library(survival) attach(ghn) # puts the variables into workspace, so you can use 'X6.MP.time' directly dead <- c(X6.MP.dead, control.dead) time <- c(X6.MP.time, control.time) group <- c(rep(1,21),rep(0,21)) ############# survfit ################### MP.km <- survfit(Surv(X6.MP.time,X6.MP.dead), conf.type="log-log") # 'survfit' fits a Kaplan-Meier curve to a single group. # MP.km is an object in R which includes a lot of information. summary(MP.km) # gives a table of the estimated survival, with confidence limits. names(MP.km) # allows us to look at what is included in MP.km: # the output is: #[1] "n" "time" "n.risk" "n.event" "surv" "type" #[7] "std.err" "upper" "lower" "conf.type" "conf.int" "call" # where the [7] tells us that "std.err" is the 7th item in this list. # If you look at the output from summary(MP.km), you can identify some of these. MP.km$n # is the number in the sample MP.km$time # is the list of 16 unique times 6 7 9 10 11 13 16 17 19 20 22 23 25 32 34 35 # compare this with the list of 21 times in X6.MP.time, where we have all the times: X6.MP.time # 6 6 6 6 7 9 10 10 11 13 16 17 19 20 22 23 25 32 32 34 35 # notice also that the table given by summary(MP.km) does not list all these times MP.km$n.event # is the number of events at each of the 16 times in MP.km$time # 3 1 0 1 0 1 1 0 0 0 1 1 0 0 0 0 # the table given by summary(MP.km) gives only the times for which MP.km$n.event > 0 MP.km$n.risk # is the number at risk at each of the 16 times in MP.km$time # 21 17 16 15 13 12 11 10 9 8 7 6 5 4 2 1 MP.km$surv # is the estimated survival function at the times in MP.km$time round(MP.km$surv,2) # is easier to look at #0.86 0.81 0.81 0.75 0.75 0.69 0.63 0.63 0.63 0.63 0.54 0.45 0.45 0.45 0.45 0.45 MP.km$std.err # gives the standard errors for the estimated survival function MP.km$lower # gives the lower 95% CI, MP.km$upper # gives the upper 95% CI, MP.km$type # gives the type of censoring MP.km$conf.type # says which confidence interval formula was used: "log-log" in this case MP.km$conf.int # says what level the confidence interval is: 0.95 in this case MP.km$call # shows what command created MP.km: # in this case it was survfit(formula = Surv(X6.MP.time, X6.MP.dead), conf.type = "log-log") MP.km # automatically gives the estimated median, with confidence interval. # if we want to find out what the estimated survival at 20 weeks is, without # printing all the estimates, we can select part of 'surv' using 'time, as below: MP.km$surv[MP.km$time==20] # gives output [1] 0.627451 # so the estimated survival is 0.63. # Often there will not be an observation at the exact time. # What we want is the largest time before the time of interest. Suppose we want # the estimated survival at 18 weeks max(MP.km$time[MP.km$time<=18]) # '<=' is the symbol for less than or equal to # finds the maximum value of MP.km$time for which MP.km$time is less than or equal to 18 # you can check this by looking at MP.km$time[MP.km$time<=18] # gives output 6 7 9 10 11 13 16 17 # so, to find the survival estimate, we can use MP.km$surv[MP.km$time==max(MP.km$time[MP.km$time<=18])]] # gives outptu 0.627451 # As the survival function is decreasing, there is a simpler formula: min(MP.km$surv[MP.km$time<=18]) # this considers all the survival estimates for times less than or equal to the time 18, ############# survfit with more than one group ################### ghn.km <- survfit(Surv(time,dead)~group, conf.type="log-log") # use the function names() to find out what is in the object ghn.km: names(ghn.km) # "n" "time" "n.risk" "n.event" # "surv" "type" "ntimes.strata" "strata" # "strata.all" "std.err" "upper" "lower" # "conf.type" "conf.int" "call" # some of the names are familiar. ghn.km$n # gives the total number in both groups. ghn.km$time # gives all the unique times, for both groups #ghn.km$n.risk, n.event, surv, std.err lower, upper, conf.type, type, call #are similar to the simple case (see above) ghn.km$ntimes.strata # gives the number of unique times in each group. # 1 2 #12 16 ghn.km$strata # gives the number of unique times in each group, # with the name and value of the variable: # group=0 group=1 # 12 16 ghn.km$strata.all # gives the total number in each group: # group=0 group=1 # 21 21 # to get the results for each group separately, we need to use: ghn.ctl.km <- ghn.km[1] ; ghn.MP.km <- ghn.km[2] # the [1] takes only the results for the first group (0). # the estimated probability of surviving to age 10 week for control is: min(ghn.ctl.km$surv[ghn.ctl.km$time<=10]) # 0.3809524 # and for 6MP is: min(ghn.MP.km$surv[ghn.MP.km$time<=10]) # 0.7529412 # In fact, you do not have to create the separate object ghn.ctl.km # you can just put it in one call (but you might find it more difficult to remember) min(ghn.km[1]$surv[ghn.km[1]$time<=10]) # suggestion: use this approach to find the estimated survival for # cerebral palsy at ages 10, 20, 30 years, for males and females min(ghn.km[1]$surv[ghn.km[1]$time<=10])