# This file gives commands to read in the Gehan data, create suitable vectors # and then calculate a Kaplan-Meier survival estimate. It also shows how to # plot the estimates with confidence intervals (default 95%). # Gehan data on leukaemia remission; group=0 for control; group=1 for 6MP. # Type R in a terminal to start the session. library(survival) # this sets up the functions we will use ghn <- read.table("gehan.dat", header=T) #ghn <- read.table("/home/jane/public_html/gehan.dat", header=T) # read the data into an object called 'ghn' # '<-' is the assignment command # the file 'gehan.dat' has the names of the variables as the first line. ghn # prints the contents of 'ghn' in the terminal window # I suggest you prints the contents of the objects created, to learn # what R does. # *.dead gives the censoring information. '1' is observed, '0' censored names(ghn) # allows you to see what the names are time <- c(ghn$X6.MP.time, ghn$control.time) # 'c(,)' concatenates the two vectors of time; to refer to a #sub-object in an object, you use '$' group <- c(rep(1,21),rep(0,21)) # create an indicator vector for groups. 'rep(1,21)' gives 21 '1's. # use names you can remember. NEVER use 'c' as a name: it is a function! attach(ghn) # puts the variables into workspace, so you can use 'X6.MP.time' directly dead <- c(X6.MP.dead, control.dead) table(dead,group) ; table(time); table(dead,time) ; table(dead,time,group) # Tables to inspect the data. # ';' allows commands to be put on the same line. summary(time); summary(time[group==0]) ; summary(time[group==1]) # summary gives 5 number summary and the mean. # 'name[X]' selects those elements of 'name' for which 'X' is true ######## Kaplan-Meier estimates. Surv(time, dead) # the function 'Surv' creates a survival object # censoring is marked by '+' ghn6MP.surv <- survfit(Surv(X6.MP.time,X6.MP.dead)~1, conf.type="log-log") # 'survfit' fits a Kaplan-Meier curve to a single group. ghn6MP.surv # brief summary of results summary(ghn6MP.surv) # full results names(ghn6MP.surv) # names of all the results of function 'survfit' ghn6MP.surv$type # type of censoring; default is right ghn.surv <- survfit(Surv(time, dead) ~ group, conf.type="log-log") # '~ group' means fit KM for each group. ghn.surv; summary(ghn.surv) # look at the results. help(survfit) # to find out more about, use help() # there is plenty of detail. survfit # shows you the commands in the function plot(ghn.surv) # simplest way of getting a plot plot(ghn.surv, xlab="Weeks", ylab="Estimated survival function") # label the axes title(main="Remission of leukaemia patients") # add a title plot(ghn.surv, xlab="Weeks", ylab="Estimated survival function", conf=T, lty=c(1,2)) # include confidence intervals, use different line types 'lty' for groups title(main="Remission of leukaemia patients") legend(20,0.2, c("Conl","6MP"),lty=c(1,2)) # add a legend, with top right of box at x=25, y=1. plot(ghn.surv, xlab="Weeks", ylab="Estimated survival function", log=T) # changes the scale of the vertical axis. plot(ghn.surv, xlab="Weeks", ylab="Estimated survival function", fun="log", xlim=c(1,max(time)), conf=T, lty=c(1,2)) ######## log-rank test survdiff(Surv(time, dead) ~ group) h ######## plot actuarial life table. printgraph(file="GehanKMAL.ps") plot(ghn6MP.surv, xlab="Weeks", ylab="Estimated survival function",conf=F,lwd=2 , ylim=c(0.3,1.0) ) title(main="Remission of leukaemia patients; KM and lifetable") points(c(0,4),c(1,1),type="l", lty=2,lwd=2) points(c(4,8),c(0.85,0.85),type="l", lty=2,lwd=2) points(c(8,12),c(0.749,0.749),type="l", lty=2,lwd=2) points(c(12,16),c(0.687,0.687),type="l", lty=2,lwd=2) points(c(16,20),c(0.618,0.618),type="l", lty=2,lwd=2) points(c(20,24),c(0.453,0.453),type="l", lty=2,lwd=2) ######## Maximum likelihood estimates. control.km <- survfit(Surv(control.time,control.dead)~1, conf.type="log-log") plot(control.km, xlab="Weeks", ylab="Estimated survival function") # label the axes title(main="Remission of control leukaemia patients") t <- seq(0.1,max(control.time),0.1) # set up time for lines. lines(t, exp(-21*t/182)) control.km ######## Preliminary log plot. q plot(control.km, xlab="Weeks", ylab="Estimated survival function", log=T) plot(control.km, xlab="Weeks", ylab="Estimated survival function", fun="cloglog") names(control.km); round(cbind(control.km$surv[1:11],control.km$time[1:11], log(control.km$surv[1:11]) ),2) length(control.km$surv) plot(control.km$time[1:11],log(control.km$surv[1:11]), xlab="time (weeks)", ylab="log estimated S(t)", cex=2) title(main="Remission of control leukaemia patients: log plot") points(c(0,22),c(0,-2.538462), type="l",lwd=2) ######## log likelihood # Gehan control group. la <- seq(0.05,0.25,0.005) ; la.hat <- 21/182 plot(la,21*log(la)-la*182, type="l",xlab="lambda", ylab="l(lambda, t)", main="Log-likelihood function for Gehan control group",lwd=2) abline(h=21*log(la.hat)-la.hat*182,lwd=2) abline(h=21*log(la.hat)-la.hat*182 - qchisq(df=1,0.95)/2, lty=2,lwd=2) legend(0.17,-67.5,c("log L(^lambda^)-3.84/2"),bty="n" ,cex=1.3) la <- c(seq(0.070,0.075,0.001),seq(0.170,0.175,0.001)) cbind(la,21*log(la)-la*182) qchisq(df=42,0.025)*la.hat/(2*21) qchisq(df=42,0.975)*la.hat/(42) postscript("~/LifeData/Ghn6MPlog.ps") plot(ghn6MP.surv$time,log(ghn6MP.surv$surv), ylim=c(-0.85,0), xlab="time (weeks)", ylab="log estimated S(t)", cex=2) title(main="Remission of 6MP leukaemia patients: log plot") points(c(0,35),c(0,-9/359*35), type="l",lwd=2) dev.off() la <-seq(0.01,0.052,0.001) ; la.hat <- 9/359 # Gehan 6MP plot(la,9*log(la)-la*359, type="l",xlab="lambda", ylab="l(lambda, t)", main="Log-likelihood function for Gehan 6MP group",lwd=2) abline(h=9*log(la.hat)-la.hat*359,lwd=2) abline(h=9*log(la.hat)-la.hat*359 - qchisq(df=1,0.95)/2, lty=2,lwd=2) legend(0.17,-67.5,c("log L(^lambda^)-3.84/2"),bty="n" ,cex=1.3) la <- c(seq(0.013,0.014,0.0001),seq(0.045,0.046,0.0001)) cbind(la,9*log(la)-la*359) ############ log-log not choose x axis sensibly. split.screen(c(2,2)) screen(1) plot(control.km$time[1:11],log(control.km$surv[1:11]), xlab="time (weeks)", ylab="log estimated S(t)", cex=2) title(main="Remission of control leukaemia patients: log plot") points(c(0,22),c(0,-2.538462), type="l",lwd=2) xu <- max(ghn.surv$time[ghn.surv$n.event> 0]) plot(ghn.surv, xlab="Weeks (log scale)", ylab="log{-log(survival function)}", xlim=c(1,xu) , ylim=c(-3.9,1.8), fun="cloglog",conf=T, lty=c(1,2)) max( ghn.surv$time[ghn.surv$n.event> 0] ) ################## Use of survreg to fit parametric models. Gehan data ctl.exp <- survreg(Surv(control.time,control.dead) ~1, dist="exp") ctl.exp # look at the results summary(ctl.exp) # and in detail. mp.exp <- survreg(Surv(time[group==1], dead[group==1]) ~ 1, dist="exp") ghn6MP.surv <- survfit(Surv(X6.MP.time,X6.MP.dead)~1, ghn0.exp <- survreg(Surv(time, dead) ~ 1, dist="exp") ghn.exp <- survreg(Surv(time, dead) ~ group, dist="exp") summary(ghn.exp) names(ghn.exp) ghn.exp$var ctl.wei <- survreg(Surv(control.time,control.dead) ~1, dist="weib") mp.wei <- survreg(Surv(time[group==1], dead[group==1]) ~ 1, dist="weib") ghn.wei <- survreg(Surv(time, dead) ~ group, dist="wei") summary(ghn.wei) ctl.ll <- survreg(Surv(control.time,control.dead) ~1, dist="logl") mp.ll <- survreg(Surv(time[group==1], dead[group==1]) ~ 1, dist="logl") ghn.ll <- survreg(Surv(time, dead) ~ group, dist="logl") ########### estimated medians. medc_hat.exp <- predict(ctl.exp,type="uquantile",p=0.5,se.fit=T) exp(medc_hat.exp$fit[1]) exp(medc_hat.exp$fit[1]-1.96*medc_hat.exp$se.fit[1]) exp(medc_hat.exp$fit[1]+1.96*medc_hat.exp$se.fit[1]) medm_hat.exp <- predict(mp.exp,type="uquantile",p=0.5,se.fit=T) exp(medm_hat.exp$fit[1]) exp(medm_hat.exp$fit[1]-1.96*medc_hat.exp$se.fit[1]) exp(medm_hat.exp$fit[1]+1.96*medc_hat.exp$se.fit[1]) medc_hat.wei <- predict(ctl.wei,type="uquantile",p=0.5,se.fit=T) exp(medc_hat.wei$fit[1]) exp(medc_hat.wei$fit[1]-1.96*medc_hat.wei$se.fit[1]) exp(medc_hat.wei$fit[1]+1.96*medc_hat.wei$se.fit[1]) medm_hat.wei <- predict(mp.wei,type="uquantile",p=0.5,se.fit=T) exp(medm_hat.wei$fit[1]) exp(medm_hat.wei$fit[1]-1.96*medc_hat.wei$se.fit[1]) exp(medm_hat.wei$fit[1]+1.96*medc_hat.wei$se.fit[1]) medc_hat.ll <- predict(ctl.ll,type="uquantile",p=0.5,se.fit=T) exp(medc_hat.ll$fit[1]) exp(medc_hat.ll$fit[1]-1.96*medc_hat.ll$se.fit[1]) exp(medc_hat.ll$fit[1]+1.96*medc_hat.ll$se.fit[1]) medm_hat.ll <- predict(mp.ll,type="uquantile",p=0.5,se.fit=T) exp(medm_hat.ll$fit[1]) exp(medm_hat.ll$fit[1]-1.96*medc_hat.ll$se.fit[1]) exp(medm_hat.ll$fit[1]+1.96*medc_hat.ll$se.fit[1]) ################## postscript("Ghn6MPlog.ps") plot(ghn6MP.surv$time,log(ghn6MP.surv$surv), ylim=c(-0.85,0), xlab="time (weeks)", ylab="log estimated S(t)", cex=2) title(main="Remission of 6MP leukaemia patients: log plot") points(c(0,35),c(0,-9/359*35), type="l",lwd=2) dev.off()