fz <- read.table("~/LifeData/FZ/feiglz.dat", header=T) attach(fz) dead <- rep(1,33) ; lwbc <- log(wbc) ; y <- log(t) # set up the data # now explore the data with numerical summaries; histograms and boxplots. table(ag) ; summary(t); summary(wbc); summary(lwbc) # postscript("/home/jane/LifeData/FZ/boxp.ps") hist(wbc) ; hist(lwbc) boxplot(y~ag, xlab="AG", ylab="log(time)" ,main="Feigl and Zelen data on leukaemia") # 'y' is the variable for the plot, 'ag' is the group variable. boxplot(lwbc~ag, xlab="AG", ylab="log(white blood cell count)" ) plot(lwbc[ag==0], y[ag==0],xlab="log(white blood cell count)", ylab="log(time)", main="Feigl and Zelen data on leukaemia",pch=1) points(lwbc[ag==1], y[ag==1],pch=2) legend(7.5,1.5,legend=c("AG+", "AG-"), pch=c(1,2)) # plot with points for the two groups labeled. # dev.off() # As there are no censored data, we can use ordinary least squares. ols.fit <- lm(log(t)~lwbc) ; summary(ols.fit) # lm(y~x) fits a regression line of y on x. summary() gives the results. plot(lwbc[ag==0], y[ag==0],xlab="log(white blood cell count)", ylab="log(time)", main="Feigl and Zelen data on leukaemia",pch=1) points(lwbc[ag==1], y[ag==1],pch=2) legend(7.2,1.5,legend=c("AG+", "AG-"), pch=c(1,2)) abline(ols.fit,lwd=2) # adds a fitted line to a plot. 'lwd=2' makes the line thicker ols.fit2 <- lm(log(t)~lwbc+ag) ; summary(ols.fit2) # lm(y~x+z) fits a regression line of y on x and z abline(a=ols.fit2$coef[1], b=ols.fit2$coef[2],lty=2,lwd=2) abline(a=ols.fit2$coef[1]+ols.fit2$coef[3], b=ols.fit2$coef[2],lty=5,lwd=2) legend(8,1.5,legend=c("AG+", "AG-"), lty=c(2,4)) # adds the two regression lines ########### Next look at the KM plots: library(survival) fz.ag.km <- survfit(Surv(t, dead) ~ ag, conf.type="log-log",na.action=na.omit) # postscript("/home/jane/LifeData/FZ/KM.ps") plot(fz.ag.km,xlab="Weeks ", ylab="Estimated survival function", lty=c(1,3),conf=T, lwd=2, main="Survival of leukaemia patients") legend(100,1, c("AG +","AG -"),lty=c(3,1), cex=2) abline(h=0.5) ; abline(v=52) # plot with confidence intervals, lines for median and 1 year survival. plot(fz.ag.km,xlab="Weeks (log scale)", ylab="log{-log(survival function)}", xlim=c(1,200), fun="cloglog", lty=c(1,3), main="Survival of leukaemia patients") legend(2,1, c("AG -","AG +"),lty=c(1,3)) # complementary log log plot to see whether Weibull might be a reasonable model # It is better to define the limits of the x-axis: # xlim=c(1,200) starts the axis at 1, ends at 200. # dev.off() fz.ag.km # look at the brief results of the KM fit summary(fz.ag.km) # look at the results of the KM fit # The medians differ by a factor of 8 - might expect the log-rank test to be sig't. survdiff(Surv(t, dead) ~ ag) # AG+ has longer median survival, over a year compared to under 2 months. # Now fit survival models. Consider different simple models first. fz.ag.exp <- survreg(Surv(t, dead) ~ ag, dist="exponential") fz.ag.wei <- survreg(Surv(t, dead) ~ ag, dist="weib") fz.ag.ll <- survreg(Surv(t, dead) ~ ag, dist="logl") fz.ag.ln <- survreg(Surv(t, dead) ~ ag, dist="logn") fz.ag.exp ;fz.ag.wei ;fz.ag.ll ; fz.ag.ln #If we considered the intercept only models, the Weibul and logN are slightly better. #E.g. compare Exp and Weibull. 2*(155.5-153.6) = 3.8 # However, once we allow for the AG group, we can use the simplest model, the exponential summary(fz.ag.exp) exp(fz.ag.exp$coef[2]) # exp(1.25) 3.5 is the factor scaling the life times. -log(0.5)/exp(-fz.ag.exp$coef[1]) # median for AG- group -log(0.5)/exp(-fz.ag.exp$coef[1]-fz.ag.exp$coef[2])# median for AG+ group # Now consider the effect of log white blood cell count: fz.lwbc.exp <- survreg(Surv(t, dead) ~ lwbc, dist="exponential") fz.lwbc.wei <- survreg(Surv(t, dead) ~ lwbc, dist="weib") fz.lwbc.ll<- survreg(Surv(t, dead) ~ lwbc, dist="logl") fz.lwbc.ln <- survreg(Surv(t, dead) ~ lwbc, dist="logn") fz.lwbc.exp ;fz.lwbc.wei ;fz.lwbc.ll ; fz.lwbc.ln # still reasonable to use exponential; in all models, lwbc is significant. summary(fz.lwbc.exp) # The coefficient for lwbc is -0.401: what does this mean? exp(-0.401) # 0.67 is the scale for a unit change in lwbc # Summary of lwbc gave # Min. 1st Qu. Median Mean 3rd Qu. Max. # 6.620 8.575 9.259 9.524 10.370 11.510 # The difference in survival with lwbc of 8.575 vs 9.259: exp( -0.401 * (9.259-8.575)) # = 0.76, so reduce lifetime by 3/4 as lwbc goes from LQ to median. # if the wbc doubles, log(wbc) increases by log(2) = 0.6931472 exp( -0.401 *log(2)) # 0.76 exp( -0.401 *log(10)) # 0.40 tenfold increase , only 40% life time ####### now consider both AG and lwbc: fz2.exp <- survreg(Surv(t, dead) ~ ag + lwbc, dist="exponential") summary(fz2.exp) ######## is an interaction needed, i.e. a difference in slope? fz3.exp <- survreg(Surv(t, dead) ~ ag *lwbc, dist="exponential") summary(fz3.exp) ### ag:lwbc -0.328 0.246 -1.332 0.18298 ### interaction term is unnecesary # for a given lwbc, the AG+ group has lifetimes exp(1.018) # 2.8 times longer than for AG_. # In either group, if the wbc doubles, lifetime is reduced by exp( -0.304*log(2)) # 0.81, 80%. ## put fitted lines on to KM plots # postscript("/home/jane/LifeData/FZ/fit.ps") plot(fz.ag.km,xlab="Weeks ", ylab="Estimated survival function", lty=c(1,3),conf=T, lwd=2, main="Survival of leukaemia patients") legend(100,1, c("AG +","AG -"),lty=c(3,1), cex=2) # plot with confidence intervals, lines for median and 1 year survival. t<- seq(0, max(t),0.1) # time of same range as observed lmdp <- exp(-2.88) ;lmdn <- exp(-2.88-1.25) lines(t, exp(-(lmdp* t)),lty=4,lwd=2 ) lines(t, exp(-(lmdn* t)),lty=5 ,lwd=2 ) # dev.off()