rm(list=ls()) ; library(survival) fz <- read.table("~/LifeData/FZ/feiglz.dat", header=T) attach(fz); names(fz) dead <- rep(1,33) ; lwbc <- log(wbc) ; y <- log(tim) fz.C.ag <- coxph( Surv(tim, dead) ~ ag) # fit a Cox proportional hazard model with covariate 'ag' summary(fz.C.ag) #Call: #coxph(formula = Surv(tim, dead) ~ ag) # # n= 33 # # coef exp(coef) se(coef) z Pr(>|z|) #ag -1.1803 0.3072 0.4173 -2.828 0.00468 ** #--- #Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # # exp(coef) exp(-coef) lower .95 upper .95 #ag 0.3072 3.255 0.1356 0.696 # #Rsquare= 0.221 (max possible= 0.994 ) #Likelihood ratio test= 8.26 on 1 df, p=0.00405 #Wald test = 8 on 1 df, p=0.00468 #Score (logrank) test = 8.75 on 1 df, p=0.003100 fz.C.lwbc <- coxph( Surv(tim, dead) ~ lwbc) ; summary( fz.C.lwbc ) # coef exp(coef) se(coef) z Pr(>|z|) #lwbc 0.4129 1.5112 0.1367 3.02 0.00253 ** #--- #Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # # exp(coef) exp(-coef) lower .95 upper .95 #lwbc 1.511 0.6617 1.156 1.976 # #Rsquare= 0.243 (max possible= 0.994 ) #Likelihood ratio test= 9.19 on 1 df, p=0.002431 #Wald test = 9.12 on 1 df, p=0.002528 #Score (logrank) test = 9.53 on 1 df, p=0.002020 fz.C.2 <- coxph( Surv(tim, dead) ~ lwbc+ag) ; summary(fz.C.2) # coef exp(coef) se(coef) z Pr(>|z|) #lwbc 0.3677 1.4444 0.1360 2.703 0.00687 ** #ag -1.0691 0.3433 0.4293 -2.490 0.01276 * #--- #Rsquare= 0.377 (max possible= 0.994 ) #Likelihood ratio test= 15.64 on 2 df, p=0.0004014 #Wald test = 15.06 on 2 df, p=0.0005365 #Score (logrank) test = 16.49 on 2 df, p=0.0002629 fz.C.ag.st <- coxph(Surv(tim, dead)~lwbc+strata(ag));summary(fz.C.ag.st) # coef exp(coef) se(coef) z Pr(>|z|) #lwbc 0.3906 1.4778 0.1426 2.738 0.00618 ** #--- # exp(coef) exp(-coef) lower .95 upper .95 #lwbc 1.478 0.6767 1.117 1.955 # #Rsquare= 0.21 (max possible= 0.98 ) #Likelihood ratio test= 7.78 on 1 df, p=0.005289 #Wald test = 7.5 on 1 df, p=0.00618 #Score (logrank) test = 7.92 on 1 df, p=0.004899 # the 'strata' option allows a different baseline hazard for AG+ and # AG-. However, as the baseline is not regarded as of interest, the estimated # baseline is not given in the summary. Compare this with the fit with 'lwbc + ag' # and with the Weibull accelerated life fit. split.screen(c(2,2)) ; screen(1) plot( survfit(fz.C.ag), xlab="Survival time in weeks", ylab="Cox S_0^(t)", main="Cox estimate; FZ, AG" ) # A plot of the estimated baseline survival, i.e. at ag=0 fz.C.ag$method # [1] "efron" The method used is Efron's method, not Breslow's screen(2) plot( survfit(fz.C.lwbc), xlab="Survival time in weeks", ylab="Cox S_0^(t)", main="Cox estimate; FZ, lwbc" ) screen(3) plot( survfit(fz.C.2), xlab="Survival time in weeks", ylab="Cox S_0^(t)", main="Cox estimate; FZ, AG+lwbc" ,cex=0.5) screen(4) plot( survfit(fz.C.ag.st), xlab="Survival time in weeks", ylab="Cox S_0^(t)", main="Cox estimate; FZ, AG strata" ,cex=0.5) # Now add the Kaplan-Meier estimate. screen(4) ; plot(survfit(Surv(tim,dead) ~ ag ) ,lty=2) # This shows the (small) adjustment made by taking lwbc into account. # ************ Residuals. mresid2 <- residuals(fz.C.2) # Martingale residuals are the default dresid2 <- residuals(fz.C.2, type="deviance") # deviance residuals ( dfbeta2 <-residuals(fz.C.2, type="dfbeta") ) # approx change in coef due to i dfbeta2s <-residuals(fz.C.2, type="dfbetas") # approx change in coef due to i , scaled. sresid2 <- residuals(fz.C.2, type="schoenfeld") # Schoenfeld residuals for each covariate plot(lwbc,mresid2, xlab="log (wbc)", ylab="Martingal residuals", main="Martingal residuals, Cox fit to AG + lwbc", ylim=c(-2.8,1.1) ) plot(lwbc,dresid2, xlab="log (wbc)", ylab="Deviance residuals", main="Deviance residuals, Cox fit to AG vs lwbc") # Nothing showing - fit acceptable. # Compare with the fit without lwbc. mresid.ag <- residuals(fz.C.ag) split.screen(c(2,1) ); screen(1) plot(lwbc,mresid.ag, xlab="log (wbc)", ylab="Martingal residuals", main="Martingal residuals, Cox fit to AG") screen(2) plot(lwbc,mresid2, xlab="log (wbc)", ylab="Martingal residuals", main="Martingal residuals, Cox fit to AG + lwbc", ylim=c(-2.8,1.1) ) close.screen(all=T) split.screen(c(2,1) ); screen(1) plot(dfbeta2s[,1], type="h",xlab="Individuals", ylab="Scaled dfbeta, ", main="Assessment of influence, lwbc") screen(2) plot(dfbeta2s[,2], type="h",xlab="Individuals", ylab="Scaled dfbeta, ", main="Assessment of influence, AG") # log(100000) = 11.51 log(43)=3.8 plot(lwbc[ag==0],y[ag==0],xlab="log(white blood cell count)", ylab="log(time)", xlim=c(6.6,11.6),ylim=c(-0.1,5.1), main="Feigl and Zelen data on leukaemia",pch=2) points(jitter(lwbc[ag==1],amount=0.1), jitter(y[ag==1]),pch="+") legend(7.5,1.5,legend=c("AG+", "AG-"), pch=c("+"2)) points(log(100000), log(43), pch=1,cex=2) time <- tim; time[32] <- NA fz.C.2i <- coxph( Surv(time, dead) ~ lwbc+ag,) ; summary(fz.C.2i)