library(survival) library(survey) # note the newer rms library functions may not work well here # use the Design instead of rms library(Design) #data used for this analysis noNA=read.table("noNA.txt", header = TRUE) attach(noNA) #design used for this analysis dstr2=svydesign(id=~1, strata=~group, prob=~inv_weight, fpc=~ssize, data=noNA) # inputs to svycox.nomogram # .design = a survey design objects # .model = a Cox model specification # .data = the data set on which the model is to be fit (cannot contain NAs) # pred.at = time point at which nomogram prediction axis will be drawn # fun.lab = Label of the Prediction Axis # svycox.nomogram=function(.design,.model,.data,pred.at,fun.lab){ design.call=.design$call svy.cox.fit=svycoxph(.model,x=TRUE,design=.design) pred.lp.cox=predict(svy.cox.fit) pred.survey.cox=predict(svy.cox.fit,type="curve",newdata=.data) .rhs=.model[[3]] f.form=paste("pred.lp.cox~",paste(all.vars(.model)[-(1:2)],collapse="+")) .f=ols(as.formula(f.form),sigma=1,x=TRUE,y=TRUE,data=noNA) .ss3<-c(0.05,0.2,0.4,0.6,0.7,0.8,0.9,0.95,0.99) .ss3.label<-100*.ss3 time.at=pred.survey.cox[[1]]$time[which(pred.survey.cox[[1]]$time>pred.at)[1]-1] .baseline=exp(log(pred.survey.cox[[1]]$surv[names(time.at)])/exp(svy.cox.fit$linear.predictors[1])) .tempfun=function(x) .baseline[[1]]^exp(x) .nom=nomogram(.f, fun=.tempfun, funlabel=fun.lab,fun.at=.ss3,lp=T, vnames="labels") return(list(nomog=.nom,design=.design,svy.cox=svy.cox.fit,preds=pred.survey.cox,pred.at=pred.at)) } #required for Design library #could not make it work within the function so user has to specify before running svycox.nomogram options(datadist="dd") # svycox.nomogram generates a plut and returns a nomogram object # save this object since it is required for the subsequent validation and calibration functions #options used for this analysis dd=datadist(ECOG,Alb,Hb,Age,Differentiation,Gt_1_m1site,liver_only,lymph_only, data=noNA) options(datadist="dd") mynom=svycox.nomogram(.design=dstr2, .model=Surv(survival,surv_cens)~ECOG+liver_only+Alb+Hb+Age+Differentiation+Gt_1_m1site+lymph_only, .data=noNA, pred.at=24, fun.lab="Prob of 2 Yr OS") # generating the bootstrap sample for the matched case-cotrol design # since it is design dependent we did not make it part of validate.svycox # user has to create the bootsrap sample consistent with the design bootit=200 cases=which(noNA$group=="long") controls=which(noNA$group=="<24") boot.index=matrix(NA,nrow(noNA),bootit) for(i in 1:bootit){ boot.index[,i]=c(sample(cases,replace=T),sample(controls,replace=T)) } # Inputs for the validate.svycox function # .boot.index = a matrix of bootstrap sample indicators # number of rows is the same as the number of rows in the data # number of columns is the number of bootstrap samples # .nom = a nomogram object returned from svycox.nomogram # .data = data set on which validation will take place # # validate.svycox=function(.boot.index,.nom,.data){ internal.validate.func=function(.boot.vec,.nom2,.data2){ boot.data=.data2[.boot.vec,] design.boot=svydesign(id=~1,strata=as.formula(paste("~",names(.nom2$design$strata))), prob=as.formula(paste("~",names(.nom2$design$prob))), fpc=as.formula(paste("~",colnames(.nom2$design$fpc$popsize))), data=boot.data) boot.fit<-svycoxph(formula(.nom2$svy.cox), x=TRUE,design=design.boot) lp.boot=boot.fit$x%*%as.matrix(boot.fit$coefficients)- mean(boot.fit$x%*%as.matrix(boot.fit$coefficients)) lp.test=(.nom2$svy.cox)$x%*%as.matrix(boot.fit$coefficients)- mean((.nom2$svy.cox)$x%*%as.matrix(boot.fit$coefficients)) cindex.train<-1-rcorr.cens(lp.boot,Surv(boot.data$survival,boot.data$surv_cens))[[1]] cindex.test<-1-rcorr.cens(lp.test,Surv(.data2$survival,.data2$surv_cens))[[1]] return(cindex.train-cindex.test) } val.res=apply(boot.index,2,internal.validate.func,.nom2=.nom,.data2=.data) print(mean(val.res)) return(val.res) } # validate.svycox prints the estimated optimism # and returns the vector of optimisim values for each bootstrap sample myval=validate.svycox(boot.index,mynom,noNA) # Inputs for calibrate.svyxoc # .nom = A nomogram object from svycox.nomogram # .timept = Time point at which calibration will take place # Defaults to the time value of the prediction axis in the nomogram # .ngroup = Number of groups ot be formed for validation purposes # calibrate.svycox=function(.nom,.timept=.nom$pred.at,.ngroup=5){ .loc=max(which(.nom$preds[[1]]$time<=.timept)) pred.timept=rep(NA,.nom$svy.cox$n) for(i in 1:length(pred.timept)) pred.timept[i]=.nom$preds[[i]]$surv[.loc] pred.timept.grp=cut(pred.timept,quantile(pred.timept,seq(0,1,1/.ngroup)),include.lowest=T, labels=1:.ngroup) .predicted=tapply(pred.timept,pred.timept.grp,median) .observed=matrix(NA,nrow=.ngroup,ncol=3) colnames(.observed)=c("Observed","Lower 95%","Upper 95%") for(i in 1:.ngroup){ .km1=svykm(as.formula(paste(names(.nom$svy.cox$model)[1],"~","1")), design=subset(.nom$design,pred.timept.grp==i), se=TRUE) .km1.timept=.km1[[2]][which(.km1[[1]]>.timept)[1]-1] .varlog1.timept=.km1[[3]][which(.km1[[1]]>.timept)[1]-1] .ll1.timept=exp(log(.km1.timept)-1.96*sqrt(.varlog1.timept)) .ul1.timept=exp(log(.km1.timept)+1.96*sqrt(.varlog1.timept)) .observed[i,]=c(.km1.timept,.ll1.timept,.ul1.timept) } plot(.predicted,.observed[,1],xlim=0:1,ylim=0:1,xlab="Predicted",ylab="Observed",pch=16) arrows(x0=.predicted,y0=.observed[,2],y1=.observed[,3],angle=90,code=3,length=0.1,lwd=2) abline(0,1,lty=2) lines(.predicted,.observed[,1]) return(cbind(Predicted=.predicted,.observed)) } calibrate.svycox(mynom)