library(survival)
library(survey)
# note the newer rms library functions may not work well here
# use the Design instead of rms
library(Design)

#data used for this analysis
noNA=read.table("noNA.txt", header = TRUE)
attach(noNA)

#design used for this analysis
dstr2=svydesign(id=~1, strata=~group, prob=~inv_weight, fpc=~ssize, data=noNA)



# inputs to svycox.nomogram
#   .design  = a survey design objects
#   .model   = a Cox model specification
#   .data    = the data set on which the model is to be fit (cannot contain NAs)
#	pred.at  = time point at which nomogram prediction axis will be drawn
#	fun.lab  = Label of the Prediction Axis
#

svycox.nomogram=function(.design,.model,.data,pred.at,fun.lab){
design.call=.design$call
svy.cox.fit=svycoxph(.model,x=TRUE,design=.design)
pred.lp.cox=predict(svy.cox.fit)
pred.survey.cox=predict(svy.cox.fit,type="curve",newdata=.data)
.rhs=.model[[3]]
f.form=paste("pred.lp.cox~",paste(all.vars(.model)[-(1:2)],collapse="+"))
.f=ols(as.formula(f.form),sigma=1,x=TRUE,y=TRUE,data=noNA)
.ss3<-c(0.05,0.2,0.4,0.6,0.7,0.8,0.9,0.95,0.99)
.ss3.label<-100*.ss3
time.at=pred.survey.cox[[1]]$time[which(pred.survey.cox[[1]]$time>pred.at)[1]-1]
.baseline=exp(log(pred.survey.cox[[1]]$surv[names(time.at)])/exp(svy.cox.fit$linear.predictors[1]))

.tempfun=function(x) .baseline[[1]]^exp(x)

.nom=nomogram(.f, fun=.tempfun, funlabel=fun.lab,fun.at=.ss3,lp=T, vnames="labels")
return(list(nomog=.nom,design=.design,svy.cox=svy.cox.fit,preds=pred.survey.cox,pred.at=pred.at))
}



#required for Design library
#could not make it work within the function so user has to specify before running svycox.nomogram
options(datadist="dd")

# svycox.nomogram generates a plut and returns a nomogram object
# save this object since it is required for the subsequent validation and calibration functions

#options used for this analysis
dd=datadist(ECOG,Alb,Hb,Age,Differentiation,Gt_1_m1site,liver_only,lymph_only, data=noNA)
options(datadist="dd")

mynom=svycox.nomogram(.design=dstr2,
		  .model=Surv(survival,surv_cens)~ECOG+liver_only+Alb+Hb+Age+Differentiation+Gt_1_m1site+lymph_only,
		  .data=noNA,
		  pred.at=24,
		  fun.lab="Prob of 2 Yr OS")
		  


#  generating the bootstrap sample for the matched case-cotrol design
#  since it is design dependent we did not make it part of validate.svycox
#  user has to create the bootsrap sample consistent with the design

bootit=200

cases=which(noNA$group=="long")
controls=which(noNA$group=="<24")

boot.index=matrix(NA,nrow(noNA),bootit)
for(i in 1:bootit){
boot.index[,i]=c(sample(cases,replace=T),sample(controls,replace=T))
}

		  
#  Inputs for the validate.svycox function
#  .boot.index  = a matrix of bootstrap sample indicators
#						number of rows is the same as the number of rows in the data
#						number of columns is the number of bootstrap samples
#  .nom			= a nomogram object returned from svycox.nomogram
#  .data		= data set on which validation will take place
#
#
		  
validate.svycox=function(.boot.index,.nom,.data){

internal.validate.func=function(.boot.vec,.nom2,.data2){
boot.data=.data2[.boot.vec,]
design.boot=svydesign(id=~1,strata=as.formula(paste("~",names(.nom2$design$strata))),
							prob=as.formula(paste("~",names(.nom2$design$prob))),
							fpc=as.formula(paste("~",colnames(.nom2$design$fpc$popsize))),				
						data=boot.data)
boot.fit<-svycoxph(formula(.nom2$svy.cox), x=TRUE,design=design.boot)
lp.boot=boot.fit$x%*%as.matrix(boot.fit$coefficients)-
		mean(boot.fit$x%*%as.matrix(boot.fit$coefficients))
lp.test=(.nom2$svy.cox)$x%*%as.matrix(boot.fit$coefficients)-
		mean((.nom2$svy.cox)$x%*%as.matrix(boot.fit$coefficients))
cindex.train<-1-rcorr.cens(lp.boot,Surv(boot.data$survival,boot.data$surv_cens))[[1]]
cindex.test<-1-rcorr.cens(lp.test,Surv(.data2$survival,.data2$surv_cens))[[1]]	
return(cindex.train-cindex.test)
}

val.res=apply(boot.index,2,internal.validate.func,.nom2=.nom,.data2=.data)
print(mean(val.res))
return(val.res)

}

#  validate.svycox prints the estimated optimism
#  and returns the vector of optimisim values for each bootstrap sample

myval=validate.svycox(boot.index,mynom,noNA)

#  Inputs for calibrate.svyxoc
#  .nom		= A nomogram object from svycox.nomogram
#  .timept  = Time point at which calibration will take place
#					Defaults to the time value of the prediction axis in the nomogram
#  .ngroup	= Number of groups ot be formed for validation purposes
#

calibrate.svycox=function(.nom,.timept=.nom$pred.at,.ngroup=5){
.loc=max(which(.nom$preds[[1]]$time<=.timept))
pred.timept=rep(NA,.nom$svy.cox$n)
for(i in 1:length(pred.timept)) pred.timept[i]=.nom$preds[[i]]$surv[.loc]
pred.timept.grp=cut(pred.timept,quantile(pred.timept,seq(0,1,1/.ngroup)),include.lowest=T, labels=1:.ngroup)
.predicted=tapply(pred.timept,pred.timept.grp,median)
.observed=matrix(NA,nrow=.ngroup,ncol=3)
colnames(.observed)=c("Observed","Lower 95%","Upper 95%")
for(i in 1:.ngroup){
  .km1=svykm(as.formula(paste(names(.nom$svy.cox$model)[1],"~","1")), 
			design=subset(.nom$design,pred.timept.grp==i), se=TRUE)
  .km1.timept=.km1[[2]][which(.km1[[1]]>.timept)[1]-1]
  .varlog1.timept=.km1[[3]][which(.km1[[1]]>.timept)[1]-1]
  .ll1.timept=exp(log(.km1.timept)-1.96*sqrt(.varlog1.timept))
  .ul1.timept=exp(log(.km1.timept)+1.96*sqrt(.varlog1.timept))
  .observed[i,]=c(.km1.timept,.ll1.timept,.ul1.timept)
  }
plot(.predicted,.observed[,1],xlim=0:1,ylim=0:1,xlab="Predicted",ylab="Observed",pch=16)
arrows(x0=.predicted,y0=.observed[,2],y1=.observed[,3],angle=90,code=3,length=0.1,lwd=2)
abline(0,1,lty=2)
lines(.predicted,.observed[,1])
return(cbind(Predicted=.predicted,.observed))
}
calibrate.svycox(mynom)