#######################################################################################
### stdca is an extension to dca for a survival-time endpoint. 
### The program calculates the net benefit associated with
### various treatment strategies.
###                              
### The methods for decision curve analysis with a binary response can be found in:
###		vickers and elkin (2006). Medical decision making Nov-Dec;26(6):565-74 
### The extension to survival-time data was made following the methods described in:
### 		begg, cramer, venkatraman et al (2000). Stat Med 19:1997-2014
### To account for competing risk, the survival probability at time t is calculated using
### the cumulative incidence function, instead of Kaplan-Meier estimates (see cmprsk library).
###                              
### The function has 7 inputs:   
###	  timepoint:	timepoint of interest, for example, 5 for 5-year survival
###	  time:		censoring time or time of event
###	  failure:		for non-competing risk analysis,
###				coded as 0 if censored and 1 if experienced the event;
###	  			for competing risk analysis,
###				coded as 0 if censored, 1 if experienced the event of interest,
###				and 2 if experienced the competing risk event
###     xmatrix: 		probability of failure at the timepoint associated with p
###				single variable or multivariable models, nxp matrix
###     xstart: 		starting values for x-axis (threshold probability)
###               	between 0 and 1; default is 0.01          
###     xstop: 		stopping values for x-axis (threshold probability)
###               	between 0 and 1; default is 0.99          
###     xby: 		increment for threshold probability       
###				default is 0.01                           
###	  ymin:		minimum value for net benefit when plotting default decision curve
###				default is -0.05
###	  ymax:		maximum value for net benefit when plotting default decision curve
###				default is 1.0
###	  cmprsk:		FALSE for Kaplan-Meier analysis and TRUE for competing risk analysis
###				default is FALSE
###                                                           
### The function outputs a matrix with the following columns: 
###     threshold:      threshold probablity                  
###                     default is 1 to 100 in steps of 1     
###     none:           net benefit of treating no patients   
###     all:            net benefit of treating all patients  
###     modelp1:        net benefit of treating patients according to the 1st model 
###     modelp2:        net benefit of treating patients according to the 2nd model
###     ... and so on for all p models specified    
###                                                     
### Examples:                                           
###     stdca(timepoint=5, time=followuptime, failure=cancer, xmatrix=p1)        
###     stdca(timepoint=5, time=followuptime, failure=cancer, xmatrix=p1, xstart=0.10, xstop=0.30) 
###     stdca(timepoint=5, time=followuptime, failure=cancer, xmatrix=cbind(p1, p2))     
###                                                                       
#######################################################################################


stdca <- function (timepoint, time, failure, xmatrix, xstart=0.01, xstop=0.99, xby=0.01, ymin=-0.05, ymax=1.0, cmprsk=FALSE) 
{

# load the Design and cmprsk libraries
library(Design)
library(cmprsk)

# check that the inputs are correctely specified

	# the timepoint of interest is >0
	if (timepoint<=0) {
		stop("timepoint must be greater than 0")
		}

	# xstart is between 0 and 1
	if (xstart<0 | xstart>1) {
		stop("xstart must lie between 0 and 1")
		}

	# xstop is between 0 and 1
	if (xstop<0 | xstop>1) {
		stop("xstop must lie between 0 and 1")
		}
		
	# xby is between 0 and 1
	if (xby<=0 | xby>=1) {
		stop("xby must lie between 0 and 1")
		}

	# xstart is before xstop
	if (xstart>=xstop) {
		stop("xstop must be larger than xstart")
		}

	# all predictors are given as probabilities
	for (i in 1:ncol(cbind(xmatrix)) ) {
		if ( min(cbind(xmatrix)[,i])<0 | max(cbind(xmatrix)[,i])>1   ) {
			stop("xvars must be probabilities and lie between 0 and 1")
			}
		}


	# all predictors are given as the probability of event
	for (i in 1:ncol(cbind(xmatrix)) ) {
		if (cor(failure, cbind(xmatrix)[,i]) < 0) {
			stop("xvars must represent probability of having the event")
			}
		}



# get rid of the missing values among predictors and response
tfx.matrix<-data.frame(time, failure, xmatrix)
for (i in 1:ncol(tfx.matrix)) {
	tfx.matrix<-tfx.matrix[!is.na(tfx.matrix[,i]),]
	}

# assign time, failure and predictor variables according to inputs
t <- tfx.matrix[,1]
f <- tfx.matrix[,2]
nobs = nrow(tfx.matrix)
print(paste("After deleting the missing, the Nobs of this analysis is",nobs,sep=" "))


# initialize the result matrix
result.out<-NULL

# Use loops to calculate the net benefit for each model for all threshold probabilities
for (i in 3:ncol(tfx.matrix)) {
       pred<-tfx.matrix[,i]
       model.n<-i-2


	# initialize variables that contain net benefit
	if(model.n==1) {
		all<-none<-NULL
		}
	modelp <- NULL

	# calculate net benefit for each threshold
      threshold<-seq(xstart,xstop,xby)

      for (j in 1:length(threshold)) {
      	thres.val<-threshold[j]

		# identify the patients above the threshold, and get the empirical probability of being above the threshold
		atrisk <- pred >= thres.val
		px <- mean(atrisk)

		# get the probability of the event for subjects above the threshold 
		which.atrisk <- pred >=thres.val
		t.atrisk <- t[which.atrisk]
		f.atrisk <- f[which.atrisk]

		# if cmprsk=FALSE, use kaplan-meier methods to get probabilities
		if(cmprsk==FALSE) {
			S <- survfit(Surv(t.atrisk, f.atrisk)~1)
			pdgivenx <- (1 - summary.survfit(S, times=timepoint)$surv)
					}
		# if cmprsk=TRUE, use competing risk methods to get probabilities
		if(cmprsk==TRUE) {
			CI <- cuminc(t.atrisk, f.atrisk)
			pdgivenx <- timepoints(CI, times=timepoint)$est[1]
			}

		tp <- pdgivenx * px * nobs
		fp <- (1 - pdgivenx) * px * nobs

		# append the net benefit to the output vector; if <= ymin, then blank out
		tempnb <- (tp - fp * thres.val / (1 - thres.val) ) / nobs 
		if (tempnb <= ymin) {
			modelp <- c(modelp, NA)
			}
		else {
			modelp <- c(modelp, tempnb)
			}


	      # Calculate the "all" and "none" for one time since they should be constant

		# if cmprsk=FALSE, use kaplan-meier methods to get probabilities
		if(cmprsk==FALSE) {
			S <- survfit(Surv(t, f)~1)
			pd <- (1 - summary.survfit(S, times=timepoint)$surv)
			}

		# if cmprsk=TRUE, use competing risk methods to get probabilities
		if(cmprsk==TRUE) {
			CI <- cuminc(t, f)
			pd <- timepoints(CI, times=timepoint)$est[1]
			}

		tempnb <- pd - (1-pd) * thres.val / (1 - thres.val)
		if (model.n==1) {
			# if all net benefit <= ymin, then blank out
			if (tempnb <= ymin) {
				all <- c(all, NA)
				}
			else {
				all <- c(all, tempnb )   
				}
			none <- c(none, 0)
			}
        }

	# append the results to the matrix
	result.out<-cbind(result.out,modelp)

     }
  
# put the threshold as a %
threshold <- threshold * 100

# save the results matrix as a data frame
result.out<-data.frame(as.data.frame(result.out),all,none,threshold)

# assign column names to the results matrix
names(result.out)<-c(paste("modelp",seq(1,model.n),sep=""),"all","none","threshold")

# plot the default decision curve

	# start by plotting the net benefit for treating none
	plot(result.out$threshold, result.out$none, type="l", lwd=2, xlim=c(xstart*100, xstop*100), ylim=c(ymin, ymax), xlab="Threshold probability (%)", ylab="Net benefit")

	# then add the net benefits for treat all; the default is to disregard net benefits < ymin
	lines(result.out$threshold, result.out$all, type="l", col=8, lwd=2)

	# initialize the legend label, color, and width using the standard specs of the none and all lines
	legendlabel <- c("None", "All")
	legendcolor <- c(17, 8)
	legendwidth <- c(2, 2)
	legendpattern <- c(1, 1)
	
	# then add the net benefits for treating according to each model; the default is to disregard net benefits < ymin
	for (i in 1:(ncol(tfx.matrix) - 2)) {
		nb <- result.out[, i]
		lines(result.out$threshold, nb, type="l", col=i, lty=2)
		# add each model to the legend
		legendlabel <- c(legendlabel, paste("Model", i,sep=" "))
		legendcolor <- c(legendcolor, i)
		legendwidth <- c(legendwidth, 1)
		legendpattern <- c(legendpattern, 2)
		}

	# then add the legend	
	legend("topright", legendlabel, cex=0.8, col=legendcolor, lwd=legendwidth, lty=legendpattern)

# return the results matrix so that the results can be saved and used for plots
return(result.out)

}
