# Program name: onestep 
# Date: 07/2005
#======================ucsc.onestep===================================================================

onestep=function(x, y, xtest = NULL, ytest = NULL, threshold, n.threshold, scale.sd =TRUE, 
        threshold.scale = NULL, se.scale = NULL, offset.percent = 50, 
        prior, cost, sign.contrast = "both")
{


        this.call <- match.call()
        y=factor(y)
        ytest <- NULL
        xtest <- NULL

        argy <- ytest
        if(is.null(ytest)) {
                argy <- y
        }
        n.class <- table(y)
        if(min(n.class) == 1) {
                stop("Error: each class must have >1 sample")
        }
        if(is.null(xtest)) {
                xtest <- x
                ytest <- y
        }

		    
		    
        n <- sum(n.class)
        ntest <- ncol(xtest)
        K <- length(prior)
        p <- nrow(x)
### creat a design matrix for y

        Y <- model.matrix( ~ factor(y) - 1, data = list(y = y))

        dimnames(Y) <- list(NULL, names(n.class))

### compute the p by k matrix of class centroids for each gene
        centroids <- scale(x %*% Y, FALSE, n.class)
        

### compute the gene-wise si+s0 a vector of length G (total number of genes)
				sd <- rep(1, p)
                if(scale.sd) {
                xdif <- x - centroids %*% t(Y)
                sd <- (xdif^2) %*% rep(1/(n - K), n)
                sd <- drop(sqrt(sd))
                offset <- quantile(sd, offset.percent/100)
                sd <- sd + offset
        }
### compute the overall centroids a vector of length G
        centroid.overall <- drop(x %*% rep(1/n, n))
        if(is.null(threshold.scale)) {
                threshold.scale <- rep(1, K)
                names(threshold.scale) <- names(n.class)
        }
### Now make an adjustment for the sample sizes in the "t" ratios
        if(is.null(se.scale))
                se.scale <- sqrt(1/n.class - 1/n)
                
### compute d_ik a matrix of p by k
        delta <- (centroids - centroid.overall)/sd
        delta <- scale(delta, FALSE, threshold.scale * se.scale)
        if(sign.contrast == "positive") {
                delta <- delta * (delta > 0)
        }
        if(sign.contrast == "negative") {
                delta <- delta * (delta < 0)
        }

        if(!is.null(threshold)){
	        threshold=as.matrix(threshold)
	        n.threshold=dim(threshold)[1]
	        }
        if(is.null(threshold)){
	        threshold <- seq(0, max(abs(delta)), length = n.threshold)
	        threshold=as.matrix(threshold)
	        }
	    
	    
        #--initialize 
        nonzero <- seq(n.threshold)
        size <- seq(n.threshold)
        errors <- rep(NA, n.threshold)
        yhat <- as.list(seq(n.threshold))
        Dset <- as.list(seq(n.threshold))
        geneid <- as.list(seq(n.threshold))
        posidset <- as.list(seq(n.threshold))
        for(ii in 1:n.threshold){

                delta.shrunk <- sdsc.softshrink(delta, as.numeric(threshold[ii,]))
                delta.shrunk <- scale(delta.shrunk, FALSE, 1/(threshold.scale*se.scale))
                delta.ori=scale(delta,FALSE,1/se.scale)
                nonzero[ii] <- attr(delta.shrunk, "nonzero") #number of nonzero features
                posid <- drop(abs(delta.shrunk) %*% rep(1, K)) > 0 #mark features with nonzero delta
                
                ###spectral decomposition (Ling's code)
                if(nonzero[ii]>1){
	                classifier=GetGenes(GeneLR=t(x[posid,]),VarExplained=0.95,GeneIdx=rownames(x)[posid])
            	}
                if(nonzero[ii]<=1){classifier=rownames(x)[posid]}
                size[ii]=length(classifier)               
                Dset[[ii]]=classifier # get the names of the delta set
                geneid[[ii]]=is.element(rownames(x),classifier)
                dd <- sdsc.diagdisc(x=(xtest - centroid.overall)/sd, centroids=delta.ori, 
                        prior=prior, cost=cost,weight=geneid[[ii]])
                yhat[[ii]] <- sdsc.softmin(dd)

                if(!is.null(ytest)) {
                        errors[ii] <- sum(yhat[[ii]] != ytest)
                }
			}


        object <- list(y = argy, yhat = yhat,  Dset=Dset, geneid=geneid, centroids = 
                centroids, centroid.overall = centroid.overall, sd = sd, 
                threshold = threshold[seq(n.threshold),], nonzero = nonzero[seq(
                n.threshold)], size=size, threshold.scale = threshold.scale, se.scale = 
                se.scale,delta.ori=delta.ori, delta=delta, call = this.call, prior = prior, cost=cost, offset = offset, sign.contrast = 
                sign.contrast)
        if(!is.null(ytest))
                object$errors <- errors[seq(n.threshold)]
        class(object) <- "onestep"
        object
}

#============================sdsc.diagdisc=======================================================

sdsc.diagdisc=function(x, centroids, prior, cost, weight) {
### Computes the class discriminant functions assuming scaled x and centroids
  if(!missing(weight)) {
    posid <- (weight > 0)
    if(any(posid)) {
      weight <- sqrt(weight[posid]) 
      centroids <- centroids[posid,  , drop = FALSE] * weight
      x <- x[posid,  , drop = FALSE] * weight
    }
    else {
      mat <- outer(rep(1, ncol(x)), log(prior), "*")
      dimnames(mat) <- list(NULL, dimnames(centroids)[[2]])
      return(mat)
    }
  }
  dd <- drop(rep(1, nrow(x)) %*% (x^2))/2- t(x) %*% centroids # matrix of n by k
  dd0 <- drop(rep(1, nrow(centroids)) %*% (centroids^2))/2 
  dd=scale(dd,center=-dd0,scale=FALSE)
  dd=scale(exp(-dd),center=FALSE,scale=1/prior)
  K=length(prior)
  I <- matrix(0,nrow=K,ncol=K) 
  I[row(I)!=col(I)] <- 1 
  if(is.null(cost)){cost=I}
  dd%*%cost

}

#================================sdsc.softshrink=================================================
sdsc.softshrink=function(delta, threshold) {
  dif <- abs(delta) - threshold
  delta <- sign(delta) * dif * (dif > 0) # force genes with a dif smaller than threshold to be 0
  nonzero <- sum(drop((dif > 0) %*% rep(1, ncol(delta))) > 0) #number of column with at least one nonzero delta
  attr(delta, "nonzero") <- nonzero # specify the nonzero part of the delta matrix
  delta
}

#=================================sdsc.softmin=====================================================
sdsc.softmin=function(x, gap=FALSE) {
  d <- dim(x)
  mindist <- x[, 1]
  pclass <- rep(1, d[1])
  for(i in seq(2, d[2])) {
    l <- x[, i] < mindist
    pclass[l] <- i
    mindist[l] <- x[l, i]
  }
  dd <- dimnames(x)[[2]]
  if(gap) {
    x <- abs(mindist - x)
    x[cbind(seq(d[1]), pclass)] <- drop(x %*% rep(1, d[2]))
    gaps <- do.call("pmin", data.frame(x))
  }
  pclass <- if(is.null(dd) || !length(dd))
    pclass
  else
    factor(pclass, levels = seq(d[2]), labels = dd)
  if(gap)
    list(class = pclass, gaps = gaps)
  else
    pclass

}