 # GeneLR is the expression matrix with row=subject and col=genes. 
 # Varexplained: 90% or 95%. It depends on data, but I use 95% more. 
 # GeneIdx is a list of gene names to keep track which gene are selected  You could have 1,2,3,4.    


GetGenes <- function(GeneLR, VarExplained, GeneIdx)
{
   outs <- NULL

   # correlation calculation. 
   c <- ncol(GeneLR)
   p2 <- cor(GeneLR, use="pairwise")
   e=eigen(p2)
   ep <- e$values
   tvar <- sum(ep) # total variance


   mef <- 1 + c*(1 - ep%*%ep / sum(ep)^2)
   if(round(mef) == c){
       outs=GeneIdx
       return(outs)
   }
   
   # determine argmax_i (\sum(lambda_i)/TotalVariance >= VarExplained)
   k <- 0
   cc <- c
   lastj <- cc
   for(j in 1 : c) {
     k <- k + ep[j] / tvar
     if(k >= VarExplained) {
       lastj <- j
       break
     }
   }
   if(lastj == cc){
       outs=GeneIdx
       return(outs)
   }

   # selection
   p2v <- varimax(e$vectors)
   for(i in 1 : cc) {
     x <- sum(abs(p2v$loadings[i, 1 : lastj])) / lastj
     y <- sum(abs(p2v$loadings[i, (lastj+1) : cc])) / (cc - lastj)
   #  x <- sum(abs(p2v$loadings[i, 1 : lastj]))
   #  y <- sum(abs(p2v$loadings[i, (lastj+1) : cc])) 
     if(y < x) {
       outs <- c(outs, GeneIdx[i])
     }
   }
   return(outs)
}
