################################
#
# july 28, 2014
#
# 
# bayes estimation of log odds
#
# input parameters:
#
# y = binary vector of N1 1's for N1 cases and N0 0's for N0 controls
# x and z = vectors of length N0+N1 giving the levels of risk factors X and Z
#
#
# lambda.step is a positive number that says the steps to take when searching for an optimal lambda.
# lambda.search.num is an arbitrary integer to tell the function when to stop searching for optimal lambda.
# experience with several practical applications suggest that the default values provided are fine and need
# not be tweaked. there may be some exceptions with certain data sets, though.
#
#
# this function calls the function get.gj.beta.est
################################


get.bayes.est <- function(y, x, z, lambda.step=0.001, lambda.search.num=10000){

  n.beta <- length(unique(x))-1
  n.delta <- length(unique(z))-1

  xf <- as.factor(x)
  zf <- as.factor(z)
  design.mat <- model.matrix(~xf*zf, contrasts=list(xf="contr.helmert", zf="contr.helmert"))
  unique.design.mat <- unique(design.mat)
  

  num.main.eff <- n.beta+n.delta
  add.x.mat <- unique.design.mat[,(1:(1+num.main.eff))]

  unique.row.names <- as.integer(rownames(unique.design.mat))
  row.length <- length(unique.row.names)
  case.data <- rep(0,row.length)
  control.data <- rep(0, row.length)
  temp.xz <- cbind(x,z)
  for(i in 1:row.length){
    val.vec <- temp.xz[unique.row.names[i],]
    y.vec <- y[x==val.vec[1] & z==val.vec[2]]
    case.data[i] <- length(which(y.vec==1))
    control.data[i] <- length(which(y.vec==0))
  }

###########
# get optimal lambda
###########

  lambda.gj <- 0
  add.gj.model <- get.gj.beta.est(case.data, control.data, add.x.mat, lambda=lambda.gj)
  gj.conv <- add.gj.model$converge
  log.like.val <- NULL
  lambda.val <- NULL
  counter <- 0
 
  while(gj.conv){
    lambda.val <- c(lambda.val, lambda.gj)
    log.like.val <- c(log.like.val, add.gj.model$log.like)
    temp.lambda <- lambda.gj + lambda.step
    add.gj.model <- get.gj.beta.est(case.data, control.data, add.x.mat, lambda=temp.lambda)
    gj.conv <- add.gj.model$converge
    lambda.gj <- temp.lambda
    counter <- counter + 1
    if(counter > lambda.search.num){
      gj.conv <- F
    }
  }

  lambda.gj <- -0.01
  add.gj.model <- get.gj.beta.est(case.data, control.data, add.x.mat, lambda=lambda.gj)
  gj.conv <- add.gj.model$converge

  while(gj.conv){
    lambda.val <- c(lambda.val, lambda.gj)
    log.like.val <- c(log.like.val, add.gj.model$log.like)
    temp.lambda <- lambda.gj - lambda.step
    add.gj.model <- get.gj.beta.est(case.data, control.data, add.x.mat, lambda=temp.lambda)
    gj.conv <- add.gj.model$converge
    lambda.gj <- temp.lambda
    counter <- counter+1
    if(counter > lambda.search.num){
      gj.conv <- F
    }
  }

  lambda.val.sort <- sort(lambda.val)
  log.like.val.order <- log.like.val[order(lambda.val)]
  optimal.lambda <- lambda.val.sort[which(log.like.val.order >= max(log.like.val.order))]

  add.gj.model <- get.gj.beta.est(case.data, control.data, add.x.mat, lambda=optimal.lambda)
  full.model <- get.gj.beta.est(case.data, control.data, unique.design.mat, lambda=0)

  gj.log.odds.est <- as.vector( log(add.gj.model$est.or) )
  full.log.odds.est <- as.vector( log(full.model$est.or) )

  gj.var.log.odds <- add.gj.model$var.est.or

  full.var.log.odds <- full.model$var.est.or

  Z.mat <- unique.design.mat[,-(1:(1+num.main.eff))]
  hat.mat <- add.x.mat %*% solve(t(add.x.mat) %*% add.x.mat) %*% t(add.x.mat)
  comp.hat.mat <- diag(nrow(hat.mat)) - hat.mat

  full.model.int <- Z.mat %*% solve(t(Z.mat) %*% Z.mat) %*% t(Z.mat) %*% comp.hat.mat

  zeta.vector <- log(case.data/control.data)
  A.mat <- diag(1/(1/case.data + 1/control.data))
  temp.coeff <- full.model$beta.est[-1]
  beta.est <- temp.coeff[1:n.beta]
  temp.coeff <- temp.coeff[-(1:n.beta)]
  delta.est <- temp.coeff[1:n.delta]
  tt.design.mat <- unique.design.mat[,-1]
  x1.mat <- tt.design.mat[,(1:n.beta)]
  tt.design.mat <- tt.design.mat[,-(1:n.beta)]
  x2.mat <- tt.design.mat[,(1:n.delta)]

  if(n.beta==1){
    x1.mat <- matrix(x1.mat, ncol=1)
    beta.est <- matrix(beta.est, ncol=1)
  }
  if(n.delta==1){
    x2.mat <- matrix(x2.mat, ncol=1)
    delta.est <- matrix(delta.est, ncol=1)
  }
  x1.beta <- as.vector( x1.mat %*% beta.est )
  x2.delta <- as.vector( x2.mat %*% delta.est )
  D.mat <- diag(x1.beta * x2.delta)
  one.mat <- matrix(rep(1,nrow(D.mat)), ncol=1)

  denom.val <- as.vector( t(one.mat) %*% D.mat %*% A.mat %*% D.mat %*% one.mat )

  rem.int <- matrix(x1.beta * x2.delta, ncol=1) %*% t(one.mat) %*% D.mat %*% A.mat %*% comp.hat.mat / denom.val 

  M.mat <- full.model.int - rem.int
  Sigma <- M.mat %*% solve(A.mat) %*% t(M.mat)

  epsilon <- full.log.odds.est - gj.log.odds.est
  prior.var.sigma.sq <- sum(epsilon^2) / length(epsilon)

  inverse.mat <- solve( Sigma + prior.var.sigma.sq * diag(nrow(Sigma)) )
  V.vector <- as.vector( inverse.mat %*% epsilon )
  C.mat <- matrix(V.vector, ncol=1) %*% matrix(epsilon, nrow=1)

  bayes.log.odds.est <- as.vector(prior.var.sigma.sq * inverse.mat %*% full.log.odds.est) + 
                        as.vector( Sigma %*% inverse.mat %*% gj.log.odds.est )

  M1.mat <- Sigma %*% inverse.mat %*% (diag(nrow(Sigma)) - 2/length(epsilon) * C.mat )
  M.mat <- cbind( diag(nrow(M1.mat)) - M1.mat, M1.mat)
  R.mat.top <- cbind(full.var.log.odds, gj.var.log.odds)
  R.mat.bot <- cbind(gj.var.log.odds, gj.var.log.odds)
  R.mat <- rbind(R.mat.top, R.mat.bot)

  bayes.var.mat <- M.mat %*% R.mat %*% t(M.mat)

  log.odds.est <- cbind(full.log.odds.est, gj.log.odds.est,bayes.log.odds.est)
  colnames(log.odds.est) <- c("std.logistic", "additive.gj", "bayes")

  log.odds.std.err <- cbind(sqrt(diag(full.var.log.odds)), sqrt(diag(gj.var.log.odds)), 
                            sqrt(diag(bayes.var.mat)))
  colnames(log.odds.std.err) <- c("std.logistic", "additive.gj", "bayes")

  rmse.std <- sqrt( mean(diag(full.var.log.odds)) )
  rmse.gj <- sqrt( mean( (gj.log.odds.est -full.log.odds.est)^2 + diag(gj.var.log.odds) ) )
  rmse.bayes <- sqrt( mean( (bayes.log.odds.est -full.log.odds.est)^2 + diag(bayes.var.mat) ) )

  rmse.val <- data.frame(std.logistic=rmse.std, additivegj=rmse.gj, bayes=rmse.bayes)

  result <- list(log.odds.est = log.odds.est, log.odds.std.err=log.odds.std.err,
                 rmse=rmse.val,
                 prior.var.sigma.sq=prior.var.sigma.sq, optimal.lambda=optimal.lambda,
                 Sigma=Sigma, bayes.var.mat = bayes.var.mat, gj.var.mat=gj.var.log.odds,
                 std.log.odds.var=full.var.log.odds)

  return(result)

}


