################################
#
# july 28, 2014
#
# hypothesis tests for removable and non-removable interactions
# the output also includes standard deviance test for interactions
#
# input parameters:
#
# y = binary vector of N1 1's for N1 cases and N0 0's for N0 controls
# x and z = vectors of length N0+N1 giving the levels of risk factors X and Z 
#
#
# lambda.step is a positive number that says the steps to take when searching for an optimal lambda.
# lambda.search.num is an arbitrary integer to tell the function when to stop searching for optimal lambda.
# experience with several practical applications suggest that the default values provided are fine and need
# not be tweaked. there may be some exceptions with certain data sets, though.
#
#
# this function calls the function get.gj.beta.est
################################

get.int.test <- function(y, x, z, lambda.step=0.01, lambda.search.num=3000){

  n.beta <- length(unique(x)) - 1
  n.delta <- length(unique(z)) - 1

  xf <- factor(x)
  zf <- factor(z)
  design.matrix <- model.matrix(~ xf*zf, contrasts=list(xf="contr.helmert", zf="contr.helmert"))
  unique.design.matrix <- unique(design.matrix)
  additive.design.matrix <- unique.design.matrix[,1:(n.beta+n.delta+1)]

  unique.row.names <- as.integer(rownames(unique.design.matrix))
  row.length <- length(unique.row.names)
  n.case <- rep(0,row.length)
  n.control <- rep(0, row.length)
  temp.xz <- cbind(x,z)
  for(i in 1:row.length){
    val.vec <- temp.xz[unique.row.names[i],]
    y.vec <- y[x==val.vec[1] & z==val.vec[2]]
    n.case[i] <- length(which(y.vec==1))
    n.control[i] <- length(which(y.vec==0))
  }

#########
########## test for removable interaction 
#########

  zero.count <- length(which(n.control==0)) + length(which(n.case==0))

  if(zero.count == 0){
    obs.log.or <- log(n.case/n.control)

    eqwt.null.glm <- glm(obs.log.or ~ additive.design.matrix[,-1], family=gaussian)
    eqwt.null.est <- eqwt.null.glm$coeff

    eqwt.mu.hat <- eqwt.null.est[1]
    eqwt.beta.hat <- eqwt.null.est[2:(n.beta+1)]
    eqwt.delta.hat <- eqwt.null.est[-(1:(n.beta+1))]
    eqwt.beta.mat <- matrix(eqwt.beta.hat, ncol=1)
    eqwt.delta.mat <- matrix(eqwt.delta.hat, ncol=1)

    eqwt.temp <- as.vector( additive.design.matrix %*% eqwt.null.est )
    eqwt.p.val <- exp(eqwt.temp) / (1 + exp(eqwt.temp))

    eqwt.x.beta <- as.vector(additive.design.matrix[,(2:(n.beta+1))] %*% eqwt.beta.mat)
    eqwt.z.delta <- as.vector(additive.design.matrix[,-(1:(n.beta+1))] %*% eqwt.delta.mat)
    eqwt.diag.mat <- diag(eqwt.x.beta * eqwt.z.delta)

    eqwt.hat.mat <- additive.design.matrix %*% 
                    solve(t(additive.design.matrix) %*% additive.design.matrix) %*% 
                    t(additive.design.matrix) 
    eqwt.hat.mat.comp <- diag(nrow(eqwt.hat.mat)) - eqwt.hat.mat
    eqwt.residual.val <- as.vector( eqwt.hat.mat.comp %*% obs.log.or )
    eqwt.A.mat <- diag( 1 / (1/n.case + 1/n.control ) )
    one.mat <- rep(1,nrow(eqwt.A.mat))

    eqwt.numerator <- ( as.vector( t(one.mat) %*% eqwt.diag.mat %*% eqwt.A.mat %*% eqwt.residual.val ) )^2
    eqwt.denominator <- as.vector( t(one.mat) %*% eqwt.diag.mat %*% eqwt.A.mat %*% eqwt.hat.mat.comp %*% solve(eqwt.A.mat) %*%
                                                                  eqwt.hat.mat.comp %*% eqwt.A.mat %*% eqwt.diag.mat %*% one.mat )

    eqwt.score.test <- eqwt.numerator / eqwt.denominator

    W.mat <- matrix(eqwt.x.beta * eqwt.z.delta, ncol=1)
    lambda.est <- - solve(t(W.mat) %*% eqwt.A.mat %*% W.mat) %*% t(W.mat) %*% eqwt.A.mat %*% eqwt.residual.val 
    cond.var.lambda <- solve(t(W.mat) %*% eqwt.A.mat %*% W.mat) %*% t(W.mat) %*% eqwt.A.mat %*% 
                       eqwt.hat.mat.comp %*% solve(eqwt.A.mat) %*% eqwt.hat.mat.comp %*% eqwt.A.mat %*%
                       W.mat %*% solve(t(W.mat) %*% eqwt.A.mat %*% W.mat)
  }
  else{
    eqwt.score.test <- NA
  }

########
######### standard likelihood ratio test ###########
########

  null.glm <- glm(y ~ design.matrix[,(2:(n.beta+n.delta+1))], family=binomial)
  all.glm <- glm(y ~ design.matrix[,-1], family=binomial)
  std.test <- null.glm$deviance - all.glm$deviance

#########
########## test for non-removable interaction #########
#########
  lambda.gj <- 0
  add.gj.model <- get.gj.beta.est(n.case, n.control, additive.design.matrix, lambda=lambda.gj)
  gj.conv <- add.gj.model$converge
  log.like.val <- NULL
  lambda.val <- NULL
  counter <- 0
  
  while(gj.conv){
    lambda.val <- c(lambda.val, lambda.gj)
    log.like.val <- c(log.like.val, add.gj.model$log.like)
    temp.lambda <- lambda.gj + lambda.step
    add.gj.model <- get.gj.beta.est(n.case, n.control, additive.design.matrix, lambda=temp.lambda)
    gj.conv <- add.gj.model$converge
    lambda.gj <- temp.lambda
    counter <- counter + 1
    if(counter > lambda.search.num){
      gj.conv <- F
    }
  }

  lambda.gj <- -0.01
  add.gj.model <- get.gj.beta.est(n.case, n.control, additive.design.matrix, lambda=lambda.gj)
  gj.conv <- add.gj.model$converge

  while(gj.conv){
    lambda.val <- c(lambda.val, lambda.gj)
    log.like.val <- c(log.like.val, add.gj.model$log.like)
    temp.lambda <- lambda.gj - lambda.step
    add.gj.model <- get.gj.beta.est(n.case, n.control, additive.design.matrix, lambda=temp.lambda)
    gj.conv <- add.gj.model$converge
    lambda.gj <- temp.lambda
    counter <- counter+1
    if(counter > lambda.search.num){
      gj.conv <- F
    }
  }

  lambda.val.sort <- sort(lambda.val)
  log.like.val.order <- log.like.val[order(lambda.val)]
  lambda.opt <- lambda.val.sort[which(log.like.val.order >= max(log.like.val.order))]

##########
########### calculate the test statistic for non-removable interaction ##########
##########

mu.val <- summary(all.glm)$coeff[1,1]
beta.val <- matrix(summary(all.glm)$coeff[(2:(n.beta+1)),1], ncol=1)
delta.val <- matrix(summary(all.glm)$coeff[(n.beta+2):(n.beta+n.delta+1),1], ncol=1)
x.beta <- as.vector( design.matrix[, (2:(n.beta+1))] %*% beta.val )
z.delta <- as.vector( design.matrix[, (n.beta+2):(n.beta+n.delta+1)] %*% delta.val )
rem.fitted <- mu.val + x.beta + z.delta - lambda.opt * x.beta * z.delta
pi.rem <- exp(rem.fitted) / (1 + exp(rem.fitted) )
log.likeli.rem <- sum( y * log(pi.rem) + (1 - y) * log(1 - pi.rem) )
log.likeli.full <- sum( y * log(all.glm$fitted) + (1 - y) * log(1 - all.glm$fitted) )

non.rem.test <- -2 * (log.likeli.rem - log.likeli.full)

#########
########## output the results ##############
#########
  test.result <- c(eqwt.score.test, non.rem.test, std.test)
  test.df <- c(1, (n.beta*n.delta-1), n.beta*n.delta)
  test.p <- c(1 - pchisq(test.result[1], df=test.df[1]),
              1-pchisq(test.result[2],df=test.df[2]),
              1-pchisq(test.result[3],df=test.df[3]))

  result.test <- cbind(test.result, test.df, test.p)
  dimnames(result.test) <- list(c("rem.int.test", "non-rem.int.test", "std.int.test"), c("test", "df", "p.value"))
  result <- list(hypothesis.test = result.test, optimum.lambda = lambda.opt)
  return(result)
}


