########################################################################### ### ### # affy normalization using non-differentially expressed genes # ### ### ########################################################################### # affy normalization equi.gene.norm <- function(data.y, data.x, pi.start=0.1, lamda.hat=0.9){ # starting value theta.hat <- equi.gene.norm.start(data.y, data.x, pi.start, lamda.hat) # EM algorithm est.hat.new <- equi.gene.norm.EM(data.y, data.x, theta.hat) est.hat.new } # starting value equi.gene.norm.start <- function(data.y, data.x, pi.start, lamda.hat){ # variance components sigma2.hat <- 0.5 tau2.hat <- 1 phi2.hat <- 1 xi2.hat <- 1 # differential expression muO.hat <- 3 muU.hat <- -2 # gene proportions pi.hat <- pi.start lamda.hat <- lamda.hat # rank-invariant genes data.y.simple <- apply(data.y, 1:2, mean) G <- dim(data.y.simple)[1] gene.rank <- apply(data.y.simple, 2, rank) gene.rank.var <- apply(gene.rank, 1, var) cutoff <- sort(gene.rank.var)[G*(1-pi.start)] idx.gene <- gene.rank.var < cutoff alpha.hat <- apply(data.y.simple[idx.gene,], 2, median) #alpha.hat <- apply(data.y, 2, median) # overall median theta.hat <- list(alpha.hat=alpha.hat, pi.hat=pi.hat, lamda.hat=lamda.hat, muO.hat=muO.hat, muU.hat=muU.hat, tau2.hat=tau2.hat, phi2.hat=phi2.hat, xi2.hat=xi2.hat, sigma2.hat=sigma2.hat) theta.hat } # EM algorithm: iterate btw E- and M- steps equi.gene.norm.EM <- function(data.y, data.x, theta.hat){ # average probes data.y.simple <- apply(data.y, 1:2, mean) P <- dim(data.y)[3] llh <- -999999999990 llh.old <- -999999999999 while(llh-llh.old>0.01){ llh.old <- llh # E-step e.hat <- equi.gene.norm.Estep(data.y.simple, data.x, theta.hat, P) # M-step theta.hat <- equi.gene.norm.Mstep(data.y.simple, data.x, e.hat, theta.hat) llh <- theta.hat$llh } list(theta.hat=theta.hat, e.hat=e.hat) } library(mvtnorm) # E-step equi.gene.norm.Estep <- function(data.y.simple, data.x, theta.hat, P){ alpha.hat <- theta.hat$alpha.hat pi.hat <- theta.hat$pi.hat lamda.hat <- theta.hat$lamda.hat muO.hat <- theta.hat$muO.hat muU.hat <- theta.hat$muU.hat tau2.hat <- theta.hat$tau2.hat phi2.hat <- theta.hat$phi2.hat xi2.hat <- theta.hat$xi2.hat sigma2.hat <- theta.hat$sigma2.hat # compute residuals after subtracting array effects data.y.simple <- compute.resid.ctd(data.y.simple, alpha.hat) # compute density for o(ver-expression)=1 and u(nder-expression)=1 dens.ND <- compute.dens.ND(data.y.simple, sigma2.hat, tau2.hat) dens.Ov <- compute.dens.D(data.y.simple, data.x, sigma2.hat, tau2.hat, phi2.hat, muO.hat) dens.Un <- compute.dens.D(data.y.simple, data.x, sigma2.hat, tau2.hat, xi2.hat, muU.hat) temp <- 0 - apply(cbind(dens.ND, dens.Ov, dens.Un), 1, max) dens.ND <- dens.ND + temp dens.Ov <- dens.Ov + temp dens.Un <- dens.Un + temp # compute o.hat and u.hat temp <- (1-pi.hat)*exp(dens.ND)+pi.hat*lamda.hat*exp(dens.Ov)+pi.hat*(1-lamda.hat)*exp(dens.Un) o.hat <- pi.hat*lamda.hat*exp(dens.Ov)/temp u.hat <- pi.hat*(1-lamda.hat)*exp(dens.Un)/temp z.hat <- o.hat + u.hat o.hat <- as.integer(o.hat>u.hat & o.hat>(1-o.hat-u.hat)) u.hat <- as.integer(u.hat>o.hat & u.hat>(1-o.hat-u.hat)) print(table(o.hat, u.hat)) list(o.hat=o.hat, u.hat=u.hat, z.hat=z.hat) } library(nlme) # M-step equi.gene.norm.Mstep <- function(data.y.simple, data.x, e.hat, theta.hat){ # delist theta0.hat o.hat <- e.hat$o.hat u.hat <- e.hat$u.hat z.hat <- o.hat + u.hat G <- dim(data.y.simple)[1] n <- dim(data.y.simple)[2] # frequency of each cluster pi.hat <- sum(z.hat)/G lamda.hat <- sum(o.hat)/sum(z.hat) # estimate alpha.hat (array effect) alpha.hat <- apply(data.y.simple[z.hat==0,], 2, mean) # xn=non-differential-expression, xo=over-expression, xu=under-expression y <- compute.resid.ctd(data.y.simple, alpha.hat) group <- t(apply(rbind(1:G), 2, rep, each=n)) xo <- matrix(0, nrow=G, ncol=n) xo[o.hat==1, data.x==1] <- 1 xu <- matrix(0, nrow=G, ncol=n) xu[u.hat==1, data.x==1] <- 1 # estimate variances data.lme <- data.frame(y=as.vector(y), xo=as.vector(xo), xu=as.vector(xu), group=as.vector(group)) temp <- lme(y ~ -1+xo+xu, data=data.lme, random = ~ 1+xo+xu|group, control=list(maxIter=200, msMaxIter=100, opt="optim", optimMethod="L-BFGS-B", lower=c(0,-Inf), upper=c(Inf,0))) muO.hat <- fixed.effects(temp)[1] muU.hat <- fixed.effects(temp)[2] var.hat <- as.double(VarCorr(temp)[,1]) llh <- temp$logLik print(round(llh,3)) print(round(sqrt(var.hat),3)) list(alpha.hat=alpha.hat, pi.hat=pi.hat, lamda.hat=lamda.hat, muO.hat=muO.hat, muU.hat=muU.hat, tau2.hat=var.hat[1], phi2.hat=var.hat[2], xi2.hat=var.hat[3], sigma2.hat=var.hat[4], llh=llh) } # subtract array effects compute.resid.ctd <- function(data.y.simple, alpha.hat){ resid <- t(t(data.y.simple) - alpha.hat) resid } # density for non-differentially expressed genes compute.dens.ND <- function(data.y.simple, sigma2.hat, tau2.hat){ n <- ncol(data.y.simple) temp <- cbind(rep(1, n)) mu <- rep(0, n) sigma <- diag(1,n)*sigma2.hat + temp%*%t(temp)*tau2.hat dens0 <- apply(data.y.simple, 1, dmvnorm, mean=mu, sigma=sigma, log=TRUE) dens0 } # density for differentially expressed genes compute.dens.D <- function(data.y.simple, data.x, sigma2.hat, tau2.hat, phi2.hat, mu.hat){ n <- ncol(data.y.simple) n1 <- sum(data.x) # gene effect mu <- rep(0, n) temp <- cbind(rep(1, n)) sigma <- diag(1,n)*sigma2.hat + temp%*%t(temp)*tau2.hat # treatment effect mu[data.x==1] <- mu.hat temp <- cbind(rep(1, n1)) sigma[data.x==1, data.x==1] <- sigma[data.x==1, data.x==1] + temp%*%t(temp)*phi2.hat dens1 <- apply(data.y.simple, 1, dmvnorm, mean=mu, sigma=sigma, log=TRUE) dens1 }