##################################################################################
# Program: MI-lasso.R                                                            #
# NOTE: This R program includes:                                                 #
# 1) the R function "MI.lasso" to implement the MI-lasso variable selection      #
#    method.                                                                     #
# 2) An example on how to use the "MI.lasso" function.                           #
#                                                                                #
# Reference:                                                                     #
#    Chen, Q. & Wang, S. (2013). "Variable selection for multiply-imputed data   #
#    with application to dioxin exposure study," Statistics in Medicine, 32:     #
#    3646-59.                                                                    #
##################################################################################


MI.lasso = function(mydata=mydata, D, lambda, maxiter=200, eps=1e-6) {

## D is the number of imputations
## mydata is in the array format: mydata[[1]] is the first imputed dataset...
## for each mydata[[d]], the first p columns are covariates X, and the last one is the outcome Y

## number of observations
n = dim(mydata[[1]])[1]

## number of covariates
p = dim(mydata[[1]])[2]-1

## Standardize covariates X and center outcome Y 
x = NULL
y = NULL
meanx = NULL
normx = NULL
meany = 0
for (d in 1:D) {
  x[[d]] = mydata[[d]][,1:p]
  y[[d]] = mydata[[d]][,(p+1)]
  meanx[[d]] = apply(x[[d]], 2, mean)
  x[[d]] = scale(x[[d]], meanx[[d]], FALSE)
  normx[[d]] = sqrt(apply(x[[d]]^2, 2, sum))
  x[[d]] = scale(x[[d]], FALSE, normx[[d]])
  meany[d] = mean(y[[d]])
  y[[d]] = y[[d]] - meany[d]
}

## Ordinary least squares (OLS) estimates of beta coefficients
b.ols = matrix(0,p,D)
for (d in 1:D) {
  b.ols[,d]  = qr.solve(t(x[[d]])%*%x[[d]])%*%t(x[[d]])%*%y[[d]]
}

## Estimate beta_dj in Equation (4) 
iter = 0
dif = 1
c = rep(1,p)
b = matrix(0,p,D) 						
while (iter<=maxiter & dif>=eps) {
  iter = iter + 1
  b.old = b
  # update beta_d by ridge regression
  for (d in 1:D) {
    xtx = t(x[[d]])%*%x[[d]]
    diag(xtx) = diag(xtx) + lambda/c
    xty = t(x[[d]])%*%y[[d]]
    b[,d] = qr.solve(xtx)%*%xty
  } 		
  # update c in Equation (4)
  c = sqrt(apply(b^2,1,sum)) 			
  c[c<sqrt(D)*1e-10] = sqrt(D)*1e-10
  dif = max(abs(b-b.old))
}
b[apply((b^2),1,sum)<=5*1e-8,] = 0

## Calculate BIC 
sse = 0
df = 0
nzero = seq(p)[apply(b^2, 1, sum)>0]
for (d in 1:D) {
  sse[d] = mean((y[[d]]-x[[d]]%*%b[,d])^2)
}
for (j in nzero) {
  norm1 = sqrt(sum(b[j,]^2))
  norm2 = sqrt(sum(b.ols[j,]^2))
  df = df+1+(D-1)*norm1/norm2
}
bic = log(mean(sse)) + log(D*n)*df/(D*n)

## transform beta and intercept back to original scale
b.scaled = b
b0 = 0
coefficients = matrix(0,p+1,D)
for (d in 1:D) {
  b[,d] = b[,d]/normx[[d]]
  b0[d] = meany[d] - b[,d]%*%meanx[[d]]
  coefficients[,d] = c(b0[d], b[,d])
}

## output the selected variables
## TRUE means "selected", FALSE means "NOT selected"
varsel = abs(b[,1])>0

return(list(coefficients=coefficients, bic=bic, varsel=varsel))
}


######################################################
#  An example on the use of the "MI.lasso" function	#
######################################################


## specify tuning parameter
lamvec = (2^(seq(-1,4,by=0.05)))^2/2

## select the optimal tuning parameter by BIC
bicvec = 0
for (i in 1:length(lamvec)) {
  fit = MI.lasso(newdata, 5, lambda=lamvec[i])
  bicvec[i] = fit$bic
}

## select tuning parameter with the smallest BIC
opt = order(bicvec)[1]

## fit the model with the optimal lambda
fit.opt = MI.lasso(newdata, 5, lambda=lamvec[opt])
selection = which(fit.opt$varsel==TRUE)
print(fit.opt$coefficients)