###############################################################
############################################################### 
# DEFINE INPUTS OF THE FUNCTION 
############################################################### 

# y                 Data 
# group             Vector that indicates group membership
# nM                Number of mixtures 
# nF                Number of factors (including all periods) 
# nI                Number of instruments 
# freelambda        Matrix indicating normalized and free factor loadings (1=normalized, 2=free)
# mstep.start       Starting values for EM algorithm (M-step in particular) 
# param.start       Starting values for minimum distance estimator 
# conv              Convergence criterion for EM algorithm 


###############################################################
# DEFINITION OF FUNCTION STARTS HERE
############################################################### 
estim.meas.model.group        <- function(y, group, nM, nF, nI, conv, freelambda, freeuniq, freemean, mstep.start, param.start){
 
  
###############################################################
###############################################################
  # DEFINE PARAMETERS THAT WILL BE USED THROUGHOUT 
  # nZ                Number of measurements
  nZI <- ncol(y)       

###############################################################
###############################################################
# DEFINE SUBFUNCTIONS
# scanNA 
# estepfn 
# mstepfn

###############################################################
# Function to scan missing values 
###############################################################
scanNA <- function(x) c(which(complete.cases(x)==TRUE))


###############################################################
# ESTEP FUNCTION 
###############################################################
estepfn<-function(mstep, y){
  mean      <- mstep[[1]]  
  sigma     <- mstep[[2]]
  prop      <- mstep[[3]]
  
  probi <- matrix(0, nobs, nM)   # Probability of being in each mixture 
  
  invsigma_noNA  <- list()       # Compute these objects for people with complete data 
  
  for (m in 1:nM) invsigma_noNA[[m]] <- solve(sigma[[m]])
  
  invsigma_noNA  <- list()
  for (m in 1:nM) invsigma_noNA[[m]] <- solve(make.positive.definite(sigma[[m]]))
  
  Q_noNA <- list()
  for (m in 1:nM) Q_noNA[[m]] <- ((1/(2*pi))^(nZI/2))*((1/det(make.positive.definite(sigma[[m]])))^.5)
  
  # Loop over people
  for (i in 1:nobs){
    for (m in 1:nM){
      
      # If they have zero NA: 
      if (length(comp[[i]])==nZI){
        probi[i,m]  <-  prop[m]*Q_noNA[[m]]*exp(-.5*t(y[i,1:nZI]- mean[m,])%*%invsigma_noNA[[m]]%*%(y[i,1:nZI]-mean[m,]))
        
        # If they have some NA, do the step only over the non-missing values - need to adjust the variances and means   
      } else if (length(comp[[i]])<nZI){ 
        Q           <- ((1/(2*pi))^(length(comp[[i]])/2))*((1/det(make.positive.definite(sigma[[m]][comp[[i]],comp[[i]]]))^.5))
        invsigma    <- solve(make.positive.definite(sigma[[m]][comp[[i]],comp[[i]]]))
        probi[i,m]  <-  prop[m]*Q*exp(-.5*t(y[i,comp[[i]]] - mean[m,comp[[i]]])%*%invsigma%*%(y[i,comp[[i]]] - mean[m,comp[[i]]]))
      }
    } 
  } 
  
  # Calculate probability of each observation in each mixture 
  probiden               <-rowSums(probi*1e50)
  prob                   <-(probi*1e50)/(probiden)
  prob[is.na(prob)]      <-0    #replace NAs with zeros. NAs occur if both pdfs are zero
  estep                  <- prob
  
  loglik                 <- sum(rowSums(probi))
  
  return(list(estep, loglik))
}


###############################################################
# MSTEP FUNCTION 
############################################################### 
mstepfn<- function(estep,y) {
  prob     <- estep
  
  # Update means 
  meani     <-matrix(0, nM, nZI)
  mean      <- matrix(0, nM, nZI)
  propimean <-matrix(0,nM,nZI)
  
  for (m in 1:nM){ 
    for (i in 1:nobs){
      if (length(comp[[i]])==nZI){   
        meani[m,] <- meani[m,] + prob[i,m]*(y[i,1:nZI])  
        propimean[m,] <- propimean[m,]+prob[i,m]  
      } else if (length(comp[[i]])<nZI){ 
        temp <- matrix(0, nM, nZI)
        temp2 <- matrix(0, nM, nZI)
        temp[m,comp[[i]]] <- prob[i,m]*(y[i,comp[[i]]])
        meani[m,] <- meani[m,] + temp[m,]
        temp2[m,comp[[i]]] <- prob[i,m]
        propimean[m,] <- propimean[m,]+temp2[m,]
      }
      mean[m,] <- meani[m,]/propimean[m,]
    } 
  } 
  
  # Update variance covariance matrix   
  sigmai <- list()
  sigma  <- list()
  
  for (m in 1:nM){
    sigmai[[m]] <- mat.or.vec(nZI, nZI)
    
    for (i in 1:nobs){
      if (length(comp[[i]])==nZI){  
        
        sigmai[[m]]<-sigmai[[m]] + prob[i,m]*((y[i,1:nZI]-mean[m,])%*%t(y[i,1:nZI]-mean[m,]))
        
      } else if (length(comp[[i]])<nZI){ 
        temp <- matrix(0,nZI,nZI)
        temp[comp[[i]], comp[[i]]] <- prob[i,m]*((y[i,comp[[i]]]-mean[m,comp[[i]]])%*%t(y[i,comp[[i]]]-mean[m,comp[[i]]]))
        sigmai[[m]] <- sigmai[[m]] + temp 
      } 
      sigma[[m]] <- sigmai[[m]]/propimean[m,]
    }  
  }   
  
  #To deal with symmetric issues
  for (m in 1:nM) {
    sigma[[m]]<-forceSymmetric(sigma[[m]])
  }
  
  #To deal with the positive definite issues
  for (m in 1:nM) {
    sigma[[m]]<-make.positive.definite(sigma[[m]])
  }
  
  # Update weight 
  prop <- colSums(prob)/sum(colSums(prob))
  
  mstep<-list(mean, sigma, prop)
  return(mstep)
}

###############################################################
###############################################################
# IMPLEMENT E-M ALGORITHM 

mstepG <- list()

for (g in 1:nG){
###############################################################
# 1.1. Scan missing values 
###############################################################
  comp         <- apply(y[which(group==g),], 1, scanNA)   
  nobs         <- table(group)[g]
  mstepG.start <- mstep.start[[g]]

###############################################################
# 1.2. Initialize loop with mstep.start values 
###############################################################
  estep     <- estepfn(mstepG.start,y[which(group==g),])
  mstep     <- mstepfn(estep[[1]],y[which(group==g),])
  loglik    <- estep[[2]]
  
  estep     <- estepfn(mstep,y[which(group==g),])
  mstep     <- mstepfn(estep[[1]],y[which(group==g),])
  loglik    <- rbind(loglik, estep[[2]])
  
  iter      <- 1
  
###############################################################
# 1.3. Loop over the E and M steps until convergence 
###############################################################  
  while ((abs((loglik[iter+1]-loglik[iter])/loglik[iter])>conv)){
    if (iter <1500){
      
      iter      <- iter + 1
      estep     <- estepfn(mstep,y[which(group==g),])
      mstep     <- mstepfn(estep[[1]],y[which(group==g),])
      loglik    <- rbind(loglik, estep[[2]])
      print(paste("Iteration:", iter,sep=" ")) 
      print(paste("Convergence criterion:", abs(loglik[iter+1]-loglik[iter])/loglik[iter], sep=" "))  
    } else if (iter==1500){
      mstep <- "NA"
      break
    }
  } 
  mstepG[[g]] <- mstep
} 

###############################################################
# 1.4. Save output for each of the group
###############################################################  
newMstepG <- mstepG
for (g in 1:nG){
  newMstepG[[g]][[1]] <- mstepG[[g]][[1]][,1:nZ]
  for (m in 1:nM)    newMstepG[[g]][[2]][[m]] <- mstepG[[g]][[2]][[m]][1:nZ, 1:nZ]
  newMstepG[[g]][[3]] <- mstepG[[g]][[3]]
}

###############################################################
###############################################################
# IMPLEMENT MINIMUM DISTANCE ESTIMATOR TO ESTIMATE 
# PARAMETERS OF THE FACTOR JOINT DISTRIBUTION
# FACTOR LOADINGS AND VARIANCE OF MEASUREMENT ERROR 


##############################################################
# 2.1. Define objects to fill in the "param" vector properly 
##############################################################

endEps     <- length(which(freeuniq[upper.tri(freeuniq, diag=T)]==1))
upcorr     <- which(freeuniq[upper.tri(freeuniq, diag=T)]==1)

startCov   <- endEps + 1
endCov     <- endEps + nG*nM*0.5*nF*(nF+1)
covpos     <- matrix(seq(startCov, endCov, (0.5*nF*(nF+1))), nM, nG)

startMean  <- endCov + 1
endMean    <- startMean - 1 + length(rep(rep(1, nF), (nM-1) + nM*(nG-1))) 

nLambda    <- length(which(freelambda==2))

startLambda <- endMean + 1 
endLambda   <- startLambda - 1 + nLambda
lambdalong  <- rep(0, nZ*nF)
lambdalong[which(freelambda==1)]  <- 1 


##############################################################
# 2.2. Define function to optimize over 
##############################################################
mindistance      <-function(param) {
  
# 2.2.1 Fill in various objects with elements of param  
##############################################################
# Variances of the uniquenesses 
eps_par                    <- matrix(0, nZ, nZ)
eps_par[upper.tri(eps_par, diag=T)][upcorr] <- param[1:endEps]
eps_par                    <- eps_par + t(eps_par) - diag(nZ) * diag(eps_par)
  
# Variance-covariance matrices of the nM mixtures 
Lcovfactor_par    <- list()
covfactor_par     <- list()
  
for (g in 1:nG){
  Lcovfactor_par[[g]] <- list()  
  covfactor_par[[g]]  <- list()  
  for (m in 1:nM){
    Lcovfactor_par[[g]][[m]]                              <- matrix(0, nF, nF)
    lowerTriangle(Lcovfactor_par[[g]][[m]], diag=TRUE)    <- param[covpos[m,g]:(covpos[m,g]+ 0.5*nF*(nF+1)-1)]
    covfactor_par[[g]][[m]]                               <- Lcovfactor_par[[g]][[m]] %*% t(Lcovfactor_par[[g]][[m]])
  }
} 
  
# Means of the nM-1 mixtures  
# For one group we assume the overall mean to be 0, but not for the other groups 
meanpos                     <- matrix(0, nM, nG) 
meanpos[1:(nM-1),1]         <- seq(startMean, startMean+nF*(nM-2), nF)
meanpos[,2:nG]              <- matrix(seq(startMean+nF*(nM-2)+nF, endMean, nF), nM, nG-1)  
  
meanfactor_par              <- list()
meanfactor_par[[1]]         <- t(matrix(param[startMean:(startMean+(nM-1)*nF-1)], nF, (nM-1)))
for (g in 2:nG){ 
meanfactor_par[[g]]         <- matrix(0, nM, nF)
for (m in 1:nM){
meanfactor_par[[g]][m,]     <- param[meanpos[m,g]:(meanpos[m,g]+nF-1)]
}
}
  
# Factor loadings (same across groups)
lambdalong[which(freelambda==2)]  <- param[startLambda:endLambda]
lambda_par                        <- matrix(lambdalong, nZ, nF)
  

# 2.2.2 Extract output of the EM alrogithm and compute estimated 
# means and variance-covariances 
############################################################## 
mean_hat      <- list()
cov_hat       <- list()
prop_hat      <- list()

for (g in 1:nG){
mean_hat[[g]]       <- newMstepG[[g]][[1]]                                                   
cov_hat[[g]]        <- newMstepG[[g]][[2]]
prop_hat[[g]]       <- newMstepG[[g]][[3]]
}  
  
# Estimated means 
mean_par <- list()  
for (g in 1:nG){
mean_par[[g]] <-  matrix(0, nM, nZ)

for (m in 1:(nM-1)){ 
  mean_par[[g]][m,] <- lambda_par %*% as.matrix(meanfactor_par[[g]][m,])
}

if (length(which(freemean[g,]==1))!=0){
mean_par[[g]][nM,which(freemean[g,]==1)]<- lambda_par[which(freemean[g,]==1),] %*% as.matrix(meanfactor_par[[g]][nM,])
} 

for (j in which(freemean[g,]==0)){
mean_par[[g]][nM,j]<- - 1 * sum(prop_hat[[g]][1:(nM-1)] * mean_par[[g]][1:(nM-1),j])/prop_hat[[g]][nM]
} 
} 

# Squared distance between output of EM and functional form  
mean <- list()
for (g in 1:nG){
mean[[g]]  <- sum((mean_hat[[g]][1,]- mean_par[[g]][1,])^2)
for (m in 2:nM){ 
mean[[g]]  <- mean[[g]] + sum((mean_hat[[g]][m,] - mean_par[[g]][m,])^2)
} 
} 
                  
# Variance-covariance 
cov_par  <- list()
for (g in 1:nG){
cov_par[[g]] <- list()
for (m in 1:nM){
cov_par[[g]][[m]] <-lambda_par%*%covfactor_par[[g]][[m]]%*%t(lambda_par) + eps_par
} 
} 
  
cov <- list()
for (g in 1:nG){
cov[[g]]     <- sum(sum((cov_hat[[g]][[1]]-cov_par[[g]][[1]])^2))
for (m in 2:nM){
cov[[g]]    <- cov[[g]] + sum(sum((cov_hat[[g]][[m]]-cov_par[[g]][[m]])^2))
}
} 

# Sum of least squares 
sos                 <- 0 
for (g in 1:nG) sos <- sos + mean[[g]] + cov[[g]]
  
return(sos)
}


##############################################################
# 3. Perform minimum distance estimator  
##############################################################
out <- optim(param.start,mindistance, method=c("L-BFGS-B"), control=list(maxit=10000),
             lower=c(rep(0, nZ), rep(-Inf, length(param.start)-nZ)), upper=rep(Inf, length(param.start)))

##############################################################
# 4. Organize output from the minimum distance estimator
##############################################################
# Mixture weights 
prob_est <- list(mstepG[[1]][[3]], mstepG[[2]][[3]])

# Variance matrix of measurement error 
eps_est  <- out$par[1:endEps]

# Covariance matrices
Lcovfactor_est    <- list()
covfactor_est     <- list()
covpos <- matrix(seq(startCov, endCov, 0.5*nF*(nF+1)), nM, nG)

for (g in 1:nG){
  Lcovfactor_est[[g]] <- list()  
  covfactor_est[[g]] <- list()  
  for (m in 1:nM){
    Lcovfactor_est[[g]][[m]]                          <- matrix(0, nF, nF)
    lowerTriangle(Lcovfactor_est[[g]][[m]], diag=TRUE) <- out$par[covpos[m,g]:(covpos[m,g]+ 0.5*nF*(nF+1)-1)]
    covfactor_est[[g]][[m]]                           <- Lcovfactor_est[[g]][[m]] %*% t(Lcovfactor_est[[g]][[m]])
  }
} 

# Means  
meanpos                     <- matrix(0, nM, nG) 
meanpos[1:(nM-1),1]         <- seq(startMean, startMean+nF*(nM-2), nF)
meanpos[,2:nG]              <- matrix(seq(startMean+nF*(nM-2)+nF, endMean, nF), nM, nG-1)  

meanfactor_est           <-  list()
meanfactor_est[[1]]      <-  matrix(0, nM, nF)
meanfactor_est[[1]][1:(nM-1),]  <- t(matrix(out$par[startMean:(startMean+(nM-1)*nF-1)], nF, (nM-1)))
for (j in 1:nF){
  meanfactor_est[[1]][nM,j] <- - 1 * sum(prob_est[[1]][1:(nM-1)] * meanfactor_est[[1]][1:(nM-1),j])/prob_est[[1]][nM]
}

for (g in 2:nG){ 
   meanfactor_est[[g]]      <- matrix(0, nM, nF)
   for (m in 1:nM){
     meanfactor_est[[g]][m,]  <- out$par[meanpos[m,g]:(meanpos[m,g]+nF-1)]
   }
}


# Factor Loadings 
lambdalong[which(freelambda==2)]  <- out$par[startLambda:endLambda]
lambda_est  <- matrix(lambdalong, nZ, nF)

  
  
##############################################################
# 5. Fill in with instruments    
##############################################################
if (nZI==nZ){
  covall_est = covfactor_est
  meanall_est = meanfactor_est
}
if (nZI>nZ){
# Vector of means   
meanall_est <- list()
for (g in 1:nG){
  meanall_est[[g]] <- matrix(0,nM,nFI)
  for (m in 1:nM){
    meanall_est[[g]][m,1:nF]              <- meanfactor_est[[g]][m,1:nF]
    meanall_est[[g]][m,(nF+1):(nFI)]      <- mstepG[[g]][[1]][m,(nZ+1):nZI]
  }
} 
  
# Variance covariance matrix  
covall_est  <-list()
  for (g in 1:nG){
  covall_est[[g]] <- list()
  for (m in 1:nM){
  covall_est[[g]][[m]] <- matrix(0, nFI, nFI)
  covall_est[[g]][[m]][1:nF, 1:nF] <-  covfactor_est[[g]][[m]]
  for (ik in 1:nFI) {
    for (jk in (nF+1):nFI){
      covall_est[[g]][[m]][ik,jk] <- mstepG[[g]][[2]][[m]][startSeq[ik],startSeq[jk]]
  }
  }
  covall_est[[g]][[m]][(nF+1):nFI, 1:nF] <- t(covall_est[[g]][[m]][1:nF,(nF+1):nFI] )
} 
} 
} 

##############################################################
# 6. Group output to save from the function  
##############################################################
est <- list(prob=prob_est, eps=eps_est, cov=covall_est, mean=meanall_est, lambda=lambda_est)
all <- list(out, est, mstepG)

return(all)
} 