###################################################################
# Name              : MASTER_MC.R
# 
# Description       : Master program doing the following: 
#                     1. generating the data for the Montecarlo given a particular covariance structure 
#                     2. estimating the measurement system and factor scores for those data 
#                     3. estimating the production function by OLS and IV on those data 
###################################################################



###################################################################
# SETTINGS 
###################################################################

rm(list=ls())                      # Clear all objects from memory
set.seed(08072018)                 # Random number generator seed (This is the seed number that needs to be used to reproduce the work on R Version 3.3.1)

nMC             <- 10              # Number of samples for the MC
covMC           <- c("A")          # Covariance chosen for the error structure of the generated data - Select either "A" or "B"
generate       <-  "TRUE"          # Selected "TRUE" if want to generate the MC data; "FALSE" if want to go straight to estimation of the model

###################################################################
# DIRECTORIES
###################################################################

# Main working directory 
dir                 <- c("AER-2015-0183_data_appendix/")        # Write name of your main working directory
# Sub-directories 
dir_anal           <- paste(dir,c("code/R"), sep="") 
dir_functions      <- paste(dir,c("code/R/functions"), sep="")
dir_pf             <- paste(dir,c("code/R/specsPF"), sep="")
dir_data           <- paste(dir,c("data"), sep="")
dir_dataMC         <- paste(dir,"data_MC",covMC, sep="")

# Specification of the measurement model used to estimate the parameters used then to generate the MC data 
nameFM             <- c("latentfactors_noinstruments")                     # Name of the specification
inputFM            <- paste("SpecsFM/",nameFM, ".R", sep="")               # Location of the file with all the details of the specification

# Directory with estimates of the measurement system and factor scores on the true data 
dir_outputFM_true  <- paste(dir,c("output/"), nameFM, sep="")

# Directory with model estimates we use to generate the MC data 
dir_outputPF_true   <- paste(dir, c("output/"), nameFM, "/FirstStage_toyprice_foodprice_conflict", sep="")

# Directory with estimates of the measurement system and factor scores on the MC simulated data 
dir_outputFM       <- paste(dir,c("output/"), nameFM, "/MC", covMC, sep="")


###################################################################
# LOAD PACKAGES
###################################################################

library("MASS")
library("mixtools")
library("mvtnorm")
library("minpack.lm")
library("corpcor")
library("gdata")
library("Matrix")
library("ks")
library("psych")

###################################################################
# CALL FUNCTIONS
###################################################################

setwd(dir_functions)
source("call.data_func.R")
source("estim.meas.model.nonparametric.group_func.R")
source("bartlett.func.R")
source("bartlett.correction.mixt.func.R")
source("pf.ols.func.MC.R")
source("first.stage.func.MC.R")
source("rdform.func.MC.R")
source("md.flexfunc.MC.R")

if (generate=="TRUE"){
  
###################################################################
# Step 1: Load the true data and estimates of the factor scores, 
# investment and production functions on the true data, 
# which we will use to generate the MC data
###################################################################

data                 <- "measures.csv"

# Load data
setwd(dir_data)                             # Open directory that contains the data (true data and bootstrap datasets)
measures <- call.data(data)

# Load details of the empirical specification
setwd(dir_anal)                             # Open director that contains the detail of the specification
source(inputFM)                             # Load  file specifying the measurement system

# Load estimates of the factor model and factor scores 
setwd(dir_outputFM_true)
load("fs_true.R")
scores <- list(fs_true[[5]],fs_true[[6]])

# Define observables 
Zall         <- cbind(measures$meanprice_juglibros_log_st, measures$FUmeanprice_log_st, measures$tpobc_pop1993_st)
X1           <- cbind(measures$ln_nkids0_st) 
Xnames       <- c("kids")

mscoresall   <- rbind(scores[[1]], scores[[2]])
keep         <- which(complete.cases(cbind(Zall,X1, mscoresall)))
keep1        <- which(complete.cases(cbind(Zall,X1, mscoresall)[which(measures$treat==1),]))
keep2        <- which(complete.cases(cbind(Zall,X1, mscoresall)[which(measures$treat==2),]))
treatnomiss  <- measures$treat[keep]

fs   <- list()
fs[[1]] <- scores[[1]][keep1,]
fs[[2]] <- scores[[2]][keep2,]


# Assemble data of initial conditions factor scores and instruments
posIC      <- c(2, 7)   # Baseline cognition and mother's cognition 
IC         <- rbind(fs[[1]][,posIC], fs[[2]][,posIC])


###################################################################
# Step 2: Generate correlated errors for investments and production 
# functions (choose covariance A or B)
###################################################################
nV <- 3                # Number of variables we're generating (2 investments and one cognition)

setwd(dir_data)

if (covMC== "A" ){
  load("covA.R") 
  avg_within 
  avg_across 
}   

if (covMC== "B" ){
  load("covB.R") 
  avg_within 
  avg_across 
}     

# Number of villages and number of people by village 
code_village = unique(measures$cod_dane[keep])
le           =  rep(0,length(code_village))
for (v in 1:length(code_village))    le[v] <- length(which(measures$cod_dane[keep]==code_village[v])) 


# Build covariance matrix of spatially correlated errors for investments and cognition for each village 
cov_error <- list()
for (v in 1:length(code_village)) {
  cov_error[[v]] <- matrix(0, le[v]*nV, le[v]*nV)
  seqStart      <- seq(1,le[v]*nV, nV)
  
  
  # Fill in the diagonal blocks 
  for (i in 1:le[v]) {
    cov_error[[v]][seqStart[i]:(seqStart[i]+nV-1), seqStart[i]:(seqStart[i]+nV-1)] <- avg_within 
  } 
  
  # Fill in the off diagonal blocks 
  vec           <- t(combn( seqStart, 2)) 
  pairs         <- rbind(t(combn( seqStart, 2)), cbind(vec[,2], vec[,1]))
  
  for (j in 1:(le[v]*le[v] - le[v])){ 
    cov_error[[v]][(pairs[j,1]:(pairs[j,1]+nV-1)), (pairs[j,2]:(pairs[j,2]+nV-1))] <- avg_across 
  }
  
}
###################################################################
# Step 3: Upload parameter estimates that we willuse to 
# generate the MC data 
###################################################################
# Upload the parameter estimates of the investment function and 
# production function for cognition on the true data
setwd(dir_outputPF_true)
load("OUTPUT_pval.R")
load("OUTPUT_IV_pval.R")
iv_true_new                 <- iv_true
iv_true_new[9,1]            <- 0                  # set coefficient on time investments to 0 

# Upload estimates of the measurement system on the true data 
setwd(dir_outputFM_true)
load("trueFM.R")

# Pick lines of the factor loading matrices that correspond to these measures 
items_to_pick <- c(startSeq[1]:endSeq[1],
                   startSeq[2]:endSeq[2],
                   startSeq[5]:endSeq[5],
                   startSeq[6]:endSeq[6],
                   startSeq[7]:endSeq[7])
lambda_new    <- lambda[items_to_pick,c(1,2,5,6,7)]


###################################################################
# Step 4: Generate MC samples of measurements and save datasets 
###################################################################

for (b in 1:nMC){ 
  # Draw error for each person in each village using the covariance above 
  error     <- rep(0,nV)
  for (v in 1:length(code_village)) {
    error     <- rbind(error, t(matrix(mvrnorm(n = 1, rep(0,le[v]*nV), make.positive.definite(cov_error[[v]]) ),  3, le[v]))) 
  } 
  error <- error[-1,]

  
  X_Invest           <- cbind(rep(1,length(keep)), treatnomiss, IC,  Zall[keep,] )
  gen_Invest          <- X_Invest %*% invest_true[c(1,2,3,5,8:10),] +   error[,1:2]  
  
  X_PF                <- cbind(rep(1,length(keep)), treatnomiss, IC,   gen_Invest) # Only mat investment - Set coef on time investment to 0 
  gen_PF              <- X_PF %*% iv_true_new[c(1,2,3,5,8,9),1] +error[,3]
  
  
gen_latent_factors   <- cbind(gen_PF , 
                            c(fs[[1]][,2], fs[[2]][,2]), 
                            gen_Invest , 
                            c(fs[[1]][,7], fs[[2]][,7]))
  
  gen_Meas              <- gen_latent_factors %*% t(lambda_new)  + mvrnorm(n= length(keep), rep(0,dim(lambda_new)[1]), diag(eps[items_to_pick]))
 
  # Standardize measures so that they have mean 0 and sd 1 in the control group 
  means_control = colMeans(gen_Meas[which(treatnomiss==1),])
  sd_control = apply(gen_Meas[which(treatnomiss==1),], 2,sd)
  gen_Meas= (gen_Meas -  t( replicate(length(keep), means_control)))
  gen_Meas = gen_Meas / t( replicate(length(keep), sd_control))
  
  
  dataExport            <- cbind(gen_Meas,measures$treat[keep], measures$cod_dane[keep], measures$ln_nkids0_st[keep], 
                                 measures$meanprice_juglibros_log_st[keep], measures$FUmeanprice_log_st[keep], measures$tpobc_pop1993_st[keep])
  
  colnames(dataExport)  <- c( colnames(gen_Meas), "treat", "cod_dane", "ln_nkids0", "meanprice_juglibros_log", "FUmeanprice_log", "tpobc_pop1993")
  

  # Save the datasets 
setwd(dir_dataMC)
if (b==1) {
  write.csv(dataExport , file=paste("measures.csv"), row.names=FALSE) 
}
write.csv(dataExport , file=paste("measures_b", b, ".csv", sep=""), row.names=FALSE) 
} 

} 


###################################################################
# ESTIMATION OF MODEL WITH SIMULATED DATA 
###################################################################

# Boostrap settings 
Bootstrap          <- 1                                      # 1 if run the bootstrap; 0 otherwise
onlyboot           <- 0                                      # 1 if we only want to perform the bootstrap; 0 if we want to estimate the model on true data and bootstrap data
node               <- 1                                      # Number of nodes (or R windows) on which to run the estimation for bootstrapped samples)
bsample            <- 10                                    # Number of bootstrap samples on each node
pos                <- seq(1,1,1)                             # Nodes used for the boostrap
nameBootFM         <- paste("BootFM", node, ".R", sep="")    # Name of the output from estimation of the measurement system on the n-th node
nameBootFS         <- paste("BootFS", node, ".R", sep="")    # Name of the output from estimation of the factor scores on the n-th node

# EM estimation algorithm settings
conv               <- 1e-6                                   # Convergence criterion

# Model specification
nameFM             <- c("latentfactors_noinstruments_MC")                    # Name of the measurement system specification file
inputFM            <- paste("specsFM/",nameFM, ".R", sep="")              # Location of the file with all the details of the specification

# Define directories where we will save the results for that measurement system specification 
dir_outputFM       <- paste(dir,c("output/"), nameFM, covMC, sep="")   


###################################################################
# 1. LOAD DATA AND SPECS OF THE MEASUREMENT MODEL
###################################################################
# Load data
dir_data         <- paste(dir,"data_MC",covMC, sep="")

setwd(dir_data)                             # Open directory that contains the data (true data and bootstrap datasets)
measures <- call.data("measures_b1.csv")

# Load details of the empirical specification
setwd(dir_anal)                             # Open director that contains the detail of the specification
source(inputFM)                             # Load  file specifying the measurement system

################################################################### 
# 2. Estimate measurement system and assemble output from parallel 
#    sessions together
################################################################### 

setwd(dir_anal)
source("EstimateMeasurementSystem.R")

setwd(dir_anal)
source("AssembleBootstrapFM.R")

################################################################### 
# 3. Estimate factor scores and assemble output from parallel 
#    sessions together
#################################################################### 
setwd(dir_anal)
source("EstimateFactorScores.R")

setwd(dir_anal)
source("AssembleBootstrapFS.R")


################################################################### 
# 4. Estimate investment and production functions 
#################################################################### 
# Production function by OLS 
namePF            <- c("CobbDouglas_OLS_MC")
dir_outputPF      <- paste(dir,c("output/"), nameFM,covMC, "/", namePF, sep="")
setwd(dir_pf)
source(paste(namePF, ".R", sep=""))

# First stage with toyprice, foodprice, and conflict
nameFirstStage <- c("FirstStage_toyprice_foodprice_conflict_MC")
dir_outputPF      <- paste(dir,c("output/"), nameFM, covMC, "/",  nameFirstStage, sep="")
setwd(dir_pf)
source(paste(nameFirstStage, ".R", sep=""))

# Production function by IV
setwd(dir_pf)
posExo       <- c(1:4)
posInv       <- c(5:7)
source("CobbDouglas_IV_MC.R")
