###################################################################
# Name              : FirstStage_toyprice_foodprice_conflict_MC.R
# Description       : This program estimates of the investment functions 
#                     (first stage) and reduced form equations with 
#                     toy price, food price and conflict as instruments 
#                     for the simpler model used in the MC exercise
###################################################################


Xnames       <- c( "kids")
Znames       <-  c("toyprice", "foodprice", "conflict")

posInv       <- c(5:7)
posInvX      <- c(6:8)

# Load estimates of the investment function on the true data used to generate the data 
# to perform the rank and cragg donald test 
setwd(dir_outputPF_true)
load("OUTPUT_pval.R")
invest_true_param    <- invest_true[c(1,2,3,5,8,9,10),]

# Dimensions of the investment functions 
dim_inv <- dim(invest_true_param)

###################################################################
# Define function to estimate first stage and reduced form by OLS
# This function produces "naive" estimates not correcting for the 
# fact that we're using factor scores as well as adjusted 
# estimates correcting for that fact 
###################################################################

first.rdform.estim.func             <- function(scores, data, lambda, eps, mean, cov,  missing, parametric){
  
  # Load data
  setwd(dir_data)     
  measures <- read.csv(data, header=T)
  measures <- measures[order(measures$treat), ]
  n        <- nrow(measures)
  
  # Get estimates of measurement system Ineed for estimation of correction bias 
    totmean <- cbind(rep(0, nF), mean)
  
  
  
  # Define observables 
  Zall         <- cbind(measures$meanprice_juglibros_log_st, measures$FUmeanprice_log_st, measures$tpobc_pop1993_st)
  X1           <- cbind(measures$ln_nkids0_st) 
  Xnames       <- c("kids")
  
  mscoresall   <- rbind(scores[[1]], scores[[2]])
  keep         <- which(complete.cases(cbind(Zall,X1, mscoresall)))
  keep1        <- which(complete.cases(cbind(Zall,X1, mscoresall)[which(measures$treat==1),]))
  keep2        <- which(complete.cases(cbind(Zall,X1, mscoresall)[which(measures$treat==2),]))
  treatnomiss  <- measures$treat[keep]
  
  if (missing=="FALSE"){
    fs   <- list()
    fs[[1]] <- scores[[1]][keep1,]  
    fs[[2]] <- scores[[2]][keep2,]  
  } else if (missing=="TRUE"){
    fs   <- scores
  }
  
  pos           <- c(2,5)
  
  # First stage and reduced form with all instruments 
  Z1          <-  cbind(measures$meanprice_juglibros_log, measures$FUmeanprice_log,  measures$tpobc_pop1993)
  Znames      <-  c("toyprice", "foodprice",  "conflict")
  Z           <-  Z1[keep,]
  X           <-  X1[keep,]
  
  # Define specification of first stage and reduced form 
    RHSnames    <- c("treat", "cog0", "cogmo",  Znames)
    pos           <- c(2,5)

  
  
  out.first.stage   <-  first.stage.func.MC(fs, X, Xnames, Z, Znames, treatnomiss, RHSnames, pos, totmean,mean, cov, prob, parametric)
  
  out.rdform        <-  rdform.func.MC(fs, X, Xnames, Z, Znames, treatnomiss, RHSnames, pos, totmean,mean, cov, prob, parametric)
  
  regdata           <- data.frame(cbind(rbind(fs[[1]], fs[[2]]),  Z, treatnomiss))
  names(regdata)    <- c( "cog1",  "cog0", "mat", "time",  "cogmo",  Znames, "treat") 
  
  return(list(out.first.stage, out.rdform, regdata))
} 


###################################################################
# Estimate first stage and reduced form equations on the 
# simulated datasets 
###################################################################

    
setwd(dir_outputFM)
load("fs_bstrap.R")
    
# Define output 
invest_bstrap <- array(0, dim=c(dim_inv[1], dim_inv[2], nof))
investbias_bstrap <- array(0, dim=c(dim_inv[1], dim_inv[2], nof))
cf_bstrap    <- list()
    
rdform_bstrap <- array(0, dim=c(dim_inv[1], dim_inv[2], nof))
rdformbias_bstrap <- array(0, dim=c(dim_inv[1], dim_inv[2], nof))
    
    
for (b in 1:nof){
 out_bstrap   <- first.rdform.estim.func(fs_bstrap_allBFGS[[noflag[b]]], paste("measures_b", noflag[b], ".csv", sep=""), 
                                                lambdaBoot[,,noflag[b]],
                                                epsBoot[,noflag[b]],
                                                meanBoot[,,noflagFS[b]], 
                                                list(covBoot[,,noflagFS[b],1],covBoot[,,noflagFS[b],2]), 
                                                missing="FALSE", parametric=0)
 print(b)
 # Rename output 
 invest_bstrap[,,b]        <- out_bstrap[[1]][[1]]
 investbias_bstrap[,,b]    <- out_bstrap[[1]][[2]]
 cf_bstrap[[b]]            <- out_bstrap[[1]][[3]]
 
 rdform_bstrap[,,b]        <- out_bstrap[[2]][[1]]
 rdformbias_bstrap[,,b]    <- out_bstrap[[2]][[2]]
 
 regdata                    <- out_bstrap[[3]]
 save(regdata, file=paste("regdata_bstrap", b, ".R", sep=""))
 
 }
    
     
      



 

###################################################################
# Compute optimal weighting matrix   
###################################################################
rdform_cog        <- rdform_bstrap[,1,]  
cogrdf_var        <- 1/(nof - 1) * (rdform_cog  - rowMeans(rdform_cog)) %*% t(rdform_cog  - rowMeans(rdform_cog))


####################################################################
# Rank and Cragg Donald tests 
####################################################################

indicator        <- function(condition) ifelse(condition,1,0)

## Rank test
rank.stat.func               <- function(invest, tt){
  coef                       <- as.matrix(invest[tt,])
  rank                       <- min(eigen(t(coef) %*% coef)$values)
  return(rank)
}

rank.pvalue.func          <- function(rank_true, rank_boot, nof){

  test_sd                 <- sd(rank_boot)
  test_centered_boot      <- (rank_boot - rank_true)/test_sd
  pvalue                  <- 1/nof * (sum(indicator(test_centered_boot>(rank_true/test_sd))))
  return(pvalue)
}

rank_true               <- rank.stat.func(invest_true_param, c(posInv ))
rank_bstrap             <- rep(NA, nof)
for (i in 1:nof)        rank_bstrap[i] <- rank.stat.func(invest_bstrap[,,i], c(posInv ))
out_ranktest1           <- rank.pvalue.func(rank_true, rank_bstrap, nof)

rank_true                <- rank.stat.func(invest_true_param, c(2, posInv ))
rank_bstrap              <- rep(NA, nof)
for (i in 1:nof)         rank_bstrap[i] <- rank.stat.func(invest_bstrap[,,i], c(2, posInv ))
out_ranktest2            <- rank.pvalue.func(rank_true, rank_bstrap, nof)

out_ranktest <- c(out_ranktest1, out_ranktest2)

# Cragg-Donald test
cd.stat.func       <- function(data, cf, invest, tt, ttZ){
  n                <- nrow(cf)
  Sv               <- 1/n * t(cf) %*% cf
  Z                <- as.matrix(data[,ttZ])
  pi               <- as.matrix(invest[tt,])
  CD               <- min(eigen(Sv^0.5 %*% t(pi) %*% t(Z) %*% Z %*% pi %*% Sv^0.5)$values)
  return(CD)
}
cd.pvalue.func      <- function(cd_true, cd_boot, nof){
  test_sd                 <- sd(cd_boot)
  test_centered_boot      <- (cd_boot - cd_true)/test_sd
  pvalue                  <- 1/nof * (sum(indicator(test_centered_boot>(cd_true/test_sd))))
  return(pvalue)
}
  
cf_bstrap              <- array(NA, dim=c(nrow(regdata), 2, nof))
regdata_bstrap         <- array(NA, dim=c(nrow(regdata), ncol(regdata), nof))

for (b in 1:nof){
  setwd(dir_data)
  load(paste("regdata_bstrap", b, ".R", sep=""))
  data_Xnoint           <- regdata[,c(-1, -3, -4, -ncol(regdata))]
  n                     <- length( regdata$mat)
  cf_mat                <- as.matrix(regdata$mat, n, 1)  - matrix(1,n, 1) %*% invest_bstrap[1,1,b]    -  as.matrix(regdata$treat, n, 1) %*% invest_bstrap[2,1,b] - as.matrix(data_Xnoint) %*% invest_bstrap[3:7,1,b]
  cf_time               <- as.matrix(regdata$time, n, 1)  - matrix(1,n, 1) %*% invest_bstrap[1,2,b]  -   as.matrix(regdata$treat, n, 1) %*% invest_bstrap[2,2,b] - as.matrix(data_Xnoint) %*% invest_bstrap[3:7,2,b]
  cf_bstrap[,,b]        <- cbind(cf_mat, cf_time)
  regdata_bstrap[,,b]   <- as.matrix(regdata)
  }

cf_true           <- apply(cf_bstrap, c(1,2), mean)
regdata_true      <-  apply(regdata_bstrap, c(1,2), mean )  
invest_true       <- apply(invest_bstrap, c(1,2), mean)
  
cd_true                   <- cd.stat.func(regdata_true, cf_true, invest_true_param, posInv, posInvX)
cd_bstrap                 <-  rep(NA, nof)
for ( i in 1:nof) cd_bstrap[i]        <- cd.stat.func(regdata_true, cf_true, invest_bstrap[,,i], posInv,posInvX)
out_cdtest1                 <- rank.pvalue.func(cd_true, cd_bstrap, nof)

cd_true                   <- cd.stat.func(regdata_true, cf_true, invest_true_param, c(posInv,2), c(posInvX,9))
cd_bstrap                 <-  rep(NA, nof)
for ( i in 1:nof) cd_bstrap[i]        <- cd.stat.func(regdata_true, cf_true, invest_bstrap[,,i], c(posInv ,2),c(posInvX, 9))
out_cdtest2                 <- rank.pvalue.func(cd_true, cd_bstrap, nof)
out_cdtest <- c(out_cdtest1, out_cdtest2)


##############################################################################
# Create table of estimates (point estimate followed by bootstrapped 
# standard errors, 95% ci, p-value one tail test and p-value two tailed test)
############################################################################## 
outtrue     <- list(invest_true, rdform_true)  
outb        <- list(invest_bstrap, rdform_bstrap) 

ncol <- c(2,1)
outtable        <- list()
for (s in 1:2) {
  outtable[[s]]        <- matrix(0, dim_inv[1]*5, ncol[s])
 
   for (i in 1:dim_inv[1]){
    for (j in 1:ncol[s]){

      
      mean             <- mean(outb[[s]][i,j,])
      se               <- sd(outb[[s]][i,j,])
      t_stat           <- mean/se
      t_crit           <- (outb[[s]][i,j,] - mean)/se
      p_val            <- 1 - ecdf(t_crit)(t_stat)
      p_val2           <- 1 - ecdf(t_crit^2)(t_stat^2)
      
      outtable[[s]][(i*5-4), j]  <- round(mean,3)
      outtable[[s]][(i*5-3),j]   <- round(se,3) 
      outtable[[s]][(i*5-2), j]  <- paste("[", round(quantile(outb[[s]][i,j,], .025),3), ",", 
                                          round(quantile(outb[[s]][i,j,], .975),3), "]", sep="") 
      outtable[[s]][(i*5-1), j]  <- p_val 
      outtable[[s]][i*5, j]      <- p_val2 
      
    } 
  } 
} 


# Add rank and cragg donald tests to investment table
outtable[[1]]   <- rbind(outtable[[1]],  out_ranktest, out_cdtest)

# Name output rows and columns 
basic    <- c("Intercept", "", "","","",
              "Treat", "", "", "","",
              "cog0","", "", "","",
              "cogmo", "", "", "","") 

invnames <- basic 
for (l in 1:length(c(Znames))) invnames     <- c(invnames, c(Znames)[l] , "", "", "", "")
rdfnames  <- invnames 
invnames   <- c(invnames, "Rank test", "Cragg-Donald test")


rownames(outtable[[1]]) <- invnames 
rownames(outtable[[2]]) <- rdfnames
colnames(outtable[[2]]) <- c("Cognitive skills") 
colnames(outtable[[1]]) <- c("Material investment", "Time investment")


# Save output 
setwd(dir_outputPF)
save(invest_bstrap,  rdform_bstrap, file=paste("OUTPUT", ".R", sep=""))
write.csv(outtable[[1]], file=paste("INVEST",  ".csv", sep=""))
write.csv(cbind(outtable[[2]]), file=paste("RDF",  ".csv", sep=""))


