###################################################################
# Name              : KernelDensities.R
# Description       : This program generates graphs of kernel
#                     densities and computes Kolmogorov-Smirnov tests 
#                     of equality of densities between treatment 
#                     and control groups
###################################################################


set.seed(08072018) 

# Load estimates from the true data 
setwd(dir_outputFM)
load("trueFM.R")


#############################################################################
# 1. COMPUTE KOLMOGOROV-SMIRNOV TESTS OF EQUALITY OF DENSITIES
#############################################################################

#############################################################################
# 1.1. On true data 
#############################################################################

# Load data 
setwd(dir_outputFM)
load("trueFM.R")

data          <- draw.factor.noins(mean.mix, cov.mix, prob.mix)
alldata       <- data[[1]]
attach(alldata)
save(alldata, file="alldata.R")
n             <- nrow(alldata)


support       <- matrix(NA, 6, 2)
for (j in 1:6)  support[j, ] <- c(min(alldata[,j]), max(alldata[,j]))
cdfC          <- list()
cdfT          <- list()
for (j in 1:6){
  cdfC[[j]]         <- ecdf(alldata[which(treat==0),j])
  cdfT[[j]]         <- ecdf(alldata[which(treat==1),j]) 
}

maxabsdiff <- rep(NA,6)
grid    <- list()
for (j in 1:6){
  grid[[j]]            <- seq(min(alldata[,j]), max(alldata[,j]), 0.01)
  absdiff         <- rep(NA, length(grid[[j]]))
  for (i in 1:length(grid[[j]])){ 
    absdiff[i]       <- abs(cdfC[[j]](grid[[j]][i]) - cdfT[[j]](grid[[j]][i]))
  }
  maxabsdiff[j]         <- max(absdiff)
}


#############################################################################
# 1.2. On bootstrapped samples 
#############################################################################
# Put all estimates from bootstrapped samples together 
load("allbootFM.R")
bootFMlong <- list()
for (c in 1:cores){
  for (r in 1:bsample){
    bootFMlong[[(c-1)*(allbsample/cores) + r]]<- allbootFM[[c]][[r]]   
  }
}


# Get dataset ready to perform tests       
boot_data_draw <-  function(b){  
  prob.boot <- bootFMlong[[b]]$prob
  
  mean.boot <- list() 
  for (g in 1:nG) mean.boot[[g]] <- bootFMlong[[b]]$mean[[g]]
  
  cov.boot <- list()
  for (g in 1:nG) {
    cov.boot[[g]] <- make.positive.definite(bootFMlong[[b]]$cov[[g]][[1]])
    for (m in 2:nM){
      cov.boot[[g]] <- rbind(cov.boot[[g]], make.positive.definite(bootFMlong[[b]]$cov[[g]][[m]]))
    }
  }
  
  alldatab        <-  draw.factor.noins(mean.boot, cov.boot, prob.boot)[[1]]
  attach(alldatab)
  return(alldatab)
} 

# Compute test statistic for each bootstrap 
bootmaxabsdiff      <- matrix(NA, length(noflag), 6)
bootcdfC          <- list()
bootcdfT          <- list()

for (b in noflag) {
  alldatab        <- boot_data_draw(b)[,1:6]
  print(b)
  
  for (j in 1:6){
    bootcdfC[[j]]         <- ecdf(alldatab[which(treat==0),j])
    bootcdfT[[j]]         <- ecdf(alldatab[which(treat==1),j]) 
    
    bootabsdiff         <-rep(NA, length(grid[[j]]))
    for (i in 1:length(grid[[j]])){ 
      bootabsdiff[i]       <- abs((bootcdfC[[j]](grid[[j]][i]) - bootcdfT[[j]](grid[[j]][i])) 
                                  - (cdfC[[j]](grid[[j]][i]) - cdfT[[j]](grid[[j]][i]))) 
    } 
    bootmaxabsdiff[which(noflag==b),j]       <- max(bootabsdiff)
  } 
} 


# Compute p-value 
indicator        <- function(condition) ifelse(condition,1,0)
kspvalue         <- rep(NA,6)
for (j in 1:6){
  kspvalue[j]   <- 1/nof * sum(indicator(bootmaxabsdiff[,j]>rep(maxabsdiff[j], nof))) 
}

cvalue95          <- rep(NA,6)
cvalue90          <- rep(NA,6)
for (j in 1:6) {
  cvalue95[j]     <- quantile(bootmaxabsdiff[,j], probs=c(0.95))
  cvalue90[j]     <- quantile(bootmaxabsdiff[,j], probs=c(0.90))
} 

cvalue95 < maxabsdiff
cvalue90 < maxabsdiff

setwd(dir_outputFM)
save(maxabsdiff, cvalue95, cvalue90, kspvalue, file="kstest.R")


#####################################################################################
# 2. GRAPH THE KERNEL DENSITIES FOR EACH GROUP AND INSERT THE P-VALUE OF THE KS TEST 
#####################################################################################
# Draw data from the estimated distribution of latent factors 
data          <- draw.factor.noins(mean.mix, cov.mix, prob.mix)
alldata       <- data[[1]]
lnalldata     <- data[[2]]
attach(alldata)
attach(lnalldata)

gC <- lnalldata[which(lnalldata$treat==0),1:nF]
gT <- lnalldata[which(lnalldata$treat==1),1:nF]

# Load p-values for the KS test estimated above 
setwd(dir_outputFM)
load( file="kstest.R")

# Densities treated vs. control (the graphs are all hand coded
# because I need to insert the p-value in slightly different places 
# on each graph) 

label <- c("Child's cognitive skills at follow-up", 
           "Child's cognitive skills at baseline", 
           "Child's socio-emotional skills at follow-up", 
           "Child's socio-emotional skills at baseline", 
           "Material investment at follow-up", 
           "Time investment at follow-up", 
           "Mother's cognitive skills at baseline", 
           "Mother's socio-emotional at baseline")




f=1 
  setwd(dir_outputFM)
  pdf(paste("Density",f, ".pdf", sep="")) 
  a = density(gT[,f])
  b = density(gC[,f])
  ymax <- max(max(a$y), max(b$y))
  plot(a, xlab="", main="", ylim=c(0,ymax), xlim=c(-3,3))  
  lines(b, main="", lty="dashed")
  legend("topleft", c( "Treated", "Control" ), 
         lty=c(1,2,0), bty="n", cex=1)
  text(-3.2, 0.46, paste( "p-value diff:", round(kspvalue[f],3)), cex=1, pos=4 )
  dev.off()

f=2 
setwd(dir_outputFM)
pdf(paste("Density",f, ".pdf", sep="")) 
a = density(gT[,f])
b = density(gC[,f])
ymax <- max(max(a$y), max(b$y))
plot(a, xlab="", main="", ylim=c(0,ymax), xlim=c(-3,3))  
lines(b, main="", lty="dashed")
legend("topleft", c( "Treated", "Control"), 
       lty=c(1,2,0), bty="n", cex=1)
text(-3.2, 0.49,paste( "p-value diff:", round(kspvalue[f],3)), cex=1, pos=4 ) 
dev.off()


f=3 
setwd(dir_outputFM)
pdf(paste("Density",f, ".pdf", sep="")) 
a = density(gT[,f])
b = density(gC[,f])
ymax <- max(max(a$y), max(b$y))
plot(a, xlab="", main="", ylim=c(0,ymax), xlim=c(-3,3))  
lines(b, main="", lty="dashed")
legend("topleft", c( "Treated", "Control"), 
       lty=c(1,2,0), bty="n", cex=1)
text(-3.2, 0.50,paste( "p-value diff:", round(kspvalue[f],3)), cex=1, pos=4 ) 
dev.off()


f=4 
setwd(dir_outputFM)
pdf(paste("Density",f, ".pdf", sep="")) 
a = density(gT[,f])
b = density(gC[,f])
ymax <- max(max(a$y), max(b$y))
plot(a, xlab="", main="", ylim=c(0,ymax), xlim=c(-3,3))  
lines(b, main="", lty="dashed")
legend("topleft", c( "Treated", "Control"), 
       lty=c(1,2,0), bty="n", cex=1)
text(-3.2, 0.50,paste( "p-value diff:", round(kspvalue[f],3)), cex=1, pos=4 ) 
dev.off()


f=5 
setwd(dir_outputFM)
pdf(paste("Density",f, ".pdf", sep="")) 
a = density(gT[,f])
b = density(gC[,f])
ymax <- max(max(a$y), max(b$y))
plot(a, xlab="", main="", ylim=c(0,ymax), xlim=c(-3,3))  
lines(b, main="", lty="dashed")
legend("topleft", c( "Treated", "Control"), 
       lty=c(1,2,0), bty="n", cex=1)
text(-3.2, 0.36, paste( "p-value diff:", round(kspvalue[f],3)), cex=1, pos=4 ) 
dev.off()


f=6 
setwd(dir_outputFM)
pdf(paste("Density",f, ".pdf", sep="")) 
a = density(gT[,f])
b = density(gC[,f])
ymax <- max(max(a$y), max(b$y))
plot(a, xlab="", main="", ylim=c(0,ymax), xlim=c(-3,3))  
lines(b, main="", lty="dashed")
legend("topleft", c( "Treated", "Control"), 
       lty=c(1,2,0), bty="n", cex=1)
text(-3.2, 0.81,paste( "p-value diff:", round(kspvalue[f],3)), cex=1, pos=4 ) 
dev.off()

