

use "mainsample.dta",clear

**************************************** Table 1 : Summary stats by college type ****************************************************
keep if sample
    
local k salary per12 dumCBSE age focc1 focc2 focc3 focc4 focc5 focc6 year2 year3 year4 year5 gendercode st_code1 st_code2 st_code3 finalmarks 
   
foreach var in `k'{
ttest `var' , by(public)
   
}   

***********************************************************************************************************************************
******************************************Table 2: parametric RD test scores z scores *********************************************

local l per12 dumCBSE age focc1 focc2 focc3 focc4 focc5 focc6  year3 year4 year5 gendercode st_code1 st_code2 
local s per12 per12sq dumCBSE age focc1 focc2 focc3 focc4 focc5 focc6  year3 year4 year5 gendercode st_code1 st_code2 
local c per12 per12sq per12cube dumCBSE age focc1 focc2 focc3 focc4 focc5 focc6  year3 year4 year5 gendercode st_code1 st_code2 

* full sample z scores estimate
reg  testscores_n public `l', robust
reg  testscores_n public `s',robust
reg  testscores_n public `c', robust
  

* 15 points interval 
reg  testscores_n public `l'    if  centeredScores>=-15 & centeredScores<=15  ,robust
reg  testscores_n public `s'   if   centeredScores>=-15 & centeredScores<=15  ,robust
reg  testscores_n public  `c'   if    centeredScores>=-15 & centeredScores<=15  ,robust
  
* 10 points interval 
reg  testscores_n public `l' if    centeredScores>=-10 & centeredScores<=10  ,robust
reg  testscores_n public `s'  if   centeredScores>=-10 & centeredScores<=10  ,robust
reg  testscores_n public `c'  if    centeredScores>=-10 & centeredScores<=10  ,robust
  
  
* 5 points interval 
reg  testscores_n public `l'  if   centeredScores>=-5 & centeredScores<=5  ,robust
reg  testscores_n public  `s'  if    centeredScores>=-5 & centeredScores<=5  ,robust
reg  testscores_n public  `c' if   centeredScores>=-5 & centeredScores<=5  ,robust
  

*******************************************************************************************************************************


************************************** Table 3: parametric estimates RD training, occupation, and sector***************************

use "sample_surveymodulesBFNO.dta",clear
* outcomes: diploma, post graduate degree, professional specialization, studied or worked abroad, 
* tertiary sector, skill occupation, public sector firm, number of jobs held



local l public per12 dumCBSE age focc1 focc2 focc3 focc4 focc5 focc6 focc7 year3 year4 year5 gendercode st_code1 st_code2 
local o dip post_deg prof abroad tert_sec skill_occ public_firm 
 
foreach var in `o'{

reg `var' `l' if centeredScores >=-5 & centeredScores <=5 ,robust
}


 reg  no_jobs   `l' if   centeredScores >=-5 & centeredScores <=5 & no_jobs <90 ,robust

************************************************************************************************************************************

******************************** Table 4:parametric RD salary using midpoints of the bins*******************************************

use "mainsample.dta",clear

** regressions

local l per12 dumCBSE age focc1 focc2 focc3 focc4 focc5 focc6 focc7  year3 year4 year5 gendercode st_code1 st_code2 
local s per12 per12sq dumCBSE age focc1 focc2 focc3 focc4 focc5 focc6 focc7  year3 year4 year5 gendercode st_code1 st_code2 
local c per12 per12sq per12cube dumCBSE age focc1 focc2 focc3 focc4 focc5 focc6 focc7  year3 year4 year5 gendercode st_code1 st_code2 


* column 1 full sample
reg  midpointsalary public  `l', robust
reg  midpointsalary public  `s',robust 
reg  midpointsalary public  `c', robust


* column 2-- 15 point interval
reg  midpointsalary public  `l' if centeredScores>=-15 & centeredScores<=15  ,robust
reg  midpointsalary public  `s' if centeredScores>=-15 & centeredScores<=15  ,robust
reg  midpointsalary public  `c' if centeredScores>=-15 & centeredScores<=15  ,robust


  
* column 3 -- 10 point interval 
reg  midpointsalary public  `l' if centeredScores>=-10 & centeredScores<=10  ,robust
reg  midpointsalary public  `s' if centeredScores>=-10 & centeredScores<=10  ,robust
reg  midpointsalary public  `c' if centeredScores>=-10 & centeredScores<=10  ,robust

 
  
* column 4 -- 5 point interval 
reg  midpointsalary public  `l' if centeredScores>=-5 & centeredScores<=5  ,robust
reg  midpointsalary public  `s' if centeredScores>=-5 & centeredScores<=5  ,robust
reg  midpointsalary public  `c' if centeredScores>=-5 & centeredScores<=5  ,robust


* summarize to interpret the coefficients
   summ midpointsalary , detail
   
* IV reported in paper 
  gen eligi=0
  replace eligi=1 if centeredScores>0
  ivreg midpointsalary  (public =eligi ) per12  dumCBSE age focc2-focc6 gendercode st_code1 st_code2 year2-year5 , robust
   
   *************************************************************************************************************************************
******************************************************* End of Main Tables****************************************************
   
 ***************************** *****************************Main Figures******************************************************************
   * Design changes in graph editor
ssc instal cmogram
   
* Figure 1
cmogram public centeredScores, lfitci cutpoint(0)scatter histopts(bin(15))   

* Figure 2
cmogram finalmarks centeredScores, lfitci cutpoint(0)scatter histopts(bin(15))
   
*Figure 3   
cmogram salary centeredScores, lfitci cutpoint(0)scatter histopts(bin(15))

*********************************************************************************************************************************************



**************************************************Appendix Figures and Tables ************************************************************

******** **************************************Appendix Figures******************************************************************************


* Appendix Figure 1 

use "sample_surveymodulesBFNO.dta",clear
*Learning from college classes
twoway  lpolyci learn_class centeredScores , xtitle(Normalized Senior ///
Secondary School Exam Scores) ytitle(Learn From College Classes)

**************************************************************************************************************************

* Appendix Figure 2 - earnings effect (salary) using IK 2014 bandwidth
use "mainsample.dta",clear

rd salary centeredScores if salary <98, z0(0)graph mbw(100) ox

*****************************************************************************************************************************

* Appendix Figure 3 - continuous density of the running variable
* instal DCdensity Ado file
* Available here https://eml.berkeley.edu/~jmccrary/DCdensity/

 drop  Yj Xj r0 fhat se_fhat 
DCdensity  centeredScores if _followup==3 & open==1 &miss_foc==1 & (q29==1|q29==6)  , breakpoint(0.05)  generate(Xj Yj r0 fhat se_fhat) 

*************************************************************************************************************************
* Appendix Figure 4 - Smooth (continuous background characteristics; Johnston and Mas 2018 based method 

* Private college smple based Index of background chars
  reg salary  dumCBSE st_code1 st_code2 gendercode age focc1-focc7 year3-year5 ///
  if   salary <98 & public==0 ,robust
  
predict sal_private

cmogram sal_private centeredScores if salary <98 ,  lfitci cutpoint(0)scatter histopts(bin(15))



* Inframarginal sample (outside the 15 point interval) based index 

reg salary dumCBSE st_code1 st_code2 gendercode age focc1-focc7 year3-year5   if  ///
 salary <98 & (centeredScore <=-15 | centeredScore >=15) ,robust
 
 predict sal_tails15
 
 cmogram sal_tails15 centeredScores if salary <98 ,  lfitci cutpoint(0)scatter histopts(bin(15))

************************************************************************************************************************ 
 
 *Appendix Figure 5
  * probability of survey success 

  use "surveysuccess15.dta"
  twoway  lpolyci insurvey centeredScores  if centeredScores <0 & centeredScores>=-15,///
  lwidth(medium) || lpolyci insurvey centeredScores if centeredScores > =0 & centeredScores <= 15, ///
  lwidth(medium)  lcolor(navy) xline(0, lwidth(medium) lpatter(dash)) legend(off) ///
  xtitle(Normalized senior Secondary School Exam Scores) ytitle(Probablity of Being Surveyed)

 ************************************************************************************************** 
 
 * Appendix Figure 6
 * Probability of being employed by gender 
 use "surveydata_mainmodules.dta"
 cmogram emp_2 centeredScores if  centeredScores>=-20 & gendercode== 1  ,  lfitci cutpoint(0)scatter histopts(bin(15))
cmogram emp_2 centeredScores if  centeredScores>=-20 & gendercode== 0  ,  lfitci cutpoint(0)scatter histopts(bin(15))

 ********************************************************************************************************
 
 * Appendix Figure 7 
 * Prob of reporting salary
 
 
 twoway  lpolyci havesalary centeredScores if  centeredScores <0 &  centeredScores >-20 ,  ///
lwidth(medium) bw(3.5228087)  || lpolyci havesalary centeredScores if  centeredScores >=0 &  ///
centeredScores<20 ,lwidth(medium)bw(3.5228087)  lcolor(navy) xline(0, lwidth(medium) lpatter(dash)) ///
 legend(off) xtitle(Normalized senior Secondary School Exam Scores)


 
   *************************************************Appendix Tables *************************************************************
   
   ****************** Appendix Table 1 - Summ stats in 5 point interval ************************************************************
   
   *** insample those who indicated their salary with all the observables non missing 
   
  use "mainsample.dta",clear
  gen insample =0
  replace insample=1 if salary <98 & focc1<.
   
     
local k salary per12 dumCBSE age focc1 focc2 focc3 focc4 focc5 focc6 year2 year3 year4 year5 gendercode st_code1 st_code2 st_code3 finalmarks 
   
foreach var in `k'{
ttest `var' if insample==1 & centeredScores>=-5 & centeredScores<=5, by(public)
   
}   
   
   *****************************************************************************************************************************
   *************************************** Appendix Table 2 - Non Parametric RD for salary ******************************************
   ** instal rd if not already installed 
   ** ssc instal rd
   
   ** reported coeff and std error for lwald 
  
  ** triangle kernel without controls
rd midpointsalary public centeredScores   , z0(0) bw(10)
rd midpointsalary public centeredScores   , z0(0) bw(7.5)
rd midpointsalary public centeredScores   , z0(0) bw(5)
rd midpointsalary public centeredScores   , z0(0) mbw(100)

** rectangle kernels without controls
rd midpointsalary public centeredScores   , z0(0) bw(10)k(rec)
rd midpointsalary public centeredScores   , z0(0) bw(7.5)k(rec)
rd midpointsalary public centeredScores   , z0(0) bw(5) k(rec)
rd midpointsalary public centeredScores   , z0(0) mbw(100)(rec)

  
  *************************************************************************************************************************
  
 ************************************************ Appendix Table 3 - Survey success by college type *******************************************
  
  
 use "surveysuccess.dta"

 
 
reg insurvey st_code1 st_code2  age focc1 focc2 focc5 year2 year3 if public==1
est store a2
reg insurvey st_code1 st_code2  age focc1 focc2 focc5 year2 year3 if public==0
est store b2
suest  a2  b2

 
test [a2_mean]st_code1 = [b2_mean]st_code1
test [a2_mean]st_code2 = [b2_mean]st_code2
test [a2_mean]age = [b2_mean]age
test [a2_mean]focc1 = [b2_mean]focc1
test [a2_mean]focc5 = [b2_mean]focc5
test [a2_mean]focc2 = [b2_mean]focc2
test [a2_mean]year2 = [b2_mean]year2
test [a2_mean]year3 = [b2_mean]year3



  
******************************************************************************************************************************  
  
******************** Appendix Table 4- Parametric RD probability of being employed ****************************************  
  
use "surveydata_mainmodules.dta"
 
 

local l per12 dumCBSE age focc1 focc2 focc3 focc4 focc5 focc6 focc7  year3 year4 year5 gendercode st_code1 st_code2 
local s per12 per12sq dumCBSE age focc1 focc2 focc3 focc4 focc5 focc6 focc7  year3 year4 year5 gendercode st_code1 st_code2 
local c per12 per12sq per12cube dumCBSE age focc1 focc2 focc3 focc4 focc5 focc6 focc7  year3 year4 year5 gendercode st_code1 st_code2  
 
 
** full sample 
reg  emp_2 public `l' ,robust
reg  emp_2 public  `s' ,robust
reg  emp_2 public `c'  ,robust


** 15 point interval
reg  emp_2 public `l'  if  centeredScores>=-15 & centeredScores<=15 ,robust
reg  emp_2 public `s'   if  centeredScores>=-15 & centeredScores<=15 ,robust
reg  emp_2 public `c'  if  centeredScores>=-15 & centeredScores<=15 ,robust

 ** 10 point Interval 

reg  emp_2 public `l' if centeredScores>=-10 & centeredScores<=10 ,robust
reg  emp_2 public `s' if  centeredScores>=-10 & centeredScores<=10 ,robust
reg  emp_2 public `c'  if centeredScores>=-10 & centeredScores<=10 ,robust



** 5 point interval 

reg  emp_2 public `l' if  centeredScores>=-5 & centeredScores<=5 ,robust
reg  emp_2 public `s' if  centeredScores>=-5 & centeredScores<=5 ,robust
reg  emp_2 public `c' if  centeredScores>=-5 & centeredScores<=5 ,robust

****************************************************************************************************************************************

  
  ************************* Appendix Table 5 - pr of employment by gender ***************************************************************
 
 local l public per12 dumCBSE age focc1 focc2 focc3 focc4 focc5 focc6 focc7  year3 year4 year5 gendercode st_code1 st_code2 

 
 * gendercode 0
 reg  emp_2 `l' if  centeredScores>=-15 & centeredScores<=15 & gendercode ==0 ,robust
 reg  emp_2  `l'  if  centeredScores>=-10 & centeredScores<=10 & gendercode ==0 ,robust
 reg  emp_2 `l'  if  centeredScores>=-5 & centeredScores<=5 & gendercode ==0 ,robust
  
 
* gendercode 1
   
   
reg  emp_2 `l' if  centeredScores>=-15 & centeredScores<=15 & gendercode ==1 ,robust
 reg  emp_2  `l'  if  centeredScores>=-10 & centeredScores<=10 & gendercode ==1 ,robust
 reg  emp_2 `l'  if  centeredScores>=-5 & centeredScores<=5 & gendercode ==1 ,robust
     
   
*************************************************************************************************************************************   
  
  
  
************************ Appendix Table 6 - Parametric RD Salary using linear control function by Gender ******************************

 
local l public per12 dumCBSE age focc1 focc2 focc3 focc4 focc5 focc6 focc7  year3 year4 year5 st_code1 st_code2 

   
 
 * gendercode=0
reg  midpointsalary `l' if salary<98 & emp_2==1 & gendercode==0 & centeredScores>=-15 &  centeredScores<=15,robust
reg  midpointsalary `l' if  salary<98 & emp_2==1 & gendercode==0   & centeredScores>=-10 &  centeredScores<=10,robust
reg  midpointsalary `l' if  salary<98 & emp_2==1 & gendercode==0   & centeredScores>=-5 &  centeredScores<=5,robust

 
 *gendercode=1
 
 reg  midpointsalary `l' if salary<98 & emp_2==1 & gendercode==1 & centeredScores>=-15 &  centeredScores<=15,robust
reg  midpointsalary `l' if  salary<98 & emp_2==1 & gendercode==1   & centeredScores>=-10 &  centeredScores<=10,robust
reg  midpointsalary `l' if  salary<98 & emp_2==1 & gendercode==1   & centeredScores>=-5 &  centeredScores<=5,robust

  ***********************************************************************************************************************************

  
  
  
  ************************ Appendix Table 7 - chars of indivuals reporting salary versus not ********************************************
  
  local k per12  age focc1 focc2 focc3 focc4 focc5 focc6 year2 dumCBSE year3 year4 year5 gendercode st_code1 st_code2 st_code3  
   
foreach var in `k'{
ttest `var' , by(havesalary)
   
}  
  
***************************************************************************************************************************
  
************************************************ Appendix Table 8 - imputed salary ***********************************************

* Note - estimates will vary each time an imputation is re-run. 

local l per12  age focc1 focc2 focc3 focc4 focc5 focc6 year2 dumCBSE year3 year4 year5 gendercode st_code1 st_code2 st_code3  
local s per12 per12sq dumCBSE age focc1 focc2 focc3 focc4 focc5 focc6 focc7  year3 year4 year5 gendercode st_code1 st_code2 
local c per12 per12sq per12cube dumCBSE age focc1 focc2 focc3 focc4 focc5 focc6 focc7  year3 year4 year5 gendercode st_code1 st_code2  
   

mi set wide
mi register imputed midpointsalary
mi impute regress midpointsalary public `l' if emp_2==1 & focc1<. , add(5)


** regressions


keep  if emp_2==1 & focc1 <. 
 mi estimate: reg midpointsalary public `l' , robust
 
 keep if centeredScores >=-15 & centeredScores<=15
 
 ***  15 bandwidth imputed data
 mi estimate: reg midpointsalary public `l' , robust
 mi estimate: reg midpointsalary public `s'  , robust
 mi estimate: reg midpointsalary public `c'  , robust
 
 *** bandwidth 10 
 
 keep if centeredScores >=-10 & centeredScores<=10
 
 mi estimate: reg midpointsalary  public `l' , robust
 mi estimate: reg midpointsalary public  `s' , robust
 mi estimate: reg midpointsalary public  `c' , robust
 
 
 
 ** bandwidth 5
 keep if centeredScores >=-5 & centeredScores<=5
 mi estimate: reg midpointsalary public  `l', robust
 mi estimate: reg midpointsalary public  `s' , robust
 mi estimate: reg midpointsalary public `c' , robust

clear


****************************************************************************************************************************

*****************************Appendix Table 9 - Lee's Bounds ***************************************************************
* instal leebounds
* ssc instal leebounds
use "leesbounds_sample.dta" ,clear

leebounds midpointsalary public ,cie
leebounds midpointsalary public if  centeredScores >=-5 & centeredScores<=5,cie



