******* CREATE THE MAIN ANALYSIS DATASET FOR PRESCHOOL VARIATION BASED ON YEAR OF APPROVAL 
* first generation data
* also creates the "_p1" sample (omitting individuals in the bottom)

do "D:\Data\workdata\704665\daycare\dofiles\first.do"

*************************************** DATA MERGE AND COLLAPSE

use "$work\pop_daycare_popmunic_yrapproval.dta", clear

* a test
merge m:1 Kommune_ID using $work\rawdaycaredata.dta
gen rawdaycare=(_merge==2) // 1 unmerged muni due to discrepancy in admin structure over time in DK
keep if _merge!=2
drop _merge

****************************************************************************************
* merge on outcomes from different admin registers
****************************************************************************************

****************************************************************************************
*mortality
****************************************************************************************
merge 1:1 pnr using "$work\death19812013.dta"
drop if _merge==2
drop _merge

foreach var of varlist death55-death65{
replace `var'=0 if `var'==.
}

foreach num of numlist 55(1)65{
gen surv`num'=(age_death>=`num')
}

****************************************************************************************
* added for referee: marriage
****************************************************************************************
merge 1:1 pnr using "$work\marriage.dta"
drop if _merge==2
drop _merge
replace married=0 if married==.

****************************************************************************************
*nights and diags
****************************************************************************************
merge 1:1 pnr using "$work\lpr_nights_ranges.dta"
drop _merge

merge 1:1 pnr using "$work\lpr_diags_withdeath_byage.dta"
drop if _merge==2
drop _merge

****************************************************************************************
* income
****************************************************************************************
merge 1:1 pnr using "$work\daycare_econout.dta"
drop if _merge==2
drop _merge

***** any wage income
foreach k in 2931 3436 3941 4446 4951 5456 5961 {
gen any_age`k'_wage_inc = (age`k'_wage_inc>0) if age`k'_wage_inc!=.
}

*log of income
foreach var of varlist $income1 {
gen ln_`var' = ln(`var')
}

****************************************************************************************
* education
****************************************************************************************
merge 1:1 pnr using "$work\edu_3065.dta"
drop if _merge==2
drop _merge

*** 1 - basic_ed (to make comparable)

foreach k in 30 35 40 45 50 55 60 65 {
gen morethanbasic`k' = 1-basic_edu`k'
}

****************************************************************************************
****** merge to Nurse data *****
****************************************************************************************
* use parish structure for merge
* thus create treatment at munic, the relevant unit, after that

merge m:1 Sognekode_IM using "$work\nurseprogram.dta"
sort Kommune_ID
by Kommune_ID: egen nurse_date1=max(nurse_date)
drop nurse_date
rename nurse_date1 nurse_date 

* add info on the 28 munics without nurse information among our childcare munics
* add date of book prior to first mention

gen muni=Kommune_ID

gen imputenurse=0
replace nurse_date=date("1/1/1964", "MDY") if muni==120621
replace imputenurse=1 if muni==120621 /*Augustenborg*/

replace nurse_date=. if muni==120653
replace imputenurse=1 if muni==120653 /*Bogense*/

replace nurse_date=date("1/1/1964", "MDY") if muni==120622
replace imputenurse=1 if muni==120622 /*Broager*/

replace nurse_date=date("1/1/1950", "MDY") if muni==119265
replace imputenurse=1 if muni==119265 /*Faxe*/

replace nurse_date=. if muni==120180
replace imputenurse=1 if muni==120180 /*Hammel Voldby Søby*/

replace nurse_date=date("1/1/1950", "MDY") if muni==118980
replace imputenurse=1 if muni==118980 /*Herlev*/

replace nurse_date=date("1/1/1950", "MDY") if muni==119289
replace imputenurse=1 if muni==119289 /*Hårlev Himlingeøje*/

replace nurse_date=date("1/1/1950", "MDY") if muni==118983
replace imputenurse=1 if muni==118983 /*Høje Tåstrup*/

replace nurse_date=. if muni==120573
replace imputenurse=1 if muni==120573 /*Højer*/

replace nurse_date=. if muni==120362
replace imputenurse=1 if muni==120362 /*Ikast*/

replace nurse_date=. if muni==120577
replace imputenurse=1 if muni==120577 /*Løgumkloster */

replace nurse_date=date("1/1/1960", "MDY") if muni==120631
replace imputenurse=1 if muni==120631 /*Nordborg*/

replace nurse_date=. if muni==119590
replace imputenurse=1 if muni==119590 /*Nørre Åby*/

replace nurse_date=date("1/1/1964", "MDY") if muni==119659
replace imputenurse=1 if muni==119659 /*Ringe*/

replace nurse_date=. if muni==120707
replace imputenurse=1 if muni==120707 /*Rudkøbing*/

replace nurse_date=date("1/1/1950", "MDY") if muni==120701
replace imputenurse=1 if muni==120701 /*Skagen*/

replace nurse_date=date("1/1/1950", "MDY") if muni==120677
replace imputenurse=1 if muni==120677 /*Skælskør*/

replace nurse_date=. if muni==120587
replace imputenurse=1 if muni==120587 /*Skærbaek*/

replace nurse_date=date("1/1/1964", "MDY") if muni==120636
replace imputenurse=1 if muni==120636 /*Sottrup*/

replace nurse_date=date("1/1/1950", "MDY") if muni==120722
replace imputenurse=1 if muni==120722 /*Store Heddinge*/

replace nurse_date=. if muni==120208	
replace imputenurse=1 if muni==120208 /*Them*/

replace nurse_date=. if muni==120590
replace imputenurse=1 if muni==120590 /*Tinglev*/

replace nurse_date=. if muni==120549
replace imputenurse=1 if muni==120549   /*Toftlund*/

replace nurse_date=date("1/1/1950", "MDY") if muni==120551
replace imputenurse=1 if muni==120551   /*Tystrup*/

replace nurse_date=. if muni==119601
replace imputenurse=1 if muni==119601   /*Vejlby-Strib*/

replace nurse_date=date("1/1/1950", "MDY") if muni==120554
replace imputenurse=1 if muni==120554   /*Vojens*/

replace nurse_date=date("1/1/1964", "MDY") if muni==119886
replace imputenurse=1 if muni==119886   /*Års*/

replace nurse_date=date("1/1/1950", "MDY") if muni==119354
replace imputenurse=1 if muni==119354   /*Ørslev*/

drop muni
gen DOB_minusnurse = foed_dag - nurse_date

gen nurse_treat = (DOB_minusnurse>0) if DOB_minusnurse<.
*all individuals born after nurse program

replace nurse_treat = 0 if nurse_treat==.
*nurse date missing for munis without a program

gen N=1
gen muni=Kommune_ID 
bys muni: egen everopen = max(operating)

********************************************************************************************
**** summary stats comparing whole sample with munis in "ever open" daycare, Table 1
********************************************************************************************
preserve
bys muni: keep if _n==1
drop _merge
merge m:1 Kommune_ID using "$work\munic_controls_recode2930.dta"
tab _merge

ren _merge muni_merge

drop if muni_merge==2

merge m:1 Kommune_ID using "$work\munic_controls_recode2021.dta"
tab _merge
drop if _merge==2
drop _merge


eststo clear
eststo: estpost sum $munichar

eststo: estpost sum $munichar if everopen==1
eststo: estpost sum $munichar if everopen==0 


label var munipop "Avg. Population"
label var femalepop1930 "Pct Female"
label var socialdemo1929 "Pct Social Demo"
label var radicallib1929 "Pct Radical Lib"
label var agrarianlib1929 "Pct Agrarian Lib"
label var conservatives1929 "Pct Conservatives"
label var industrypop1930 "Pct Industrial"
label var urban1930 "Pct Urban"
label var agripop1930 "Pct Agricultural"
label var rural "Rural"
label var incometaxpercent1930 "Pct Paying Income Tax"
label var logtaxableincome1930 "Log Taxable Income"
label var propertytaxpercent1930 "Pct Paying Property Tax"

#delimit ;
esttab using "$results\munichar_byeveropen.tex",  main(mean) 
stats(N, label("Num. Munis") fmt(%12.0fc)) replace
fragment booktabs mtitles("All Munis" "Ever Open Preschool" "No Open Preschool") label wrap ;
#delimit cr

restore


**********************************************************************************
* down to ever implementers  here
**********************************************************************************
drop if everopen==0 | everopen==.


codebook muni if everopen==1
*138 munis have a daycare center that's ever open during this time period
ren popmunic munipop

**** we have about 53% of our individuals in the "ever open" data set
gen cph = (muni==120663)
**** if we exclude Copenhagen, we are left with about 42% of individuals


 
****************************************************************************************
***************** create hc index for main outcomes*********
****************************************************************************************
foreach var of varlist morethanbasic50 year50 ln_age3060_mean_wage_inc{

egen mean1_`var'=mean(`var') if operating==0 
egen sd1_`var'=sd(`var') if operating==0
egen  mean_`var'=max(mean1_`var')
egen  sd_`var'=max(sd1_`var')
drop mean1* sd1*

gen stand_`var'= (`var'-mean_`var')/sd_`var'

}

egen index_hc=rowmean(stand*) if stand_morethanbasic50!=. & stand_year50!=. & stand_ln_age3060_mean_wage_inc!=.


/***********************************************************
*at this stage we create  the "omit bottom p1"- sample 
************************************************************
* for Appendix Table A14
************************************************************
* outcomment and run code and save as "_p1.dta"
* then run main analyses

set seed 1234

gen index_hc1=index_hc
replace index_hc1=. if nurse_treat==0
sum index_hc1, det
gen p1=(index_hc1<=`r(p1)')
sample 99 if p1==1 /*keep 99% of those with edu in p(1)*/
sum index_hc1,det

drop p1 index_hc1
***********************************************************/

****************************************************************************************
*** regress all outcomes on month of birth and gender FEs and take residuals before collapsing
****************************************************************************************

gen mob = month(foed_dag)

egen muni_yr = group(Kommune_ID year)



foreach out of varlist $mortality {
areg `out' i.mob male , abs(muni_yr)
predict `out'_res if e(sample)==1, d
ren `out' `out'_raw
ren `out'_res `out'
}


foreach out of varlist $diagnights {
areg `out' i.mob male , abs(muni_yr)
predict `out'_res if e(sample)==1, d
ren `out' `out'_raw
ren `out'_res `out'
}


foreach out of varlist $income {
areg `out' i.mob male , abs(muni_yr)
predict `out'_res if e(sample)==1, d
ren `out' `out'_raw
ren `out'_res `out'
}


foreach out of varlist $edu index_hc married {
areg `out' i.mob male , abs(muni_yr)
predict `out'_res if e(sample)==1, d
ren `out' `out'_raw
ren `out'_res `out'
}


encode Amtsnavn, gen(county)
save "$work\indiv_analysisdata_maj2019.dta",replace

****************************************************************************************
* collapse
****************************************************************************************

#delimit ;
collapse 
		(mean) male county rural numinst numslots numinstperpop numslotsperpop   nurse_treat imputenurse *_raw $mortality $income $edu index_hc $diagnights married
		(max) operating cph
		(count) N
		(firstnm) Amtsnavn
		, by(Kommune_ID year) fast;
#delimit cr

*Municipality controls from different years (not interpolated here)
merge m:1 Kommune_ID using "$work\munic_controls_recode2930.dta"
tab _merge
ren _merge muni_merge
drop if muni_merge==2

merge m:1 Kommune_ID using "$work\munic_controls_recode2021.dta"
tab _merge
drop if _merge==2
drop _merge

label var pop1930 "Log Population"
label var femalepop1930 "Pct Female"
label var socialdemo1929 "Pct Social Demo"
label var radicallib1929 "Pct Radical Lib"
label var agrarianlib1929 "Pct Agrarian Lib"
label var conservatives1929 "Pct Conservatives"
label var industrypop1930 "Pct Industrial"
label var urban1930 "Pct Urban"
label var incometaxpercent1930 "Pct Paying Income Tax"
label var propertytaxpercent1930 "Pct Paying Property Tax"
label var agripop1930 "Pct Agricultural"
label var logtaxableincome1930 "Log Taxable Income"
label var rural "Rural"
label var socialdemo1920 "Pct Social Demo"
label var radicallib1920 "Pct Radical Lib"
label var agrarianlib1920 "Pct Agrarian Lib"
label var conservatives1920 "Pct Conservatives"
label var pop1921 "Log Population"
label var urban1921 "Pct Urban"
label var incometaxpercent1921 "Pct Paying Income Tax"
label var logtaxableincome1921 "Log Taxable Income"
label var propertytaxpercent1921 "Pct Paying Property Tax"

rename Kommune_ID muni


***** exposure at ages 4-7
sort muni year
foreach var of varlist numinstperpop operating {
by muni: gen `var'_age4 = `var'[_n+1] if balanced==1
by muni: gen `var'_age5 = `var'[_n+2] if balanced==1
by muni: gen `var'_age6 = `var'[_n+3] if balanced==1
by muni: gen `var'_age7 = `var'[_n+4] if balanced==1
}

gen frac_operating = 0 if balanced==1
replace frac_operating = 1 if operating==1 & balanced==1
replace frac_operating = 4/5 if operating==0 & operating_age4==1
replace frac_operating = 3/5 if operating==0 & operating_age4==0 & operating_age5==1
replace frac_operating = 2/5 if operating==0 & operating_age4==0 & operating_age5==0 & operating_age6==1
replace frac_operating = 1/5 if operating==0 & operating_age4==0 & operating_age5==0 & operating_age6==0 & operating_age7==1

egen numinstperpop_age3_7 = rowmean(numinstperpop numinstperpop_age4 numinstperpop_age5 numinstperpop_age6 numinstperpop_age7)


save "$work\analysis_daycare_ontheway1.dta", replace
use "$work\analysis_daycare_ontheway1.dta", clear

**always operating
sort muni year
by muni: egen alwaysopen = min(operating)

**** first year of approval
cap drop startingyear
sort muni year operating
by muni: gen afstyear = year if operating[_n]==1 & operating[_n-1]==0

bys muni: egen fstyear = mean(afstyear)
*note: this is missing for munis that already had childcare by 1933

***** label key treatment variables
label var numinstperpop "Num. centers per 1000 pop at age 3"
label var operating "Any Approved Preschool at Age 3"
label var frac_operating "Frac. Yrs age 3-7 Exposed to Preschool"
label var numinstperpop_age3_7 "Avg. num. centers per 1000 pop over age 3-7"

**** interaction with nurse program
replace nurse_treat = 1 if nurse_treat>0
cap drop daycare_nurse
gen daycare_nurse = operating*nurse_treat

label var nurse_treat "NHV at Birth"
label var daycare_nurse "Preschool x NHV"

*** which municipalities ever have a nurse program
bys muni: egen ever_nurse = max(nurse_treat)
*there are 18 municipalities that never have nurse program in our data

*** HELPNURSE SAMPLE: drop years after 1952 for the 28 munis that have worse program data (i.e. cohorts born after 1949)
gen helpnurse=1 if imputenurse==0
replace helpnurse=1 if imputenurse==1 & year<=1952


gen early = fstyear<=1939

****************************************************************************************
***** Merge to Munic x Year Characteristics that are Interpolated
****************************************************************************************
merge 1:1 muni year using "$work/munic_controls_recode_ipolated.dta", keep(match master)
drop _merge
*rescale so that they are within 0-100 range
foreach var of varlist $contrvars{
replace `var'=0 if `var'<0
replace `var'=100 if `var'>100
}

gen yrs_open = year - fstyear

gen age_atopen = fstyear - year + 3

gen older = (age_atopen>10 & age_atopen<.)
gen younger = (age_atopen<0)


* analysis data
compress
save "$work\analysis_daycare_muni_ever_yrapproval.dta", replace
