/*******************************************************************************

Replication file 2: Place, Peers and the Teenage Years

Author:		Nathan Deutscher
			
Date:		3 May 2019

This program estimates peer effects for Australia. The steps are:
	1. Create the analysis file
	2. Descriptives, baseline regressions and robustness
	3. Examine heterogeneity to investigate mechanisms
	
*******************************************************************************/

*------------------------------------------------------------------------------*
* 	0. PRELIMINARIES													   
*------------------------------------------------------------------------------*

clear all

	version 13.1

	* Location is home (ie offsite) or ATO 

	global location="home"
	*global location="ATO"

	* Corresponding path names

	global home_path="/Users/Nathan/Documents/1. Research - ATO mirror/Project 2 (FOR WEB)"
	global ATO_path="/data/secure/Fast/Researchers/Nathan/Project 2"

	* Begin the log

	capture log close
	log using "${${location}_path}/5. Output/Logs/Log peers - $S_DATE", append text 

	* Working directory

	cd "${${location}_path}/2. Data (working)"
	
	* Settings
	
	set scheme s1color, perm
	set more off, perm
	set matsize 10000
	
*------------------------------------------------------------------------------*
* 	1. CREATE ANALYSIS FILE 										   
*------------------------------------------------------------------------------*

*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*
* 	1A. Generate outcome file									   
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*

clear all
use "${${location}_path}/1. Data (MASTER)/mobility"

	do derived_variables

	keep int_id fyob sex expat *p1* *p2* num_kids* *r_total* *r_hdo* *r_depend* d_age m_age family_type num_kids birth_order has_spouse

	keep if fyob>=1978 & fyob<=1991
	
	drop *r_wages* *r_private* *r_disposable*
	
* Drop expats and those missing parents		
	
	drop if expat==1
	drop if p1_int_id==.
	
* Benchmark definitions

	global p_syear=1991
	global p_eyear=2001
	global p_span=(${p_eyear}-${p_syear}+1)

* Household income definitions

	order _all, sequential

	forvalues year=1991(1)2015 {
		egen p_h_total`year'=rowtotal(p?_r_total`year'), missing
		egen c_h_total`year'=rowtotal(c_r_total`year' c_s_r_total`year'), missing
	}

* Parent income and ranks

	order _all, sequential
	gen p_inc=.
	gen c_inc=.
		
* Parent household income ranks
	
	egen p_h_life_total=rowtotal(p_h_total${p_syear}-p_h_total${p_eyear}), missing
	replace p_inc=p_h_life_total/$p_span

	ranks, parents(1) children(0)
	
* Some checks
	
	di _N
	count if p_rank==.
					
* Child income, ranks at 16, 18, 20, 22, 24, 26, 28, 30 outcomes

	forvalues age=16(2)30 {
		forvalues cohort=1978(1)1991 {
			local year=`cohort'+`age'		
			if `year'<1991 | `year'>2015 {
				continue
			}
			replace c_inc=c_h_total`year' if fyob==`cohort'		
		}	

		ranks, parents(0) children(1)
		rename c_rank c`age'_rank
		if `age'==24 {
			gen c24_inc=c_inc
		}
		replace c_inc=.
	}

* Child uni at age 24; kids in 2015; spouse	
	
	gen byte c24_uni=.
		
	forvalues cohort=1978(1)1991 {
		local year24=`cohort'+24		
			if `year24'<2000 | `year24'>2015 {
				continue
			}	
		replace c24_uni=(c_r_hdo`year24'>0 & c_r_hdo`year24'!=.) if fyob==`cohort'	
	}	
	
	sum c_r_depend_child2015, detail
	sum c_r_depend_child20??
	
	gen byte c_kids=c_r_depend_child2015	
	
* Some checks
	
	di _N
	count if c24_rank==.

* Parent ranks

	count if sex==.
	gen byte female=(sex==1)

	bysort p1_int_id fyob: gen byte p_kids=_N
	bysort p1_int_id fyob: egen byte p_girls=total(female)
	bysort p1_int_id fyob: keep if _n==1
	
* Indicators if family has a girl or boy
	
	bysort p1_int_id fyob: gen byte famwg=(p_girls!=0)
	bysort p1_int_id fyob: gen byte famwb=(p_girls!=p_kids)
	
	table famwg famwb	

save "mobility_peers", replace

	keep p1_int_id fyob female p_rank p_girls p_kids famw*
	
save "parent_ranks", replace

*** First parent SA4 ***

clear
use "${${location}_path}/1. Data (MASTER)/parents_moves1"

	bysort p1_int_id fyob: keep if _n==1
	keep p1_int_id fyob orig_mapid
	
save "parent_SA4", replace	

*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*
* 	1B. Peer ranks, sizes and composition							   
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*

* For permanent postcode cohorts.
* Isolate idiosyncratic variations in cohort quality.

* Location peer variables are calculated for:
	* location and cohort; and
	* based on permanent postcode residents for that location and cohort.

clear
use "${${location}_path}/1. Data (MASTER)/parents_moves2"

* Use the real postcode only

	drop postcode
	rename real_postcode postcode

* Consider only history while child was still in school (under 20)

	keep if fyob>=1978 & fyob<=1991
	drop if start_year>20+fyob
	
	drop if p1_int_id==.

* Keep those with only one postcode

	bysort p1_int_id fyob postcode: gen byte tag=(_n==1)
	bysort p1_int_id fyob: egen int num_postcodes=total(tag)
	
	keep if num_postcodes==1
	
* Keep only one instance of each family

	bysort p1_int_id fyob: keep if _n==1
	
	save "parent_peers", replace

* Bring in parent ranks

	merge 1:1 p1_int_id fyob using "parent_ranks", keep(master match) keepusing(p_rank p_girls p_kids famw*)

	capture rename postcode rand_postcode_id	
	keep p1_int_id fyob rand_postcode_id num* p_* famw*

* Some counts 

	di _N
	count if p_rank==.
	count if p_girls!=0
	count if p_girls!=p_kids
	
* Generate the peer ranks 
								
	bysort rand_postcode_id fyob: egen rand_postcode_peer_rank=mean(p_rank)
	bysort rand_postcode_id fyob: egen rand_postcode_peer_sdrank=mean(p_rank)
			
	bysort rand_postcode_id fyob: egen rand_postcode_peer_grank=total(p_rank*famwg)
	bysort rand_postcode_id fyob: egen rand_postcode_peer_famwg=total(famwg)
		bysort rand_postcode_id fyob: replace rand_postcode_peer_grank=rand_postcode_peer_grank/rand_postcode_peer_famwg
			
	bysort rand_postcode_id fyob: egen rand_postcode_peer_brank=total(p_rank*famwb)
	bysort rand_postcode_id fyob: egen rand_postcode_peer_famwb=total(famwb)
		bysort rand_postcode_id fyob: replace rand_postcode_peer_brank=rand_postcode_peer_brank/rand_postcode_peer_famwb
		
	bysort rand_postcode_id fyob: egen rand_postcode_peer_fams=total(p_rank!=.)
	bysort rand_postcode_id fyob: egen rand_postcode_peer_kids=total(p_kids)
	
	keep rand_postcode* fyob 
	bysort rand_postcode_id fyob: keep if _n==1
		
	* Moving averages
			                                                            
	tsset rand_postcode_id fyob
							       
	forvalues h=1(1)3 {									       
		local w=2*`h'+1
											       
		sort rand_postcode_id fyob
		bysort rand_postcode_id : gen byte fullwindow=((fyob[_n+`h']-fyob[_n-`h'])==2*`h')
			
		foreach outcome in rank brank grank {
	
			tssmooth ma anymissinginwindow=(rand_postcode_peer_`outcome'==.), window(`h' 1 `h' )
			tssmooth ma rand_postcode_peer_`outcome'_ma`w'=rand_postcode_peer_`outcome', window(`h' 1 `h' )
				replace rand_postcode_peer_`outcome'_ma`w'=. if fullwindow==0 | anymissinginwindow!=0
				
			drop anymissinginwindow
		}
			
		drop fullwindow				
	}
							       
	tsset, clear                                            
		    
	* Long file
		
	foreach var of varlist rand_* {
		local newname=substr("`var'",6,.)
		rename `var' `newname'
	}
			
	save "rand_postcode_peers", replace

*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*
* 	1C.  Create analysis dataset								   
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*

clear
use "mobility_peers"

	keep c*_rank sex p_* p1_int_id fyob fam* d_age m_age family_type num_kids birth_order c24_inc *uni *kids *spouse
		
	* Drop those without parent rank
	
	drop if p_rank==.
	
	* Restrict to permanent postcode residents

	merge m:1 p1_int_id fyob using "parent_peers"
		keep if _merge==3
		drop _merge
	
	* Bring in peers

	rename postcode postcode_id
	
	merge m:1 postcode_id fyob using "rand_postcode_peers"
		keep if _merge==1 | _merge==3
		drop _merge
	
	* Convert to leave one out
	
	foreach width in 3 5 7 {
				
		gen postcode_peer_rank_ma`width'_L=postcode_peer_rank_ma`width'	-(1/`width')*(p_rank-postcode_peer_rank)/(postcode_peer_fams-1) 							
			
		gen postcode_peer_grank_ma`width'_L=postcode_peer_grank_ma`width'-(1/`width')*(p_rank-postcode_peer_grank)/(postcode_peer_famwg-1)*famwg 	
		gen postcode_peer_brank_ma`width'_L=postcode_peer_brank_ma`width'-(1/`width')*(p_rank-postcode_peer_brank)/(postcode_peer_famwb-1)*famwb 	
					
	}	
		
	gen postcode_peer_rank_L=postcode_peer_rank-(p_rank-postcode_peer_rank)/(postcode_peer_fams-1)		
	gen postcode_peer_grank_L=postcode_peer_grank-(p_rank-postcode_peer_grank)/(postcode_peer_famwg-1)*famwg 
	gen postcode_peer_brank_L=postcode_peer_brank-(p_rank-postcode_peer_brank)/(postcode_peer_famwb-1)*famwb 
			
	* Demeaned variables
	
	foreach outcome in rank grank brank {
		forvalues width=3(2)7 {
			gen postcode_peer_`outcome'_dm`width'_L=postcode_peer_`outcome'_L-postcode_peer_`outcome'_ma`width'_L
		}
	}	
		
	* Family fixed effects and sizes
	
	egen long family_sex_groups=group(p1_int_id sex)
	
	bysort p1_int_id: gen fam_size=_N
	table fam_size
	
	* Merge in postcode charactertistics
	
	merge m:1 postcode_id using "${${location}_path}/2. Data (working)/pc_density", keep(master match) nogen
	merge m:1 postcode_id fyob using "${${location}_path}/2. Data (working)/pc_trends", keep(master match) nogen
	
* Save the dataset
	
drop if p1_int_id==.	
	
save "mobility_peers", replace

*------------------------------------------------------------------------------*
* 	2.  BASELINE INVESTIGATION									   
*------------------------------------------------------------------------------*

*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*
* 	2A.  Descriptive								   
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*

clear
use "mobility_peers"

* Describe the distributions of postcode-cohort sizes, mean parental rank and demeaned values

	tabstat postcode_peer_kids, stat(mean p10 p50 p90 N) format(%9.2g) save
		matrix Sizes=r(StatTotal)'
		
	tabstat postcode_peer_rank_L, stat(mean p10 p50 p90 N) format(%9.2g) save
		matrix Peers_rank=r(StatTotal)'
		
	forvalues w=3(2)7 {
		tabstat postcode_peer_rank_dm`w'_L, stat(mean p10 p50 p90 N) format(%9.2g) save
		matrix Peer_rank_shock`w'=r(StatTotal)'
	}
	
	bysort postcode_id: egen mean_prank=mean(postcode_peer_rank_L)
	gen postcode_peer_rank_dm_L=mean_prank-postcode_peer_rank_L
	
	tabstat postcode_peer_rank_dm_L, stat(mean p10 p50 p90 N) format(%9.2g) save
		matrix Peer_rank_shock=r(StatTotal)'
	
	matrix samples_rank = Sizes \ Peers_rank \ Peer_rank_shock3 \ Peer_rank_shock5 \ Peer_rank_shock7 \ Peer_rank_shock
	matrix list samples_rank, format(%9.3gc)
			
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*
* 	2B.  Regression analysis							   
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*

clear
use "mobility_peers"

	gen peers=.
	gen ma_peers=.
	
	table fam_size
	
	* Comment below out for ATO runs, speeds things up at home
	keep if mod(postcode_id,25)==9
	
*** Preferred specification ***
	
	* At age 24

	reg c24_rank p_rank postcode_peer_rank_L, cluster(postcode_id)
	estadd scalar df_reg = e(N)-e(df_m)-1
	estimates store peers_raw
	
	forvalues w=3(2)7 {		
		replace ma_peers=postcode_peer_rank_ma`w'_L		
		
		reg c24_rank p_rank postcode_peer_rank_L ma_peers, cluster(postcode_id)
			estadd scalar df_reg = e(N)-e(df_m)-1
			estimates store peers_w`w'
		
	}

	quietly areg c24_rank p_rank postcode_peer_rank_L i.fyob, cluster(postcode_id) absorb(postcode_id)
		estadd scalar df_reg = e(N)-e(df_m)-e(df_a)-1
		estimates store peers_pc1
		
	quietly areg c24_rank p_rank postcode_peer_rank_L i.postcode_id#c.fyob i.fyob, cluster(postcode_id) absorb(postcode_id)
		estadd scalar df_reg = e(N)-e(df_m)-e(df_a)-1
		estimates store peers_pc2
	
	quietly areg c24_rank p_rank postcode_peer_rank_L i.postcode_id#c.fyob i.fyob mean_*, cluster(postcode_id) absorb(postcode_id)
		estadd scalar df_reg = e(N)-e(df_m)-e(df_a)-1
		estimates store peers_pc3

* TABLE 4 - Peers		
		
	estimates table peers_raw peers_w* peers_pc*, keep(p_rank postcode_peer_rank_L) star(0.1 0.05 0.01)
	esttab peers_raw peers_w* peers_pc* using "${${location}_path}/5. Output/Tables/TAB4_Peers", keep(*rank* *peers* *mean*) se star(* 0.1 ** 0.05 *** 0.01) scalars(df_reg) fixed b(%9.3fc) replace   
	estimates drop _all
	
* APPENDIX TABLE B4 - Peers	
	
	* Moving average specifications with the additional controls
	
	forvalues w=3(2)7 {		
		replace ma_peers=postcode_peer_rank_ma`w'_L			
		
		areg c24_rank p_rank postcode_peer_rank_L ma_peers i.postcode_id#c.fyob i.fyob mean_*, cluster(postcode_id) absorb(postcode_id)
			estadd scalar df_reg = e(N)-e(df_m)-e(df_a)-1
			estimates store peers_pcfe_w`w'	
		
		capture areg c24_rank p_rank postcode_peer_rank_L ma_peers i.postcode_id#c.fyob i.fyob mean_*, cluster(postcode_id) absorb(p1_int_id)
			estadd scalar df_reg = e(N)-e(df_m)-e(df_a)-1
			estimates store peers_pcfamfe_w`w'	
		
		areg c24_rank postcode_peer_rank_L ma_peers, absorb(p1_int_id) cluster(postcode_id)
			estadd scalar df_reg = e(N)-e(df_m)-e(df_a)-1
			estimates store peers_famfe_w`w'	
	}

	estimates table peers_pcfe_w* peers_pcfamfe_w* peers_famfe_w*, keep(p_rank postcode_peer_rank_L) star(0.1 0.05 0.01)
	esttab peers_pcfe_w* peers_pcfamfe_w* peers_famfe_w* using "${${location}_path}/5. Output/Tables/ATAB4_Peers_robust", keep(*rank* *peers* *mean*) se star(* 0.1 ** 0.05 *** 0.01) scalars(df_reg) fixed b(%9.3fc) replace   
	estimates drop _all

*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*
* 	2C.  Neighbouring cohorts					   
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*	

clear
use "mobility_peers"

	gen L_deviation=(p_rank-postcode_peer_rank)/(postcode_peer_fams-1)
	
	keep c24_rank p_rank postcode_id  fyob postcode_peer_rank_L postcode_peer_rank_ma?_L L_deviation p1_int_id p_inc

* Bring in the placebo cohort predictions

	rename fyob _fyob
	
	forvalues t=1(1)13 {
		noisily di "Loop `t'"
		gen fyob=_fyob+`t'-7
		
		merge m:1 postcode_id fyob using "rand_postcode_peers", keepusing(postcode_peer_rank postcode_peer_rank_ma?) keep(master match) nogen  
		
		forvalues width=3(2)7 {
			gen ma`width'_peers_t`t'=postcode_peer_rank_ma`width'	-(1/`width')*L_deviation*(abs(`t'-7)<=(`width'-1)/2)
		}
		
		rename postcode_peer_rank peers_t`t'
		rename postcode_peer_rank_ma? postcode_peer_rank_ma?_t`t'
		rename fyob fyob_t`t'
	}

	rename _fyob fyob
	
	replace peers_t7=postcode_peer_rank_L
	
*** Estimation ***
	
	gen x=_n		if _n<14
	gen x2=_n+0.1	if _n<14
	gen x3=_n+0.2	if _n<14
	gen x4=_n+0.3	if _n<14
	gen x5=_n+0.4	if _n<14
	
	forvalues w=3(2)7 {
		gen y_peers_coef_w`w'=.
		gen y_peers_se_w`w'=.
		gen y_peers_df_w`w'=.
	}
	
	gen y_peers_coef_pc=.
	gen y_peers_se_pc=.
	gen y_peers_df_pc=.
	
	gen y_peers_coef_pcl=.
	gen y_peers_se_pcl=.
	gen y_peers_df_pcl=.
	
	forvalues t=1(1)13 {
		forvalues w=3(2)7 {
			reg c24_rank p_rank peers_t`t' ma`w'_peers_t`t', cluster(postcode_id)
				replace y_peers_coef_w`w'=_b[peers_t`t']		if x==`t'
				replace y_peers_se_w`w'=_se[peers_t`t']			if x==`t'
				replace y_peers_df_w`w'=e(N)-e(df_m)-1			if x==`t'		
		}
		
		quietly areg c24_rank p_rank peers_t`t', absorb(postcode_id)
			replace y_peers_coef_pc=_b[peers_t`t']		if x==`t'
			replace y_peers_se_pc=_se[peers_t`t']		if x==`t'
			replace y_peers_df_pc=e(N)-e(df_m)-1		if x==`t'
		
		quietly areg c24_rank p_rank peers_t`t' i.postcode_id#c.fyob, absorb(postcode_id)
			replace y_peers_coef_pcl=_b[peers_t`t']		if x==`t'
			replace y_peers_se_pcl=_se[peers_t`t']		if x==`t'
			replace y_peers_df_pcl=e(N)-e(df_m)-1		if x==`t'
	}

	forvalues w=3(2)7 {
		gen y_peers_upper_w`w'=y_peers_coef_w`w'+1.96*abs(y_peers_se_w`w')
		gen y_peers_lower_w`w'=y_peers_coef_w`w'-1.96*abs(y_peers_se_w`w')
	}

	gen y_peers_upper_pc=y_peers_coef_pc+1.96*abs(y_peers_se_pc)
	gen y_peers_lower_pc=y_peers_coef_pc-1.96*abs(y_peers_se_pc)
		
	gen y_peers_upper_pcl=y_peers_coef_pcl+1.96*abs(y_peers_se_pcl)
	gen y_peers_lower_pcl=y_peers_coef_pcl-1.96*abs(y_peers_se_pcl)	
	
	
* APPENDIX FIGURE A5 - Placebo test for peer effects	
	
	* Similar results across all specifications
	#delimit ;
		graph twoway
			(rspike y_peers_upper_w3 y_peers_lower_w3 x, color(ebblue*0.33))
				(scatter y_peers_coef_w3 x, mcolor(ebblue*0.33))
			(rspike y_peers_upper_w5 y_peers_lower_w5 x2, color(ebblue*0.67))
				(scatter y_peers_coef_w5 x2, mcolor(ebblue*0.67))
			(rspike y_peers_upper_w7 y_peers_lower_w7 x3, color(ebblue))
				(scatter y_peers_coef_w7 x3, mcolor(ebblue))
			(rspike y_peers_upper_pc y_peers_lower_pc x4, color(green))
				(scatter y_peers_coef_pc x4, mcolor(green))
			(rspike y_peers_upper_pcl y_peers_lower_pcl x5, color(red))
				(scatter y_peers_coef_pcl x5, mcolor(red))
			,
			xtitle("Lag")
			ytitle("Peer effect")
			yline(0, lcolor(red) lpattern(dash))
			legend(order(2 "3-year ma" 4 "5-year ma" 6 "7-year ma" 8 "PC FE" 10 "PC FE linear") rows(1))
		;
	#delimit cr
	
	graph export "${${location}_path}/5. Output/Charts/FA5_Peers_cohorttest.png", replace
	
	drop x y_*
	
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*
* 	2D.  Across all ages						   
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*	

clear
use "mobility_peers"

	gen peers=.
	gen ma_peers=.

	gen x_age=14+_n*2 if _n<=8
	
	quietly forvalues w=3(2)7 {		
		replace peers=postcode_peer_rank_L 		
		replace ma_peers=postcode_peer_rank_ma`w'_L
		
		noisily di "Width of `w'"
		
		gen y`w'_p_rank=.
		gen y`w'_p_rank_lower=.
		gen y`w'_p_rank_upper=.
		gen y`w'_peers=.
		gen y`w'_peers_lower=.
		gen y`w'_peers_upper=.
		gen y`w'_df=.
		
		forvalues age=16(2)30 {
			reg c`age'_rank p_rank peers ma_peers, cluster(postcode_id)
				estimates store peers`age'_w`w'
				
			replace y`w'_p_rank=_b[p_rank] if _n==`=(`age'-14)/2'
			replace y`w'_peers=_b[peers] if _n==`=(`age'-14)/2'
			
			replace y`w'_p_rank_lower=_b[p_rank]-1.96*_se[p_rank] if _n==`=(`age'-14)/2'
			replace y`w'_p_rank_upper=_b[p_rank]+1.96*_se[p_rank] if _n==`=(`age'-14)/2'
			replace y`w'_peers_lower=_b[peers]-1.96*_se[peers] if _n==`=(`age'-14)/2'
			replace y`w'_peers_upper=_b[peers]+1.96*_se[peers] if _n==`=(`age'-14)/2'	
		
			replace y`w'_df=e(N)-e(df_m)	if _n==`=(`age'-14)/2'
		}		
		
		noisily estimates table peers*,  
		estimates drop _all	
	}
	
* APPENDIX FIGURE A4 - Later ages
	
	* Similar results with different window widths
	foreach w in 7 {
		#delimit ;
			graph twoway
				(rspike y`w'_p_rank_upper y`w'_p_rank_lower x_age, color(gs8))
				(scatter y`w'_p_rank x_age, msym(O) mcolor(ebblue))
				(rspike y`w'_peers_upper y`w'_peers_lower x_age, color(gs8))
				(scatter y`w'_peers x_age, msym(Oh) mcolor(ebblue))
			if _n<=8
			,
			xtitle("Age at observation")
			ytitle("Coefficient on own and peer parent ranks")
			xlabel(,grid)
			ylabel(,grid)
			legend(order(2 "Parents" 4 "Peers"))
			;
		#delimit cr

		graph export "${${location}_path}/5. Output/Charts/FA4_Peers_byage7.png", replace
	}

	drop x_* y?_*	
	
*------------------------------------------------------------------------------*
* 	3.  HETEROGENEITY								   
*------------------------------------------------------------------------------*
	
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*
* 	3A.  For different subpopulations						   
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*		
	
clear
use "mobility_peers"

	gen peers=.
	gen ma_peers=.
	
	* Subpopulations

	sum density, detail
	gen byte sparse=(density<=r(p50))
	gen byte dense=(density>r(p50))
	
	sum area, detail
	gen byte small=(area<=r(p50))
	gen byte large=(area>r(p50))

	sum frac_nesb, detail
	gen byte nonediverse=(frac_nesb<=r(p50))
	gen byte ediverse=(frac_nesb>r(p50))
	
	sum postcode_peer_sdrank, detail
	gen byte nonidiverse=(postcode_peer_sdrank<=r(p50))
	gen byte idiverse=(postcode_peer_sdrank>r(p50))
	
	replace ma_peers=postcode_peer_rank_ma7_L		
		
	foreach subpop of varlist sparse dense small large nonediverse ediverse nonidiverse idiverse  {
		reg c24_rank p_rank postcode_peer_rank_L ma_peers if `subpop'==1,  cluster(postcode_id)
			estadd scalar df_reg = e(N)-e(df_m)-1
			estimates store peers_`subpop'
	}
	
* TABLE 5 - Peers by subpopulations	
	
	#delimit ;	
		esttab 	peers_sparse peers_dense peers_large peers_small peers_ediverse peers_nonediverse peers_idiverse peers_nonidiverse
				using "${${location}_path}/5. Output/Tables/T5_Peers_bysub", 
				se fixed b(%9.3f) star(* 0.1 ** 0.05 *** 0.01) scalars(df_reg) append 
		;
	#delimit cr
	
	estimates drop _all
	
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*
* 	3B. By sex					   
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*	

clear
use "mobility_peers"

	gen peers=.
	gen ma_peers=.
	
	replace peers=postcode_peer_rank_L 

	foreach prefix in b g {
		gen `prefix'peers=postcode_peer_`prefix'rank_L
		gen ma_`prefix'peers=.
	}	
	
	quietly forvalues sex=1/2 {
			
		replace ma_peers=postcode_peer_rank_ma7_L 
		replace ma_bpeers=postcode_peer_brank_ma7_L 
		replace ma_gpeers=postcode_peer_grank_ma7_L 
			
		* General
		reg c24_rank p_rank peers ma_peers if sex==`sex', cluster(postcode_id)
		estadd scalar df_reg = e(N)-e(df_m)-1
		estimates store peers`sex'_r1
			
		* Boys
		reg c24_rank p_rank bpeers ma_bpeers if sex==`sex', cluster(postcode_id)
		estadd scalar df_reg = e(N)-e(df_m)-1
		estimates store peers`sex'_r2
		* Girls
		reg c24_rank p_rank gpeers ma_gpeers if sex==`sex', cluster(postcode_id)
		estadd scalar df_reg = e(N)-e(df_m)-1
		estimates store peers`sex'_r3
		* Boys and Girls
		reg c24_rank p_rank bpeers ma_bpeers gpeers ma_gpeers if sex==`sex', cluster(postcode_id)
		estadd scalar df_reg = e(N)-e(df_m)-1
		estimates store peers`sex'_r4
			
	}
	
* TABLE 6 - Peers by sex
	
	esttab peers2_r? using "${${location}_path}/5. Output/Tables/T6_Peers_bysex", fixed se star(* 0.1 ** 0.05 *** 0.01) scalars(df_reg) keep(p_rank peers gpeers bpeers) b(%9.3fc) replace 
	esttab peers1_r? using "${${location}_path}/5. Output/Tables/T6_Peers_bysex", fixed se star(* 0.1 ** 0.05 *** 0.01) scalars(df_reg) keep(p_rank peers gpeers bpeers) b(%9.3fc) append   

	estimates drop _all
	
