clear
set more off
cd d:\data\contact

foreach y in 1998 2000 2001 2002 2003 {
   use key_log
   keep if ayear4 == `y'
   keep id ayear4 ayear3 ayear2
   duplicates drop
   sort id
   save ayear4_`y', replace
   }
use key_log
keep if ayear2 == 2000
keep id ayear2 ayear4
duplicates drop
sort id
save ayear2_2000, replace
foreach y in 2001 2002 2003 2004 {
   use key_log
   keep if ayear3 == `y'
   keep id ayear3 ayear4 ayear
   duplicates drop
   sort id
   save ayear3_`y', replace
   }
use ayear4_1998
foreach y in 2000 2001 2002 2003 {
   append using ayear4_`y'
   }
sort id
save ayear4, replace
use ayear2_2000
foreach y in 2001 2002 2003 2004 {
   append using ayear3_`y'
   }
sort id
save ayear3, replace
foreach y in 1998 2000 2001 2002 2003 {
  use key_log
  keep if ayear == `y'
  drop if ayear4 == `y'
  keep id squad4 ayear ayear4
  duplicates drop
  gen upper = `y'
  save upper_`y', replace
  }
foreach y in 1998 2000 2001 2002  {
  append using upper_`y'
  }
duplicates drop
sort ayear4 squad4
save upper, replace


use ayear4
sort id
merge m:1 id using ..\studies\c00-14demographics
keep if _merge == 3
drop _merge
keep if black == 1 & squad4 != . & squad3 == .
keep ayear4 squad4
gen fresh_black_dropout = 1
duplicates drop
sort ayear4 squad4
save fresh_black_dropout, replace

use upper
sort id
merge m:1 id using ..\studies\c00-14demographics
keep if _merge == 3
drop _merge
keep if black == 1
gen sat = sat_v + sat_m
sort ayear squad3 id
egen fresh_bu_aca = mean(aca_cmp), by(ayear squad3)
egen fresh_bu_sat = mean(sat), by(ayear squad3)
keep squad3 cyear ayear fresh_bu_aca fresh_bu_sat
ren squad3 squad4
drop if squad4==.
duplicates drop ayear squad4, force
replace fresh_bu_aca = . if squad4 == 40
replace fresh_bu_sat = . if squad4 == 40
replace ayear = 1998 if squad4 == 40
replace cyear = 2002 if squad4 == 40
replace squad4 = 20 if squad4 == 40
drop if squad4 > 36
ren ayear ayear4
sort ayear4 squad4
save bu, replace


use residence_00_07, clear
gen south = residence=="Missouri"|residence=="Alabama"|residence=="Arkansas"|residence=="Georgia"|residence=="Virginia"|residence=="Florida"|residence=="Louisiana"|residence=="West Virginia"|residence=="North Carolina"|residence=="South Carolina"|residence=="Arkansas"|residence=="Texas"|residence=="Oklahoma"|residence=="Kentucky"|residence=="Mississippi" |residence=="Tennessee" 
gen north = 1 - south
gen missing_residence = residence == ""
replace residence = "missing" if residence == ""
sort residence
merge m:1  residence using pct_black
drop _merge
sum pct_black, detail
* gen pct_black_bot  = pct_black <= r(p50) & pct_black != .
gen pct_black_bot = pct_black < 10.6 & pct_black != .
gen pct_black_top = .
replace pct_black_top = pct_black > r(p50) if pct_black != .
duplicates drop
sort id
save residence, replace

use ayear4
sort id
merge m:1 id using ..\studies\c00-14demographics
keep if _merge == 3
keep if female == 0
drop _merge
sort ayear4 squad3 squad4
save demo_roommates, replace
keep if black == 1 
keep ayear4 squad3 squad4
sort ayear4 squad3 squad4
save demo_black_roommates, replace
use demo_roommates, clear
keep if black == 0 & hispanic == 0 & asian == 0 
sort ayear4 squad3 squad4
joinby ayear4 squad3 squad4 using demo_black_roommates
keep id
gen same_fresh_black=1
duplicates drop
sort id
save same_fresh_black, replace

use demo_roommates
sort id
merge id using team_02-07
keep if _merge == 3
drop _merge
save demo_roommates, replace
keep if black == 1 & female == 0
keep ayear4 squad3  team
sort ayear4 squad3 team
save demo_black_roommates, replace
use demo_roommates, clear
keep if black == 0 & hispanic == 0 & asian == 0 & female == 0
sort ayear4 squad3 team
joinby ayear4 squad3 team using demo_black_roommates
drop if team == .
keep id
gen same_team=1
duplicates drop
sort id
save same_team, replace

use ayear4, clear
sort id
merge m:1 id using ..\studies\c00-14demographics
keep if _merge == 3
drop _merge
keep if female == 0
sort id
merge id using sections_02-07
keep if _merge == 3
drop _merge
sort squad3 ayear4 fall course section
save demo_roommates, replace
keep if black == 1 
keep squad3 ayear4 fall course section
sort squad3 ayear4 fall course section
save demo_black_roommates, replace
use demo_roommates, clear
keep if black == 0 & hispanic == 0 & asian == 0 
sort squad3 ayear fall course section
joinby squad3 ayear4 fall course section using demo_black_roommates
keep id
gen same_section=1
duplicates drop
sort id
save same_section, replace

use ayear4, clear
sort id
merge m:1 id using ..\studies\c00-14demographics
keep if _merge == 3
drop _merge
keep if female == 0
sort id
merge 1:m id using sections_02-07
keep if _merge == 3
drop _merge
sort ayear4 fall course section
save demo_roommates, replace
keep if black == 1 
gen b_sat = sat_v + sat_m
ren sat_v b_sat_v
ren sat_m b_sat_m
ren aca_cmp b_aca_cmp
keep ayear4 fall course section b_sat b_sat_m b_sat_v b_aca_cmp
sort ayear4 fall course section
save demo_black_roommates, replace
use demo_roommates, clear
keep if black == 0 & hispanic == 0 & asian == 0 
sort ayear fall course section
joinby ayear4 fall course section using demo_black_roommates, unm(m)
sort id
by id: egen bc_aca = mean(b_aca_cmp)
by id: egen bc_sat = mean(b_sat)
by id: egen bc_sat_m = mean(b_sat_m)
by id: egen bc_sat_v = mean(b_sat_v)
by id: egen num_black_classmate = count(b_sat)
sort ayear4 id
by ayear4: center bc_aca bc_sat bc_sat_m bc_sat_v , standardize 
foreach v in aca sat sat_m sat_v {
   ren c_bc_`v' black_classmate_`v'
   }
keep id black_classmate_aca black_classmate_sat* num_black_classmate
duplicates drop
sort id
save black_classmate, replace

use ..\peeracademy\peer_master3, clear
sort id
joinby id using ayear4
keep if semester == 2
xi:reg cumgpa sat_v sat_m leadership_cmp aca_cmp cft_score  rathlete pschool black hispanic asian female i.clsyr , robust cluster( class_sqd_yr_sem)  
predict gpa_hat 
keep id gpa_hat
duplicates drop
sort id
save gpa_hat, replace

use ayear4
keep id ayear2 ayear3 ayear4
duplicates drop
sort id
merge m:1 id using ..\studies\c00-14demographics
keep if _merge == 3
keep sat_v sat_m leadership_cmp cft_score aca_cmp rathlete pschool black hispanic asian female ayear2 ayear3 ayear4 cyear squad4 squad3 id
sort id
merge m:1 id using gpa_hat
drop if _merge == 2
drop _merge
drop if squad4==.
drop if squad4>36

sort id
egen sq4_ayear4=group(squad4 ayear4)
egen sq3_ayear3=group(squad3 ayear3)
egen sq3_ayear2=group(squad3 ayear2)
gen sat=(sat_m + sat_v)
gen white = black ==0 & hispanic == 0 & asian == 0
gen wm = white & female == 0
gen bm = black==1 & female==0
egen aca_66pctile=pctile(aca_cmp), p(66.67) by(ayear4) 
egen aca_33pctile=pctile(aca_cmp), p(33.33) by(ayear4) 
gen aca_bl = (black==1 & aca_cmp < aca_33pctile)
gen aca_bm = (black==1 & aca_cmp < aca_66pctile & aca_cmp >= aca_33pctile)
gen aca_bh = (black==1 & aca_cmp >= aca_66pctile)

egen w4a = mean(aca_cmp) if wm == 1, by(sq4_ayear4)
egen w4s = mean(sat) if wm == 1, by(sq4_ayear4)
egen b4a = mean(aca_cmp) if black == 1, by(sq4_ayear4)
egen b4g = mean(gpa_hat) if black == 1, by(sq4_ayear4)
egen b4s = mean(sat) if black == 1, by(sq4_ayear4)
egen b4sm = mean(sat_m) if black == 1, by(sq4_ayear4)
egen b4sv = mean(sat_v) if black == 1, by(sq4_ayear4)
egen b4l = mean(leadership_cmp) if black == 1, by(sq4_ayear4)
egen b4c = mean(cft_score) if black == 1, by(sq4_ayear4)
egen bm4a = mean(aca_cmp) if bm == 1, by(sq4_ayear4)
egen bm4s = mean(sat) if bm == 1, by(sq4_ayear4)
egen bm4sm = mean(sat_m) if bm == 1, by(sq4_ayear4)
egen bm4sv = mean(sat_v) if bm == 1, by(sq4_ayear4)
egen bm4l = mean(leadership_cmp) if bm == 1, by(sq4_ayear4)
egen bm4c = mean(cft_score) if bm == 1, by(sq4_ayear4)

egen w4aca = max(w4a), by(sq4_ayear4)
egen w4sat = max(w4s), by(sq4_ayear4)
egen b4aca = max(b4a), by(sq4_ayear4)
egen b4gpa_hat = max(b4g), by(sq4_ayear4)
egen b4sat = max(b4s), by(sq4_ayear4)
egen b4sat_m = max(b4sm), by(sq4_ayear4)
egen b4sat_v = max(b4sv), by(sq4_ayear4)
egen b4ldr = max(b4l), by(sq4_ayear4)
egen b4cft = max(b4c), by(sq4_ayear4)
egen b4num = sum(black), by(sq4_ayear4)
egen bm4aca = max(bm4a), by(sq4_ayear4)
egen bm4sat = max(bm4s), by(sq4_ayear4)
egen bm4satm = max(bm4sm), by(sq4_ayear4)
egen bm4satv = max(bm4sv), by(sq4_ayear4)
egen bm4ldr = max(bm4l), by(sq4_ayear4)
egen bm4cft = max(bm4c), by(sq4_ayear4)
egen bm4num = sum(bm), by(sq4_ayear4)
egen fresh_num_bm = sum(bm), by(sq4_ayear4)
egen fresh_num_black_l = sum(aca_bl), by(sq4_ayear4)
egen fresh_num_black_m = sum(aca_bm), by(sq4_ayear4)
egen fresh_num_black_h = sum(aca_bh), by(sq4_ayear4)
gen d_aca = abs(aca_cmp - b4aca)
egen d_aca_sd = sd(d_aca), by(ayear4)
gen d_fresh_black_aca = d_aca / d_aca_sd

sort ayear4 id

by ayear4: center w4aca w4sat b4aca b4gpa_hat b4sat b4sat_m b4sat_v b4ldr b4cft bm4aca bm4sat bm4satm bm4satv bm4ldr bm4cft , standardize 

drop w4a w4s b4a b4s b4l b4c w4aca w4sat bm4a bm4s bm4l bm4c d_aca

ren c_w4aca fresh_wm_aca
ren c_w4sat fresh_wm_sat
ren c_b4aca fresh_black_aca
ren c_b4gpa_hat fresh_black_gpa_hat
ren c_b4sat fresh_black_sat
ren c_b4sat_m fresh_black_sat_m
ren c_b4sat_v fresh_black_sat_v
ren c_b4ldr fresh_black_lead
ren c_b4cft fresh_black_cft
ren b4num fresh_num_black
ren c_bm4aca fresh_black_m_aca
ren c_bm4sat fresh_black_m_sat
ren c_bm4satm fresh_black_m_sat_m
ren c_bm4satv fresh_black_m_sat_v
ren c_bm4ldr fresh_black_m_lead
ren c_bm4cft fresh_black_m_cft


egen fresh_black_p = sum(pschool) if black == 1, by(sq4_ayear4)
egen fresh_black_ra = sum(rathlete) if black == 1, by(sq4_ayear4)
egen fresh_black_pschool = max(fresh_black_p), by(sq4_ayear4)
egen fresh_black_rathlete = max(fresh_black_ra), by(sq4_ayear4)

foreach var in aca sat sat_m sat_v lead cft pschool rathlete gpa_hat {
  gen missing_fresh_black_`var' = fresh_black_`var' == .
  replace fresh_black_`var' = 0 if fresh_black_`var' == .
  }
 
foreach var in aca sat sat_v sat_m {
  gen missing_fresh_black_m_`var' =  fresh_black_m_`var'  == .
  replace fresh_black_m_`var' = 0 if fresh_black_m_`var' == .
  }
 
 
gen no_black_m_fresh = fresh_num_bm == 0
gen no_blacks_fresh = fresh_num_black == 0

egen w3a = mean(aca_cmp) if wm, by(sq3_ayear3)
egen w3s = mean(sat) if wm, by(sq3_ayear3)
egen b3a = mean(aca_cmp) if black == 1 & female==0, by(sq3_ayear3)
egen b3s = mean(sat) if black == 1 & female==0, by(sq3_ayear3)
egen b3l = mean(leadership_cmp) if black == 1 & female==0, by(sq3_ayear3)
egen b3c = mean(cft_score) if black == 1 & female==0, by(sq3_ayear3)

egen soph_nonblack_aca = max(w3a), by(sq3_ayear3)
egen soph_nonblack_sat = max(w3s), by(sq3_ayear3)
egen soph_black_aca = max(b3a), by(sq3_ayear3)
egen soph_black_sat = max(b3s), by(sq3_ayear3)
egen soph_black_lead = max(b3l), by(sq3_ayear3)
egen soph_black_cft = max(b3c), by(sq3_ayear3)
egen soph_num_black = sum(black) if female==0, by(sq3_ayear3)
egen soph_num_wm = sum(wm), by(sq3_ayear3)
gen male = 1 - female
egen soph_num_male = sum(male), by(sq3_ayear3)

egen w2a = mean(aca_cmp) if wm, by(sq3_ayear2)
egen w2s = mean(sat) if wm, by(sq3_ayear2)
egen b2a = mean(aca_cmp) if black == 1 & female==0, by(sq3_ayear2)
egen b2s = mean(sat) if black == 1 & female==0, by(sq3_ayear2)
egen b2l = mean(leadership_cmp) if black == 1 & female==0, by(sq3_ayear2)
egen b2c = mean(cft_score) if black == 1 & female==0, by(sq3_ayear2)


egen j_nonblack_aca = max(w2a), by(sq3_ayear2)
egen j_nonblack_sat = max(w2s), by(sq3_ayear2)
egen j_black_aca = max(b2a), by(sq3_ayear2)
egen j_black_sat = max(b2s), by(sq3_ayear2)
egen j_black_lead = max(b2l), by(sq3_ayear2)
egen j_black_cft = max(b2c), by(sq3_ayear2)
egen j_num_black = sum(black) if female==0, by(sq3_ayear2)
egen j_num_wm = sum(wm), by(sq3_ayear2)
egen j_num_male = sum(male), by(sq3_ayear2)

foreach v in nonblack_aca nonblack_sat black_aca black_sat black_lead black_cft num_black num_wm num_male {
   replace soph_`v' = j_`v' if ayear2==2000
   }

gen no_blacks_soph = soph_num_black == 0


foreach v in fresh_black_aca fresh_black_sat fresh_black_sat_v fresh_black_sat_m fresh_num_black {
   egen r_`v' = rank(`v') if wm == 1, by(cyear squad3)
   }


drop w3a w3s b3a b3s b3l b3c w2a w2s b2a b2s b2l b2c 

foreach var in aca sat lead cft {
  replace soph_black_`var' = 0 if soph_black_`var' == .
  }
  
foreach var in sat_v sat_m aca_cmp sat leadership_cmp cft_score rathlete {
  gen missing_`var' = `var' == .
  replace `var' = 0 if `var' == .
  }


sort ayear4 squad4
merge m:1 ayear4 squad4 using bu
drop if _merge == 2
gen missing_bu_aca = fresh_bu_aca == .
replace fresh_bu_aca = 0 if fresh_bu_aca == .
gen missing_bu_sat = fresh_bu_sat == .
replace fresh_bu_sat = 0 if fresh_bu_sat ==  .
drop _merge


sort id
merge m:1 id using residence
drop if _merge == 2
drop _merge
replace pct_black_bot = 0 if pct_black_bot == .
replace missing_residence = 1 if residence == "" | residence == "missing"
replace residence = "missing" if residence == ""
foreach var in fresh_black_aca fresh_black_sat fresh_black_sat_m fresh_black_sat_v fresh_num_black d_fresh_black_aca fresh_black_gpa_hat {
   gen north_`var' = north*`var'
   gen south_`var' = south*`var'
   gen bot_`var' = pct_black_bot*`var'
   gen top_`var' = pct_black_top*`var'
   }

sort id
merge m:1 id using same_fresh_black
drop if _merge == 2
drop _merge
replace same_fresh_black = 0 if same_fresh_black == .

sort id
merge m:1 id using same_team
drop if _merge == 2
drop _merge
replace same_team = 0 if same_team == .

sort id
merge m:1 id using same_section
drop if _merge == 2
drop _merge
replace same_section = 0 if same_section == .

sort id
merge m:1 id using black_classmate
drop if _merge == 2
drop _merge

sort ayear4 squad4
merge m:1 ayear4 squad4 using fresh_black_dropout
drop if _merge == 2
replace fresh_black_dropout = 0 if fresh_black_dropout == .
drop _merge

label var b4aca "Black Fresh HS Performance"
label var fresh_black_aca "Black Fresh HS Performance"
label var fresh_black_gpa_hat "Black Fresh Predicted GPA"
label var fresh_black_sat "Black Freshman SAT"
label var fresh_black_sat_m "Black Freshman SAT-M"
label var fresh_black_sat_v "Black Freshman SAT-V"
label var b4cft "Black Freshman Fitness"
label var fresh_black_cft "Black Freshman Fitness"
label var b4ldr "Black Freshman Leadership" 
label var fresh_black_lead "Black Freshman Leadership"
label var fresh_num_black "Number of Black Freshmen"
label var fresh_black_m_aca "Black Male Fresh HS Performance"
label var fresh_black_m_sat "Black Male Freshman SAT"
label var fresh_black_m_sat_v "Black Male Freshman SAT-V"
label var fresh_black_m_sat_m "Black Male Freshman SAT-M"
label var fresh_black_m_cft "Black Male Freshman Fitness"
label var fresh_black_m_lead "Black Male Freshman Leadership"
label var fresh_num_bm "Number of Black Male Freshmen"
label var aca_cmp "High School Performance"
label var sat "SAT Score"
label var leadership_cmp "Leadership Score"
label var cft_score "Fitness Score"
label var white "Caucasian"
label var black "African American"
label var hispanic "Hispanic"
label var asian "Asian"
label var rathlete "Recruited Athlete"
label var female "Female"
label var south_fresh_black_aca "South $\times$ Black Fresh Academic Composite"
label var north_fresh_black_aca "North $\times$ Black Fresh Academic Composite"
label var south_fresh_black_sat "South $\times$ Black Freshman SAT"
label var north_fresh_black_sat "North $\times$ Black Freshman SAT"
label var south_fresh_num_black "South $\times$ Number of Black Freshmem"
label var north_fresh_num_black "North $\times$ Number of Black Freshmem"
label var fresh_black_dropout "Freshman Black Dropout"
label var r_fresh_black_aca "Rank Fresh Black HS Performance"
label var r_fresh_black_sat "Rank Fresh Black SAT"
label var r_fresh_black_sat_v "Rank Fresh Black SAT-V"
label var r_fresh_black_sat_m "Rank Fresh Black SAT-M"
label var r_fresh_num_black "Rank Number Fresh Black"
label var d_fresh_black_aca "Abs Diff HS Performance"
label var fresh_num_bm "Number of Freshman Black Males"
label var bot_fresh_black_aca "Low Black $\times$ Black Fresh HS Performance"
label var bot_fresh_black_sat_v "Low Black $\times$ Black Fresh SAT-V"
label var bot_fresh_black_sat_m "Low Black $\times$ Black Fesh SAT-M"
label var bot_fresh_num_black "Low Black $\times$ Number of Black Freshmen"
label var fresh_num_black_l "Number of Low HS Performance Black Freshmen"
label var fresh_num_black_m "Number of Med HS Performance Black Freshmen"
label var fresh_num_black_h "Number of High HS Performance Black Freshmen"
label var pct_black_bot "Low \% Black State"
label var fresh_black_alo "Black Fresh Admissions Rating"


sort id 


gen w_b = (soph_num_black/soph_num_male)*((soph_num_male-soph_num_black)/soph_num_male)
save demo, replace

use key_log
sort room id day
by room id: drop if day[_n] - day[_n-1] < 5 &  action[_n] == "RETURN" &  action[_n-1] == "ISSUE"
sort id
merge m:1 id using ..\studies\c00-14demographics
keep if _merge==3
drop _merge
gen ayear_action = year(day)
gen month_action= month(day)
replace ayear_action = ayear_action - 1 if  month_action < 8
gen fall = month_action > 7
replace fall = 2 if month_action < 7
replace fall = 1 if month_action == 1
replace fall = 3 if month_action == 7 
duplicates drop
duplicates tag id action ayear_action room, g(dup)
sort id action ayear_action room
by id action ayear_action room: egen max_day = max(day)
drop if dup > 0 & day < max_day
drop dup
keep if action == "RETURN"
duplicates drop
ren day day_return
keep dorm room ayear_action month_action fall id day_return ayear4 ayear3 ayear2 ayear1
sort dorm room id
save key_return, replace

   
use key_log
sort room id day
by room id: drop if day[_n] - day[_n-1] < 5 &  action[_n] == "RETURN" &  action[_n-1] == "ISSUE"
sort id
merge m:1 id using ..\studies\c00-14demographics
keep if _merge==3
drop _merge
gen ayear_action = year(day)
gen month_action= month(day)
replace ayear_action = ayear_action - 1 if  month_action < 8
gen fall = month_action > 7
replace fall = 2 if month_action < 7
gen sat = sat_m + sat_v
drop if month_action > 5 & month_action < 8
keep if action == "ISSUE"
duplicates drop
duplicates tag id ayear_action room, g(dup)
sort id ayear_action room
by id ayear_action room: egen min_day = min(day)
drop if dup > 0 & day > min_day
drop dup
duplicates drop
gen day_issue = day
sort dorm room id
joinby dorm room id using key_return, unm(m)
duplicates tag id, g(dup)
sort id day
by id day: replace day_return = day[_n+1] if _merge == 1 & dup > 0
gen length = 999
replace length = day_return - day if day_return != .
drop _merge
sort dorm room ayear_action fall id
save key_issue, replace
* search key_log for return of this room key at any date
keep if day_return == .
keep id room 
duplicates drop
gen action = "RETURN"
sort  id room action
save missing_return, replace
use key_log, clear
sort room id day
by room id: drop if day[_n] - day[_n-1] < 5 &  action[_n] == "RETURN" &  action[_n-1] == "ISSUE"
sort id room action
joinby id room action using missing_return
ren day day_return
keep id room day_return
sort id room
save recovered_return, replace
use key_issue
sort id room
merge id room using recovered_return, update replace
replace length = day_return - day if day_return != .
drop _merge
sort dorm room ayear_action fall id
save key_issue, replace
* search key_log for next instance of issue for this cadet in another room
keep if day_return == .
keep id room day
duplicates drop
gen action = "ISSUE"
sort  id action
ren room old_room
ren day day_issue_missing
save missing_return, replace
use key_log, clear
sort room id day
by room id: drop if day[_n] - day[_n-1] < 5 &  action[_n] == "RETURN" &  action[_n-1] == "ISSUE"
sort id action
joinby id action using missing_return
drop if day < day_issue_missing
drop if room == old_room & day <= day_issue_missing
sort id day
by id:  egen min_day = min(day)
drop if day > min_day
ren day day_return
drop room
ren old_room room
ren day_issue_missing day_issue
keep id room day_issue day_return
gen missing_return = 1
duplicates drop
sort id day_return
duplicates tag id, g(dup)
by id: egen min_day = min(day_return)
drop if dup > 0 & day_return > min_day
drop dup min_day
sort id room day_issue missing_return
save recovered_return, replace
use key_issue
gen missing_return = day_return == .
sort id room day_issue missing_return
merge m:1 id room day_issue missing_return using recovered_return, update replace
replace length = day_return - day if day_return != .
drop _merge dup
drop if length < 1
duplicates drop
duplicates tag id room day, g(dup)
sort id
by id: egen min_return = min(day_return)
drop if dup > 0 & day_return > min_return
drop dup min_return missing_return
sort dorm room ayear_action fall id
save key_issue, replace
* search key_log for next instance of issue for this cadet in the same room
keep if day_return == .
keep id room day
duplicates drop
gen action = "ISSUE"
sort  id room action 
ren day day_issue_missing
save missing_return, replace
use key_log, clear
sort room id day
by room id: drop if day[_n] - day[_n-1] < 5 &  action[_n] == "RETURN" &  action[_n-1] == "ISSUE"
sort id room action 
joinby id room action using missing_return
drop if day <= day_issue_missing
sort id day
by id:  egen max_day = max(day)
drop if day < max_day
ren day day_return
ren day_issue_missing day_issue
keep id room day_issue day_return
gen missing_return = 1
duplicates drop
sort id day_return
duplicates tag id, g(dup)
by id: egen min_day = min(day_return)
drop if dup > 0 & day_return > min_day
drop dup min_day
sort id room day_issue missing_return
save recovered_return, replace
use key_issue
gen missing_return = day_return == .
sort id room day_issue missing_return
merge m:1 id room day_issue missing_return using recovered_return, update replace
replace length = day_return - day if day_return != .
drop _merge 
drop if length < 1
duplicates drop
duplicates tag id room day, g(dup)
sort id
by id: egen min_return = min(day_return)
drop if dup > 0 & day_return > min_return
drop dup min_return missing_return
replace day_return = date("6/04/99","MDY",2000) if ayear == 1998 & ayear3 == 1999 & day_return == .
replace day_return = date("6/04/01","MDY",2050) if ayear == 2000 & ayear1 == 2001 & day_return == .
replace day_return = date("6/04/01","MDY",2050) if ayear == 2000 & ayear3 == 2001 & day_return == .
replace day_return = date("6/04/02","MDY",2050) if ayear == 2001 & ayear3 == 2002 & day_return == .
replace day_return = date("6/04/03","MDY",2050) if ayear == 2002 & ayear3 == 2003 & day_return == .
replace day_return = date("6/04/04","MDY",2050) if ayear == 2003 & ayear3 == 2004 & day_return == .
replace day_return = date("6/04/02","MDY",2050) if ayear == 2001 & ayear2 == 2002 & day_return == .
replace day_return = date("6/04/03","MDY",2050) if ayear == 2002 & ayear2 == 2003 & day_return == .
replace day_return = date("6/04/04","MDY",2050) if ayear == 2003 & ayear2 == 2004 & day_return == .
replace day_return = date("6/04/05","MDY",2050) if ayear == 2004 & ayear2 == 2005 & day_return == .
replace length = day_return - day if day_return != .
sort dorm room ayear_action fall id
save key_issue, replace

forvalues y = 1998/2004 {
   use key_issue
   keep if ayear_action == `y'
   sort dorm room id
   save key_issue_`y', replace
   }
 
forvalues y = 1998/2004 { 
   use key_issue_`y', clear
   foreach v in id action day_issue day_return length ayear4 ayear3 ayear2 ayear1 sat leadership_cmp cft_score aca_cmp rathlete pschool black hispanic asian female cyear squad3 squad4 {
     ren `v' r1_`v'
     }
   sort dorm room ayear_action fall
   joinby dorm room ayear_action fall using key_issue_`y'
   drop if id == r1_id
   gen roommate_start = max(day_issue,r1_day_issue)
   gen roommate_end = min(day_return,r1_day_return)
   replace roommate_end = day_return if missing(roommate_end)
   replace roommate_end = r1_day_return if missing(roommate_end)
   gen roommate_length = roommate_end - roommate_start
   drop if roommate_length <= 0
   drop if roommate_length == .
   egen roommates = group(dorm room ayear_action fall)
   sort roommates id
   by roommates: egen c =count(id)
   gen triple = c == 6 
   drop if c == 6 & abs(day_issue - r1_day_issue) > 30
   drop if c > 6 
   foreach v in id action day_issue day_return length ayear4 ayear3 ayear2 ayear1 sat leadership_cmp cft_score aca_cmp rathlete pschool black hispanic asian female cyear squad3 squad4 {
     ren `v' r2_`v'
     }
   sort r1_id r2_id roommate_start
   duplicates tag r1_id r2_id, g(dup)
   by r1_id r2_id:  egen min_start = min(roommate_start)
   by r1_id r2_id:  egen max_end = max(roommate_end)
   replace roommate_start = min_start
   replace roommate_end = max_end
   replace roommate_length = max_end - min_start
   duplicates drop r1_id r2_id, force 
   drop loce* month_action dup min_start max_end
   gen same_squad3 = r1_squad3 == r2_squad3
   sort r1_id room
   duplicates drop
   save roommate_`y', replace
   }

forvalues y = 1998/2004 {
   forvalues degree = 1/4 {
      use roommate_`y'
	  keep if r1_ayear`degree' == `y'
	  keep if month(roommate_start) == 8
	  save roommate_`degree'_`y', replace
	  }
   }

use roommate_4_1998
append using roommate_4_2000
append using roommate_4_2001
append using roommate_4_2002
append using roommate_4_2003

ren r1_id id
sort ayear id
by ayear id: egen fresh_black_roommate =  max(r2_black)
keep id fresh_black_roommate
duplicates drop
sort id
save fresh_black_roommate, replace   
   
   
use roommate_2_2000
append using roommate_3_2001
append using roommate_3_2002
append using roommate_3_2003
append using roommate_3_2004

save roommate, replace

ren r1_id id
keep id
duplicates drop
sort id
save matched, replace
use key_issue
sort id
merge m:1 id using matched
keep if _merge == 1
keep if ayear2 == 2000 | (ayear3 > 2000 & ayear3 < 2005)
duplicates drop
sort room ayear_action fall
drop _merge
save missed, replace
foreach v in id action day_issue day_return length ayear4 ayear3 ayear2 ayear1 sat leadership_cmp cft_score aca_cmp rathlete pschool black hispanic asian female cyear squad3 squad4 {
  ren `v' r1_`v'
  }
sort dorm room ayear_action fall
joinby dorm room ayear_action fall using missed
drop if id == r1_id
gen roommate_start = max(day_issue,r1_day_issue)
gen roommate_end = min(day_return,r1_day_return)
replace roommate_end = day_return if missing(roommate_end)
replace roommate_end = r1_day_return if missing(roommate_end)
gen roommate_length = roommate_end - roommate_start
drop if roommate_length <= 0
drop if roommate_length == .
egen roommates = group(dorm room ayear_action fall)
sort roommates id
by roommates: egen c =count(id)
gen triple = c == 6 
drop if c == 6 & abs(day_issue - r1_day_issue) > 30
foreach v in id action day_issue day_return length ayear4 ayear3 ayear2 ayear1 sat leadership_cmp cft_score aca_cmp rathlete pschool black hispanic asian female cyear squad3 squad4 {
  ren `v' r2_`v'
  }
sort r1_id r2_id roommate_start
duplicates tag r1_id r2_id, g(dup)
by r1_id r2_id:  egen min_start = min(roommate_start)
by r1_id r2_id:  egen max_end = max(roommate_end)
replace roommate_start = min_start
replace roommate_end = max_end
replace roommate_length = max_end - min_start
duplicates drop r1_id r2_id, force 
drop loce* month_action dup min_start max_end
gen same_squad3 = r1_squad3 == r2_squad3
sort r1_id room
duplicates drop
save recovered, replace

append using roommate

foreach v in id ayear4 ayear3 ayear2 ayear1  {
   ren r1_`v' `v'
   }
gen br_l = roommate_length if r2_black == 1
sort id
by id: egen br_length = max(br_l)
replace br_length = 0 if br_length == .
gen hr_l = roommate_length if r2_hispanic == 1
sort id
by id: egen hr_length = max(hr_l)
replace hr_length = 0 if hr_length == .
gen ar_l = roommate_length if r2_asian == 1
sort id
by id: egen ar_length = max(ar_l)
replace ar_length = 0 if ar_length == .
sort id
by id: egen roommate_length_mean = mean(roommate_length)
keep id ayear ayear3 ayear4 br_length hr_length ar_length roommate_length_mean
duplicates drop
foreach days in  1 7 30 60 90 120 240 {
  gen br_`days' = br_length >= `days'
  label var br_`days' "Black Roommate `days' Days"
  gen hr_`days' = hr_length >= `days'
  label var hr_`days' "Hispanic Roommate `days' Days"
  gen ar_`days' = ar_length >= `days'
  label var ar_`days' "Asian Roommate `days' Days"
  }
sort id
merge m:1 id using demo
keep if _merge == 3
drop if squad3 == .
drop _merge
sort id
merge m:1  id using fresh_black_roommate
drop if _merge ==  2
drop _merge


save roommate_data_20170806, replace   


/*****************   cleanup temp files  **********************/
 
forvalues y = 1998/2004 {
   erase roommate_`y'.dta
   erase key_issue_`y'.dta
   forvalues d = 1/4 {
      erase roommate_`d'_`y'.dta
	  }
   }

foreach y in 1998 2000 2001 2002 2003 {
   foreach v in upper ayear4 {
      erase `v'_`y'.dta
	  }
   }
   
forvalues y = 2001/2004 {
   erase ayear3_`y'.dta
   }

foreach t in matched missing_return recovered recovered_return gpa_hat black_classmate demo_black_roommates /*
  */ key_issue key_return fresh_black_roommate fresh_black_dropout same_fresh_black same_team residence same_section /*
  */ missed upper bu ayear2_2000 {
   erase `t'.dta
   }

