* Create data set for doing GT2017 at hospital level


********************************************************************
* import hospital level data for EU

use data/labid, clear
keep if country=="US"
drop if diagnostic==0 | q==0 // handful of cases of missing data

egen j=group(manufacturer product)
egen h=group(lab_id)
egen t=group(yyyy mm)

count

********************************************************************
* clean up diagnostic data

replace diagnostic=0 if diagnostic==.
bysort h t: egen pM=max(diagnostic*2)
su pM, detail
gen flag_pM = inrange(pM, r(p1), r(p99))
replace pM = . if flag_pM==0

gen ln_pM = log(pM)
quietly tab t, gen(tFE)
quietly areg ln_pM tFE*, a(h)
predict pM_t, xb
predict pM_h, d
bysort t: egen temp_t=mean(pM_t)
bysort h: egen temp_h=mean(pM_h)
gen Mht=pM
replace Mht = exp(temp_t + temp_h) if Mht==.
drop if Mht==.

bysort h t: egen Qht=total(q)
gen s0=1-Qht/Mht
su s0, detail
gen flag_s0 = inrange(s0, r(p5), r(p95))
drop if flag_s0==0
drop if s0<=0

su Mht s0
count
drop temp*

********************************************************************
* fillin zeros at product-hospital-month level

fillin j h t
count

*drop jt and ht never in data
bysort j t: egen out_jt = min(_fillin)
bysort h t: egen out_ht = min(_fillin)
drop if out_jt==1 | out_ht==1
count

save data/USdata_fillin, replace
********************************************************************
* clean up price data

gen pp=revenue/q
su pp, detail
gen flag_pp = inrange(pp, r(p1), r(p99))
replace pp = . if flag_pp==0

egen jt=group(j t)
quietly tab h, gen(hFE)
gen ln_pp = log(pp)
quietly areg ln_pp hFE*, a(jt)
predict pp_h, xb
predict pp_jt, d
bysort j t: egen temp_jt=mean(pp_jt)
bysort h: egen temp_h=mean(pp_h)
gen p=pp
replace p = exp(temp_jt + temp_h) if p==.
*
quietly tab j, gen(jFE)
quietly areg ln_pp jFE*, a(h)
predict pp_j, xb
predict pp_h2, d
bysort j: egen temp_j=mean(pp_j)
bysort h: egen temp_h2=mean(pp_h2)
replace p = exp(temp_j + temp_h2) if p==.
*
su p, detail
gen flag_p = inrange(p, r(p1), r(p99))
replace p=. if flag_p==0
*
quietly reg pp jFE*
predict pp_j2, xb
bysort j: egen temp_j2=mean(pp_j2)
replace p = temp_j2 if p==.
*
su p, detail
*gen flag_p = inrange(p, r(p1), r(p99))
*drop if flag_p==0

su p
count
drop temp*

********************************************************************
* clean up quantity/share data

replace Mht=0 if Mht==.
bysort h t: egen M=max(Mht)

gen zeros=(q==.)
replace q=0 if q==.
bysort h t: egen Q=total(q)

drop s0 flag_s0
gen s0=1-Q/M
su s0, detail
gen flag_s0 = inrange(s0, r(p5), r(p95))
drop if flag_s0==0
drop if s0<=0 | s0>=1

su s0
count

*save data/EUdata_Hstuff_all, replace
********************************************************************
* merge with aggregate variable at product-month level

gsort j -manufacturer
by j: replace manufacturer = manufacturer[_n-1] if manufacturer==""
by j: replace product = product[_n-1] if product==""
gsort h -lab_id
by h: replace lab_id = lab_id[_n-1] if lab_id==""
gsort t -mm
by t: replace mm = mm[_n-1] if mm==.
by t: replace yyyy = yyyy[_n-1] if yyyy==.

*replace product="Resolute" if product=="Resolute Integrity"
keep manufacturer product yyyy mm lab_id M Q q zeros p
order manufacturer product yyyy mm lab_id M Q q zeros p
sort manufacturer product yyyy mm
save data/USdata_h_raw, replace
