set scheme s1color

/* This is the main do file for "The Evolution of Work in the United States. */ 

local compute_decomp=1  /* Compute the Section 3 decompositions */
local compile_decomp=1  /* Take the Section 3 decompositions that were produced in the "compute" step
                         and then produce tables in a relatively nice format" */
local top_occupations=1 /* Compute the top job titles (top in terms of the frequency of various task measures */
local vintage_analysis=1  /* Perform the calculations related to Section 4.2 (and corresponding sensitivity analysis */
local narrative_plots=1 /* Construct the plots that will go in Section 4 of the paper */ 
local appendix_b1=1 /* Perform the calculations related to the "Comparison to the Decennial Census" appendix */
local appendix_b2=1 /* Perform the calculations related to the "Comparison to O*NET" appendix */

log using aej_calculations, text replace

/*

Tables 1, 2, 8 are constructed in the top_occupations portion of the code.
Tables 3-5, 12-20 are constructed in the compile_decomp portion of the code.
Tables 6, 21 are constructed in the vintage_analysis portion of the code. 
Tables 7 and 22 are constructed in the vintage_analysis portion of the code.
(Tables 9-10 require micro data from EMSI which we cannot distribute)
 Table 11 summarizes the estimation of our LDA model, which we used  classify pages of
 ads as representing either job ads or some other set of advertisements */


if `compute_decomp'==1 {


/* Iteration 0 is the original dataset. Iterations 1 to 40 are
   different bootstrapped samples. */
    
forvalues iter=0/40 {
forvalues dset=0/2 {
       
   if `dset'==0 {
       local dsetnm="comp"
       local levelmax=2
   }
   if `dset'==1 {
       local dsetnm="nyt_dis"
       local levelmax=0
   }
   if `dset'==2 {
       local dsetnm="nyt_cla"
       local levelmax=0
   }

/* Here we are looping through whether we want to weight
   4-digit occupations according to their employment or not.
   Then, we loop over the different source datasets. */
    
forvalues employment_weight=0/1 {   
forvalues level=0/`levelmax' {

if `iter'==0 {
use summary_by_year_`dsetnm' , replace
}
if `iter'>0 {
use year description_new description_new_miss *_`iter' using boot_`dsetnm'_title, replace
ren ct2_`iter' ct2
ren words_`iter' words
}
drop if description_new_miss==1
    /* This description_new_miss variable is equal to 1 for observations for which we fill the job title
    field represents a spurious job title. See the documentation file "Identifying Job Titles and the
    Boundaries between Job" on our data library:  https://occupationdata.github.io/ for more of an
    explanation. */
drop if year<1950

/* Our task measures are measured at the edge of each decade, combining nearby years
   (e.g., 1958 to 1962 into the 1960 observation) to reduce sampling error. */

replace year=round(year,10) if abs(year-round(year,10))<=2
replace year=1950 if year==1953
replace year=2000 if year==1997
keep if mod(year,10)==0

/* Level = 0 means we are combining observations at the job_title level
   (as contained in the description_new variable) */
    
if `level'==0 {
 merge n:1 description_new using soc_codes_corresp
 drop if _merge==2
 replace soc_new=999999 if soc_new==.
 drop _merge
 bys description_new year: egen s1=sum(ct2)
 gen occsoc=floor(soc_new/100)
 /* Combine occupation codes with one another so that we may merge to the decennial census
    (the decennial census data not using the SOC code classification. We have constructed
    a mapping between census occupation codes and SOC codes. But this mapping that we have
    constructed is n:1, meaning that there are certain SOC codes that do not map to an individual
    census occupation code. For this reason, we need to combine SOC codes, as we do below. */
  replace occsoc=1130 if occsoc==1131
  replace occsoc=2510 if occsoc==2511
  replace occsoc=2520 if occsoc==2530 
  replace occsoc=2911 if occsoc==2990
  replace occsoc=3190 if occsoc==3120 
  replace occsoc=3520 if occsoc==3510
  replace occsoc=3990 if occsoc==3970
  replace occsoc=3990 if occsoc==3940
  replace occsoc=4520 if occsoc==4510 
  if `iter'==0 {
     collapse (mean)occsoc soc_new spitz* deming*C  words s1 [aw=ct2], by(description_new year)
  }
  if `iter'>0 {
     collapse (mean)occsoc soc_new spitz*_`iter'  deming*C_`iter' words s1 [aw=ct2], by(description_new year)
     ren spitz_nr_analytic_C_`iter' spitz_nr_analytic_C
     ren spitz_nr_inter_C_`iter' spitz_nr_inter_C
     ren spitz_nr_manual_C_`iter' spitz_nr_manual_C
     ren spitz_r_manual_C_`iter' spitz_r_manual_C
     ren spitz_r_cognitive_C_`iter' spitz_r_cognitive_C
     ren spitz_nr_analytic_`iter' spitz_nr_analytic
     ren spitz_nr_inter_`iter' spitz_nr_inter
     ren spitz_nr_manual_`iter' spitz_nr_manual
     ren spitz_r_manual_`iter' spitz_r_manual
     ren spitz_r_cognitive_`iter' spitz_r_cognitive
      ren deming_character_C_`iter' deming_character_C
      ren deming_computer_C_`iter' deming_computer_C
      ren deming_customer_service_C_`iter' deming_customer_service_C
      ren deming_financial_C_`iter' deming_financial_C 
      ren deming_people_management_C_`iter' deming_people_management_C
      ren deming_problem_solving_C_`iter' deming_problem_solving_C
      ren deming_project_management_C_`iter' deming_project_management_C
      ren deming_social_C_`iter' deming_social_C
      ren deming_writing_C_`iter' deming_writing_C
  }
  ren s1 ct2    
}

/* Level = 1 means we are combining observations at the level of the 6-digit
   SOC code (as contained in the soc_new variable */
    
if `level'==1 {
 merge n:1 description_new using soc_codes_corresp
 drop if _merge==2
 replace soc_new=999999 if soc_new==.
 drop _merge
 bys soc_new year: egen s1=sum(ct2)
  gen occsoc=floor(soc_new/100)
  replace occsoc=1130 if occsoc==1131
  replace occsoc=2510 if occsoc==2511
  replace occsoc=2520 if occsoc==2530 
  replace occsoc=2911 if occsoc==2990
  replace occsoc=3190 if occsoc==3120 
  replace occsoc=3520 if occsoc==3510
  replace occsoc=3990 if occsoc==3970
  replace occsoc=3990 if occsoc==3940
  replace occsoc=4520 if occsoc==4510 
  if `iter'==0 {
     collapse (mean)occsoc spitz* deming*C  words s1 [aw=ct2], by(soc_new year)
  }
  if `iter'>0 {
     collapse (mean)occsoc spitz*_`iter'  deming*C_`iter' words s1 [aw=ct2], by(soc_new year)
     ren spitz_nr_analytic_C_`iter' spitz_nr_analytic_C
     ren spitz_nr_inter_C_`iter' spitz_nr_inter_C
     ren spitz_nr_manual_C_`iter' spitz_nr_manual_C
     ren spitz_r_manual_C_`iter' spitz_r_manual_C
     ren spitz_r_cognitive_C_`iter' spitz_r_cognitive_C
     ren spitz_nr_analytic_`iter' spitz_nr_analytic
     ren spitz_nr_inter_`iter' spitz_nr_inter
     ren spitz_nr_manual_`iter' spitz_nr_manual
     ren spitz_r_manual_`iter' spitz_r_manual
     ren spitz_r_cognitive_`iter' spitz_r_cognitive

      ren deming_character_C_`iter' deming_character_C
      ren deming_computer_C_`iter' deming_computer_C
      ren deming_customer_service_C_`iter' deming_customer_service_C
      ren deming_financial_C_`iter' deming_financial_C 
      ren deming_people_management_C_`iter' deming_people_management_C
      ren deming_problem_solving_C_`iter' deming_problem_solving_C
      ren deming_project_management_C_`iter' deming_project_management_C
      ren deming_social_C_`iter' deming_social_C
      ren deming_writing_C_`iter' deming_writing_C
  }
  ren s1 ct2    
}

/* Level = 2 means we are combining observations at the 4-digit SOC code level
   (as contained in the occsoc variable) */
    
if `level'==2 {
 merge n:1 description_new using soc_codes_corresp
 drop if _merge==2
 replace soc_new=999999 if soc_new==.
 drop _merge
 bys soc_new year: egen s1=sum(ct2)
  gen occsoc=floor(soc_new/100)
  replace occsoc=1130 if occsoc==1131
  replace occsoc=2510 if occsoc==2511
  replace occsoc=2520 if occsoc==2530 
  replace occsoc=2911 if occsoc==2990
  replace occsoc=3190 if occsoc==3120 
  replace occsoc=3520 if occsoc==3510
  replace occsoc=3990 if occsoc==3970
  replace occsoc=3990 if occsoc==3940
  replace occsoc=4520 if occsoc==4510 
  if `iter'==0 {
     collapse (mean)  spitz* deming*C  words s1 [aw=ct2], by(occsoc year)
  }
  if `iter'>0 {
     collapse (mean)  spitz*_`iter' deming*C_`iter' words s1 [aw=ct2], by(occsoc year)
     ren spitz_nr_analytic_C_`iter' spitz_nr_analytic_C
     ren spitz_nr_inter_C_`iter' spitz_nr_inter_C
     ren spitz_nr_manual_C_`iter' spitz_nr_manual_C
     ren spitz_r_manual_C_`iter' spitz_r_manual_C
     ren spitz_r_cognitive_C_`iter' spitz_r_cognitive_C
     ren spitz_nr_analytic_`iter' spitz_nr_analytic
     ren spitz_nr_inter_`iter' spitz_nr_inter
     ren spitz_nr_manual_`iter' spitz_nr_manual
     ren spitz_r_manual_`iter' spitz_r_manual
     ren spitz_r_cognitive_`iter' spitz_r_cognitive
      ren deming_character_C_`iter' deming_character_C
      ren deming_computer_C_`iter' deming_computer_C
      ren deming_customer_service_C_`iter' deming_customer_service_C
      ren deming_financial_C_`iter' deming_financial_C 
      ren deming_people_management_C_`iter' deming_people_management_C
      ren deming_problem_solving_C_`iter' deming_problem_solving_C
      ren deming_project_management_C_`iter' deming_project_management_C
      ren deming_social_C_`iter' deming_social_C
      ren deming_writing_C_`iter' deming_writing_C

  }
  ren s1 ct2    
}

drop if year==1940 | occsoc==9999
bys year : egen ads=sum(ct2)
gen share=ct2/ads

/* Merge in the ς_j variable (see equation 6) */
    
merge n:1 occsoc using   move_rates_by_occsoc
sum move  [aw=share]
replace move=r(mean) if move==.
drop _merge
foreach var of varlist spitz* deming* {
    if `level'==0 {
    sort description_new year
    by description_new: gen temp_lag= `var' if _n==1
    replace temp_lag=temp_lag[_n-1]*(1-move)^(year-year[_n-1])+`var'*(1-(1-move)^(year-year[_n-1])) if description_new==description_new[_n-1]
    }
    if `level'==1 {
    sort soc_new year
    by soc_new: gen temp_lag= `var' if _n==1
    replace temp_lag=temp_lag[_n-1]*(1-move)^(year-year[_n-1])+`var'*(1-(1-move)^(year-year[_n-1])) if soc_new==soc_new[_n-1]
    }
    if `level'==2 {
    sort occsoc year
    by occsoc: gen temp_lag= `var' if _n==1
    replace temp_lag=temp_lag[_n-1]*(1-move)^(year-year[_n-1])+`var'*(1-(1-move)^(year-year[_n-1])) if occsoc==occsoc[_n-1]
    }
    gen `var'_m=temp_lag
    drop temp_lag
}

        
if `employment_weight'==1 {
  merge n:1 occsoc year using ipums_sizes_, keep(1 3)
  drop _merge
  bys occsoc year: egen s3=sum(share)
  gen share2=occsize*share/s3
  replace share=share2
  local suffix="emp"
}
if `employment_weight'==0 {
  local suffix="vac"
}

/* Here, we are computing the sample mean of each of the variables within our dataset.
   We use these sample means as part of the normalization that is discussed in
   Equations 7-8, used in the decompositions presented in Tables 19-20. The _W suffix
   variables are the benchmark task measures, given as the number of task mentions
   per 1000 job ad words. */
    
sum spitz_nr_analytic_C [aw=share]
local spitz_nr_analytic_C_loc=r(mean)
sum spitz_nr_inter_C [aw=share]
local spitz_nr_inter_C_loc=r(mean)
sum spitz_nr_manual_C [aw=share]
local spitz_nr_manual_C_loc=r(mean)
sum spitz_r_manual_C [aw=share]
local spitz_r_manual_C_loc=r(mean)
sum spitz_r_cognitive_C [aw=share]
local spitz_r_cognitive_C_loc=r(mean)

foreach var of varlist deming_writing_C deming_social_C deming_project_management_C deming_problem_solving_C deming_people_management_C deming_financial_C deming_customer_service_C deming_computer_C deming_character_C {
  sum `var' [aw=share]
  local `var'_loc=r(mean)
}

sum spitz_nr_analytic_C_m [aw=share]
local spitz_nr_analytic_C_m_loc=r(mean)
sum spitz_nr_inter_C_m [aw=share]
local spitz_nr_inter_C_m_loc=r(mean)
sum spitz_nr_manual_C_m [aw=share]
local spitz_nr_manual_C_m_loc=r(mean)
sum spitz_r_manual_C_m [aw=share]
local spitz_r_manual_C_m_loc=r(mean)
sum spitz_r_cognitive_C_m [aw=share]
local spitz_r_cognitive_C_m_loc=r(mean)

foreach var of varlist deming_writing_C_m deming_social_C_m deming_project_management_C_m deming_problem_solving_C_m deming_people_management_C_m deming_financial_C_m deming_customer_service_C_m deming_computer_C_m deming_character_C_m {
  sum `var' [aw=share]
  local `var'_l=r(mean)
}

foreach var of varlist spitz_nr_analytic spitz_nr_inter spitz_nr_manual spitz_r_manual spitz_r_cognitive {
  sum `var' [aw=share]
  local `var'_loc=r(mean)
}
    
foreach var of varlist spitz*C spitz*C_m {
   gen `var'_sh=`var'/``var'_loc'
   gen `var'_W=`var'/words*1000
}

foreach var of varlist spitz_nr_analytic spitz_nr_inter spitz_nr_manual spitz_r_manual spitz_r_cognitive {
   gen `var'_sh=`var'/``var'_loc'
   gen `var'_W=`var'/words*1000
}

foreach var of varlist deming_writing_C deming_social_C deming_project_management_C deming_problem_solving_C deming_people_management_C deming_financial_C deming_customer_service_C deming_computer_C deming_character_C  {
   gen `var'_sh=`var'/``var'_loc'
   gen `var'_W=`var'/words*1000
}
   
foreach var of varlist deming_writing_C_m deming_social_C_m deming_project_management_C_m deming_problem_solving_C_m deming_people_management_C_m deming_financial_C_m deming_customer_service_C_m deming_computer_C_m deming_character_C_m {
   gen `var'_sh=`var'/``var'_l'
   gen `var'_W=`var'/words*1000
}

/* One of the drawbacks of the Equation 7-8 normalization is that it requires (for a given occupation-year cell)
    there to be at least some task mentions (otherwise, one would be dividing by zero). As sensitivity analysis
    for this set of normalizations, we impute values (for these occupation-year cells). The suffix "I" stands for
    versions where we have imputed values for the _sh measures. */
    
foreach var of varlist spitz*_C_sh  {
   gen `var'_=`var'/(spitz_nr_analytic_C_sh+spitz_nr_inter_C_sh+spitz_nr_manual_C_sh+spitz_r_manual_C_sh+spitz_r_cognitive_C_sh)
   gen `var'I=`var'_
   replace `var'I=.2 if `var'_==.
}

foreach var of varlist spitz*_C_m_sh  {
   gen `var'_=`var'/(spitz_nr_analytic_C_m_sh+spitz_nr_inter_C_m_sh+spitz_nr_manual_C_m_sh+spitz_r_manual_C_m_sh+spitz_r_cognitive_C_m_sh)
   gen `var'I=`var'_
   replace `var'I=.2 if `var'_==.
}

foreach var of varlist  spitz_nr_analytic_sh spitz_nr_inter_sh spitz_nr_manual_sh spitz_r_manual_sh spitz_r_cognitive_sh {
   gen `var'_=`var'/(spitz_nr_analytic_sh+spitz_nr_inter_sh+spitz_nr_manual_sh+spitz_r_manual_sh+spitz_r_cognitive_sh)
   gen `var'I=`var'_
   replace `var'I=.2 if `var'_==.
}

foreach var of varlist deming_writing_C_sh deming_social_C_sh deming_project_management_C_sh deming_problem_solving_C_sh deming_people_management_C_sh deming_financial_C_sh deming_customer_service_C_sh deming_computer_C_sh deming_character_C_sh {
  gen `var'_=`var'/(deming_writing_C_sh+deming_social_C_sh+deming_project_management_C_sh+deming_problem_solving_C_sh+deming_people_management_C_sh+deming_financial_C_sh+deming_customer_service_C_sh+deming_computer_C_sh+deming_character_C_sh)
  gen `var'I=`var'_
  replace `var'I=1/9 if `var'_==.
}

if `level'==0 {    
sort description_new year
by description_new: gen fi=1 if _n==1
gen shareB=share if year==1950
replace shareB=0 if  fi==1 & year>1950
bys description_new: egen shareM=min(shareB)

gsort description_new -year
by description_new: gen Fi=1 if _n==1
gen shareE=share if year==2000
replace shareE=0 if  Fi==1 & year<2000
bys description_new: egen shareF=min(shareE)
local levelt="job_title"
}

if `level'==1 {
sort soc_new year
by soc_new: gen fi=1 if _n==1
gen shareB=share if year==1950
replace shareB=0 if  fi==1 & year>1950
bys soc_new: egen shareM=min(shareB)

gsort soc_new -year
by soc_new: gen Fi=1 if _n==1
gen shareE=share if year==2000
replace shareE=0 if  Fi==1 & year<2000
bys soc_new: egen shareF=min(shareE)
local levelt="soc6"
}

if `level'==2 {  
sort occsoc year
by occsoc: gen fi=1 if _n==1
gen shareB=share if year==1950
replace shareB=0 if  fi==1 & year>1950
bys occsoc: egen shareM=min(shareB)

gsort occsoc -year
by occsoc: gen Fi=1 if _n==1
gen shareE=share if year==2000
replace shareE=0 if  Fi==1 & year<2000
bys occsoc: egen shareF=min(shareE)
local levelt="soc4"
}

/* The three datasets (all vs. end vs. beg) differ according to period
   in which occupations' sizes are measured (either in period t, or at
   the last period in the sample for that occupation, or at the first
   period in the sample for that occupation */
    
preserve
collapse (mean) *sh_ *shI  *W  [aw=share] , by(year)
save  decomp_`dsetnm'_`levelt'_all_`suffix'_`iter', replace
restore

preserve
collapse (mean) *sh_  *shI *W  [aw=shareF] , by(year)
save  decomp_`dsetnm'_`levelt'_end_`suffix'_`iter', replace
restore

preserve
collapse (mean)  *sh_  *shI *W  [aw=shareM] , by(year)
save  decomp_`dsetnm'_`levelt'_beg_`suffix'_`iter', replace
restore
}
}
}
}
}

/* In this section of the do file, we take the "decomp" datasets that were
  constructed in the compute_decomp section of the do file and then construct
  the tables that will go in the paper. We need to compute the standard deviations
  from the iter=1... 40 files, and the averages from the iter=0 file.  */
  
if `compile_decomp'==1 {
forvalues employment_weight=0/1 {

if `employment_weight'==0 {
  local suffix="vac"
}
if `employment_weight'==1 {
  local suffix="emp"
}
forvalues level=0/2 {
   if `level'==0 {
      local levelt="soc4"
      local dsetmax=0
    }
   if `level'==1 {
      local levelt="soc6"
      local dsetmax=0
    }
   if `level'==2 {
      local levelt="job_title"
      local dsetmax=2
    }

forvalues dset=0/`dsetmax' {
   if `dset'==0 {
       local dsetnm="comp"
   }
   if `dset'==1 {
       local dsetnm="nyt_dis"
   }
   if `dset'==2 {
       local dsetnm="nyt_cla"
   }

di "`dsetnm'"
di "`levelt'"
di "`suffix'"

use decomp_`dsetnm'_`levelt'_all_`suffix'_1, replace
gen iter=1
forvalues xx=2/40 {
   append using decomp_`dsetnm'_`levelt'_all_`suffix'_`xx'
   replace iter=`xx' if iter==.
}
keep if year==1950
collapse (sd) spitz_nr_analytic_C_sh_-deming_character_C_m_W
foreach var of varlist s* d* {
  ren `var' `var'd
}
tostring *Wd, format(%12.2f) replace force
tostring *shId  *sh_d, format(%12.3f) replace force
foreach var of varlist *d {
  replace `var'="("+`var'+")"
 }
gen sd=1
gen year=1940  /* Year=1940 is just saying we are taking the "zeroth row"
    the observation corresponding to 1950 is the first row. This is the
    sample standard error for the first observation. */
gen group=0
save decomp_`dsetnm'_`levelt'_all_`suffix'_sd_50, replace
   
use decomp_`dsetnm'_`levelt'_all_`suffix'_0, replace
keep if year==1950
foreach var of varlist s* d* {
  ren `var' `var'd
}
tostring *Wd, format(%12.2f) replace force
tostring *shId  *sh_d, format(%12.3f) replace force
gen sd=0
keep *d
gen year=1940
gen group=0
save decomp_`dsetnm'_`levelt'_all_`suffix'_co_50, replace
/* This is the sample average for the first observation. */

/* Below we are looking at the growth rates for each decade and for
   the five-decade growth rate */
use decomp_`dsetnm'_`levelt'_all_`suffix'_1, replace
gen iter=1
forvalues xx=2/40 {
   append using decomp_`dsetnm'_`levelt'_all_`suffix'_`xx'
   replace iter=`xx' if iter==.
}
gen group=0
forvalues xx=1/40 {
   append using decomp_`dsetnm'_`levelt'_beg_`suffix'_`xx'
   replace iter=`xx' if iter==.
}
replace group=1 if group==.
sort group iter year
foreach var of varlist spitz_nr_analytic_C_sh_-deming_character_C_m_W {
   gen `var'd=`var'[_n+1]-`var' if year~=2000
   replace `var'd=`var'-`var'[_n-5] if year==2000
}
sort group iter year
expand 2
replace group=2 if _n>=481
replace group=3 if _n>=721
foreach var of varlist spitz_nr_analytic_C_sh_d-deming_character_C_m_Wd {
   replace `var'=`var'[_n-480]-`var'[_n-240]  if _n>=481
}
foreach var of varlist spitz_nr_analytic_C_sh_d-deming_character_C_m_Wd {
   replace `var'=`var'[_n-480]/`var'[_n-720]  if _n>=721
}

collapse (sd) *d , by(year group)
tostring *Wd, format(%12.2f) replace force
tostring *shId  *sh_d, format(%12.3f) replace force

foreach var of varlist *d {
  replace `var'="("+`var'+")"
 }
gen sd=1
save decomp_`dsetnm'_`levelt'_all_`suffix'_sd_d1, replace

use decomp_`dsetnm'_`levelt'_all_`suffix'_0, replace
gen group=0
append using decomp_`dsetnm'_`levelt'_beg_`suffix'_0
replace group=1 if group==.
sort group year
foreach var of varlist spitz_nr_analytic_C_sh_-deming_character_C_m_W {
   gen `var'd=`var'[_n+1]-`var' if year~=2000
   replace `var'd=`var'-`var'[_n-5] if year==2000
}
expand 2
replace group=2 if _n>=13
replace group=3 if _n>=19
foreach var of varlist spitz_nr_analytic_C_sh_d-deming_character_C_m_Wd {
   replace `var'=`var'[_n-12]-`var'[_n-6]  if _n>=13
}
foreach var of varlist spitz_nr_analytic_C_sh_d-deming_character_C_m_Wd {
   replace `var'=`var'[_n-12]/`var'[_n-18]  if _n>=19
}
keep *d year group
tostring *Wd, format(%12.2f) replace force
tostring *shId  *sh_d, format(%12.3f) replace force
gen sd=0
save decomp_`dsetnm'_`levelt'_all_`suffix'_co_d1, replace
append  using  decomp_`dsetnm'_`levelt'_all_`suffix'_sd_d1  decomp_`dsetnm'_`levelt'_all_`suffix'_co_50 decomp_`dsetnm'_`levelt'_all_`suffix'_sd_50
   
ren deming_project_management_C_Wd deming_project_mgmt_C_Wd
ren deming_project_management_C_sh_d deming_project_mgmt_C_sh_d
ren deming_project_management_C_shId deming_project_mgmt_C_shId
ren deming_project_management_C_m_Wd deming_project_mgmt_C_m_Wd

reshape wide spitz* deming* , i(year sd) j(group)
   
order year spitz_nr_analytic_C_sh_d* spitz_nr_inter_C_sh_d* spitz_nr_manual_C_sh_d* spitz_r_cognitive_C_sh_d* spitz_r_manual_C_sh_d* spitz_nr_analytic_sh_d* spitz_nr_inter_sh_d* spitz_nr_manual_sh_d* spitz_r_cognitive_sh_d* spitz_r_manual_sh_d* spitz_nr_analytic_C_shId* spitz_nr_inter_C_shId* spitz_nr_manual_C_shId* spitz_r_cognitive_C_shId* spitz_r_manual_C_shId* spitz_nr_analytic_shId* spitz_nr_inter_shId* spitz_nr_manual_shId* spitz_r_cognitive_shId* spitz_r_manual_shId*  spitz_nr_analytic_C_Wd* spitz_nr_inter_C_Wd* spitz_nr_manual_C_Wd* spitz_r_cognitive_C_Wd* spitz_r_manual_C_Wd* spitz_nr_analytic_Wd* spitz_nr_inter_Wd* spitz_nr_manual_Wd* spitz_r_cognitive_Wd* spitz_r_manual_Wd* deming_character_C_sh_d* deming_computer_C_sh_d* deming_customer_service_C_sh_d* deming_financial_C_sh_d* deming_people_management_C_sh_d* deming_problem_solving_C_sh_d* deming_project_mgmt_C_sh_d* deming_social_C_sh_d* deming_writing_C_sh_d*  deming_character_C_shId* deming_computer_C_shId* deming_customer_service_C_shId* deming_financial_C_shId* deming_people_management_C_shId* deming_problem_solving_C_shId* deming_project_mgmt_C_shId* deming_social_C_shId* deming_writing_C_shId*  deming_character_C_Wd* deming_computer_C_Wd* deming_customer_service_C_Wd* deming_financial_C_Wd* deming_people_management_C_Wd* deming_problem_solving_C_Wd* deming_project_mgmt_C_Wd* deming_social_C_Wd* deming_writing_C_Wd*   spitz_nr_analytic_C_m_sh_d* spitz_nr_inter_C_m_sh_d* spitz_nr_manual_C_m_sh_d* spitz_r_cognitive_C_m_sh_d* spitz_r_manual_C_m_sh_d* spitz_nr_analytic_C_m_shId* spitz_nr_inter_C_m_shId* spitz_nr_manual_C_m_shId* spitz_r_cognitive_C_m_shId* spitz_r_manual_C_m_shId* spitz_nr_analytic_shId* spitz_nr_inter_shId* spitz_nr_manual_shId* spitz_r_cognitive_shId* spitz_r_manual_shId*  spitz_nr_analytic_C_m_Wd* spitz_nr_inter_C_m_Wd* spitz_nr_manual_C_m_Wd* spitz_r_cognitive_C_m_Wd* spitz_r_manual_C_m_Wd*  deming_character_C_m_Wd* deming_computer_C_m_Wd* deming_customer_service_C_m_Wd* deming_financial_C_m_Wd* deming_people_management_C_m_Wd* deming_problem_solving_C_m_Wd* deming_project_mgmt_C_m_Wd* deming_social_C_m_Wd* deming_writing_C_m_Wd* 
    
tostring year, replace force
replace year="1950 Level" if year=="1940"
replace year="1950-2000" if year=="2000"
replace year="1950-1960" if year=="1950"
replace year="1960-1970" if year=="1960"
replace year="1970-1980" if year=="1970"
replace year="1980-1990" if year=="1980"
replace year="1990-2000" if year=="1990"
  save decomposition_results_`levelt'_`dsetnm'_`suffix', replace
   
   }
}
}

/* Below, we take the output that is in decomposition_results_XXX.dta
   and then isolate the results that will go in the published paper.
   The numbers that appear in observations 1-14 correspond to the
    material within Tables 3-5, 13-20. The numbers that appear in
    observation 15 correspond to the "summary" numbers described in
    Table 12.

    _C_sh_ : normalized task measures, appearing in Tables 19-20
    _C_W   : benchmark measures appearing in Tables 3-5, 13-16
    _W     : do not include words only appearing as a result of the
             CBOW model (Table 17)
    _C_m_W : account for employee turnover (Table 18)

*/
    
use decomposition_results_job_title_comp_emp, replace
keep year spitz_nr_analytic_C_sh_d0 spitz_nr_analytic_C_sh_d1 spitz_nr_analytic_C_sh_d2 spitz_nr_analytic_C_sh_d3 spitz_nr_inter_C_sh_d0 spitz_nr_inter_C_sh_d1 spitz_nr_inter_C_sh_d2 spitz_nr_inter_C_sh_d3  spitz_nr_analytic_C_Wd0 spitz_nr_analytic_C_Wd1 spitz_nr_analytic_C_Wd2 spitz_nr_analytic_C_Wd3 spitz_nr_inter_C_Wd0 spitz_nr_inter_C_Wd1 spitz_nr_inter_C_Wd2 spitz_nr_inter_C_Wd3  spitz_nr_analytic_Wd0 spitz_nr_analytic_Wd1 spitz_nr_analytic_Wd2 spitz_nr_analytic_Wd3 spitz_nr_inter_Wd0 spitz_nr_inter_Wd1 spitz_nr_inter_Wd2 spitz_nr_inter_Wd3  spitz_nr_analytic_C_m_Wd0 spitz_nr_analytic_C_m_Wd1 spitz_nr_analytic_C_m_Wd2 spitz_nr_analytic_C_m_Wd3 spitz_nr_inter_C_m_Wd0 spitz_nr_inter_C_m_Wd1 spitz_nr_inter_C_m_Wd2 spitz_nr_inter_C_m_Wd3   deming_character_C_sh_d0 deming_character_C_sh_d1 deming_character_C_sh_d2 deming_character_C_sh_d3 deming_computer_C_sh_d0 deming_computer_C_sh_d1 deming_computer_C_sh_d2 deming_computer_C_sh_d3   deming_character_C_Wd0 deming_character_C_Wd1 deming_character_C_Wd2 deming_character_C_Wd3 deming_computer_C_Wd0 deming_computer_C_Wd1 deming_computer_C_Wd2 deming_computer_C_Wd3 spitz_nr_manual_C_sh_d0 spitz_nr_manual_C_sh_d0 spitz_nr_manual_C_sh_d1 spitz_nr_manual_C_sh_d2 spitz_nr_manual_C_sh_d3 spitz_r_cognitive_C_sh_d0 spitz_r_cognitive_C_sh_d1 spitz_r_cognitive_C_sh_d2 spitz_r_cognitive_C_sh_d3 spitz_nr_manual_C_Wd0 spitz_nr_manual_C_Wd1 spitz_nr_manual_C_Wd2 spitz_nr_manual_C_Wd3 spitz_r_cognitive_C_Wd0 spitz_r_cognitive_C_Wd1 spitz_r_cognitive_C_Wd2 spitz_r_cognitive_C_Wd3 spitz_nr_manual_Wd0 spitz_nr_manual_Wd1 spitz_nr_manual_Wd2 spitz_nr_manual_Wd3 spitz_r_cognitive_Wd0 spitz_r_cognitive_Wd1 spitz_r_cognitive_Wd2 spitz_r_cognitive_Wd3 spitz_nr_manual_C_m_Wd0 spitz_nr_manual_C_m_Wd1 spitz_nr_manual_C_m_Wd2 spitz_nr_manual_C_m_Wd3 spitz_r_cognitive_C_m_Wd0 spitz_r_cognitive_C_m_Wd1 spitz_r_cognitive_C_m_Wd2 spitz_r_cognitive_C_m_Wd3  deming_customer_service_C_sh_d0 deming_customer_service_C_sh_d1 deming_customer_service_C_sh_d2 deming_customer_service_C_sh_d3 deming_financial_C_sh_d0 deming_financial_C_sh_d1 deming_financial_C_sh_d2 deming_financial_C_sh_d3  deming_customer_service_C_Wd0 deming_customer_service_C_Wd1 deming_customer_service_C_Wd2 deming_customer_service_C_Wd3 deming_financial_C_Wd0 deming_financial_C_Wd1 deming_financial_C_Wd2 deming_financial_C_Wd3 spitz_r_manual_C_sh_d0 spitz_r_manual_C_sh_d1 spitz_r_manual_C_sh_d2 spitz_r_manual_C_sh_d3   spitz_r_manual_C_Wd0 spitz_r_manual_C_Wd1 spitz_r_manual_C_Wd2 spitz_r_manual_C_Wd3   spitz_r_manual_Wd0 spitz_r_manual_Wd1 spitz_r_manual_Wd2 spitz_r_manual_Wd3   spitz_r_manual_C_m_Wd0 spitz_r_manual_C_m_Wd1 spitz_r_manual_C_m_Wd2 spitz_r_manual_C_m_Wd3    deming_people_management_C_sh_d0 deming_people_management_C_sh_d1 deming_people_management_C_sh_d2 deming_people_management_C_sh_d3 deming_problem_solving_C_sh_d0 deming_problem_solving_C_sh_d1 deming_problem_solving_C_sh_d2 deming_problem_solving_C_sh_d3  deming_people_management_C_Wd0 deming_people_management_C_Wd1 deming_people_management_C_Wd2 deming_people_management_C_Wd3 deming_problem_solving_C_Wd0 deming_problem_solving_C_Wd1 deming_problem_solving_C_Wd2 deming_problem_solving_C_Wd3 deming_project_mgmt_C_sh_d0 deming_project_mgmt_C_sh_d1 deming_project_mgmt_C_sh_d2 deming_project_mgmt_C_sh_d3 deming_social_C_sh_d0 deming_social_C_sh_d1 deming_social_C_sh_d2 deming_social_C_sh_d3  deming_project_mgmt_C_Wd0 deming_project_mgmt_C_Wd1 deming_project_mgmt_C_Wd2 deming_project_mgmt_C_Wd3 deming_social_C_Wd0 deming_social_C_Wd1 deming_social_C_Wd2 deming_social_C_Wd3 deming_writing_C_sh_d0 deming_writing_C_sh_d1 deming_writing_C_sh_d2 deming_writing_C_sh_d3 deming_writing_C_Wd0 deming_writing_C_Wd1 deming_writing_C_Wd2 deming_writing_C_Wd3
set obs 15

local solist="spitz_nr_analytic  spitz_nr_manual  spitz_nr_inter spitz_r_manual  spitz_r_cognitive"
local demlist="deming_character deming_computer deming_customer_service deming_financial deming_people_management deming_problem_solving deming_project_mgmt deming_social deming_writing"
local demtypelist = "_C_sh_ _C_W"
local sotypelist = "_C_sh_ _C_W _W _C_m_W"

foreach t1 of local sotypelist {
local count=0
foreach x of local solist {
qui {
 local count=`count'+1
 destring  `x'`t1'd0, gen(temp) force
 destring  `x'`t1'd3, gen(temp2) force
 local l1_`count' = temp[13]
 local l2_`count' = temp[1]
 local l3_`count' = temp2[13]
 drop temp temp2
}
}
replace spitz_nr_analytic`t1'd0=string((abs(log(1+`l1_1'/`l2_1'))*`l3_1'+abs(log(1+`l1_2'/`l2_2'))*`l3_2'+abs(log(1+`l1_3'/`l2_3'))*`l3_3'+abs(log(1+`l1_4'/`l2_4'))*`l3_4'+abs(log(1+`l1_5'/`l2_5'))*`l3_5')/(abs(log(1+`l1_1'/`l2_1'))+abs(log(1+`l1_2'/`l2_2'))+abs(log(1+`l1_3'/`l2_3'))+abs(log(1+`l1_4'/`l2_4'))+abs(log(1+`l1_5'/`l2_5')))) if _n==15
}
foreach t1 of local demtypelist {
local count=0
foreach x of local demlist {
qui {
 local count=`count'+1
 destring  `x'`t1'd0, gen(temp) force
 destring  `x'`t1'd3, gen(temp2) force
 local l1_`count' = temp[13]
 local l2_`count' = temp[1]
 local l3_`count' = temp2[13]
 drop temp temp2
}
}
replace deming_character`t1'd0=string((abs(log(1+`l1_1'/`l2_1'))*`l3_1'+abs(log(1+`l1_2'/`l2_2'))*`l3_2'+abs(log(1+`l1_3'/`l2_3'))*`l3_3'+abs(log(1+`l1_4'/`l2_4'))*`l3_4'+abs(log(1+`l1_5'/`l2_5'))*`l3_5')/(abs(log(1+`l1_1'/`l2_1'))+abs(log(1+`l1_2'/`l2_2'))+abs(log(1+`l1_3'/`l2_3'))+abs(log(1+`l1_4'/`l2_4'))+abs(log(1+`l1_5'/`l2_5')))) if _n==15
}
save decomposition_results_collected, replace

local dsetlist=" decomposition_results_job_title_comp_vac  decomposition_results_job_title_nyt_dis_emp  decomposition_results_job_title_nyt_cla_emp decomposition_results_soc4_comp_emp  decomposition_results_soc6_comp_emp"
foreach dset of local dsetlist {
qui {
use `dset', replace
keep spitz*C_W*
set obs 15
local solist=" spitz_nr_analytic  spitz_nr_manual  spitz_nr_inter spitz_r_manual  spitz_r_cognitive"
local count=0
foreach x of local solist {
 local count=`count'+1
 destring  `x'_C_Wd0, gen(temp) force
 destring  `x'_C_Wd3, gen(temp2) force
 local l1_`count' = temp[13]
 local l2_`count' = temp[1]
 local l3_`count' = temp2[13]
 drop temp temp2
}
}
replace spitz_nr_analytic_C_Wd0=string((abs(log(1+`l1_1'/`l2_1'))*`l3_1'+abs(log(1+`l1_2'/`l2_2'))*`l3_2'+abs(log(1+`l1_3'/`l2_3'))*`l3_3'+abs(log(1+`l1_4'/`l2_4'))*`l3_4'+abs(log(1+`l1_5'/`l2_5'))*`l3_5')/(abs(log(1+`l1_1'/`l2_1'))+abs(log(1+`l1_2'/`l2_2'))+abs(log(1+`l1_3'/`l2_3'))+abs(log(1+`l1_4'/`l2_4'))+abs(log(1+`l1_5'/`l2_5')))) if _n==15
save `dset'_collected, replace
}

}
if `top_occupations'==1 {

use summary_by_year_comp, replace
keep if year>=1950
merge n:1 description_new using soc_codes_corresp, keep(1 3)
drop if description_new_miss==1 | soc_new==. | soc_new==999999
bys description_new: egen ctT=sum(ct2)
by description_new: gen fi=1 if _n==1
keep if fi==1
gsort -ctT
keep if _n<=10
gen rk=_n
keep description_new ctT rk
save table_1, replace

use summary_by_year_comp, replace
keep if year>=1950
merge n:1 description_new using soc_codes_corresp, keep(1 3)
drop if description_new_miss==1 | soc_new==. | soc_new==999999
bys soc_new: egen ct6=sum(ct2)
by soc_new: gen fi=1 if _n==1
keep if fi==1
gsort -ct6
keep if _n<=10
keep soc_new ct6 
gen rk=_n
merge 1:1 rk using table_1
drop _merge
save table_1, replace

use summary_by_year_comp, replace
keep if year>=1950
merge n:1 description_new using soc_codes_corresp, keep(1 3)
drop if description_new_miss==1 | soc_new==. | soc_new==999999
gen soc4=floor(soc_new/100)
bys soc4: egen ct4=sum(ct2)
by soc4: gen fi=1 if _n==1
keep if fi==1
gsort -ct4
keep if _n<=10
gen rk=_n
keep soc4 ct4 rk
merge 1:1 rk using table_1
drop _merge
save table_1, replace
    
use summary_by_year_comp, replace
keep if year>=1950
merge n:1 description_new using soc_codes_corresp, keep(1 3)
drop if description_new_miss==1 | soc_new==. | soc_new==999999
bys year : egen ads=sum(ct2)
bys description_new: egen sum_ct=sum(ct2)
bys description_new: gen description_new_fi=1 if _n==1

gen share=ct2/ads
bys description_new: egen tot_share=sum(share)
replace tot_share=tot_share/51

foreach var of varlist spitz*C deming*C {
  gen `var'_W=`var'/words*1000
}

gsort description_new_fi -sum_ct
/* We look for "top occupations" among the 200 most frequently occuring job titles. */
gen in_sample_temp=1 if _n<=200
bys description_new: egen in_sample=min(in_sample)
keep if in_sample==1
collapse (mean) tot_share  spitz*C_W deming*C_W [aw=share], by(description_new)
/* These averages will go into Table 2 and 8 */
save top_occupations_by_measure_raw, replace

/* Below we have some code that produces analagous tables where the measure is
   constructed using the normalization given in Equations 7 and 8 of the paper */

use summary_by_year_comp, replace
keep if year>=1950
merge n:1 description_new using soc_codes_corresp, keep(1 3)
drop if description_new_miss==1 | soc_new==. | soc_new==999999
gen soc4=floor(soc_new/100)
bys year : egen ads=sum(ct2)
gen share=ct2/ads
bys description_new: egen tot_share=sum(share)
replace tot_share=tot_share/51
by description_new: gen description_new_fi=1 if _n==1

sum spitz_nr_analytic_C [aw=share]
local spitz_nr_analytic_C_loc=r(mean)
sum spitz_nr_inter_C [aw=share]
local spitz_nr_inter_C_loc=r(mean)
sum spitz_nr_manual_C [aw=share]
local spitz_nr_manual_C_loc=r(mean)
sum spitz_r_manual_C [aw=share]
local spitz_r_manual_C_loc=r(mean)
sum spitz_r_cognitive_C [aw=share]
local spitz_r_cognitive_C_loc=r(mean)

foreach var of varlist deming_writing_C deming_social_C deming_project_management_C deming_problem_solving_C deming_people_management_C deming_financial_C deming_customer_service_C deming_computer_C deming_character_C {
  sum `var' [aw=share]
  local `var'_loc=r(mean)
}
    
foreach var of varlist spitz*C deming_writing_C deming_social_C deming_project_management_C deming_problem_solving_C deming_people_management_C deming_financial_C deming_customer_service_C deming_computer_C deming_character_C  {
   gen `var'_sh=`var'/``var'_loc'
}

gsort description_new_fi -tot_share
gen in_sample_temp=1 if _n<=200
bys description_new: egen in_sample=min(in_sample)
keep if in_sample==1
collapse (mean) *_sh tot_share [aw=share], by(description_new)
save top_occupations_by_measure, replace

}

/* The material in this chunk of code corresponds to material presented in Section 4.2 and
   the appendix which is titled "Robustness Checks on Section 4" */
if `vintage_analysis' == 1 {

use summary_by_year_comp, replace
keep if year>=1950
merge n:1 description_new using soc_codes_corresp, keep(1 3)
drop if description_new_miss==1 | soc_new==. | soc_new==999999
gen soc4=floor(soc_new/100)
bys year : egen ads=sum(ct2)
gen share=ct2/ads
bys description_new: egen title_vintage=sum(share*year)
bys description_new: egen tot_share=sum(share)
ren soc_new occsoc
preserve 
collapse  (p5) p5_vintage=year (p50) p50_vintage=year  (p95) p95_vintage=year  [aw=share], by(description_new)
save p5_p95_vintage, replace
restore
drop _merge
merge n:1 description_new using p5_p95_vintage
bys description_new: egen tot_ads=sum(ct2)
by description_new: gen description_new_fi=1 if _n==1
replace title_vintage=title_vintage/tot_share

gen s1=spitz_nr_analytic_C+spitz_nr_inter_C+spitz_nr_manual_C+spitz_r_manual_C+spitz_r_cognitive_C
gen sA=deming_writing_C+ deming_social_C+ deming_project_management_C+ deming_problem_solving_C+ deming_people_management_C+ deming_financial_C+ deming_customer_service_C+ deming_computer_C+ deming_character_C

sum spitz_nr_analytic_C [aw=share]
local spitz_nr_analytic_C_loc=r(mean)
sum spitz_nr_inter_C [aw=share]
local spitz_nr_inter_C_loc=r(mean)
sum spitz_nr_manual_C [aw=share]
local spitz_nr_manual_C_loc=r(mean)
sum spitz_r_manual_C [aw=share]
local spitz_r_manual_C_loc=r(mean)
sum spitz_r_cognitive_C [aw=share]
local spitz_r_cognitive_C_loc=r(mean)

foreach var of varlist deming_writing_C deming_social_C deming_project_management_C deming_problem_solving_C deming_people_management_C deming_financial_C deming_customer_service_C deming_computer_C deming_character_C {
  sum `var' [aw=share]
  local `var'_loc=r(mean)
}
    
foreach var of varlist spitz*C deming_writing_C deming_social_C deming_project_management_C deming_problem_solving_C deming_people_management_C deming_financial_C deming_customer_service_C deming_computer_C deming_character_C  {
   gen `var'_sh=`var'/``var'_loc'
}

foreach var of varlist spitz*_C_sh  {
   gen `var'_=`var'/(spitz_nr_analytic_C_sh+spitz_nr_inter_C_sh+spitz_nr_manual_C_sh+spitz_r_manual_C_sh+spitz_r_cognitive_C_sh)
   gen `var'I=`var'_
   replace `var'I=.2 if `var'_==.
}
    
foreach var of varlist deming_writing_C_sh deming_social_C_sh deming_project_management_C_sh deming_problem_solving_C_sh deming_people_management_C_sh deming_financial_C_sh deming_customer_service_C_sh deming_computer_C_sh deming_character_C_sh {
  gen `var'_=`var'/(deming_writing_C_sh+deming_social_C_sh+deming_project_management_C_sh+deming_problem_solving_C_sh+deming_people_management_C_sh+deming_financial_C_sh+deming_customer_service_C_sh+deming_computer_C_sh+deming_character_C_sh)
  gen `var'I=`var'_
  replace `var'I=1/9 if `var'_==.
}

/* The results of these regressions are presented in Tables 7 and 22. */
    
foreach var of varlist spitz*C deming_computer_C {
   gen `var'_W=`var'/words*1000

   by description_new: egen `var'_MW=sum(share*`var'_W)
   replace `var'_MW=`var'_MW/tot_share

   gen `var'P=.
   qui reg `var'_W p5_vintage i.year  [aw=share], rob
   replace `var'P=_b[p5_vintage] if _n==1
   replace `var'P=_se[p5_vintage] if _n==2
   qui reg `var'_W p5_vintage i.year [aw=share], rob  a(occsoc)
   replace `var'P=_b[p5_vintage] if _n==4
   replace `var'P=_se[p5_vintage] if _n==5

   qui reg `var'_W p50_vintage i.year  [aw=share], rob
   replace `var'P=_b[p50_vintage] if _n==7
   replace `var'P=_se[p50_vintage] if _n==8
   qui reg `var'_W p50_vintage i.year [aw=share], rob  a(occsoc)
   replace `var'P=_b[p50_vintage] if _n==10
   replace `var'P=_se[p50_vintage] if _n==11

   qui reg `var'_W p95_vintage i.year  [aw=share], rob
   replace `var'P=_b[p95_vintage] if _n==13
   replace `var'P=_se[p95_vintage] if _n==14
   qui reg `var'_W p95_vintage i.year [aw=share], rob a(occsoc)
   replace `var'P=_b[p95_vintage] if _n==16
   replace `var'P=_se[p95_vintage] if _n==17

    qui reg `var'_W i.year  [aw=share], rob
    replace `var'P=e(rmse) if _n==3 | _n==9 | _n==15

    qui reg `var'_W i.year [aw=share], rob a(occsoc)
    replace `var'P=e(rmse) if _n==6 | _n==12 | _n==18

   gen `var'p=.
   qui reg `var'_MW p5_vintage if description_new_fi==1  [aw=tot_share], rob
   replace `var'p=_b[p5_vintage] if _n==1
   replace `var'p=_se[p5_vintage] if _n==2
   qui reg `var'_MW p5_vintage if description_new_fi==1 [aw=tot_share], rob  a(occsoc)
   replace `var'p=_b[p5_vintage] if _n==4
   replace `var'p=_se[p5_vintage] if _n==5

   qui reg `var'_MW p50_vintage if description_new_fi==1  [aw=tot_share], rob
   replace `var'p=_b[p50_vintage] if _n==7
   replace `var'p=_se[p50_vintage] if _n==8
   qui reg `var'_MW p50_vintage if description_new_fi==1 [aw=tot_share], rob  a(occsoc)
   replace `var'p=_b[p50_vintage] if _n==10
   replace `var'p=_se[p50_vintage] if _n==11

   qui reg `var'_MW p95_vintage if description_new_fi==1  [aw=tot_share], rob
   replace `var'p=_b[p95_vintage] if _n==13
   replace `var'p=_se[p95_vintage] if _n==14
   qui reg `var'_MW p95_vintage if description_new_fi==1 [aw=tot_share], rob a(occsoc)
   replace `var'p=_b[p95_vintage] if _n==16
   replace `var'p=_se[p95_vintage] if _n==17

    qui reg `var'_MW  if description_new_fi==1 [aw=tot_share], rob
    replace `var'p=e(rmse) if _n==3 | _n==9 | _n==15

    qui reg `var'_MW if description_new_fi==1 [aw=tot_share], rob  a(occsoc)
    replace `var'p=e(rmse) if _n==6 | _n==12 | _n==18
 }

preserve
keep  *p *P
drop software*
keep if _n<=30
 /* This material is presented in Tables 7 and 22.
    Table 7 corresponds to the variables ending with a p
    Table 22 corresponds to the variales ending with a P. */
save job_title_vintage_regressions, replace
restore

gen decade=floor(year/10)*10
replace decade=1990 if decade==2000
bys occsoc decade: egen sum_share_decade_soc=sum(share)
bys description_new decade: egen sum_share_decade=sum(share)
replace sum_share_decade=sum_share_decade/10 if decade<1990
replace sum_share_decade_soc=sum_share_decade_soc/10 if decade<1990
replace sum_share_decade=sum_share_decade/11 if decade==1990
replace sum_share_decade_soc=sum_share_decade_soc/11 if decade==1990
gen share_within_soc=sum_share_decade/sum_share_decade_soc
gsort decade occsoc -share_within_soc
by decade occsoc : gen decade_occsoc_fi=1 if _n==1
gen early_top_temp=(decade_occsoc_fi==1 & decade==1950)
gen late_top_temp=(decade_occsoc_fi==1 & decade==1990)
bys description_new decade : gen description_new_decade=1 if _n==1
by description_new: egen early_top=max(early_top_temp)
by description_new: egen late_top=max(late_top_temp)

tabstat share_within if early_top==1 & description_new_decade==1, stats(mean N) by(decade)
tabstat share_within if late_top==1 & description_new_decade==1, stats(mean N) by(decade)

keep if description_new=="diamond cutter" | description_new=="jewelry setter" | description_new=="plate maker" | description_new=="linotype operator" | description_new=="staff accountant" | description_new=="auditor" | description_new=="assistant typist" | description_new=="software engineer" | description_new=="database administrator" | description_new=="developer" | description_new=="network engineer" | description_new=="assistant typist" | description_new=="teletype operator" | description_new=="secretary receptionist" | description_new=="data processing"  | description_new=="pressman" | description_new=="offset stripper"
 
drop _merge
fillin description_new year
replace share=0 if share==.

graph twoway (lpoly share year if description_new=="pressman" , bw(4) color(red)) (lpoly share year if description_new=="offset stripper" ,  bw(4) color(blue) lpattern(dash)) (scatter share year if description_new=="pressman", msym("+") color(red)) (scatter share year if description_new=="offset stripper", msym("x") color(blue))  , xtitle("Year") ytitle("Share of Ads") xlab(1950(10)2000) legend(lab(3 "Pressman") lab(4 "Offset Stripper") order(3 4) row(1)) title("Printing Press Operators")
graph export "narrative_printing.eps" , replace
graph twoway (lpoly share year if description_new=="auditor" ,   bw(4) color(red)) (lpoly share year if description_new=="staff accountant" ,  bw(4) color(blue) lpattern(dash)) (scatter share year if description_new=="auditor", msym("+") color(red)) (scatter share year if description_new=="staff accountant", color(blue) msym("x"))  , xtitle("Year") ytitle("Share of Ads") xlab(1950(10)2000) legend(lab(3 "Auditor") lab(4 "Staff Accountant") order(3 4) row(1)) title("Accountants and Auditors")
graph export "narrative_accountant.eps" , replace
graph twoway (lpoly share year if description_new=="assistant typist" ,  bw(4) color(red)) (lpoly share year if description_new=="secretary receptionist" ,  bw(4) colo(blue) lpattern(dash)) (scatter share year if description_new=="assistant typist", msym("+") color(red)) (scatter share year if description_new=="secretary receptionist", colo(blue)  msym("x"))   , xtitle("Year") ytitle("Share of Ads") xlab(1950(10)2000) legend(lab(3 "Assistant Typist") lab(4 "Secretary Receptionist") order(3 4) row(1)) title("Secretaries and Administrative Assistants")
graph export "narrative_secretary.eps" , replace
graph twoway (lpoly share year if description_new=="data processing" ,  bw(4) color(red)) (lpoly share year if description_new=="teletype operator" ,  bw(4) lpattern(dash) color(blue)) (scatter share year if description_new=="data processing", msym("+") color(red)) (scatter share year if description_new=="teletype operator",  msym("x") color(blue)) , xtitle("Year") ytitle("Share of Ads") xlab(1950(10)2000)  legend(lab(3 "Data Processing") lab(4 "Teletype Operator") order(3 4) row(1)) title("Data Entry Keyers")
graph export "narrative_data_entry.eps" , replace

graph twoway (lpoly share year if description_new=="software engineer" , bw(4)  color(red)) (lpoly share year if description_new=="developer" ,    bw(4) lpattern(shortdash) lwidth(medthick) color(black))  (lpoly share year if description_new=="database administrator" ,  bw(4)  lpattern(dash) color(blue))  (lpoly share year if description_new=="network engineer",   lpattern(longdash) bw(4) color(green)),  legend(lab(1 "Software Engineer")  lab(2 "Developer") lab(3 "Database Administrator") lab(4 "Network Engineer")  row(2)) xlab(1950(10)2000) xtitle("Year")  ytitle("Share of Ads") 
graph export "narrative_computer.eps", replace
tabstat title_vintage if description_new=="software engineer" | description_new=="database administrator" | description_new=="developer" | description_new=="network engineer", by(description_new)

}

/* The material in this chunk of code corresponds to material presented in Section 4.1 of the paper. */

if `narrative_plots'==1 {    
use summary_by_year_comp, replace
keep if year>=1950
merge n:1 description_new using soc_codes_corresp, keep(1 3)
drop if description_new_miss==1 | soc_new==. | soc_new==999999
bys year : egen ads=sum(ct2)
gen share=ct2/ads

sum spitz_nr_analytic_C [aw=share]
local spitz_nr_analytic_C_loc=r(mean)
sum spitz_nr_inter_C [aw=share]
local spitz_nr_inter_C_loc=r(mean)
sum spitz_nr_manual_C [aw=share]
local spitz_nr_manual_C_loc=r(mean)
sum spitz_r_manual_C [aw=share]
local spitz_r_manual_C_loc=r(mean)
sum spitz_r_cognitive_C [aw=share]
local spitz_r_cognitive_C_loc=r(mean)

foreach var of varlist deming_writing_C deming_social_C deming_project_management_C deming_problem_solving_C deming_people_management_C deming_financial_C deming_customer_service_C deming_computer_C deming_character_C {
  sum `var' [aw=share]
  local `var'_loc=r(mean)
}
foreach var of varlist activity*C {
  sum `var' [aw=share]
  local `var'=r(mean)
}

foreach var of varlist spitz*C software_cnc  activity_*C {
   gen `var'_W=`var'/words*1000
   qui sum `var'_W [aw=share]
   gen `var'_WN=`var'_W/r(mean)
}

graph twoway  (lpoly  spitz_nr_analytic_C_WN  year  if description_new=="machinist" [aw=share], bw(4)  ysca(log) lpattern(dash_dot) lwidth(thick)) (lpoly  spitz_nr_inter_C_WN  year if description_new=="machinist" [aw=share], bw(4) ysca(log)   lpattern(longdash))   (lpoly  spitz_nr_manual_C_WN  year if description_new=="machinist" [aw=share],  ysca(log)  bw(4) ) (lpoly spitz_r_cognitive_C_WN  year if description_new=="machinist" [aw=share], bw(4) ysca(log)  lpattern(dash)) (lpoly  spitz_r_manual_C_WN  year if description_new=="machinist" [aw=share], bw(4) ysca(log)  lwidth(medthick) lpattern(dash_dot)) , xtitle("Year") xlab(1950(10)2000) legend(lab(1 "NR: Analytic") lab(2 "NR: Interactive") lab(3 "NR: Manual") lab(4 "R: Cognitive") lab(5 "R: Manual") row(2)) ytitle("Mentions per 1000 Ad Words," "Relative to Sample Average") ylab(.5 1 2 5 10 20)
graph export "narrative1.eps", replace
/* This is the right panel of Figure 4 */

graph twoway  (lpoly  spitz_nr_analytic_C_WN  year  if description_new=="cashier" [aw=share], bw(4)   lpattern(dash_dot) lwidth(thick)) (lpoly  spitz_nr_inter_C_WN  year if description_new=="cashier" [aw=share], bw(4)    lpattern(longdash))   (lpoly  spitz_nr_manual_C_WN  year if description_new=="cashier" [aw=share],    bw(4) ) (lpoly spitz_r_cognitive_C_WN  year if description_new=="cashier" [aw=share], bw(4)   lpattern(dash)) (lpoly  spitz_r_manual_C_WN  year if description_new=="cashier" [aw=share], bw(4)   lwidth(medthick) lpattern(dash_dot)) , xtitle("Year") xlab(1950(10)2000) legend(lab(1 "NR: Analytic") lab(2 "NR: Interactive") lab(3 "NR: Manual") lab(4 "R: Cognitive") lab(5 "R: Manual") row(2)) title("Cashier") ytitle("Mentions per 1000 Ad Words," "Relative to Sample Average") 
graph export "narrative5.eps", replace
/* This is the left panel of Figure 6 */

graph twoway  (lpoly  spitz_nr_analytic_C_WN  year  if description_new=="manager" [aw=share], bw(4)   lpattern(dash_dot) lwidth(thick)) (lpoly  spitz_nr_inter_C_WN  year if description_new=="manager" [aw=share], bw(4)    lpattern(longdash))   (lpoly  spitz_nr_manual_C_WN  year if description_new=="manager" [aw=share],    bw(4) ) (lpoly spitz_r_cognitive_C_WN  year if description_new=="manager" [aw=share], bw(4)   lpattern(dash)) (lpoly  spitz_r_manual_C_WN  year if description_new=="manager" [aw=share], bw(4)   lwidth(medthick) lpattern(dash_dot)) , xtitle("Year") xlab(1950(10)2000) legend(lab(1 "NR: Analytic") lab(2 "NR: Interactive") lab(3 "NR: Manual") lab(4 "R: Cognitive") lab(5 "R: Manual") row(2)) ytitle("Mentions per 1000 Ad Words," "Relative to Sample Average") 
graph export "narrative3.eps", replace
/* This is the left panel of Figure 5 */

graph twoway  (lpoly  spitz_nr_analytic_C_WN  year  if description_new=="real estate sales" [aw=share], bw(4)   lpattern(dash_dot) lwidth(thick)) (lpoly  spitz_nr_inter_C_WN  year if description_new=="real estate sales" [aw=share], bw(4)    lpattern(longdash))   (lpoly  spitz_nr_manual_C_WN  year if description_new=="real estate sales" [aw=share],    bw(4) ) (lpoly spitz_r_cognitive_C_WN  year if description_new=="real estate sales" [aw=share], bw(4)   lpattern(dash)) (lpoly  spitz_r_manual_C_WN  year if description_new=="real estate sales" [aw=share], bw(4)   lwidth(medthick) lpattern(dash_dot)) , xtitle("Year") xlab(1950(10)2000) legend(lab(1 "NR: Analytic") lab(2 "NR: Interactive") lab(3 "NR: Manual") lab(4 "R: Cognitive") lab(5 "R: Manual") row(2)) title("Real Estate Sales") ytitle("Mentions per 1000 Ad Words," "Relative to Sample Average") 
/* This is the right panel of Figure 6 */
graph export "narrative6.eps", replace

graph twoway  (lpoly  software_cnc_WN  year if description_new=="machinist"  [aw=share], bw(4)  lwidth(medthick)), xtitle("Year") ytitle("Mentions per 1000 Ad Words," "Relative to Sample Average") legend(off) xlab(1950(10)2000)
/* This is the right panel of Figure 4 */
graph export "narrative2.eps", replace

graph twoway (lpoly  activity_building_teams_C_WN   year  if description_new=="manager" [aw=share], bw(4) lpattern(dash_dot) lwidth(thick)) (lpoly activity_coaching_others_C_WN  year  if description_new=="manager" [aw=share], bw(4) lpattern(longdash))   (lpoly  activity_establishing_inte_C_WN  year  if description_new=="manager" [aw=share], bw(4)) (lpoly  activity_work_with_public_C_WN  year if description_new=="manager" [aw=share], bw(4) lpattern(dash)) , xtitle("Year") xlab(1950(10)2000) legend(lab(1 "Building Teams") lab(2 "Coaching Others") lab(3 "Establishing Relationships") lab(4 "Working with the Public") row(2))  ytitle("Mentions per 1000 Ad Words," "Relative to Sample Average") 
graph export "narrative4.eps", replace
/* This is the right panel of Figure 5 */

foreach var of varlist spitz*C {
   sum `var' [aw=share]
   gen `var'_sh=`var'/r(mean)
}

foreach var of varlist spitz*C {
   gen `var'_sh_=`var'_sh/(spitz_nr_analytic_C_sh+spitz_nr_inter_C_sh+spitz_nr_manual_C_sh+spitz_r_cognitive_C_sh+spitz_r_manual_C_sh)
}

/* The portion of code, below, computes "near" job titles, presented in Tables 6 and 21.  */

bys description_new: egen mean_share=sum(share)
replace mean_share=mean_share/51
gsort -mean_share
keep if _n<=51*200
drop _merge
fillin description_new year
bys description_new: egen mean_share_=min(mean_share)
gsort -mean_share_ year
keep if _n<=51*200
preserve
sum spitz_nr_analytic_C_W [aw=share]
local l1=r(mean)
sum spitz_nr_inter_C_W [aw=share]
local l2=r(mean)
sum spitz_nr_manual_C_W [aw=share]
local l3=r(mean)
sum spitz_r_cognitive_C_W [aw=share]
local l4=r(mean)
sum spitz_r_manual_C_W [aw=share]
local l5=r(mean)
sum spitz_nr_analytic_C_sh_ [aw=share]
local m1=r(mean)
sum spitz_nr_inter_C_sh_ [aw=share]
local m2=r(mean)
sum spitz_nr_manual_C_sh_ [aw=share]
local m3=r(mean)
sum spitz_r_cognitive_C_sh_ [aw=share]
local m4=r(mean)
sum spitz_r_manual_C_sh_ [aw=share]
local m5=r(mean)

collapse (mean) *_W *sh_ [aw=share], by(description_new )

/* Store, for later, coordinates of each job title. We try two distance measures:
    one which is based on the frequency of words (per 1000 job ad words), the second
    which uses the normalizations given in Equations 7 and 8. Closest job titles
    based on these two measures are given in Tables 6 and 21, respectively. */

forvalues xx=1/200 {
   local l1_`xx'=spitz_nr_analytic_C_W[`xx']
   local l2_`xx'=spitz_nr_inter_C_W[`xx']
   local l3_`xx'=spitz_nr_manual_C_W[`xx']
   local l4_`xx'=spitz_r_cognitive_C_W[`xx']
   local l5_`xx'=spitz_r_manual_C_W[`xx']
   local l6_`xx'=description_new[`xx']
   local m1_`xx'=spitz_nr_analytic_C_sh_[`xx']
   local m2_`xx'=spitz_nr_inter_C_sh_[`xx']
   local m3_`xx'=spitz_nr_manual_C_sh_[`xx']
   local m4_`xx'=spitz_r_cognitive_C_sh_[`xx']
   local m5_`xx'=spitz_r_manual_C_sh_[`xx']
}
restore

preserve
gen decade=floor(year/10)*10
replace decade=1990 if decade==2000
collapse (mean) spitz*_W  [aw=share], by(description_new decade)
/* Compute the distance between each job title-decade combination and each job title (avg.
   over the sample period). We will then compute the job title that has the minimum distance
   (looking over the possible 200 job titles) */
forvalues zz=1/200 {
  gen distance_`zz'=sqrt( ( 1/`l1'/`l1' * (spitz_nr_analytic_C_W-`l1_`zz'')^2+ 1/`l2'/`l2' * (spitz_nr_inter_C_W-`l2_`zz'')^2+  1/`l3'/`l3' *(spitz_nr_manual_C_W-`l3_`zz'')^2+  1/`l4'/`l4' *(spitz_r_cognitive_C_W-`l4_`zz'')^2+  1/`l5'/`l5' *(spitz_r_manual_C_W-`l5_`zz'')^2))
}

gen closest=""
gen min_1=.
gen min_2=.
gen min_3=.
gen min_4=.
gen min_5=.
gen distance_min=.
forvalues zz=1/200 {
   replace min_1=`l1_`zz'' if distance_`zz'<=distance_min
   replace min_2=`l2_`zz'' if distance_`zz'<=distance_min
   replace min_3=`l3_`zz'' if distance_`zz'<=distance_min
   replace min_4=`l4_`zz'' if distance_`zz'<=distance_min
   replace min_5=`l5_`zz'' if distance_`zz'<=distance_min
   replace closest="`l6_`zz''" if distance_`zz'<=distance_min
   replace distance_min=distance_`zz' if distance_`zz'<=distance_min
}
drop distance_*
    /* this is the dataset that contains Table 6. To produce this table,
      one needs to keep only the observations which correspond to
      description_new=="manager" | description_new=="machinist" |
      description_new=="cashier" | description_new=="real estate sales" */
save closest_job_titles, replace
restore
gen decade=floor(year/10)*10
replace decade=1990 if decade==2000
collapse (mean) spitz*_sh_ [aw=share], by(description_new decade)

forvalues zz=1/200 {
  gen distance_`zz'=sqrt( ( 1/`m1'/`m1' * (spitz_nr_analytic_C_sh_-`m1_`zz'')^2+ 1/`m2'/`m2' * (spitz_nr_inter_C_sh_-`m2_`zz'')^2+  1/`m3'/`m3' *(spitz_nr_manual_C_sh_-`m3_`zz'')^2+  1/`m4'/`m4' *(spitz_r_cognitive_C_sh_-`m4_`zz'')^2+  1/`m5'/`m5' *(spitz_r_manual_C_sh_-`m5_`zz'')^2))
}

gen closest=""
gen min_1=.
gen min_2=.
gen min_3=.
gen min_4=.
gen min_5=.
gen distance_min=.
forvalues zz=1/200 {
   replace min_1=`m1_`zz'' if distance_`zz'<=distance_min
   replace min_2=`m2_`zz'' if distance_`zz'<=distance_min
   replace min_3=`m3_`zz'' if distance_`zz'<=distance_min
   replace min_4=`m4_`zz'' if distance_`zz'<=distance_min
   replace min_5=`m5_`zz'' if distance_`zz'<=distance_min
   replace closest="`l6_`zz''" if distance_`zz'<=distance_min
   replace distance_min=distance_`zz' if distance_`zz'<=distance_min
}
drop distance_*
         /* This is the dataset that corresponds to Table 21. */
save closest_job_titles_share, replace

}

if `appendix_b1'==1 { 

use summary_by_year_comp, replace
merge n:1 description_new using soc_codes_corresp
drop if _merge==2
replace soc_new=999999 if soc_new==.
drop _merge

drop if description_new_miss==1 | soc_new==999999
drop if year<1950
replace year=round(year,10) if abs(year-round(year,10))<=2
replace year=1950 if year==1953
replace year=2000 if year==1997
keep if mod(year,10)==0

gen occsoc=floor(soc_new/100)
replace occsoc=1110 if occsoc==1120
replace occsoc=1130 if occsoc==1131
replace occsoc=2510 if occsoc==2511
replace occsoc=2520 if occsoc==2590
replace occsoc=2911 if occsoc==2990
replace occsoc=3190 if occsoc==3110
replace occsoc=3520 if occsoc==3510
replace occsoc=3530 if occsoc==3590
replace occsoc=3990 if occsoc==3970
replace occsoc=4130 if occsoc==4140
replace occsoc=4530 if occsoc==4540

bys year: egen sum_ads=sum(ct)
bys year occsoc: egen sum_ads_=sum(ct)
gen share=sum_ads_/sum_ads
collapse (mean) degree* share [aw=ct], by(occsoc year)
merge 1:1 occsoc year using ipums_sizes_nybos
ren occsize occsizenybos
drop _merge
merge 1:1 occsoc year using ipums_sizes_
drop _merge
merge 1:1 occsoc year using ipums_occupation_education
drop _merge
replace degree_ma=degree_ma+degree_mba+degree_phd+degree_cpa+degree_ms
replace degree_ba=degree_ba+degree_bs

gen cor_ba=.
gen cor_size=.
gen cor_sizenybos=.
gen cor_ma=.
local count=1
replace degree_ba=. if ba==.
replace degree_ma=. if ma==.
forvalues yr=1950(10)2000 {
     cor ma degree_ma [aw=share] if year==`yr'
    replace cor_ma=r(rho) if _n==`count'
    cor ba degree_ba [aw=share] if year==`yr'
    replace cor_ba=r(rho) if _n==`count'
    cor share occsize if year==`yr'
    replace cor_size=r(rho) if _n==`count'
    cor share occsizenybos  if year==`yr'
    replace cor_sizenybos=r(rho) if _n==`count'

     scatter degree_ma ma if year==`yr' [aw=share], msym(i) mlab(occsoc) mlabcolor(black) mlabpos(0) legend(off)  xtitle("Graduate Degree Share: Census")  ytitle("Graduate Degree" "Frequency: Newspaper") note("") caption("") title("`yr'") mlabsize(small)
graph save "occupation_ma_`yr'.gph", replace
   scatter degree_ba ba if year==`yr' [aw=share], msym(i) mlab(occsoc) mlabcolor(black) mlabpos(0) legend(off)  xtitle("Undergrad. Degree Share: Census")  ytitle("Undergrad. Degree" "Frequency: Newspaper") note("") caption("") title("`yr'") mlabsize(small) ylab(0(.06).18)
graph save "occupation_ba_`yr'.gph", replace
   scatter share occsize if year==`yr'  [aw=share], msym(i) mlab(occsoc) mlabcolor(black) mlabpos(0) legend(off)  xtitle("Employment Share: Census")  ytitle("Vacancy" "Share: Newspaper") note("") caption("") title("`yr'") mlabsize(small)
graph save "occupation_size_`yr'.gph", replace
   scatter share occsizenybos if year==`yr'  [aw=share], msym(i) mlab(occsoc) mlabcolor(black) mlabpos(0) legend(off)  xtitle("Employment Share: Census")  ytitle("Vacancy" "Share: Newspaper") note("") caption("") title("`yr'") mlabsize(small) 
graph save "occupation_sizeny_`yr'.gph", replace

    local count=`count'+1
}
    
graph combine occupation_ma_1950.gph occupation_ma_1960.gph  occupation_ma_1970.gph occupation_ma_1980.gph  occupation_ma_1990.gph occupation_ma_2000.gph , xcommon ycommon
graph export "occupation_prof.eps", replace
    /* This is figure 13 in the paper */
graph combine occupation_ba_1950.gph occupation_ba_1960.gph  occupation_ba_1970.gph occupation_ba_1980.gph  occupation_ba_1990.gph occupation_ba_2000.gph , xcommon ycommon
graph export "occupation_undergraduate.eps", replace
    /* This is figure 12 in the paper */
graph combine occupation_sizeny_1950.gph occupation_sizeny_1960.gph  occupation_sizeny_1970.gph occupation_sizeny_1980.gph  occupation_sizeny_1990.gph occupation_sizeny_2000.gph , xcommon ycommon 
graph export "occupation_shares_3.eps", replace
/* This is figure 11 in the paper */
    
graph combine occupation_size_1950.gph occupation_size_1960.gph  occupation_size_1970.gph occupation_size_1980.gph  occupation_size_1990.gph occupation_size_2000.gph , xcommon ycommon 
graph export "occupation_shares_1.eps", replace
 /* This is figure 10 in the paper */
gen soc2=floor(occsoc/100)
collapse (sum) share occsize, by(soc2 year)

local short11 = "Management" 
local short13 = "Financial Operations"
local short15 = "Computer/Math"
local short17 = "Architecture/Engineering"
local short19 = "Science" 
local short21 = "Social Services" 
local short23 = "Legal"
local short25 = "Education"
local short27 = "Entertainment"
local short29 = "Healthcare Practitioners" 
local short31 = "Healthcare Support"
local short33 = "Protective Service"
local short35 = "Food Prep/Serving" 
local short37 = "Building/Grounds Cleaning" 
local short39 = "Personal Care"
local short41 = "Sales" 
local short43 = "Administrative Support" 
local short45 = "Farming/Fishing"
local short47 = "Construction" 
local short49 = "Installation/Maintenance" 
local short51 = "Production" 
local short53 = "Transportation"
ren soc2 occsoc
gen occsoc1=occsoc+.5
drop if occsoc==0 | occsoc==55

graph twoway (bar share occsoc1 if year==1950, color(red) barw(.8) horizontal ) (bar occsize occsoc if year==1950, color(blue)  barw(.8)  lwidth(.3) fintensity(1)  horizontal ) , legend(off) ylab(none) xtitle("Frequency") title("1950")  xlab(0(.09).27)    ylabel(11 "`short11'" 13 "`short13'"  15 "`short15'"  17 "`short17'"  19 "`short19'"  21 "`short21'" 23 "`short23'"  25 "`short25'"  27 "`short27'" 29 "`short29'" 31 "`short31'" 33 "`short33'"  35 "`short35'"  37 "`short37'"  39 "`short39'" 41 "`short41'" 43 "`short43'"  45 "`short45'"  47 "`short47'"  49 "`short49'"  51 "`short51'" 53 "`short53'" , labsize(small) angle(0)) 
graph export "occupation_shares2a.eps", replace

graph twoway (bar share occsoc1 if year==1970, color(red) barw(.8) horizontal ) (bar occsize occsoc if year==1970, color(blue)  barw(.8)  lwidth(.3) fintensity(1)  horizontal ) , legend(off) ylab(none) xtitle("Frequency") title("1970")  xlab(0(.09).27)   ylabel(11 "`short11'" 13 "`short13'"  15 "`short15'"  17 "`short17'"  19 "`short19'"  21 "`short21'" 23 "`short23'"  25 "`short25'"  27 "`short27'" 29 "`short29'" 31 "`short31'" 33 "`short33'"  35 "`short35'"  37 "`short37'"  39 "`short39'" 41 "`short41'" 43 "`short43'"  45 "`short45'"  47 "`short47'"  49 "`short49'"  51 "`short51'" 53 "`short53'" , labsize(small) angle(0)) 
graph export "occupation_shares2c.eps", replace

graph twoway (bar share occsoc1 if year==1990, color(red) barw(.8) horizontal ) (bar occsize occsoc if year==1990, color(blue)  barw(.8)  lwidth(.3) fintensity(1)  horizontal ) , legend(off) ylab(none) xtitle("Frequency") title("1990")  xlab(0(.09).27)   ylabel(11 "`short11'" 13 "`short13'"  15 "`short15'"  17 "`short17'"  19 "`short19'"  21 "`short21'" 23 "`short23'"  25 "`short25'"  27 "`short27'" 29 "`short29'" 31 "`short31'" 33 "`short33'"  35 "`short35'"  37 "`short37'"  39 "`short39'" 41 "`short41'" 43 "`short43'"  45 "`short45'"  47 "`short47'"  49 "`short49'"  51 "`short51'" 53 "`short53'" , labsize(small) angle(0))  
graph export "occupation_shares2e.eps", replace

graph twoway (bar share occsoc1 if year==1960, color(red) barw(.8) horizontal ) (bar occsize occsoc if year==1960, color(blue)  barw(.8)  lwidth(.3) fintensity(1)  horizontal ) , legend(off) ylab(none) xtitle("Frequency") title("1960")  xlab(0(.09).27) xsize(4.5) 
graph export "occupation_shares2b.eps", replace
graph twoway (bar share occsoc1 if year==1980, color(red) barw(.8) horizontal ) (bar occsize occsoc if year==1980, color(blue)  barw(.8)  lwidth(.3) fintensity(1)  horizontal ) , legend(off) ylab(none) xtitle("Frequency") title("1980")  xlab(0(.09).27)  xsize(4.5)
graph export "occupation_shares2d.eps", replace
graph twoway (bar share occsoc1 if year==2000, color(red) barw(.8) horizontal ) (bar occsize occsoc if year==2000, color(blue)  barw(.8)  lwidth(.3) fintensity(1)  horizontal ) ,  ylab(none) xtitle("Frequency") title("2000")  xlab(0(.09).27) legend(lab(1 "Newspaper") lab(2 "Census") bplace(5) ring(0) region(lp(blank)) row(2) size(small)  symysize(*.8) symxsize(*.3) ) xsize(4.5)
graph export "occupation_shares2f.eps", replace
    /* The six files above form the panels of figure 9*/
}

if `appendix_b2'==1 {

insheet using onet_varname_crosswalk.csv, clear
drop if _n==1
ren v1 code
ren v2 varname
save onet_varname_crosswalk, replace

use Knowledge, replace
append using Skills Styles Activities
merge n:1 code using onet_varname_crosswalk
drop _merge
replace varname=varname+"W"
gen soc4=floor( onetsoccode/100)
save onet_to_merge, replace

 /* Here we compare our occupations' task measures to corresponding
    measures in O*NET */
    
use summary_by_year_comp, replace
keep if year>=1950
merge n:1 description_new using soc_codes_corresp, keep(1 3)
drop if description_new_miss==1 | soc_new==. | soc_new==999999
gen soc4=floor(soc_new/100)
bys year : egen ads=sum(ct2)
gen share=ct2/ads
bys soc4 year: egen mean_share=sum(ct2)
foreach var of varlist activity*C style*C requirement*C skill*C {
  gen `var'W=`var'/words*1000
}

collapse (mean) activity*C style*C requirement*C skill*C *W mean_share [aw=share] , by(soc4 year)
ren mean_share share

/* We do not in the end use the Equations 7-8 "share" variables. (Our plots are, instead,
   based on on "_W" variables.  We do, below, construct the variables based on Equations 7-8.
   To do so, we first compute the number of activity-related words, style-related words,
    knowledge requirement-related words, and skill-related words. We divide by the sample
    average, then normalize so that the share variables sum to 1.
    */
    
foreach var of varlist activity*C   style*C requirement*C skill*C {
   sum `var' [aw=share]
   local `var'=r(mean)
}
    
foreach var of varlist  activity*C style*C requirement*C skill*C {
   gen `var'S=`var'/``var''
}

gen activity_sum=0
gen requirement_sum=0
gen skill_sum=0
gen style_sum=0
foreach var of varlist activity*C {
  replace activity_sum=activity_sum+`var'S
}
foreach var of varlist style*C {
  replace style_sum=style_sum+`var'S
}
foreach var of varlist skill*C {
  replace skill_sum=skill_sum+`var'S
}
foreach var of varlist requirement*C {
  replace requirement_sum=requirement_sum+`var'S
}
foreach var of varlist activity*C {
  gen `var's=`var'S/activity_sum
 }
foreach var of varlist skill*C {
  gen `var's=`var'S/skill_sum
 }
foreach var of varlist style*C {
  gen `var's=`var'S/style_sum
 }
foreach var of varlist requirement*C {
  gen `var's=`var'S/requirement_sum
 }
bys soc4: egen mean_share=mean(share)
collapse (mean) *Cs *W  mean_share [aw=share], by(soc4)
ren mean_share share

  /* Given that we are keeping the *W variables, here, our plots
    compare O*NET importance measures with the frequency of activity
    related words (mentions per 1000 job ad words) in our newspaper text. */
preserve
keep soc4 activity*CW share
reshape long activity, i(soc4) j(varname) string
replace varname=substr(varname, 2,.)
replace varname="activity_"+varname
merge 1:n varname soc4 using onet_to_merge
collapse (mean) datavalueIM activity share, by(soc4 code)
keep if activity~=.
drop if floor(soc4/100)==55
gen cor_activityUW=.
gen cor_activityW=.
sort code soc4
forvalues  x=1/41 {
    qui cor datavalueIM activity if _n>=(`x'-1)*108+1 & _n<=`x'*108
    replace cor_activityUW=r(rho) if _n==`x'
    qui cor datavalueIM activity if _n>=(`x'-1)*108+1 & _n<=`x'*108 [aw=share]
    replace cor_activityW=r(rho) if _n==`x'
}
sum cor*
scatter activity datavalueIM  if code=="4.A.3.a.4", mlabel(soc4) mlabpos(0) msym(i) mlabcolor(black) xtitle("O*NET Importance") ytitle("Newspaper Frequency") title("Activity: Operating Vehicles or Equipment, 4.A.3.a.4") ylab(0(3)12)  xlab(1(1)5)
graph export "correlations_with_onet_composite_10.eps", replace
restore
preserve

keep soc4 skill*CW share
reshape long skill, i(soc4) j(varname) string
replace varname=substr(varname, 2,.)
replace varname="skill_"+varname
merge 1:n varname soc4 using onet_to_merge
collapse (mean) datavalueIM skill share, by(soc4 code)
drop if floor(soc4/100)==55
gen cor_skillUW=.
gen cor_skillW=.
sort code soc4
keep if skill~=.
forvalues  x=1/35 {
    qui cor datavalueIM skill if _n>=(`x'-1)*108+1 & _n<=`x'*108
    replace cor_skillUW=r(rho) if _n==`x'
    qui cor datavalueIM skill if _n>=(`x'-1)*108+1 & _n<=`x'*108 [aw=share]
    replace cor_skillW=r(rho) if _n==`x'
}
sum cor*
scatter skill datavalueIM  if code=="2.A.1.b", mlabel(soc4) mlabpos(0) msym(i) mlabcolor(black) xtitle("O*NET Importance") ytitle("Newspaper Frequency") title("Skills: Active Listening, 2.A.1.b")  xlab(1(1)5)
graph export "correlations_with_onet_composite_7.eps", replace
 restore
preserve

keep soc4 requirement*CW share
reshape long requirement, i(soc4) j(varname) string
replace varname=substr(varname, 2,.)
replace varname="requirement_"+varname
merge 1:n varname soc4 using onet_to_merge
collapse (mean) datavalueIM requirement share, by(soc4 code)
drop if floor(soc4/100)==55
gen cor_requirementUW=.
gen cor_requirementW=.
sort code soc4
keep if requirement~=.
forvalues  x=1/33 {
    qui cor datavalueIM requirement if _n>=(`x'-1)*108+1 & _n<=`x'*108
    replace cor_requirementUW=r(rho) if _n==`x'
    qui cor datavalueIM requirement if _n>=(`x'-1)*108+1 & _n<=`x'*108 [aw=share]
    replace cor_requirementW=r(rho) if _n==`x'
}
sum cor*
 scatter requirement datavalueIM  if code=="2.C.1.f", mlabel(soc4) mlabpos(0) msym(i) mlabcolor(black) xtitle("O*NET Importance") ytitle("Newspaper Frequency") title("Knowledge: Personnel and Human Resources, 2.C.1.f")  xlab(1(1)5)
graph export "correlations_with_onet_composite_8.eps", replace

restore
preserve

keep soc4 style*CW share
reshape long style, i(soc4) j(varname) string
replace varname=substr(varname, 2,.)
replace varname="style_"+varname
merge 1:n varname soc4 using onet_to_merge
collapse (mean) datavalueIM style share, by(soc4 code)
drop if floor(soc4/100)==55
gen cor_styleUW=.
gen cor_styleW=.
sort code soc4
keep if style~=.
sort code soc4
forvalues  x=1/16 {
    qui cor datavalueIM style if _n>=(`x'-1)*108+1 & _n<=`x'*108
    replace cor_styleUW=r(rho) if _n==`x'
    qui cor datavalueIM style if _n>=(`x'-1)*108+1 & _n<=`x'*108 [aw=share]
    replace cor_styleW=r(rho) if _n==`x'
}
sum cor*
 scatter style datavalueIM  if code=="1.C.3.a", mlabel(soc4) mlabpos(0) msym(i) mlabcolor(black) xtitle("O*NET Importance") ytitle("Newspaper Frequency") title("Style: Cooperation, 1.C.3.a") xlab(1(1)5)
graph export "correlations_with_onet_composite_9.eps", replace

 restore
}

log close
