/* Execute this file by running: run "/Users/miguel/Documents/Research/Stata/personal programs.do" run "/Users/miguel/Documents/Research/Stata/personal programs (faculty drive).do" */ // syntax varlist (real), dist(real) force // Today's date local today1 `c(current_date)' local today2 = date("`today1'", "DMY") local year = mod(year(`today2'),100) local month = month(`today2') local day = day(`today2') global today if (`month' < 10) { local month = "0`month'" } if (`day' < 10) { local day = "0`day'" } global today = "`year'`month'`day'" /////////////////////////////////////////////////// // Faculty drive stuff /////////////////////////////////////////////////// global common_folder_mac "/Volumes/morin research/Stata/Census of Population/Data" global mac 1 global gex global image_name global image_folder "/Users/miguel/Documents/Research/images/" capture program drop gex program define gex // graph export "$image_folder $image_name.png", as(png) replace graph export "$image_folder$today $image_name.pdf", as(pdf) replace end // Winsorize a variable at default threshold of 1% on each side capture program drop winsorize program define winsorize syntax varlist (numeric min=1) [, lbl] if ("$winsor_threshold" == "") local winsor_threshold 0.01 else local winsor_threshold $winsor_threshold if ("$winsor_drop_outliers" == "" | "$winsor_drop_outliers" == "0") local drop_outliers 0 else local drop_outliers 1 foreach v in `0' { qui count if !mi(`v') local total_obs = r(N) local win_thresh_num = ceil(`total_obs' * `winsor_threshold') winsor `v', h(`win_thresh_num') gen(`v'w) // Drop outliers? if (`drop_outliers') { // di "dropping outliers for `v'" // list `v' `v'w state_code if `v'w ~= `v' drop if `v'w ~= `v' rename `v'w `v'd local new_var `v'd } else local new_var `v'w local label_v : var label `v' drop `v' if ("`lbl'" ~= "") local add " (w)" if ("`label_v'" == "v") label variable `new_var' "`v'`add'" else label variable `new_var' "`label_v'`add'" } end //////////////////////////////////////////// // // Plot a graph with regression details // //////////////////////////////////////////// capture program drop plot_regression program define plot_regression syntax varlist (min=2 max=2 numeric) [if] [, tabname(string) /// nomarkers weight(string) clustering(string) markers(string)] local dep_var `: word 1 of `varlist'' local ind_var `: word 2 of `varlist'' local tab_option if "`tabname'" ~= "" { local tab_option name(`tabname', replace) } if ("`weight'" ~= "") local weighting [aweight=`weight'] capture drop `dep_var'p // Get labels for the variables, if any local label_dep : var label `dep_var' local label_ind : var label `ind_var' if ("`label_dep'" == "") local label_dep "`dep_var'" if ("`label_ind'" == "") local label_ind "`ind_var'" if (0) { if ("$plot_title" ~= "") local title_line title("$plot_title") } else { // local title_line title("`label_dep'") // local label_dep "" } // Get correlation corr `dep_var' `ind_var' `if' local correlation = r(rho) reg `dep_var' `ind_var' `if' `weighting', `clustering' capture drop `dep_var'p predict `dep_var'p, xb local coeff = _b[`ind_var'] local sdev = _se[`ind_var'] local tstat = `coeff' / `sdev' local R2 = e(r2) local N = e(N) //local F = e(F) local coeff2 : display %6.2f `coeff' local tstat2 : display %6.2f `tstat' local R3 : display %6.2f `R2' local corr2 : display %6.0f (100 * `correlation') //local F2 : display %6.0f `F' // if thresh is specified, only plot if the t-stat is above the threshold local to_plot 1 if "$thresh" ~= "" { local to_plot = (abs(`tstat') > $thresh) global tstat `tstat' } if (`to_plot') { // Change if to exclude missing points if ("`if'" ~= "") local if `if' & !mi(`ind_var') & !mi(`dep_var') else local if if !mi(`ind_var') & !mi(`dep_var') // If this regression is at the state-level, use this line qui { checkfor2 state_code local av = r(available) qui count local num = r(N) } // Markers if ("$no_markers" == "") { local scatter_plot (scatter `dep_var' `ind_var' `if', /// /// msymbol(none) mlabel(state_code) mlabsize($symbol_size) /// mlabcolor(navy) mlabpos(3) mlabgap(-3)) } else { // otherwise use this one local scatter_plot (scatter `dep_var' `ind_var' `if', color(navy)) } // If the user asked for a weighted regression, add this line if ("`weighting'" ~= "") { local weighted_plot (scatter `dep_var' `ind_var' [aw=`weight'], msymbol(Oh) mlstyle(dot) mlcolor(gs8)) // If there is a weight, display that instead of the correlation local label_weight : var label `weight' if ("`label_weight'" == "") local label_weight "`weight'" local extra_details "Weight: `label_weight'" } else local extra_details "Correlation: `corr2'%" twoway /// $background_scatter /// (line `dep_var'p `ind_var' `if', lcolor(black)) /// `scatter_plot' `weighted_plot', /// xtitle("`label_ind'") ytitle("`label_dep'") `title_line' /// legend(off) /// note("Slope: `coeff2' t-statistic: `tstat2'" "R2: `R3' Observations: `N' `extra_details'") /// `tab_option' $graph_options global counter = $counter + 1 } drop `dep_var'p end /////////////////////////////////////////////////// /////////////////////////////////////////////////// // // Geography programs // /////////////////////////////////////////////////// /////////////////////////////////////////////////// global new_york_counties fips_county == 36005 | fips_county == 36047 | fips_county == 36061 | fips_county == 36081 | fips_county == 36085 // For a variable defined at the county-level, this program computes // the total of that variable for N miles around each county, // imputing the contribution of counties where the N-mile line // cuts across with the proportionality rule by area included divided // by area of target county capture program drop intersect_aoi program define intersect_aoi syntax varlist (min=1 numeric) , dist(int) if (`dist' == 50) local dist_meters 80467 else if (`dist' == 100) local dist_meters 160934 else if (`dist' == 150) local dist_meters 241402 else if (`dist' == 200) local dist_meters 321868 if ("`dist_meters'" == "") { di "Problem in the code" return } // Load intersect in CSV format? if (0) { preserve insheet using "/Users/miguel/Documents/Research/Stata/Geography/Docs/GIS county intersects/Intersect `dist_meters'.csv", comma clear names save "/Users/miguel/Documents/Research/Stata/Geography/Data/Intersect `dist_meters'", replace restore } rename fips_county fips_trg merge 1:m fips_trg using "/Users/miguel/Documents/Research/Stata/Geography/Data/Intersect `dist_meters'" // verify_merge drop if _merge ~= 3 // Compute the variable imputed to this fragment with proportionality rule // of the area of the target county that is in the AOI of the source county foreach v in `varlist' { gen `v'_frag = `v' * area_frag / area_trg } // Collapse at level of the source county gen area_self = area_trg if fips_src == fips_trg collapse (sum) *_frag area_self, by (fips_src) fast rename fips_src fips_county rename area_frag aoi_`dist' rename area_self area_self_`dist' // Rename all variables foreach v in `varlist' { rename `v'_frag `v'_aoi`dist' } end capture program drop get_county_details program define get_county_details rename `0' fips_county merge m:1 fips_county using "$counties_folder/xwalk_fips_county_details" drop if _merge == 2 drop _merge rename fips_county `0' end capture program drop drop_outside_counties program define drop_outside_counties // Get type of county and proceed if it's a string local t: type county if ("`t'" == "str") { checkfor2 state_code local un = r(unavailable) if ("`un'" ~= "state_code") local state_name state else local state_name state_code // Drop consular service and non-continental areas drop if regexm(lower(`state_name'), "^(cs|vi|pr|as|ak|hi|gu|pc)$") drop if regexm(lower(`state_name'), "^(puerto rico|military and naval forces|guam|american samoa|virgin islands|united states|panama canal zone)$") drop if mi(`state_name') drop if lower(county) == "mesa verde national park" & regexm(lower(`state_name'), "^(co|colorado)$") drop if lower(county) == "arundel" & regexm(lower(`state_name'), "^(fl|florida)$") drop if lower(county) == "yellowstone national park" & regexm(lower(`state_name'), "^(id|idaho)$") } end /////////////////////////////////////////////////// /////////////////////////////////////////////////// // // Number and text functions // /////////////////////////////////////////////////// /////////////////////////////////////////////////// capture program drop my_destring program define my_destring syntax varlist (min=1 string) [, force] local vars = regexr("`0'", ",(.*)", "") // di "`vars'" foreach v in `vars' { tempvar t_real t_string destring `v', gen(`t_real') force tostring `t_real', gen(`t_string') local sel (`v' ~= `t_string') & ~(`v' == "" & `t_string' == ".") count if `sel' local temp = r(N) if (`temp' & ("`force'" == "")) { list `v' `t_real' if `sel' di "The observations above have a problem de-stringing, please advise" stop } else { drop `v' rename `t_real' `v' } } end /////////////////////////////////////////////////// /////////////////////////////////////////////////// //ÊPrograms /////////////////////////////////////////////////// /////////////////////////////////////////////////// global state_codes AK AL AR AZ CA CO CT DC DE FL GA HI IA ID IL IN KS KY LA MA MD ME MI MN MO MS MT NC ND NE NH NJ NM NV NY OH OK OR PA RI SC SD TN TX UT VA VT WA WI WV WY global state_codes_mainland AL AR AZ CA CO CT DC DE FL GA IA ID IL IN KS KY LA MA MD ME MI MN MO MS MT NC ND NE NH NJ NM NV NY OH OK OR PA RI SC SD TN TX UT VA VT WA WI WV WY global state_names Alaska Alabama Arkansas Arizona California Colorado Connecticut DistrictOfColumbia Delaware Florida Georgia Hawaii Iowa Idaho Illinois Indiana Kansas Kentucky Louisiana Massachusetts Maryland Maine Michigan Minnesota Missouri Mississippi Montana NorthCarolina NorthDakota Nebraska NewHampshire NewJersey NewMexico Nevada NewYork Ohio Oklahoma Oregon Pennsylvania RhodeIsland SouthCarolina SouthDakota Tennessee Texas Utah Virginia Vermont Washington Wisconsin WestVirginia Wyoming global countries "/Users/miguel/Documents/Research/Stata/Geography/Data/countries.dta" global states "/Users/miguel/Documents/Research/Stata/Geography/Data/states.dta" global geoFolder "/Users/miguel/Documents/Research/Stata/Geography/Data" global counties_folder "$geoFolder" //////////////////////////////////////////// //////////////////////////////////////////// //////////////////////////////////////////// // // // Micro-data programs // // //////////////////////////////////////////// //////////////////////////////////////////// //////////////////////////////////////////// capture program drop sym_chg program define sym_chg local v `: word 1 of `0'' local year2 `: word 2 of `0'' local year1 `: word 3 of `0'' if (`year2' < `year1') { local t `year2' local year2 `year1' local year1 `t' } tempvar t1 t2 gen `t1' = `v'`year1' replace `t1' = 0 if mi(`v'`year1') gen `t2' = `v'`year2' replace `t2' = 0 if mi(`v'`year2') capture drop `v'_schg gen `v'_`year1'_`year2'_schg = (`t2'-`t1') / (0.5 * (`t2'+`t1')) if ("$normalize" ~= "") { replace `v'_`year1'_`year2' = `v'_`year1'_`year2' / (`year2'-`year1') } end capture program drop verify_merge program define verify_merge count if _merge ~= 3 local temp = r(N) if (`temp' > 0) { stop } else drop _merge end capture program drop propagate program define propagate syntax varlist if ("`varlist'" == "") { foreach v of varlist _all { local var_type: type `v' if (regexm("`var_type'", "^str")) replace `v' = `v'[_n-1] if `v' == "" else replace `v' = `v'[_n-1] if `v' == . } } else { foreach v in `varlist' { // Check that this variable exists checkfor2 `v' local un = r(unavailable) if ("`un'" ~= "`v'") { local var_type: type `v' if (regexm("`var_type'", "^str")) replace `v' = `v'[_n-1] if `v' == "" else replace `v' = `v'[_n-1] if `v' == . } } } end capture program drop compute_measures program define compute_measures syntax [, panel] // This program computes changes in common measures, like employment, value // added, etc.. It can be run before or after reshape, and before or after // aggregation. If after aggregation, it drops the variables before computing // them to replace, for example, \sum \Delta VA from a (compute_measures - collapse) // step with a \Delta (\sum VA) after a (compute_measures - collapse - compute_measures) step // if a panel, compute these year by year if ("`panel'" ~= "") { local years 29 35 } else local years 0 local num_years `: word count `years'' forvalues i = 1/`num_years' { if ("`panel'" == "") local year else local year `: word `i' of `years'' // Compute measures gen VA`year' = VP`year' - cost_materials_energy`year' gen profits`year' = VA`year' - wage_bill_large`year' gen cct_price`year' = concrete_tons_value`year' / concrete_tons`year' foreach coverage in $coverages { // capture drop emp_hp_`coverage'`year' // gen emp_hp_`coverage'`year' = emp_`coverage'`year' if !mi(elec_hp`year') capture drop LSVP_`coverage'`year' gen LSVP_`coverage'`year' = wage_bill_`coverage'`year'/ VP`year' capture drop LSVA_`coverage'`year' gen LSVA_`coverage'`year' = wage_bill_`coverage'`year'/ VA`year' capture drop prodR_VP`coverage'`year' gen prodR_VP`coverage'`year' = VP`year' / emp_`coverage'`year' capture drop prodR_VA`coverage'`year' gen prodR_VA`coverage'`year' = VA`year' / emp_`coverage'`year' capture drop prodQ_`coverage'`year' gen prodQ_`coverage'`year' = concrete_tons`year' / emp_`coverage'`year' capture drop prodRH_VA`coverage'`year' gen prodRH_VA`coverage'`year' = VA`year' / hours_`coverage'`year' capture drop prodQH_`coverage'`year' gen prodQH_`coverage'`year' = concrete_tons`year' / hours_`coverage'`year' capture drop wage_`coverage'`year' gen wage_`coverage'`year' = wage_bill_`coverage'`year'/ emp_`coverage'`year' capture drop hourlywage_`coverage'`year' gen hourlywage_`coverage'`year' = wage_bill_`coverage'`year'/ hours_`coverage'`year' capture drop materials_wages_`coverage'`year' gen materials_wages_`coverage'`year' = cost_materials`year' / wage_bill_`coverage'`year' capture drop elec_cost_emp_`coverage'`year' gen elec_cost_emp_`coverage'`year' = elec_cost`year' / emp_`coverage'`year' capture drop elec_kw_emp_`coverage'`year' gen elec_kw_emp_`coverage'`year' = elec_kw`year' / emp_`coverage'`year' capture drop elec_num_emp_`coverage'`year' gen elec_num_emp_`coverage'`year' = elec_num`year' / emp_`coverage'`year' capture drop elec_hp_emp_`coverage'`year' gen elec_hp_emp_`coverage'`year' = elec_hp`year' / emp_`coverage'`year' capture drop elec_hp_hours_`coverage'`year' gen elec_hp_hours_`coverage'`year' = elec_hp`year' / hours_`coverage'`year' // capture drop elec_hp_emp_hp_`coverage'`year' // gen elec_hp_emp_hp_`coverage'`year' = elec_hp`year' / emp_hp_`coverage'`year' capture drop elec_wages_`coverage'`year' gen elec_wages_`coverage'`year' = elec_cost`year' / wage_bill_`coverage'`year' capture drop fuel_wages_`coverage'`year' gen fuel_wages_`coverage'`year' = cost_fuel`year' / wage_bill_`coverage'`year' // gen elec_worker_`coverage'2 = cost_purchased_current / employment_`coverage'2 // gen kw_imp_worker_`coverage' = kilowatts`year' / emp_`coverage' } capture drop elec_share_VP`year' gen elec_share_VP`year' = elec_cost`year' / VP`year' capture drop elec_share_VA`year' gen elec_share_VA`year' = elec_cost`year' / VA`year' capture drop fuel_share_VA`year' gen fuel_share_VA`year' = cost_fuel`year' / VA`year' capture drop fuel_share_VP`year' gen fuel_share_VP`year' = cost_fuel`year' / VP`year' capture drop fuel_elec_share`year' egen fuel_elec_share`year' = rowtotal(cost_fuel`year' elec_cost`year') replace fuel_elec_share`year' = fuel_elec_share`year' / VP`year' capture drop materials_share_VP`year' gen materials_share_VP`year' = cost_materials`year' / VP`year' capture drop materials_share_VA`year' gen materials_share_VA`year' = cost_materials`year' / VA`year' capture drop materials_energy_share_VP`year' gen materials_energy_share_VP`year' = cost_materials_energy`year' / VP`year' capture drop elec_ton`year' gen elec_ton`year' = elec_cost`year' / concrete_tons`year' label variable elec_ton`year' "Electricity spending per concrete ton" capture drop elec_price`year' gen elec_price`year' = elec_cost`year' / elec_kw`year' capture drop elec_price_avail`year' gen elec_price_avail`year' = elec_cost_kw`year' / elec_kw`year' // gen elec_share_VA = cost_purchased_current / value_added // gen fuel_share_VA = cost_fuel / value_added // gen elec_cost_plant_imputed`year' = cost_purchased_current`year' / kilowatts_imputed`year' } end capture program drop impute_wem program define impute_wem tempvar temp rename wage_earners_total wem egen `temp' = rowtotal(wage_earners_jan wage_earners_feb wage_earners_mar wage_earners_apr wage_earners_may wage_earners_jun wage_earners_jul wage_earners_aug wage_earners_sep wage_earners_oct wage_earners_nov wage_earners_dec) qui count if `temp' ~= wem & `temp' ~= . & wem ~= . & `temp' ~= 0 local errors = r(N) if (`errors' > 0) { di "Problem in the data" list id `temp' wem if `temp' ~= wem & `temp' ~= . & wem ~= . return } replace wem = `temp' if wem == . & `temp' ~= . gen wage_earners = wem / 12 end capture program drop get_county_coarse_fips_from_fips program define get_county_coarse_fips_from_fips // Merge by county fips merge m:1 county_fips_original using "$counties", keepusing(fips_coarse) qui count if _merge == 1 local errors = r(N) if (`errors' > 0) { di "Failed matching of county FIPS" error } drop if _merge == 2 drop _merge end /////////////////////////////////////////////////// // County-level pograms /////////////////////////////////////////////////// capture program drop clean_name_from_state_details program define clean_name_from_state_details syntax varlist (string max=1 min=1) local variable `: word 1 of `0'' local start_sep "(, |\. |,|\.| )" local end_sep "(|, |\. |,|\.| )$" foreach state_ind in state_name /// state_code /// state_other_name_1 /// state_other_name_2 /// state_other_name_3 /// state_other_name_nodot_1 /// state_other_name_nodot_2 /// state_other_name_nodot_3 /// state_other_name_nodots_1 /// state_other_name_nodots_2 /// state_other_name_nodots_3 /// { replace `variable' = regexr(`variable', "`start_sep'"+lower(`state_ind')+"`end_sep'","") } end // This program uses county and state_code to get the fips code by county capture program drop getCountyFipsFromName program define getCountyFipsFromName syntax varlist (min=1 max=1 string) [, hist] // alternative: // syntax [, clean] /////////////////////////////////////////////////// // just cleaning up /////////////////////////////////////////////////// local var_fips fips_county // fips_county or fips_coarse gen county_lower = `varlist' rename `varlist' original_`varlist' // Get state's names to clean up the county names replace state_code = upper(state_code) merge m:1 state_code using "$states" drop if _merge == 2 drop _merge // Replace some counties by hand: DC, Baltimore city, St Louis replace county_lower = "District of Columbia" if "DC" == state_code // if city is available, i.e. this is micro-data, fix some common issues checkfor2 city local un = r(unavailable) if ("`un'" ~= "city") { replace county_lower = "Baltimore city" if "MD" == state_code & regexm(lower(trim(city)), "^baltim(o|a)re( city)?$") replace county_lower = "St Louis City" if "MO" == state_code & regexm(lower(trim(city)), "^(st.?)|(saint) louis$") } // Clean up useless characters and trailing spaces replace county_lower = lower(county_lower) local separator "( |\.|,|-)" replace county_lower = regexr(county_lower,"`separator'*$", "") // For some reason, this expression matches everything and gives a problem: replace county = regexr(county,"^(`separator'*)", "") // instead, I do it several times: forvalues i = 1/3 { replace county_lower = regexr(county_lower,"^`separator'", "") } // Replace this weird character as an apostrophe: ´ replace county_lower = regexr(county_lower, "´", "'") replace county_lower = regexr(county_lower, "å«", "'") // Remove state information clean_name_from_state_details county_lower // Remove parishes in Louisiana replace county_lower = regexr(county_lower, "^((parish of)|(par.)|(parish,)|(parish)) ", "") if state_code == "LA" replace county_lower = regexr(county_lower, "^parish-", "") if state_code == "LA" replace county_lower = regexr(county_lower, "(,?) (parish|\(parish\)|parihs|paresh|perish)$", "") if state_code == "LA" // Remove county_lower name to purge trailing spaces, " co." and " county" replace county_lower = regexr(county_lower," co\.*$", "") replace county_lower = regexr(county_lower," county$", "") replace county_lower = regexr(county_lower, " $", "") // this symbol appears in some of Vickers' counties replace county_lower = regexr(county_lower, "^ ", "") // this symbol appears in some of Hardish's beverages counties replace county_lower = regexr(county_lower,"^(st\.|saint) ", "st ") replace county_lower = regexr(county_lower,"^(mt\.|mount) ", "mt ") replace county_lower = regexr(county_lower,"^(ft\.|fort) ", "ft ") replace county_lower = trim(county_lower) /////////////////////////////////////////////////// // merge // note: this happens only once because multiple // spellings go in a different record each /////////////////////////////////////////////////// merge m:1 state_code county_lower using "$counties_folder/xwalk_county_names_fips", keepusing(`var_fips' county_proper county_notes) // todo: adapt historical as optional parameter // merge m:1 state_code county_lower using "$counties_folder/xwalk_hist_county_names_fips", keepusing(`var_fips' county_proper county_notes) drop if _merge == 2 /* // Verify that the corrections are OK duplicates drop state_code county county_clean, force drop if county_clean == "" list state_code county county_clean if lower(county) ~= lower(county_clean) return // */ /* qui count if _merge == 1 local missed = r(N) if (`missed' > 0 & ("$clean" ~= "")) { di "`missed' counties are still missing." keep if _merge == 1 count local temp = r(N) di "`temp' observations are still missing:" duplicates drop state_code county_lower, force list state_code county_lower di "I am going to try to find them with a bigram matching" rename county_lower county_source drop county_proper joinby state_code using "$counties_folder/xwalk_fips_county_details" gen county_target = lower(county_proper) bigram county_source county_target, gen(score) continuous(.) keep if score > 0.5 list state_code county_source county_target score gsort state_code county_target outsheet state county_source county_target score $o $e di "Found these potential matches for counties, please treat them" stop } else { // */ drop _merge rename county_proper county drop county_notes rename county_lower county_clean rename original_`varlist' `varlist'_original drop state state_no_spaces state_other* state_fips division region end //////////////////////////////////////////// // // Sate-level programs // //////////////////////////////////////////// // add state_code capture program drop addStateCodes program addStateCodes syntax varlist (string min = 1 max = 1) gen stateNameLower = lower(`0') replace stateNameLower = trim(itrim(stateNameLower)) merge m:1 stateNameLower using "$geoFolder/xwalk_stateNameLower_state_code", keepusing(state_code) drop if _merge == 2 gen OK = _merge == 3 count if ~OK local temp = r(N) if (`temp') { list stateNameLower if ~ OK di "Problem: the `temp' observation(s) above could not be matched to state code" } drop OK _merge end capture program drop add_state_details program define add_state_details syntax [, regions] capture drop _merge /////////////////////////////////////////////////// // Either add state code or add state name, // depending on available variables /////////////////////////////////////////////////// // If the data has state_code but not state, add state_code checkfor2 state_code local a_state_code = r(available) checkfor2 state local a_state = r(available) local missing_state = r(manymissings) // Add state codes if ("`a_state_code'" ~= "state_code" & ("`a_state'" == "state" | "`missing_state'" == "state")) { replace state = proper(state) replace state = "District of Columbia" if state == "District Of Columbia" // If the state variable has no spaces at all, merge to that variable tempvar spaces gen `spaces' = regexm(state, " ") qui count if `spaces' local with_spaces = r(N) if (`with_spaces') { di "Adding state codes" capture drop state_code merge m:1 state using "$states", keepusing(state_code) di "Not matched:" list state _merge if _merge ~= 3 drop if _merge == 2 // observations from using dataset only drop _merge } else { di "Adding state codes and names with spaces" rename state state_no_spaces replace state_no_spaces = lower(state_no_spaces) merge m:1 state_no_spaces using "$states", keepusing(state_code state) di "Not matched:" list state _merge if _merge ~= 3 drop if _merge == 2 // observations from using dataset only drop _merge } } else if ("`a_state_code'" == "state_code" & "`a_state'" ~= "state") { di "Adding state names" merge m:1 state_code using "$states", keepusing(state) di "Not matched:" list state _merge if _merge ~= 3 drop if _merge == 2 // observations from using dataset only drop _merge } else di "Adding nothing" /////////////////////////////////////////////////// //ÊTreat region names and add regions to states /////////////////////////////////////////////////// // Treat regions too and assign each state to a region if ("`regions'" ~= "") { gen is_region = regexm(lower(state), "(new england)|(middle atlantic)|(east north central)|(west north central)|(south atlantic)|(east south central)|(west south central)|(mountain)|(pacific)") gen region = lower(state) if is_region gen is_US = regexm(lower(state), "united states") // all names without a state code are either a region, the US, or // they need to be dropped or inspected manually tempvar t gen `t' = (state_code ~= "") | is_region | is_US count if `t' == 0 local temp = r(N) if (`temp' > 0) { keep if ~`t' duplicates drop state, force list state di "### You have a problem with the above state names: they don't match to regions!" stop } // Give a region to each state replace region = "new england" if regexm(lower(state), "(maine)|(new hampshire)|(vermont)|(massachusetts)|(rhode island)|(connecticut)") replace region = "middle atlantic" if regexm(lower(state), "(new york)|(new jersey)|(pennsylvania)") replace region = "east north central" if regexm(lower(state), "(ohio)|(indiana)|(illinois)|(michigan)|(wisconsin)") replace region = "west north central" if regexm(lower(state), "(minnesota)|(iowa)|(missouri)|(north dakota)|(south dakota)|(nebraska)|(kansas)") replace region = "south atlantic" if regexm(lower(state), "(delaware)|(maryland)|(district of columbia)|(virginia)|(west virginia)|(north carolina)|(south carolina)|(georgia)|(florida)") replace region = "east south central" if regexm(lower(state), "(kentucky)|(tennessee)|(alabama)|(mississippi)") replace region = "west south central" if regexm(lower(state), "(arkansas)|(louisiana)|(oklahoma)|(texas)") replace region = "mountain" if regexm(lower(state), "(montana)|(idaho)|(wyoming)|(colorado)|(new mexico)|(arizona)|(utah)|(nevada)") replace region = "pacific" if regexm(lower(state), "(washington)|(oregon)|(california)") // Check that all states have a region count if region == "" & ~is_US local temp = r(N) if (`temp' > 0) { list state region if region == "" & ~is_US di "## You have a problem with names and regions" stop } } end capture program drop clean_city_names program define clean_city_names syntax varlist (string min = 1 max = 1) local city `: word 1 of `0'' merge m:1 state_code using "../Geography/Data/states" drop if _merge == 2 drop _merge replace `city' = trim(itrim(lower(`city'))) // Purge state details clean_name_from_state_details `city' // Common replacements replace `city' = regexr(`city', "(^`city' of )|(^`city' )| (^`city', )|( `city'$)", "") replace `city' = regexr(`city', "(^town of )|(^township of )|( township$)|( townships$)|( town$)|( twp$)|( tsp$)|( turk$)|( tup$)","") replace `city' = regexr(`city', "(^boro of )|(^borough of )|( boro$)|( borough$)", "") replace `city' = regexr(`city', "(^village of )", "") replace `city' = regexr(`city', "^(saint|st\.) ", "st ") replace `city' = regexr(`city', "^(mount|mt\.) ", "mt ") replace `city' = regexr(`city', "^(fort|ft\.) ", "ft ") replace `city' = regexr(`city', "^(east (saint|st\.)) ", "east st ") /* // Clean up city names gen merged = 0 rename city city_original gen city_clean = "" foreach variable in city city_other_name_1 city_other_name_2 city_other_name_3 { di "Round: `variable'" gen `variable' = lower(city_original) if merged == 0 save "$tf/temp", replace // Purge duplicates from city list use "$cities", clear replace `variable' = lower(`variable') duplicates drop `variable' state_code, force drop if `variable' == "" save "$tf/cities", replace // Return to original data use "$tf/temp", clear merge m:1 state_code `variable' using "$tf/cities", keepusing(city) drop if _merge == 2 replace merged = 1 if _merge == 3 replace city_clean = city if _merge == 3 gsort -_merge gen tag = 0 replace tag = 1 if _merge == 3 & _n < 10 list _merge `variable' state_code city if tag drop _merge city `variable' tag } /* // Verify that the corrections are OK duplicates drop state_code county county_clean, force drop if county_clean == "" list state_code county county_clean if lower(county) ~= lower(county_clean) return // */ qui count if merged == 0 local missed = r(N) if (`missed' > 0) { keep if merged == 0 duplicates drop state_code city_original, force di "These observations are still missing:" list state_code city_original di "I am going to try to find them with a bigram matching" gen city_source = lower(city_original) joinby state_code using "$cities" rename city city_target bigram city_source city_target, gen(score) continuous(.) keep if score > 0.5 list state_code city_source city_target score gsort +city_source outsheet state_code city_target city_source score $o $e di "Found these potential matches for counties, please treat them" error } drop merged rename city_clean city // */ end capture program drop clean_names program define clean_names gen name_lower = trim(lower($to_clean_var)) merge m:1 $other_merge_vars name_lower using "$clean_dataset" count if _merge == 1 & name_lower ~= "" local missed = r(N) if (`missed' > 0) { di "`missed' names are still missing." keep if _merge == 1 count local temp = r(N) duplicates drop $to_clean_var $other_dup_vars, force // Drop variables from merge drop $correct_var di "`temp' observations are still missing:" list source name_lower di "I am going to try to find them with a bigram matching" rename name_lower name_source joinby year using "$clean_dataset" rename name_lower name_target global no_winkler 0 bigram name_source name_target, gen(score) continuous(.) keep if score > 0.7 $other_instructions count local matched = r(N) if (`matched' > 0) { list source name_source name_target score gsort - score state_code id outsheet filename source $to_clean_var $correct_var score $o $e di "Found these potential matches for product names, please treat them" stop } else di "No matches on this variable" } end capture program drop compare_two_vars program define compare_two_vars syntax varlist (max=2 min=2) [, visual] local v1 `: word 1 of `varlist'' local v2 `: word 2 of `varlist'' tempvar v1l v2l diff gen `v1l' = log(`v1') gen `v2l' = log(`v2') gen `diff' = abs(log(`v1'/`v2')) gsort - `diff' if ("`visual'" ~= "") { twoway scatter `v1l' `v2l' } local selector `diff' > $threshold & `diff' ~= . // di "count if `selector'" qui count if `selector' local mistakes = r(N) if (`mistakes' > 0) { format %16.0gc `v1' `v2' list `v1' `v2' `diff' $extra_vars if `selector' outsheet `v1' `v2' $extra_vars if `selector' $o $e di "Problem with variable `v1'" stop } else di "Variable `v1' is fine, good job!" end capture program drop verify_sum program define verify_sum syntax varlist (numeric min=3) local num_vars `: word count `varlist'' local sum_var `: word 1 of `varlist'' local other_vars forvalues i=2/`num_vars' { local this_var `: word `i' of `varlist'' local other_vars `other_vars' `this_var' } egen _sum_var = rowtotal(`other_vars') compare_two_vars `sum_var' _sum_var drop _sum_var end //////////////////////////////////////////// //////////////////////////////////////////// //////////////////////////////////////////// // // // Global macros // // //////////////////////////////////////////// //////////////////////////////////////////// //////////////////////////////////////////// global concrete "/Users/miguel/Documents/Research/Great Depression/Stata/Concrete/" global cement "Cement from Ziebarth/Data/cement_panel" global autos "Bresnahan and Raff/Data/autos_panel" global alert shell osascript "/Users/miguel/Documents/Programming/AppleScript/Utilities/Notify user.scpt" global home_folder "/Users/miguel" global geography "/Users/miguel/Documents/Research/Stata/Geography" global cities "$geography/Data/cities.dta" global state_level "$geography/Data/state_level.dta" // Common folders global tf "/Users/miguel/Documents/Temp/Stata" global df "/Volumes/Data/Data/" global lyx_image shell osascript "/Users/miguel/Documents/Programming/AppleScript/LyX/LyX include latest image.scpt" /* global doc_folder "/Users/miguel/Dropbox/Replication codes/Documents" global data_folder "/Users/miguel/Dropbox/Replication codes/Data" global ampl_folder "/Users/miguel/Dropbox/Replication codes/From AMPL" */ // Excel shortcuts global t "$tf/temp.csv" global o using "$t", comma replace global e shell open -a "Microsoft Excel" "$t" global excel shell open -a "Microsoft Excel" // Reset macros every time global cell_range global plot_title // Regression export global lyx_table shell osascript "/Users/miguel/Documents/Programming/AppleScript/LyX/LyX include latest table.scpt" // call from Stata as: $lyx_table "`folder'" // Log use global log_file "Logs/$today.txt" global cumul_log "Logs/$today.cumul" capture program drop log_start program define log_start log_end capture rm "$log_file" log using "$log_file", replace text end capture program drop log_end program define log_end capture log close shell cat $log_file >> $cumul_log end // Colors global color1948 gs12 global color1953 emidblue global color1957 sienna global color1960 dkorange global color1969 olive global color1973 pink global color1980 red global color1981 cranberry global color1990 blue global color2001 dkgreen global color2007 black //////////////////////////////////////////// //////////////////////////////////////////// //////////////////////////////////////////// // // // String programs // // //////////////////////////////////////////// //////////////////////////////////////////// //////////////////////////////////////////// capture program drop keep_consonants program define keep_consonants syntax varlist (string max=1 min=1), gen(str) cap drop `gen' gen `gen' = `varlist' foreach vowel in a e i o u y { replace `gen' = subinstr(`gen', "`vowel'", "", .) } // replace `gen' = subinstr(`gen', " ", "", .) end capture program drop keep_vowels program keep_vowels syntax varlist (string max=1 min=1), gen(str) cap drop `gen' gen `gen' = `varlist' foreach consonant in b c d f g h j k l m n p q r s t v w x z { replace `gen' = subinstr(`gen', "`consonant'", "", .) } replace `gen' = subinstr(`gen', " ", "", .) end capture program drop bigram_original2 program define bigram_original2 version 8.2 *! version 2.2 M Blasnik 19-Sep-2005 // Edited by Miguel Morin on 16 July 2014 syntax varlist (max=2) , gen(str) tempvar source_length target_length wink cap confirm var `gen' if _rc==0 replace `gen'=0 else gen `gen'=0 local v1 `: word 1 of `varlist'' local v2 `: word 2 of `varlist'' gen `target_length' = length(`v2') local max_length = r(max) gen `source_length' = length(`v1') local poss = `match_length'-1 // Compute the sum of the lengths of series of characters in match string // also present in source string. This is neatly done by incrementing 1 // each time that the source string contains a sequence of 2 characters. // This variable is also associative, i.e. result(a,b) = result(b,a). if `max_length'>2 { forval i=1(1)`poss' { qui replace `gen'=`gen'+1 if index(`v1',substr("`v2'",`i',2))>0 } qui replace `gen'=`gen'*2/(`source_length'+`target_length'-2) } * deal with strings <3 characters // If either the match string or the source string is less than 3 characters // long, replace score with 0, unless the strings are the same qui replace `gen'=0 if (`target_length'<3 | `source_length'<3) /// & !index(`v2',`v1') /// & !index(`v1',`v2') // If either the match string or the source string is less than 3 characters // long, replace score with a low number if one string contains the other qui replace `gen'=min(`target_length',`source_length')/(`target_length'+`source_length'-1) /// if (`target_length'<3 | `source_length'<3) /// & ( index(`v2',`v1') /// | index(`v1',`v2') ) * Winkler adjustment: adjusts score upward based on first 1,2,3, or 4 characters matching // This adjustment gives more importance to the first 4 characters than the others gen byte `wink'=0 forval i=1(1)4 { qui replace `wink'=`wink'+1 if substr(`v1',1,`i')==substr(`v2',1,`i') } qui replace `gen'=`gen'+`wink'*(1-`gen')/10 * make sure exact matches =1 and missing =0 qui replace `gen'=1 if `v1'==`v2' qui replace `gen'=0 if `source_length'==0 end capture program drop bigram_original program define bigram_original version 8.2 *! version 2.2 M Blasnik 19-Sep-2005 // Edited by Miguel Morin on 16 July 2014 syntax varlist (max=1) , gen(str) match(str) tempvar source_length wink cap confirm var `gen' if _rc==0 replace `gen'=0 else gen `gen'=0 local match_length = length("`match'") gen `source_length' = length(`varlist') local poss = `match_length'-1 // Compute the sum of the lengths of series of characters in match string // also present in source string. This is neatly done by incrementing 1 // each time that the source string contains a sequence of 2 characters. // This variable is also associative, i.e. result(a,b) = result(b,a). if `match_length'>2 { forval i=1(1)`poss' { qui replace `gen'=`gen'+1 if index(`varlist',substr("`match'",`i',2))>0 } qui replace `gen'=`gen'*2/(`source_length'+`match_length'-2) } * deal with strings <3 characters // If either the match string or the source string is less than 3 characters // long, replace score with 0, unless the strings are the same qui replace `gen'=0 if (`match_length'<3 | `source_length'<3) /// & !index("`match'",`varlist') /// & !index(`varlist',"`match'") // If either the match string or the source string is less than 3 characters // long, replace score with a low number if one string contains the other qui replace `gen'=min(`match_length',`source_length')/(`match_length'+`source_length'-1) /// if (`match_length'<3 | `source_length'<3) /// & ( index("`match'",`varlist') /// | index(`varlist',"`match'") ) * Winkler adjustment: adjusts score upward based on first 1,2,3, or 4 characters matching // This adjustment gives more importance to the first 4 characters than the others gen byte `wink'=0 forval i=1(1)4 { qui replace `wink'=`wink'+1 if substr(`varlist',1,`i')==substr("`match'",1,`i') } qui replace `gen'=`gen'+`wink'*(1-`gen')/10 * make sure exact matches =1 and missing =0 qui replace `gen'=1 if `varlist'=="`match'" qui replace `gen'=0 if `source_length'==0 end capture program drop split_state_combinations program define split_state_combinations syntax varlist(min=1 max=1 numeric) local variable `: word 1 of `0'' local combinations /// Delaware_and_Maryland_and_DC /// Delaware_and_Maryland_and_District_of_Columbia /// Vermont_and_Rhode_Island /// Montana_and_Utah /// Delaware_and_Maryland_and_District_of_Columbia_and_West_Virginia /// Alabama_and_Mississippi /// New_Mexico_and_Arizona /// South_Carolina_and_Georgia // Treat the state aggregate name for commas and the like replace state_agg = subinstr(state_agg, ", and ", " and ", .) replace state_agg = subinstr(state_agg, ", ", " and ", .) // Iterate over combinations of states foreach combination in `combinations' { // Count how many states in this combination local num_states 1 local to_continue 1 local combination_space = subinstr("`combination'", "_", " ", .) local combination "`combination_space'" while (`to_continue') { local to_continue = regexm("`combination'", " and ") if (`to_continue') { local temp = subinstr("`combination'", " and ", " ### ", 1) local state`num_states' = regexr("`temp'", " ### .*", "") local combination = regexr("`temp'", ".* ### ", "") local num_states = `num_states' + 1 } else { local state`num_states' `combination' } } // di "Combination: `state1' and `state2' and `state3'" // Get the variable for this combination summ `variable' if state_agg == "`combination_space'" local var_ref = r(mean) local N = r(N) // If this combination is in the data, // create a new observation for each split state if (`N' > 0) { forvalues j=1/`num_states' { local new = _N + 1 set obs `new' replace state = "`state`j''" if _n == _N replace `variable' = `var_ref' if _n == _N } } } end //////////////////////////////////////////// //////////////////////////////////////////// //////////////////////////////////////////// // // // Input / output programs // // //////////////////////////////////////////// //////////////////////////////////////////// //////////////////////////////////////////// // Stop Stata capture program drop stop program define stop // log_end //br display "Stopping Stata because of an error thrown somewhere..." return end // Load an Excel file into memory if the Stata file does not exist or // if it is older than the Excel file capture program drop load_excel program define load_excel, rclass syntax [, force] /* syntax varlist(min=3 string) local excel_file `: word 1 of `0'' local excel_sheet `: word 2 of `0'' local stata_name `: word 3 of `0'' */ // Default: reload Excel file local reload 1 local stata_file "$tf/$stata_name.dta" if ("`force'" == "") { // Check if Stata file exists capture confirm file "`stata_file'" if (_rc == 0) { // The Stata file exists. Is it older than the Excel file? // This PERL script returns the age in seconds of the file, // (i.e., the seconds since the file was last modified) global S_SHELL "/bin/bash" qui ashell perl $home_folder/Documents/Programming/Perl/last_modified.pl "$excel_file" local last_excel = real(r(o1)) // Age of Stata file qui ashell perl $home_folder/Documents/Programming/Perl/last_modified.pl "`stata_file'" local last_stata = real(r(o1)) // If Excel is older, do not reload if (`last_excel' > `last_stata') { local reload 0 } else { local options , replace } } } else local options , replace if (`reload') { di "Excel file is more recent, loading sheet $excel_sheet of $excel_file into $stata_name" if ("$cell_range" ~= "") { di " import excel $excel_file, sheet($excel_sheet) cellrange($cell_range) firstrow clear" import excel "$excel_file", sheet("$excel_sheet") cellrange("$cell_range") firstrow clear } else { di "import excel $excel_file, sheet($excel_sheet) firstrow clear" import excel "$excel_file", sheet("$excel_sheet") firstrow clear } save "`stata_file'" `options' } else { di "Opening Stata file $stata_name" use "`stata_file'", clear } return scalar reload=`reload' end capture program drop view_excel program define view_excel // Random file name local fname = regexr(reverse(string(uniform())),"\.",".tab") outsheet * using "$tf/`fname'" $excel "$tf/`fname'" end //////////////////////////////////////////// // // NBER peaks and troughs in quarters // except for 1980 (too short) // //////////////////////////////////////////// // NBER peaks and troughs global peaks 01oct1948 01jul1953 01jul1957 01apr1960 01oct1969 01oct1973 01jan1980 01jul1981 01jul1990 01jan2001 01oct2007 global troughs 01oct1949 01apr1954 01apr1958 01jan1961 01oct1970 01jan1975 01jul1980 01oct1982 01jan1991 01oct2001 01apr2009 // NBER peaks and troughs in quarters global peaks_quarters 01oct1948 01jul1953 01jul1957 01apr1960 01oct1969 01oct1973 01jul1981 01jul1990 01jan2001 01oct2007 01jan2013 global troughs_quarters 01oct1949 01apr1954 01apr1958 01jan1961 01oct1970 01jan1975 01oct1982 01jan1991 01oct2001 01apr2009 global number_peaks `: word count $peaks_quarters' //////////////////////////////////////////// // // Folders and image settings // //////////////////////////////////////////// global label_size medium global title_size large /* global top_title_size vlarge global caption_size medium global line_width thick */ global symbol_size 4 global graph_options scheme(dissertation) global routine_color red global nonroutine_color black global model_color mint global data_color black //////////////////////////////////////////// // // Compute recovery in a variable // 2 years after the NBER trough // (2 years because 3 years gives an overlap // between the 1958 recovery and the 1960 recession) // //////////////////////////////////////////// capture program drop recovery program define recovery syntax varlist(min=1 max=1 numeric) // For some reason, this syntax fails when the program is called: //syntax varlist(min=1 max=1 numeric) [, tabname(string)] local variable `: word 1 of `0'' // Get variable label, if any local the_label : var label `variable' if ("`the_label'" == "") { local the_label "`variable'" } // Get the recovery length (default = 2 years) if ("$recovery_length'" == "") { global recovery_length 2 } capture drop var_* capture drop years_* capture drop name* capture drop `variable'_recovery gen `variable'_recovery = . capture drop recession gen recession = . local number = `:word count $troughs_quarters' forvalues i = 1/`number' { local peak `: word `i' of $peaks_quarters' local next = `i' + 1 local next_peak `:word `next' of $peaks_quarters' local trough `: word `i' of $troughs_quarters' local ref_date `trough' local recession = year(td(`peak')) // Limit of the window: 1 year before trough to 2 years after // local start = mdy(month(td(`ref_date')),1,year(td(`ref_date'))-1) local end = min(mdy(month(td(`ref_date')),1,year(td(`ref_date'))+$recovery_length),td(`next_peak')) // Turn variable at the trough into a macro qui summ `variable' if daten == td(`ref_date') local var_ref = r(mean) // Report growth to a new variable gen var_`recession' = 100 * log(`variable'/`var_ref') if daten == `end' // gen var_`recession' = 100 * (`variable' - `var_ref')/`var_ref' if daten >= `start' & daten <= `end' // gen var_`recession' = `variable' - `var_ref' if daten >= `start' & daten <= `end' // Compute time from reference date gen years_`recession' = (year(daten) - year(td(`ref_date'))) + (month(daten) - month(td(`ref_date')))/12 if daten == `end' // Report the recovery in output and employment to the appropriate variable replace `variable'_recovery = var_`recession' if daten == `end' replace recession = `recession' if daten == `end' } /* // Average before and after 1990 gen num = 1 generate period = . replace period = 0 replace period = 1 capture drop `variable'_rec_tot capture drop count_total bysort period: egen `variable'_rec_tot = total(`variable'_recovery) bysort period: egen count_total = total(num) capture drop `variable'_recovery_avg gen `variable'_recovery_avg = `variable'_rec_tot / count_total list daten `variable'_recovery `variable'_recovery_avg if `variable'_recovery ~= . */ // Grab averages summ `variable'_recovery if recession < 1990 local avg_0 = r(mean) local count_0 = r(N) local sd_0 = r(sd) local min_0 = r(min) summ `variable'_recovery if recession >= 1990 & recession ~= . local avg_1 = r(mean) local count_1 = r(N) local sd_1 = r(sd) local min_1 = r(min) // Add zero line? if (`min_1' < 0 | `min_0' < 0) { local added_lines yline(0, color(black) lwidth(medium)) } // Plot standard errors too? if ("$sd_level" ~= "") { local lower_0 = `avg_0' - $sd_level * `sd_0' local upper_0 = `avg_0' + $sd_level * `sd_0' local lower_1 = `avg_1' - $sd_level * `sd_1' local upper_1 = `avg_1' + $sd_level * `sd_1' local style_0 , lpattern(dash) lwidth(medthick) local style_1 , lpattern(shortdash) lwidth(medthick) local added_lines `added_lines' yline(`lower_0' `style_0') yline(`upper_0' `style_0') yline(`lower_1' `style_1') yline(`upper_1' `style_1') } local avg_early : di %3.1f `avg_0' local avg_late : di %3.1f `avg_1' // Do a bar plot graph bar `variable'_recovery /// , /// over(recession) ytitle(Recovery in `the_label' (%)) /// graphregion(color(white)) /// yline(`avg_0') yline(`avg_1') `added_lines' /// note("Average of first `count_0' recoveries: `avg_early'%. Average of last `count_1' recoveries: `avg_late'%.") /// $graph_options // drop num period count_total `variable'_rec_tot di "`added_lines'" end //////////////////////////////////////////// // // Compute recovery in a variable // after the NBER trough, for a given // recovery of 5% in another variable // //////////////////////////////////////////// capture program drop recovery_Y program define recovery_Y syntax varlist(min=2 numeric) local variable `: word 1 of `0'' local gdp_var `: word 2 of `0'' // Get label, if any local var_label : var label `variable' if ("`var_label'" == "") { local var_label "`variable'" } local threshold 5 capture drop var_* capture drop years_* capture drop name* capture drop `variable'_rec gen `variable'_rec = . capture drop gdp_rec gen gdp_rec = . capture drop recession gen recession = . capture drop length gen length = . local number = `:word count $troughs_quarters' forvalues i = 1/`number' { local peak `: word `i' of $peaks_quarters' local next = `i' + 1 local next_peak `:word `next' of $peaks_quarters' local trough `: word `i' of $troughs_quarters' local ref_date `trough' local recession = year(td(`peak')) // Limit of the window: 1 year before trough to 3 years after local start = mdy(month(td(`ref_date')),1,year(td(`ref_date'))-0) local end = min(mdy(month(td(`ref_date')),1,year(td(`ref_date'))+4),td(`next_peak')) // Turn variable at the trough into a variable qui summ `variable' if daten == td(`ref_date') local var_ref = r(mean) // Same for output qui summ `gdp_var' if daten == td(`ref_date') local y_ref = r(mean) // Report growth to a new variable gen var_`recession' = 100 * log(`variable'/`var_ref') if daten >= `start' & daten <= `end' // gen var_`recession' = `variable' - `var_ref' if daten >= `start' & daten <= `end' gen y_`recession' = 100 * log(`gdp_var'/`y_ref') if daten >= `start' & daten <= `end' // Compute time from reference date gen years_`recession' = (year(daten) - year(td(`ref_date'))) + (month(daten) - month(td(`ref_date')))/12 if daten >= `start' & daten <= `end' // Find earliest time period when output recovery is above and below the threshold capture drop temp gen temp = 1 if y_`recession' ~= . & y_`recession' > `threshold' gen threshold_`recession' = 1 if temp == 1 & temp[_n-1] == . drop temp // Average the recovery before and after the threshold, with linear interpolation capture drop weight gen weight = (`threshold'-y_`recession'[_n-1])/(y_`recession'- y_`recession'[_n-1]) if threshold_`recession' == 1 gen var_`recession'_w = /// weight * var_`recession' + (1-weight)* var_`recession'[_n-1] gen gdp_`recession'_w = /// weight * y_`recession' + (1-weight)* y_`recession'[_n-1] gen length_`recession' = /// weight * years_`recession' + (1-weight)* years_`recession'[_n-1] // Report the recovery in output and employment to the appropriate variable replace `variable'_rec = var_`recession'_w if threshold_`recession' == 1 replace gdp_rec = gdp_`recession'_w if threshold_`recession' == 1 replace recession = `recession' if threshold_`recession' == 1 replace length = length_`recession' if threshold_`recession' == 1 } // Normalize by length to get a growth rate gen `variable'_rec_norm = `variable'_rec / length // Average before and after 1990 gen num = 1 generate period = . replace period = 0 if recession < 1990 replace period = 1 if recession >= 1990 & recession ~= . capture drop `variable'_rec_total capture drop count_total capture drop bysort period: egen `variable'_rec_total = total(`variable'_rec) bysort period: egen `variable'_rec_norm_avg = mean(`variable'_rec_norm) bysort period: egen length_total = total(length) bysort period: egen count_total = total(num) capture drop `variable'_recovery_avg gen `variable'_rec_avg = `variable'_rec_total / count_total capture drop length_rec_avg gen length_rec_avg = length_total / count_total sort daten list daten `variable'_rec `variable'_rec_avg `variable'_rec_norm `variable'_rec_norm_avg length length_rec_avg if `variable'_rec ~= . // Grab averages summ `variable'_rec_avg if period == 0 local avg_0 = r(mean) local count_0 = r(N) summ `variable'_rec_avg if period == 1 local avg_1 = r(mean) local count_1 = r(N) local avg_early : di %3.1f `avg_0' local avg_late : di %3.1f `avg_1' // Do a bar plot graph bar `variable'_rec /// , /// over(recession, label(labsize(medium))) /// ytitle(Recovery in `var_label' (%)) /// graphregion(color(white)) yline(0) yline(`avg_0') yline(`avg_1') /// /// ylabel(-1(1)2) /// note("Average of first `count_0' recoveries: `avg_early'%. Average of last `count_1' recoveries: `avg_late'%.") /// $graph_options drop num period count_total var_* y_* years_* threshold_* gdp_*_w length_* weight* end //////////////////////////////////////////// // // Plot graph with recessions shaded // //////////////////////////////////////////// capture program drop plot_with_recessions program define plot_with_recessions syntax varlist(numeric) [if] [, tabname(string)] // Find if this data relates to after 1945 with the variable daten // or before 1945, without that variable checkfor2 daten local unavailable = r(unavailable) if ("`unavailable'" == "daten") { local after_1945 0 local time_var year } else { local after_1945 1 local time_var daten } local number = `:word count `varlist'' local var1 `: word 1 of `varlist'' forvalues i=1/`number' { local variable `: word `i' of `varlist'' // Get variable label, if any local `variable'_label : var label `variable' if ("``variable'_label'" == "") { local `variable'_label "`variable'" } } local tab_option if "`tabname'" ~= "" { local tab_option name(`tabname', replace) } qui summ `var1' `if' local max_val = r(max) local min_val = r(min) local mlabels local msymbols local connects foreach variable in `varlist' { qui summ `variable' `if' local max_temp = r(max) local min_temp = r(min) if (`max_temp' > `max_val') { local max_val `max_temp' } if (`min_temp' < `min_val') { local min_val `min_temp' } qui summ `time_var' if `variable' ~= . local date_max = r(max) gen last_obs = 0 replace last_obs = 1 if `time_var' == `date_max' // Generate a label with this name capture drop `variable'_lbl gen `variable'_lbl = "``variable'_label'" if last_obs local mlabels `mlabels' `variable'_lbl local msymbols `msymbols' none local connects `connects' l drop last_obs } if (`number' == 1) { local mlabels local ytit ytitle(``var1'_label') } if (`number' == 2) { local ytit ytitle("$ytitle") local colors lcolor($colors) mlabcolor($colors) } if (`number' == 4) { local ytit ytitle("$ytitle") local colors lcolor(black green blue red) mlabcolor(black green blue red) } local min : di %6.2f `min_val' local max : di %6.2f `max_val' if ("$min" ~= "") { local min $min } if ("$max" ~= "") { local max $max } di "Min: `min', max: `max'" // Prepare code for shaded areas of recessions // Did the user specify a recession to start with? if ("$first_recession" == "") { global first_recession 1 } if (`after_1945') { local number `: word count $peaks' forvalues i=$first_recession/`number' { local peak `: word `i' of $peaks' local trough `: word `i' of $troughs' local shaded_area `shaded_area' /// (scatteri `max' `=td(`peak')' `max' `=td(`trough')', /// bcolor(gs10) recast(area) lwidth(none)) /// (scatteri `min' `=td(`peak')' `min' `=td(`trough')', /// bcolor(gs10) recast(area) lwidth(none)) if ("`peak'" ~= "01jan1980") local tlabels `tlabels' `=td(`peak')' } local tlabels tlabel(`tlabels', format(%tdYY) ) } else { local peaks 1918 1920 1923 1926 1929 1937 // 1910 1913 local troughs 1919 1921 1924 1927 1933 1938 // 1912 1914 local number `: word count `peaks'' forvalues i=1/`number' { local peak `: word `i' of `peaks'' local trough `: word `i' of `troughs'' local shaded_area `shaded_area' /// (scatteri `max' `=`peak'' `max' `=`trough'', /// bcolor(gs10) recast(area)) /// (scatteri `max' `=`peak'' `max' `=`trough'', /// bcolor(gs10) recast(area)) } local tlabels xlabel(1918 1920 1923 1926 1929 1933 1939, labsize($label_size)) } twoway `shaded_area' /// (scatter `varlist' `time_var' `if', c(`connects') msymbol(`msymbols') mlabel(`mlabels') `colors' $mlab), legend(off) /// /// (scatter `varlist' `time_var' `if', c(`connects') msymbol(`msymbols') `colors'), /// `tab_option' `tlabels' /// $graph_options /// graphregion(margin(medlarge)) /// xtitle(Years) `ytit' $ylab end /////////////////////////// // // Program to create maps // of the US // /////////////////////////// ** created 2012 10 03 by Boone (using prog_getMap.do) * write a program to easily make STATE-level map of US capture program drop getMap_state program define getMap_state syntax , stateVar(string) valueToMap(string) [otheroptions(string) clm(string) clnumber(integer 5) keepif(string) legend(string) title(string) useShpFile(integer 0)] * stateVar: name of variable containing state FIPS code * valueToMap: variable whose value will be displayed on map preserve if "`legend'"=="" { local legendOption "" } else { local legendOption "legend(`legend')" } if "`clm'"=="" { local clmOption "" } else { local clmOption "clm(`clm')" } if "`title'"=="" { } * option to create data set from shp file, only need to do this once if `useShpFile' == 1 { * first remove files if they already exist cap erase stcoord.dta cap erase stdb.dta * downloaded shp file from http://www.nws.noaa.gov/geodata/catalog/national/html/us_state.htm on 2012.10.03 * create stata file from shp file for US State; shp2dta using "shpfiles/s_28au12", database(stdb) coordinates(stcoord) genid(stid) } if "`keepif'" ~= "" { keep if `keepif' } keep `stateVar' `valueToMap' save "tmp_mapdata", replace use "stdb",clear drop if STATE=="PR" | STATE=="AK" | STATE=="AS" | STATE=="GU" | STATE=="HI" | STATE=="PR" | STATE=="VI" keep stid FIPS LON STATE rename FIPS `stateVar' cap destring `stateVar',replace merge m:1 `stateVar' using "tmp_mapdata" drop if _m==1 spmap `valueToMap' using stcoord, id(stid) title("`title'") `legendOption' `clmOption' clnumber(`clnumber') `otheroptions' cap erase tmp_mapdata.dta end //////////////////////////////////////////// //////////////////////////////////////////// //////////////////////////////////////////// // // // Programs for Micro // // //////////////////////////////////////////// //////////////////////////////////////////// //////////////////////////////////////////// /////////////////////// // Summarize mergings /////////////////////// capture program drop summarize_mergings program define summarize_mergings local number_rounds `: word 1 of `0'' capture drop still_missing gen still_missing = 1 qui count local obs = r(N) di "## Merge summary" di "## Total observations: `obs'" forvalues i=1(1)`number_rounds' { qui count if round`i' qui replace still_missing = 0 if round`i' local obs = r(N) di "## Matched on round `i': `obs'" } qui summ still_missing local obs = r(sum) if (`obs' == 0) { di "## All set, congratulations!" } else { di "## Still missing: `obs'" } end //////////////////////////////////////////////////////////// // Show warning //////////////////////////////////////////////////////////// capture program drop show_warning program define show_warning local temp `: word 1 of `0'' di _newline(1) di "#########################################################" di "## `temp'" di "#########################################################" di _newline(1) end //////////////////////////////////////////////////////////// // Call applescript to show latest results //////////////////////////////////////////////////////////// capture program drop show_results program define show_results window manage close graph shell osascript "/Users/miguel/Documents/Programming/AppleScript/Stata/Show results.scpt" end capture program drop show_outliers program define show_outliers //window manage close graph shell osascript "/Users/miguel/Documents/Programming/AppleScript/Archives/Show outliers.scpt" end //////////////////////////////////////////////////////////// // Tabulate missing, zero, and other information //////////////////////////////////////////////////////////// capture program drop tabmiss0 program define tabmiss0 local variable1 `: word 1 of `0'' local variable2 `: word 2 of `0'' gen info1 = 1 replace info1 = 0 if `variable1' == 0 replace info1 = 2 if `variable1' == . label variable info1 "`variable1'" gen info2 = 1 replace info2 = 0 if `variable2' == 0 replace info2 = 2 if `variable2' == . label variable info2 "`variable2'" capture label drop tabs label define tabs 0 "zero" 1 "OK" 2 "missing" label values info1 tabs label values info2 tabs tab info1 info2 drop info* end //////////////////////////////////////////// //////////////////////////////////////////// //////////////////////////////////////////// // // // Programs for Macro // // //////////////////////////////////////////// //////////////////////////////////////////// //////////////////////////////////////////// // Generate date variable from a real variable capture program drop clean_dates program define clean_dates capture drop daten local variable `: word 1 of `0'' replace `variable' = . if `variable' == 0 gen daten = mdy(1 + 12 * (`variable' - int(`variable')), 1, int(`variable')) format daten %td end /////////////////////////////////////////////////// // Tag business cycles /////////////////////////////////////////////////// capture program drop tag_cycles program define tag_cycles local number = `:word count $troughs' gen cycle_tag = . gen recession = . forvalues i = 1/`number' { local peak `: word `i' of $peaks' local trough `: word `i' of $troughs' local recession = year(td(`peak')) if (`i' < `number') { local next = `i' + 1 local next_peak `:word `next' of $peaks' } else local next_peak 01oct2014 local end_recovery = min(td(`next_peak'), /// mdy(month(td(`trough')),1,3+year(td(`trough'))) /// ) replace cycle_tag = -1 if daten > td(`peak') & daten <= td(`trough') replace cycle_tag = 0 if daten > td(`trough') & daten <= `end_recovery' replace cycle_tag = 1 if daten > `end_recovery' & daten <= td(`next_peak') replace recession = `recession' if daten > td(`peak') & daten <= td(`next_peak') } label define cycle_tags -1 "recession" 0 "recovery" 1 "expansion" label values cycle_tag cycle_tags end //////////////////////////////////////////// // // Compute drop in a variable // between the NBER peak and trough // //////////////////////////////////////////// capture program drop recession_bars program define recession_bars local variable `: word 1 of `0'' // Get variable label, if any local the_label : var label `variable' if ("`the_label'" == "") { local the_label "`variable'" } capture drop var_* capture drop years_* capture drop name* capture drop `variable'_recession gen `variable'_recession = . capture drop years_recession gen years_recession = . capture drop recession gen recession = . local number = `:word count $troughs_quarters' forvalues i = 1/`number' { local peak `: word `i' of $peaks_quarters' local next = `i' + 1 local next_peak `:word `next' of $peaks_quarters' local trough `: word `i' of $troughs_quarters' // Turn variable at the trough into a variable qui summ `variable' if daten == td(`peak') local var_peak = r(mean) // Compute length of recession replace years_recession = (year(daten) - year(td(`peak'))) /// + (month(daten) - month(td(`peak')))/12 /// if daten == td(`trough') // Compute annualized growth of the variable replace `variable'_recession = 100 * log(`variable'/`var_peak') /// / years_recession if daten == td(`trough') // Keep track of recession year replace recession = year(td(`peak')) if daten == td(`trough') } // Average before and after 1990 gen num = 1 generate period = . replace period = 0 if recession < 1990 replace period = 1 if recession >= 1990 & recession ~= . capture drop `variable'_drop_tot capture drop count_total bysort period: egen `variable'_drop_tot = total(`variable'_recession) bysort period: egen count_total = total(num) capture drop `variable'_drop_avg gen `variable'_drop_avg = `variable'_drop_tot / count_total list daten `variable'_recession `variable'_drop_avg if `variable'_recession ~= . // Grab averages summ `variable'_drop_avg if period == 0 local avg_0 = r(mean) local sd_0 = r(sd) summ `variable'_drop_avg if period == 1 local avg_1 = r(mean) local sd_1 = r(sd) local avg_early : di %3.1f `avg_0' local avg_late : di %3.1f `avg_1' local lower_0 = `avg_0' - 1.96 * `sd_0' local upper_0 = `avg_0' + 1.96 * `sd_0' local lower_1 = `avg_1' - 1.96 * `sd_0' local upper_1 = `avg_1' + 1.96 * `sd_0' di "upper_0 = `upper_0'" // Do a bar plot graph bar `variable'_recession /// , /// over(recession) ytitle(Annualized change in `the_label' (%)) /// graphregion(color(white)) yline(0) /// yline(`avg_0') yline(`lower_0') yline(`upper_0') /// yline(`avg_1') yline(`lower_1') yline(`upper_1') /// note("Average of first seven recessions: `avg_early'%. Average of last three recessions: `avg_late'%.") /// // ylabel(-1(1)2) drop num period count_total `variable'_drop_tot sort daten end //////////////////////////////////////////// // // Compute recovery in a variable // after the NBER trough, by the // time another variable recovers // back to trend // //////////////////////////////////////////// capture program drop recovery_Ycyc program define recovery_Ycyc syntax varlist(min=2 numeric) local variable `: word 1 of `0'' local gdp_var `: word 2 of `0'' capture drop var_* years_* name* capture drop `variable'_recovery gen `variable'_recovery = . capture drop gdp_recovery gen gdp_recovery = . capture drop recession gen recession = . capture drop length gen length = . // HP filter the series capture drop y_log capture drop hp_y_log_sm_1 capture drop hp_y_log_1 capture drop y_cyc gen y_log = log(`gdp_var') hprescott y_log, stub(hp) smooth(1600) rename hp_y_log_1 y_cyc local threshold 0 local number = `:word count $troughs' forvalues i = 1/`number' { local peak `: word `i' of $peaks' local next = `i' + 1 local next_peak `:word `next' of $peaks' local trough `: word `i' of $troughs' local ref_date `trough' local recession = year(td(`peak')) // Limit of the window local start = mdy(month(td(`ref_date')),1,year(td(`ref_date'))-0) local end = min(mdy(month(td(`ref_date')),1,year(td(`ref_date'))+5),td(`next_peak')) // Turn variable at the trough into a variable qui summ `variable' if daten == td(`ref_date') local var_ref = r(mean) // Same for output qui summ `gdp_var' if daten == td(`ref_date') local y_ref = r(mean) // Report growth to a new variable gen var_`recession' = 100 * log(`variable'/`var_ref') if daten >= `start' & daten <= `end' // gen var_`recession' = `variable' - `var_ref' if daten >= `start' & daten <= `end' // gen y_`recession' = 100 * log(`gdp_var'/`y_ref') if daten >= `start' & daten <= `end' gen y_`recession' = y_cyc if daten >= `start' & daten <= `end' // Compute time from reference date gen years_`recession' = (year(daten) - year(td(`ref_date'))) + (month(daten) - month(td(`ref_date')))/12 if daten >= `start' & daten <= `end' // Find earliest time period when output recovery is above and below the threshold capture drop temp gen temp = 1 if y_`recession' ~= . & y_`recession' > `threshold' gen threshold_`recession' = 1 if temp == 1 & temp[_n-1] == . drop temp // Average the recovery before and after the threshold, with linear interpolation capture drop weight gen weight = (`threshold'-y_`recession'[_n-1])/(y_`recession'- y_`recession'[_n-1]) if threshold_`recession' == 1 gen var_`recession'_w = /// weight * var_`recession' + (1-weight)* var_`recession'[_n-1] gen gdp_`recession'_w = /// weight * y_`recession' + (1-weight)* y_`recession'[_n-1] gen length_`recession' = /// weight * years_`recession' + (1-weight)* years_`recession'[_n-1] // Report the recovery in output and employment to the appropriate variable replace `variable'_recovery = var_`recession'_w if threshold_`recession' == 1 replace gdp_recovery = gdp_`recession'_w if threshold_`recession' == 1 replace recession = `recession' if threshold_`recession' == 1 replace length = length_`recession' if threshold_`recession' == 1 } // Average before and after 1990 gen num = 1 generate period = . replace period = 0 if recession < 1990 replace period = 1 if recession >= 1990 & recession ~= . bysort period: egen `variable'_rec_tot = total(`variable'_recovery) bysort period: egen count_total = total(num) gen `variable'_rec_avg = `variable'_rec_tot / count_total sort daten list daten `variable'_recovery `variable'_recovery_avg if `variable'_recovery ~= . // Grab averages summ `variable'_rec_avg if period == 0 local avg_0 = r(mean) summ `variable'_rec_avg if period == 1 local avg_1 = r(mean) local avg_early : di %3.2f `avg_0' local avg_late : di %3.2f `avg_1' // Do a bar plot graph bar `variable'_recovery /// , /// over(recession) ytitle(Recovery in `variable' from trough (%)) /// graphregion(color(white)) yline(0) yline(`avg_0') yline(`avg_1') /// /// ylabel(-1(1)2) /// note("Average of first seven recoveries: `avg_early'%. Average of last three recoveries: `avg_late'%.") /// drop num period count_total var_* y_* years_* threshold_* gdp_*_w length_* weight* end //////////////////////////////////////////// // // Compute recession quantities with NBER quarters // //////////////////////////////////////////// /* capture program drop recession program define recession local size 4 local variable `: word 1 of `0'' // NBER peaks and troughs local peaks 01oct1948 01jul1953 01jul1957 01apr1960 01oct1969 01oct1973 01jan1980 01jul1981 01jul1990 01jan2001 01oct2007 local troughs 01oct1949 01apr1954 01apr1958 01jan1961 01oct1970 01jan1975 01jul1980 01oct1982 01jan1991 01oct2001 01apr2009 capture drop `variable'_* capture drop years_* capture drop name* local number = `:word count $troughs' forvalues i = 1/`number' { local peak `: word `i' of $peaks' local trough `: word `i' of $troughs' local next = `i' + 1 local next_peak `:word `next' of $peaks' local ref_date `peak' local recession = year(td(`peak')) // Turn variable at the peak into a variable qui summ `variable' if daten == td(`peak') local var_ref = r(mean) // Limit of the window: 1 year before peak to 3 years after trough local start = mdy(month(td(`peak')),1,year(td(`peak'))-1) local end = min(mdy(month(td(`trough')),1,year(td(`trough'))+3),td(`next_peak')) // Report growth to a new variable gen `variable'_`recession' = 100 * log(`variable'/`var_ref') if daten >= `start' & daten <= `end' // Compute time from reference date gen years_`recession' = (year(daten) - year(td(`ref_date'))) + (month(daten) - month(td(`ref_date')))/12 if daten >= `start' & daten <= `end' gen name_`recession' = "" replace name_`recession' = "`recession'" if daten == `end' } twoway /// (scatter `variable'_1948 years_1948, connect(direct) msymbol(none) lcolor(`color1948') mlabel(name_1948) mlabcolor(`color1948') mlabsize(`size') mlabpos(2)) /// (scatter `variable'_1953 years_1953, connect(direct) msymbol(none) lcolor(`color1953') mlabel(name_1953) mlabcolor(`color1953') mlabsize(`size') mlabpos(2)) /// (scatter `variable'_1957 years_1957, connect(direct) msymbol(none) lcolor(`color1957') mlabel(name_1957) mlabcolor(`color1957') mlabsize(`size') mlabpos(4)) /// (scatter `variable'_1960 years_1960, connect(direct) msymbol(none) lcolor(`color1960') mlabel(name_1960) mlabcolor(`color1960') mlabsize(`size') mlabpos(4)) /// (scatter `variable'_1969 years_1969, connect(direct) msymbol(none) lcolor(`color1969') mlabel(name_1969) mlabcolor(`color1969') mlabsize(`size') mlabpos(2)) /// (scatter `variable'_1973 years_1973, connect(direct) msymbol(none) lcolor(`color1973') mlabel(name_1973) mlabcolor(`color1973') mlabsize(`size') mlabpos(3)) /// (scatter `variable'_1980 years_1980, connect(direct) msymbol(none) lcolor(`color1980') mlabel(name_1980) mlabcolor(`color1980') mlabsize(`size') mlabpos(3)) /// (scatter `variable'_1981 years_1981, connect(direct) msymbol(none) lcolor(`color1981') mlabel(name_1981) mlabcolor(`color1981') mlabsize(`size') mlabpos(4)) /// (scatter `variable'_1990 years_1990, connect(direct) msymbol(none) lcolor(`color1990') mlabel(name_1990) mlabcolor(`color1990') mlabsize(`size') mlabpos(3)) /// (scatter `variable'_2001 years_2001, connect(direct) msymbol(none) lcolor(`color2001') mlabel(name_2001) mlabcolor(`color2001') mlabsize(`size') mlabpos(3)) /// (scatter `variable'_2007 years_2007, connect(direct) msymbol(none) lcolor(`color2007') mlabel(name_2007) mlabcolor(`color2007') mlabsize(`size') mlabpos(3)) /// , /// /// title("Recoveries in nonfarm business employment") ytitle(Deviation of `variable' employment from NBER peak (%)) xtitle(Years from NBER peak) /// legend(off) yline(0) xline(0) /// graphregion(color(white)) end */ //////////////////////////////////////////// // // Compute peaks and troughs // //////////////////////////////////////////// capture program drop peaks_troughs program define peaks_troughs local gdp_var `:word 1 of `0'' // Define recessions as two quarters of contraction, after 1948 gen recession = 1 if _n > 3 & `gdp_var' > `gdp_var'[_n+1] // Fill up pattern 1001 replace recession = 1 if recession[_n-2] == 1 & recession[_n+1] == 1 // Fill up pattern 101 replace recession = 1 if recession[_n-1] == 1 & recession[_n+1] == 1 // Remove lonely quarters of recession replace recession = . if recession == 1 & recession[_n-1] == . & recession[_n+1] == . // Fill up the last quarter of the recession gen temp = recession replace temp = 1 if recession == . & recession[_n-1] == 1 drop recession rename temp recession // Compute peaks and troughs gen peak = 1 if recession == 1 & recession[_n-1] == . gen trough = 1 if recession == 1 & recession[_n+1] == . end // Add the start of the log, since I often forget it! /* if missing("$log_entries") { noisily { display "##############################" display "Please specify that you don't want to log your entries" display "##############################" return } } else { if ($log_entries) { log_start } } // */ /////////////////////////////////////////////////// // occ1950 labels /////////////////////////////////////////////////// capture label drop occ1950_lbl label define occ1950_lbl 0 `"Accountants and auditors"' label define occ1950_lbl 1 `"Actors and actresses"', add label define occ1950_lbl 2 `"Airplane pilots and navigators"', add label define occ1950_lbl 3 `"Architects"', add label define occ1950_lbl 4 `"Artists and art teachers"', add label define occ1950_lbl 5 `"Athletes"', add label define occ1950_lbl 6 `"Authors"', add label define occ1950_lbl 7 `"Chemists"', add label define occ1950_lbl 8 `"Chiropractors"', add label define occ1950_lbl 9 `"Clergymen"', add label define occ1950_lbl 10 `"College presidents and deans"', add label define occ1950_lbl 12 `"Agricultural sciences"', add label define occ1950_lbl 13 `"Biological sciences"', add label define occ1950_lbl 14 `"Chemistry"', add label define occ1950_lbl 15 `"Economics"', add label define occ1950_lbl 16 `"Engineering"', add label define occ1950_lbl 17 `"Geology and geophysics"', add label define occ1950_lbl 18 `"Mathematics"', add label define occ1950_lbl 19 `"Medical sciences"', add label define occ1950_lbl 23 `"Physics"', add label define occ1950_lbl 24 `"Psychology"', add label define occ1950_lbl 25 `"Statistics"', add label define occ1950_lbl 26 `"Natural science (n.e.c.)"', add label define occ1950_lbl 27 `"Social sciences (n.e.c.)"', add label define occ1950_lbl 28 `"Nonscientific subjects"', add label define occ1950_lbl 29 `"Subject not specified"', add label define occ1950_lbl 31 `"Dancers and dancing teachers"', add label define occ1950_lbl 32 `"Dentists"', add label define occ1950_lbl 33 `"Designers"', add label define occ1950_lbl 34 `"Dietitians and nutritionists"', add label define occ1950_lbl 35 `"Draftsmen"', add label define occ1950_lbl 36 `"Editors and reporters"', add label define occ1950_lbl 41 `"Engineers, aeronautical"', add label define occ1950_lbl 42 `"Engineers, chemical"', add label define occ1950_lbl 43 `"Engineers, civil"', add label define occ1950_lbl 44 `"Engineers, electrical"', add label define occ1950_lbl 45 `"Engineers, industrial"', add label define occ1950_lbl 46 `"Engineers, mechanical"', add label define occ1950_lbl 47 `"Engineers, metallurgical, metallurgists"', add label define occ1950_lbl 48 `"Engineers, mining"', add label define occ1950_lbl 49 `"Engineers (n.e.c.)"', add label define occ1950_lbl 51 `"Entertainers (n.e.c.)"', add label define occ1950_lbl 52 `"Farm and home management advisors"', add label define occ1950_lbl 53 `"Foresters and conservationists"', add label define occ1950_lbl 54 `"Funeral directors and embalmers"', add label define occ1950_lbl 55 `"Lawyers and judges"', add label define occ1950_lbl 56 `"Librarians"', add label define occ1950_lbl 57 `"Musicians and music teachers"', add label define occ1950_lbl 58 `"Nurses, professional"', add label define occ1950_lbl 59 `"Nurses, student professional"', add label define occ1950_lbl 61 `"Agricultural scientists"', add label define occ1950_lbl 62 `"Biological scientists"', add label define occ1950_lbl 63 `"Geologists and geophysicists"', add label define occ1950_lbl 67 `"Mathematicians"', add label define occ1950_lbl 68 `"Physicists"', add label define occ1950_lbl 69 `"Miscellaneous natural scientists"', add label define occ1950_lbl 70 `"Optometrists"', add label define occ1950_lbl 71 `"Osteopaths"', add label define occ1950_lbl 72 `"Personnel and labor relations workers"', add label define occ1950_lbl 73 `"Pharmacists"', add label define occ1950_lbl 74 `"Photographers"', add label define occ1950_lbl 75 `"Physicians and surgeons"', add label define occ1950_lbl 76 `"Radio operators"', add label define occ1950_lbl 77 `"Recreation and group workers"', add label define occ1950_lbl 78 `"Religious workers"', add label define occ1950_lbl 79 `"Social and welfare workers, except group"', add label define occ1950_lbl 81 `"Economists"', add label define occ1950_lbl 82 `"Psychologists"', add label define occ1950_lbl 83 `"Statisticians and actuaries"', add label define occ1950_lbl 84 `"Miscellaneous social scientists"', add label define occ1950_lbl 91 `"Sports instructors and officials"', add label define occ1950_lbl 92 `"Surveyors"', add label define occ1950_lbl 93 `"Teachers (n.e.c.)"', add label define occ1950_lbl 94 `"Technicians, medical and dental"', add label define occ1950_lbl 95 `"Technicians, testing"', add label define occ1950_lbl 96 `"Technicians (n.e.c.)"', add label define occ1950_lbl 97 `"Therapists and healers (n.e.c.)"', add label define occ1950_lbl 98 `"Veterinarians"', add label define occ1950_lbl 99 `"Professional, technical and kindred workers (n.e.c.)"', add label define occ1950_lbl 100 `"Farmers (owners and tenants)"', add label define occ1950_lbl 123 `"Farm managers"', add label define occ1950_lbl 200 `"Buyers and department heads, store"', add label define occ1950_lbl 201 `"Buyers and shippers, farm products"', add label define occ1950_lbl 203 `"Conductors, railroad"', add label define occ1950_lbl 204 `"Credit men"', add label define occ1950_lbl 205 `"Floormen and floor managers, store"', add label define occ1950_lbl 210 `"Inspectors, public administration"', add label define occ1950_lbl 230 `"Managers and superintendents, building"', add label define occ1950_lbl 240 `"Officers, pilots, pursers and engineers, ship"', add label define occ1950_lbl 250 `"Officials and administrators (n.e.c.), public administration"', add label define occ1950_lbl 260 `"Officials, lodge, society, union, etc."', add label define occ1950_lbl 270 `"Postmasters"', add label define occ1950_lbl 280 `"Purchasing agents and buyers (n.e.c.)"', add label define occ1950_lbl 290 `"Managers, officials, and proprietors (n.e.c.)"', add label define occ1950_lbl 300 `"Agents (n.e.c.)"', add label define occ1950_lbl 301 `"Attendants and assistants, library"', add label define occ1950_lbl 302 `"Attendants, physician's and dentist's office"', add label define occ1950_lbl 304 `"Baggagemen, transportation"', add label define occ1950_lbl 305 `"Bank tellers"', add label define occ1950_lbl 310 `"Bookkeepers"', add label define occ1950_lbl 320 `"Cashiers"', add label define occ1950_lbl 321 `"Collectors, bill and account"', add label define occ1950_lbl 322 `"Dispatchers and starters, vehicle"', add label define occ1950_lbl 325 `"Express messengers and railway mail clerks"', add label define occ1950_lbl 335 `"Mail carriers"', add label define occ1950_lbl 340 `"Messengers and office boys"', add label define occ1950_lbl 341 `"Office machine operators"', add label define occ1950_lbl 342 `"Shipping and receiving clerks"', add label define occ1950_lbl 350 `"Stenographers, typists, and secretaries"', add label define occ1950_lbl 360 `"Telegraph messengers"', add label define occ1950_lbl 365 `"Telegraph operators"', add label define occ1950_lbl 370 `"Telephone operators"', add label define occ1950_lbl 380 `"Ticket, station, and express agents"', add label define occ1950_lbl 390 `"Clerical and kindred workers (n.e.c.)"', add label define occ1950_lbl 400 `"Advertising agents and salesmen"', add label define occ1950_lbl 410 `"Auctioneers"', add label define occ1950_lbl 420 `"Demonstrators"', add label define occ1950_lbl 430 `"Hucksters and peddlers"', add label define occ1950_lbl 450 `"Insurance agents and brokers"', add label define occ1950_lbl 460 `"Newsboys"', add label define occ1950_lbl 470 `"Real estate agents and brokers"', add label define occ1950_lbl 480 `"Stock and bond salesmen"', add label define occ1950_lbl 490 `"Salesmen and sales clerks (n.e.c.)"', add label define occ1950_lbl 500 `"Bakers"', add label define occ1950_lbl 501 `"Blacksmiths"', add label define occ1950_lbl 502 `"Bookbinders"', add label define occ1950_lbl 503 `"Boilermakers"', add label define occ1950_lbl 504 `"Brickmasons, stonemasons, and tile setters"', add label define occ1950_lbl 505 `"Cabinetmakers"', add label define occ1950_lbl 510 `"Carpenters"', add label define occ1950_lbl 511 `"Cement and concrete finishers"', add label define occ1950_lbl 512 `"Compositors and typesetters"', add label define occ1950_lbl 513 `"Cranemen, derrickmen, and hoistmen"', add label define occ1950_lbl 514 `"Decorators and window dressers"', add label define occ1950_lbl 515 `"Electricians"', add label define occ1950_lbl 520 `"Electrotypers and stereotypers"', add label define occ1950_lbl 521 `"Engravers, except photoengravers"', add label define occ1950_lbl 522 `"Excavating, grading, and road machinery operators"', add label define occ1950_lbl 523 `"Foremen (n.e.c.)"', add label define occ1950_lbl 524 `"Forgemen and hammermen"', add label define occ1950_lbl 525 `"Furriers"', add label define occ1950_lbl 530 `"Glaziers"', add label define occ1950_lbl 531 `"Heat treaters, annealers, temperers"', add label define occ1950_lbl 532 `"Inspectors, scalers, and graders, log and lumber"', add label define occ1950_lbl 533 `"Inspectors (n.e.c.)"', add label define occ1950_lbl 534 `"Jewelers, watchmakers, goldsmiths, and silversmiths"', add label define occ1950_lbl 535 `"Job setters, metal"', add label define occ1950_lbl 540 `"Linemen and servicemen, telegraph, telephone, and power"', add label define occ1950_lbl 541 `"Locomotive engineers"', add label define occ1950_lbl 542 `"Locomotive firemen"', add label define occ1950_lbl 543 `"Loom fixers"', add label define occ1950_lbl 544 `"Machinists"', add label define occ1950_lbl 545 `"Mechanics and repairmen, airplane"', add label define occ1950_lbl 550 `"Mechanics and repairmen, automobile"', add label define occ1950_lbl 551 `"Mechanics and repairmen, office machine"', add label define occ1950_lbl 552 `"Mechanics and repairmen, radio and television"', add label define occ1950_lbl 553 `"Mechanics and repairmen, railroad and car shop"', add label define occ1950_lbl 554 `"Mechanics and repairmen (n.e.c.)"', add label define occ1950_lbl 555 `"Millers, grain, flour, feed, etc."', add label define occ1950_lbl 560 `"Millwrights"', add label define occ1950_lbl 561 `"Molders, metal"', add label define occ1950_lbl 562 `"Motion picture projectionists"', add label define occ1950_lbl 563 `"Opticians and lens grinders and polishers"', add label define occ1950_lbl 564 `"Painters, construction and maintenance"', add label define occ1950_lbl 565 `"Paperhangers"', add label define occ1950_lbl 570 `"Pattern and model makers, except paper"', add label define occ1950_lbl 571 `"Photoengravers and lithographers"', add label define occ1950_lbl 572 `"Piano and organ tuners and repairmen"', add label define occ1950_lbl 573 `"Plasterers"', add label define occ1950_lbl 574 `"Plumbers and pipe fitters"', add label define occ1950_lbl 575 `"Pressmen and plate printers, printing"', add label define occ1950_lbl 580 `"Rollers and roll hands, metal"', add label define occ1950_lbl 581 `"Roofers and slaters"', add label define occ1950_lbl 582 `"Shoemakers and repairers, except factory"', add label define occ1950_lbl 583 `"Stationary engineers"', add label define occ1950_lbl 584 `"Stone cutters and stone carvers"', add label define occ1950_lbl 585 `"Structural metal workers"', add label define occ1950_lbl 590 `"Tailors and tailoresses"', add label define occ1950_lbl 591 `"Tinsmiths, coppersmiths, and sheet metal workers"', add label define occ1950_lbl 592 `"Tool makers, and die makers and setters"', add label define occ1950_lbl 593 `"Upholsterers"', add label define occ1950_lbl 594 `"Craftsmen and kindred workers (n.e.c.)"', add label define occ1950_lbl 595 `"Members of the armed services"', add label define occ1950_lbl 600 `"Apprentice auto mechanics"', add label define occ1950_lbl 601 `"Apprentice bricklayers and masons"', add label define occ1950_lbl 602 `"Apprentice carpenters"', add label define occ1950_lbl 603 `"Apprentice electricians"', add label define occ1950_lbl 604 `"Apprentice machinists and toolmakers"', add label define occ1950_lbl 605 `"Apprentice mechanics, except auto"', add label define occ1950_lbl 610 `"Apprentice plumbers and pipe fitters"', add label define occ1950_lbl 611 `"Apprentices, building trades (n.e.c.)"', add label define occ1950_lbl 612 `"Apprentices, metalworking trades (n.e.c.)"', add label define occ1950_lbl 613 `"Apprentices, printing trades"', add label define occ1950_lbl 614 `"Apprentices, other specified trades"', add label define occ1950_lbl 615 `"Apprentices, trade not specified"', add label define occ1950_lbl 620 `"Asbestos and insulation workers"', add label define occ1950_lbl 621 `"Attendants, auto service and parking"', add label define occ1950_lbl 622 `"Blasters and powdermen"', add label define occ1950_lbl 623 `"Boatmen, canalmen, and lock keepers"', add label define occ1950_lbl 624 `"Brakemen, railroad"', add label define occ1950_lbl 625 `"Bus drivers"', add label define occ1950_lbl 630 `"Chainmen, rodmen, and axmen, surveying"', add label define occ1950_lbl 631 `"Conductors, bus and street railway"', add label define occ1950_lbl 632 `"Deliverymen and routemen"', add label define occ1950_lbl 633 `"Dressmakers and seamstresses, except factory"', add label define occ1950_lbl 634 `"Dyers"', add label define occ1950_lbl 635 `"Filers, grinders, and polishers, metal"', add label define occ1950_lbl 640 `"Fruit, nut, and vegetable graders, and packers, except factory"', add label define occ1950_lbl 641 `"Furnacemen, smeltermen and pourers"', add label define occ1950_lbl 642 `"Heaters, metal"', add label define occ1950_lbl 643 `"Laundry and dry cleaning operatives"', add label define occ1950_lbl 644 `"Meat cutters, except slaughter and packing house"', add label define occ1950_lbl 645 `"Milliners"', add label define occ1950_lbl 650 `"Mine operatives and laborers"', add label define occ1950_lbl 660 `"Motormen, mine, factory, logging camp, etc."', add label define occ1950_lbl 661 `"Motormen, street, subway, and elevated railway"', add label define occ1950_lbl 662 `"Oilers and greaser, except auto"', add label define occ1950_lbl 670 `"Painters, except construction or maintenance"', add label define occ1950_lbl 671 `"Photographic process workers"', add label define occ1950_lbl 672 `"Power station operators"', add label define occ1950_lbl 673 `"Sailors and deck hands"', add label define occ1950_lbl 674 `"Sawyers"', add label define occ1950_lbl 675 `"Spinners, textile"', add label define occ1950_lbl 680 `"Stationary firemen"', add label define occ1950_lbl 681 `"Switchmen, railroad"', add label define occ1950_lbl 682 `"Taxicab drivers and chauffers"', add label define occ1950_lbl 683 `"Truck and tractor drivers"', add label define occ1950_lbl 684 `"Weavers, textile"', add label define occ1950_lbl 685 `"Welders and flame cutters"', add label define occ1950_lbl 690 `"Operative and kindred workers (n.e.c.)"', add label define occ1950_lbl 700 `"Housekeepers, private household"', add label define occ1950_lbl 710 `"Laundressses, private household"', add label define occ1950_lbl 720 `"Private household workers (n.e.c.)"', add label define occ1950_lbl 725 `"[holding category for 1860/70 "domestic"]"', add label define occ1950_lbl 730 `"Attendants, hospital and other institution"', add label define occ1950_lbl 731 `"Attendants, professional and personal service (n.e.c.)"', add label define occ1950_lbl 732 `"Attendants, recreation and amusement"', add label define occ1950_lbl 740 `"Barbers, beauticians, and manicurists"', add label define occ1950_lbl 750 `"Bartenders"', add label define occ1950_lbl 751 `"Bootblacks"', add label define occ1950_lbl 752 `"Boarding and lodging house keepers"', add label define occ1950_lbl 753 `"Charwomen and cleaners"', add label define occ1950_lbl 754 `"Cooks, except private household"', add label define occ1950_lbl 760 `"Counter and fountain workers"', add label define occ1950_lbl 761 `"Elevator operators"', add label define occ1950_lbl 762 `"Firemen, fire protection"', add label define occ1950_lbl 763 `"Guards, watchmen, and doorkeepers"', add label define occ1950_lbl 764 `"Housekeepers and stewards, except private household"', add label define occ1950_lbl 770 `"Janitors and sextons"', add label define occ1950_lbl 771 `"Marshals and constables"', add label define occ1950_lbl 772 `"Midwives"', add label define occ1950_lbl 773 `"Policemen and detectives"', add label define occ1950_lbl 780 `"Porters"', add label define occ1950_lbl 781 `"Practical nurses"', add label define occ1950_lbl 782 `"Sheriffs and bailiffs"', add label define occ1950_lbl 783 `"Ushers, recreation and amusement"', add label define occ1950_lbl 784 `"Waiters and waitresses"', add label define occ1950_lbl 785 `"Watchmen (crossing) and bridge tenders"', add label define occ1950_lbl 790 `"Service workers, except private household (n.e.c.)"', add label define occ1950_lbl 810 `"Farm foremen"', add label define occ1950_lbl 820 `"Farm laborers, wage workers"', add label define occ1950_lbl 830 `"Farm laborers, unpaid family workers"', add label define occ1950_lbl 840 `"Farm service laborers, self-employed"', add label define occ1950_lbl 910 `"Fishermen and oystermen"', add label define occ1950_lbl 920 `"Garage laborers and car washers and greasers"', add label define occ1950_lbl 930 `"Gardeners, except farm, and groundskeepers"', add label define occ1950_lbl 940 `"Longshoremen and stevedores"', add label define occ1950_lbl 950 `"Lumbermen, raftsmen, and woodchoppers"', add label define occ1950_lbl 960 `"Teamsters"', add label define occ1950_lbl 970 `"Laborers (n.e.c.)"', add label define occ1950_lbl 975 `"Works, occupation undetermined"', add label define occ1950_lbl 980 `"Keeps house/housekeeping at home/housewife"', add label define occ1950_lbl 981 `"Imputed keeping house (1850-1900)"', add label define occ1950_lbl 982 `"Helping at home/helps parents/housework"', add label define occ1950_lbl 983 `"At school/student"', add label define occ1950_lbl 984 `"Retired"', add label define occ1950_lbl 985 `"Unemployed/without occupation"', add label define occ1950_lbl 986 `"Invalid/disabled w/ no occupation reported"', add label define occ1950_lbl 987 `"Inmate"', add label define occ1950_lbl 990 `"New Worker"', add label define occ1950_lbl 991 `"Gentleman/lady/at leisure"', add label define occ1950_lbl 995 `"Other non-occupational response"', add label define occ1950_lbl 996 `"Illegible"', add label define occ1950_lbl 997 `"Occupation missing/unknown"', add label define occ1950_lbl 998 `"Illegible"', add label define occ1950_lbl 999 `"N/A (blank)"', add // label values occ1950 occ1950_lbl capture label drop classwkr_lbl label define classwkr_lbl 0 `"N/A"' label define classwkr_lbl 10 `"Self-employed"', add label define classwkr_lbl 11 `"Employer"', add label define classwkr_lbl 12 `"Working on own account"', add label define classwkr_lbl 13 `"Self-employed, not incorporated"', add label define classwkr_lbl 14 `"Self-employed, incorporated"', add label define classwkr_lbl 20 `"Works for wages or salary"', add label define classwkr_lbl 21 `"Works on salary (1920)"', add label define classwkr_lbl 22 `"Wage/salary, private"', add label define classwkr_lbl 23 `"Wage/salary at non-profit"', add label define classwkr_lbl 24 `"Wage/salary, government"', add label define classwkr_lbl 25 `"Federal government employee"', add label define classwkr_lbl 26 `"Armed forces"', add label define classwkr_lbl 27 `"State government employee (in Puerto Rico, Commonwealth)"', add label define classwkr_lbl 28 `"Local government employee"', add label define classwkr_lbl 29 `"Unpaid family worker"', add label define classwkr_lbl 97 `"Unknown"', add label define classwkr_lbl 98 `"Illegible"', add label define classwkr_lbl 99 `"Missing"', add label define classwkr_lbl 7 `"1960s cases to be allocated"', add // label values classwkr classwkr_lbl capture label drop incwage_lbl label define incwage_lbl 999998 `"Missing"' label define incwage_lbl 999999 `"N/A"', add label define incwage_lbl 99998 `"1960s cases to be allocated"', add // label values incwage incwage_lbl