********************************************************************************
/*
Supplementary Material: Stata do-file 
Data used: Understanding Society or UK Household Longitudinal Study (UKHLS): Waves 1-10, SN: 6931 (restricted main data) and SN: 6666 (restricted local authority districts): 
University of Essex, Institute for Social and Economic Research. (2022). Understanding Society: Waves 1-11, 2009-2020 and Harmonised BHPS: Waves 1-18, 1991-2009: Special Licence Access. [data collection]. 15th Edition. UK Data Service. SN: 6931, DOI: 10.5255/UKDA-SN-6931-14. SN: 6666, DOI: 10.5255/UKDA-SN-6666-14. 
Paper Title: Wages of UK immigrant men across generations: who catches up?
Author: Nico Ochmann 
Affiliation: University of Manchester, Department of Economics, UK  
*/
********************************************************************************
version 17 
clear all 
set more off
capture log close 
 
global Data "" 
global mergeddata ""
global graphs_tables ""
global log ""

**Creating 10 datasets for the local authority district (LAD)** 
 
foreach w in a b c d e f g h i j {
use `w'_hidp `w'_oslaua using "$Data/`w'_oslaua_protect", clear 
rename `w'_hidp hidp 
rename `w'_oslaua oslaua 
save $mergeddata/`w'_oslaua_protect_2, replace 
} 

**Combining the time-invariant variables across the 10 waves** 
use pidp hhorig sex birthy feend_dv ukborn plbornc_all scend_dv yr2uk4  racel_dv pacob_all macob_all mgmrob_all mgprob_all pgprob_all pgmrob_all paedqf maedqf j1soc00 using "$Data/xwavedat_protect", clear 
save  $mergeddata/xwavedat, replace 


**Merging the time-varying variables for a given wave with the local authority districts (LAD) for a given wave** 
foreach w in a b c d e f g h i j {
	
	
	use  pidp `w'_hidp  `w'_intdaty_dv `w'_jbhrs `w'_qfhigh_dv  `w'_hiqual_dv  `w'_dvage `w'_marstat  `w'_jbstat `w'_lingua `w'_jbotpd `w'_jbot  ///    
		`w'_paygu_dv `w'_paynu_dv `w'_payu `w'_payg_dv `w'_payn_dv `w'_seearnnet_dv `w'_fimnlabgrs_dv  `w'_jbsize `w'_jbsect  `w'_jbsemp  `w'_nnatch `w'_preason  ///		
		`w'_gor_dv  `w'_urban_dv `w'_jshrs   `w'_jbnssec8_dv `w'_jbiindb_dv `w'_jbisco88  `w'_jbsoc00 `w'_jbmngr `w'_oprlg `w'_oprlg1 `w'_nirel `w'_health `w'_oprlg0 `w'_oprlg0ni `w'_pasoc00 `w'_masoc00 `w'_citzn1 `w'_citzn2 `w'_citzn3 `w'_ndepchl_dv `w'_jbft_dv    ///
		 using "$Data/`w'_indresp_protect", clear 		
		
	gen wave = strpos("abcdefghij","`w'")	
 
	renpfix `w'_ 
	  
	
	merge m:1  hidp using  "$mergeddata/`w'_oslaua_protect_2", gen(_wmerege7)  
	
	
	save $mergeddata/`w'wave, replace
	}

**Append the previous 10 individual datasets into one long dataset containing all 10 waves** 
use  $mergeddata/awave, replace	
foreach w in b c d e f g h i j{

	append using $mergeddata/`w'wave.dta
	}

save $mergeddata/abcdefghij_long, replace 

**Erase all 10 individual datasets previously merged** 
foreach w in a b c d e f g h i j{
	erase $mergeddata/`w'wave.dta
}

*Note that `w'_qfhighoth and `w'_higheduk variables are available in Waves 6 (starting with the IEMB subsample), 9, and 10.*
*Creating 3 individual datasets (3 waves) from each wave containing these 2 variables and the cross-waves personal identifier pidp*    
 
use pidp f_qfhighoth f_higheduk using "$Data/f_indresp_protect", clear 
save $mergeddata/f_short, replace 

use pidp i_qfhighoth i_higheduk using "$Data/i_indresp_protect", clear 
save $mergeddata/i_short, replace 

use pidp j_qfhighoth j_higheduk using "$Data/j_indresp_protect", clear 
save $mergeddata/j_short, replace 

*Merging the 2 variables from these 3 datasets (3 waves) into 1 dataset to generate 2 time-invariant derived variables (highother_dv and highuk_dv)* 

use $mergeddata/f_short, clear 
merge 1:1 pidp using "$mergeddata/i_short", nogen 
merge 1:1 pidp using "$mergeddata/j_short", nogen 

generate highother_dv = -9 
replace highother_dv  = f_qfhighoth if f_qfhighoth>=0 & f_qfhighoth<. 
replace highother_dv  = i_qfhighoth if i_qfhighoth>=0 & i_qfhighoth<. & (highother_dv==. | highother_dv<0)
replace highother_dv  = j_qfhighoth if j_qfhighoth>=0 & j_qfhighoth<. & (highother_dv==. | highother_dv<0)

generate highuk_dv = -9 
replace highuk_dv   = f_higheduk  if f_higheduk >=0 & f_higheduk <. 
replace highuk_dv   = i_higheduk  if i_higheduk >=0 & i_higheduk <. & (highuk_dv ==. | highuk_dv <0)
replace highuk_dv   = j_higheduk  if j_higheduk >=0 & j_higheduk <. & (highuk_dv ==. | highuk_dv <0)

keep pidp highother_dv highuk_dv 

save $mergeddata/high_dv, replace 


use $mergeddata/abcdefghij_long 

*Merging the 1 dataset with the 2 time-invariant derived variables with the long dataset containing all 10 waves* 
merge m:1 pidp using $mergeddata/high_dv, force generate(_wemerge_1) 

*Merging the other selected time-invariant variables from the raw datasets with the long dataset containing all 10 waves*  
merge m:1 pidp using $mergeddata/xwavedat, force generate(_wemerge_2) 


save $mergeddata/abcdefghij_long_timeinvarvar, replace
 
*Time-invariant variable `w'_qualoc is only asked new participants once, hence it is necessary to forward information across waves for each individual* 
foreach w in a b c d e f g h i j{
        use pidp `w'_qualoc using "$Data/`w'_indresp_protect",clear
        
		
		save $mergeddata/`w', replace 
}

*Merging this variable from these 10 datasets (10 waves) into 1 dataset to generate 1 time-invariant derived variable (qualoc_dv)* 
use $mergeddata/a, clear
foreach w in b c d e f g h i j{
        merge 1:1 pidp   using $mergeddata/`w'
        drop _merge
}

generate qualoc_dv=-9
foreach w in a b c d e f g h i j{
        replace qualoc_dv=`w'_qualoc if `w'_qualoc>-7  & `w'_qualoc<. & qualoc_dv==-9     
}
recode qualoc_dv 2=0
lab def qualoc_dv 0"no" 1 "yes" -8 "inapplicable"
lab val qualoc_dv qualoc_dv

*Forwarding information across waves for each individual* 
merge 1:m pidp using $mergeddata/abcdefghij_long_timeinvarvar , force generate(_wemerge_3)


save $mergeddata/noweight, replace 

*Time-invariant variable `w'_kidlang is only asked in Waves 2, 6, 8. 
*Creating 3 individual datasets (3 waves) containing this variable and the cross-waves personal identifier pidp* 
use pidp h_kidlang using "$Data/h_indresp_protect", clear 
save $mergeddata/h_indresp_short_protect, replace 

use pidp f_kidlang using "$Data/f_indresp_protect", clear 
save $mergeddata/f_indresp_short_protect, replace 

use pidp b_kidlang using "$Data/b_indresp_protect", clear
save $mergeddata/b_indresp_short_protect, replace 

*Merging this variable from these 3 datasets (3 waves) into 1 dataset to generate 1 time-invariant derived variable (kid_dv)*    
merge 1:1 pidp using "$mergeddata/h_indresp_short_protect" , keepusing(h_kidlang)  nogen 
merge 1:1 pidp using "$mergeddata/f_indresp_short_protect", keepusing(f_kidlang) nogen 
 
generate kid_dv =-9
replace kid_dv  =b_kidlang if b_kidlang>0 & b_kidlang<.
replace kid_dv = f_kidlang if f_kidlang>0 & f_kidlang<. & (kid_dv==. | kid_dv<0) 
replace kid_dv  =h_kidlang if h_kidlang>0 & h_kidlang<. & (kid_dv==. | kid_dv<0)  

*Merging the time-invariant variable with the long dataset containing all 10 waves*  
keep pidp kid_dv
merge 1:m pidp using $mergeddata/noweight, generate(_wemerge_4)

drop if pidp==.     
drop if wave==.  

save $mergeddata/noweight, replace

****************************constructing of the weights, although weights are not used******************  
foreach w in a {
use pidp `w'_indinus_xw using "$Data/`w'_indresp_protect", clear
gen wave = strpos("abcdefghij","`w'") 
renpfix `w'_  

} 

merge 1:1 pidp wave  using $mergeddata/noweight, force generate(_wemerge_50) 
save $mergeddata/weighta, replace  


foreach w in b {
use `w'_indinub_xw pidp using "$Data/`w'_indresp_protect", clear
gen wave = strpos("abcdefghij","`w'") 
renpfix `w'_ 


} 

merge 1:1 pidp wave using $mergeddata/weighta, force generate(_wemerge_60) 
save $mergeddata/weightab, replace 

foreach w in c {
use `w'_indinub_xw pidp using "$Data/`w'_indresp_protect", clear
gen wave = strpos("abcdefghij","`w'") 
renpfix `w'_ 


} 

merge 1:1 pidp wave using $mergeddata/weightab, force generate(_wemerge_70) 

save $mergeddata/weightabc, replace 

foreach w in d {
use `w'_indinub_xw pidp using "$Data/`w'_indresp_protect", clear
gen wave = strpos("abcdefghij","`w'") 
renpfix `w'_ 


} 

merge 1:1 pidp wave using $mergeddata/weightabc, force generate(_wemerge_80) 

save $mergeddata/weightabcd, replace 

foreach w in e {
use `w'_indinub_xw pidp using "$Data/`w'_indresp_protect", clear
gen wave = strpos("abcdefghij","`w'") 
renpfix `w'_ 


} 

merge 1:1 pidp wave using $mergeddata/weightabcd, force generate(_wemerge_90) 

save $mergeddata/weightabcde, replace 

foreach w in f {
use `w'_indinui_xw pidp using "$Data/`w'_indresp_protect", clear
gen wave = strpos("abcdefghij","`w'") 
renpfix `w'_ 


} 
merge 1:1 pidp wave using $mergeddata/weightabcde, force generate(_wemerge_100) 
save $mergeddata/weightabcdef, replace 



foreach w in g {
use `w'_indinui_xw pidp using "$Data/`w'_indresp_protect", clear
gen wave = strpos("abcdefghij","`w'") 
renpfix `w'_ 


} 

merge 1:1 pidp wave using $mergeddata/weightabcdef, force generate(_wemerge_110) 
save $mergeddata/weightabcdefg, replace 

foreach w in h {
use `w'_indinui_xw pidp using "$Data/`w'_indresp_protect", clear
gen wave = strpos("abcdefghij","`w'") 
renpfix `w'_ 


} 

merge 1:1 pidp wave using $mergeddata/weightabcdefg, force generate(_wemerge_120) 
save $mergeddata/weightabcdefgh, replace

foreach w in i {
use `w'_indinui_xw pidp using "$Data/`w'_indresp_protect", clear
gen wave = strpos("abcdefghij","`w'") 
renpfix `w'_ 


} 

merge 1:1 pidp wave using $mergeddata/weightabcdefgh, force generate(_wemerge_130) 
save $mergeddata/weightabcdefghi, replace

foreach w in j {
use `w'_indinui_xw pidp using "$Data/`w'_indresp_protect", clear
gen wave = strpos("abcdefghij","`w'") 
renpfix `w'_ 


} 

merge 1:1 pidp wave using $mergeddata/weightabcdefghi, force generate(_wemerge_140) 


gen newwgt = indinui_xw if wave==10
replace newwgt = indinui_xw if wave==9
replace newwgt = indinui_xw if wave==8 
replace newwgt = indinui_xw if wave==7 
replace newwgt = indinui_xw if wave==6 
replace newwgt = indinub_xw if wave==5 
replace newwgt = indinub_xw if wave==4 
replace newwgt = indinub_xw if wave==3
replace newwgt = indinub_xw if wave==2
replace newwgt = indinus_xw if wave==1 


save $mergeddata/noweight, replace 
 

********************************************************************************CONSTRUCTING VARIABLES***********************************************************
*****************************************************************************************************************************************************************
mvdecode _all, mv(-1 -2 -9 -7 -10 -11 -20)  // leave out -8 because I need it to construct religious variable   

******************************************************* 
gen year = intdaty_dv  
duplicates tag pidp year, gen(flag)
duplicates drop pidp year, force  // dropping duplicates 
xtset pidp year 

* generate gender variable * 

gen female = 0 if sex == 1 
replace female = 1 if sex == 2 


*for definition of second generation*  

gen father = pacob_all      
gen mother = macob_all    

* generate UK born dummies * 
gen adjustnative = ukborn   
replace adjustnative = . if ukborn==-8 

gen ukb=0 if adjustnative==5 
replace ukb=1 if adjustnative==1 | adjustnative==2 | adjustnative==3 | adjustnative==4 


********being Muslim or not************* 

*religion yes no 
gen religious = oprlg if oprlg<. // only asked new participants once  
replace religious =. if religious==-8  
bysort pidp (year): replace religious = religious[_n-1] if religious==. & religious[_n-1]<. // forward answer to all waves 

*affiliation with what religion 
gen affiliation = oprlg1 if oprlg1<.  // only asked new participants once 
replace affiliation = 1000 if religious==2    // add an arbitrary value for those who are NOT religious based on a previous question from oprlg 
replace affiliation =. if affiliation==-8  
bysort pidp (year): replace affiliation = affiliation[_n-1] if affiliation==. & affiliation[_n-1]<. // forward answer to all waves 

gen affil = nirel if nirel<. // only asked new participants once 
replace affil = . if nirel==-8 
bysort pidp (year): replace affil = affil[_n-1] if affil==. & affil[_n-1]<. // forward answer to all waves 

gen muslim = 0 if affiliation<. | affil<. 
replace muslim = 1 if affiliation == 12 | affil==13 
***********industry dummies***********

gen industry = jbiindb_dv
replace industry = . if industry<=0  

* generate combined immigrant variable at least one foreign born parent for the second generation without considering parental country of birth for the first generation*       
gen native = 0 if ukb==1 & mother<=4 & father <=4 & mother>=1 & father>=1 // natives 
replace native = 1 if plbornc_all==18  // f2 
replace native = 2 if  ((father ==18)  | (mother ==18)) & ukb==1  // f3 
replace native = 3 if plbornc_all==19  // f4 
replace native = 4 if ((father ==19)  | (mother ==19)) & ukb==1 // f5
replace native = 5 if plbornc_all==20  // f6 
replace native = 6 if ((father ==20)  | (mother ==20)) & ukb==1  // f7 
replace native = 7 if (plbornc_all==22 ) | (plbornc_all==23 ) | (plbornc_all==24 ) | (plbornc_all==25 ) /// // f8
| (plbornc_all==26 ) | (plbornc_all==103 )| (plbornc_all==105 )| (plbornc_all==106 )| (plbornc_all==112 ) ///
| (plbornc_all==134 ) | (plbornc_all==141 ) | (plbornc_all==273 ) | (plbornc_all==178 )| (plbornc_all==152 ) /// 
| (plbornc_all==351 )| (plbornc_all==235 )| (plbornc_all==386 )| (plbornc_all==387 )| (plbornc_all==326 ) ///
| (plbornc_all==148 )| (plbornc_all==255 )| (plbornc_all==203 )| (plbornc_all==149 )| (plbornc_all==191 ) /// 
| (plbornc_all==193 )| (plbornc_all==264 )| (plbornc_all==265 )| (plbornc_all==355 ) /// 
| (plbornc_all==330 )| (plbornc_all==216 )| (plbornc_all==215 )| (plbornc_all==286 ) /// 
| (plbornc_all==343 )  

replace native = 8 if (ukb==1 & (father==22 | mother==22)) | (ukb==1 & (father==23 | mother==23)) | (ukb==1 & (father==24 | mother==24)) | (ukb==1 & (father==25 | mother==25)) /// // f9  
| (ukb==1 & (father==26 | mother==26)) | (ukb==1 & (father==103 | mother==103))| (ukb==1 & (father==105 | mother==105))| (ukb==1 & (father==106 | mother==106))| (ukb==1 & (father==112 | mother==112)) ///
| (ukb==1 & (father==134 | mother==134)) | (ukb==1 & (father==141 | mother==141)) | (ukb==1 & (father==273 | mother==273)) | (ukb==1 & (father==178 | mother==178))| (ukb==1 & (father==152 | mother==152)) /// 
| (ukb==1 & (father==351 | mother==351))| (ukb==1 & (father==235 | mother==235))| (ukb==1 & (father==386 | mother==386))| (ukb==1 & (father==387 | mother==387))| (ukb==1 & (father==326 | mother==326)) ///
| (ukb==1 & (father==148 | mother==148))| (ukb==1 & (father==255 | mother==255))| (ukb==1 & (father==203 | mother==203))| (ukb==1 & (father==149 | mother==149))| (ukb==1 & (father==191 | mother==191)) /// 
| (ukb==1 & (father==193 | mother==193))| (ukb==1 & (father==264 | mother==264))| (ukb==1 & (father==265 | mother==265))| (ukb==1 & (father==335 | mother==335))  /// 
| (ukb==1 & (father==330 | mother==330))| (ukb==1 & (father==216 | mother==216))| (ukb==1 & (father==215 | mother==215))| (ukb==1 & (father==286 | mother==286)) /// 
| (ukb==1 & (father==343 | mother==343))  

replace native = 9 if (plbornc_all==27 ) | (plbornc_all==174 ) |(plbornc_all==129) /// // f10 
                     |(plbornc_all==339 ) |(plbornc_all==341 ) |(plbornc_all==211 ) | (plbornc_all==182 )  /// 
					 |(plbornc_all==388 ) |(plbornc_all==218 ) |(plbornc_all==129 ) | (plbornc_all==183 ) | (plbornc_all==217) | (plbornc_all==359)  
					 

replace native = 10 if (ukb==1 & (father ==27 | mother ==27)) | (ukb==1 & (father ==174 | mother ==174)) |(ukb==1 & (father ==129 | mother ==129)) /// // f11 
                     |(ukb==1 & (father ==339 | mother ==339)) |(ukb==1 & (father ==341 | mother ==341)) |(ukb==1 & (father ==211 | mother ==211)) | (ukb==1 & (father ==182 | mother ==182)) |(ukb==1 & (father ==388 | mother ==388)) |(ukb==1 & (father ==218 | mother ==218)) |(ukb==1 & (father ==129 | mother ==129)) | (ukb==1 & (father ==183 | mother ==183)) | (ukb==1 & (father ==217 | mother ==217)) | (ukb==1 & (father ==359 | mother ==359)) 
					 

replace native = 11 if  (plbornc_all==123 ) | (plbornc_all==132 ) | (plbornc_all==179 ) | /// 
                        (plbornc_all==198 ) | (plbornc_all==6 ) | (plbornc_all==7 ) | /// 
						(plbornc_all==209 ) | (plbornc_all==5 ) | (plbornc_all==8 ) | ///  
						(plbornc_all==353 ) | (plbornc_all==312 ) | (plbornc_all==9 ) | /// 
						(plbornc_all==346 ) | (plbornc_all==296 ) | (plbornc_all==347 )  // pre2004 EU countries plus Norway and Switzerland : FIRST generation  

replace native = 12 if (ukb==1 & (father==123 | mother ==123)) | (ukb==1 & (father==132 | mother ==132)) | (ukb==1 & (father==179 | mother ==179)) | /// 
                        (ukb==1 & (father==198 | mother ==198)) | (ukb==1 & (father==6 | mother ==6)) | (ukb==1 & (father==7 | mother ==7)) | /// 
						(ukb==1 & (father==209 | mother ==209)) | (ukb==1 & (father==5 | mother ==5)) | (ukb==1 & (father==8 | mother ==8)) | ///  
						(ukb==1 & (father==353 | mother ==353)) | (ukb==1 & (father==312 | mother ==312)) | (ukb==1 & (father==9 | mother ==9)) | /// 
						(ukb==1 & (father==346 | mother ==346)) | (ukb==1 & (father==296 | mother ==296)) | (ukb==1 & (father==347 | mother ==347))  // pre2004 EU countries plus Norway and Switzerland : Second generation  

						
						
replace native = 13 if  (plbornc_all==147)     | (plbornc_all==173) | (plbornc_all==11) | /// 
                        (plbornc_all==176) | (plbornc_all==192) | (plbornc_all==223) | /// 
						(plbornc_all==252) | (plbornc_all==259) | (plbornc_all==269) | ///  
						(plbornc_all==10)    | (plbornc_all==318) | (plbornc_all==332) | /// 
						(plbornc_all==333) // post2004 EU countries: FIRST generation  
						
						
						
replace native = 14 if (ukb==1& (father==147| mother ==147))    | (ukb==1 & (father==173 | mother ==173)) | (ukb==1 & (father==11 | mother ==11)) | /// 
                        (ukb==1 & (father==176 | mother ==176)) | (ukb==1 & (father==192 | mother ==192)) | (ukb==1 & (father==223 | mother ==223)) | /// 
						(ukb==1 & (father==252 | mother ==252)) | (ukb==1 & (father==259 | mother ==259)) | (ukb==1 & (father==269 | mother ==269)) | ///  
						(ukb==1 & (father==10 | mother ==10))    | (ukb==1 & (father==318 | mother ==318)) | (ukb==1 & (father==332 | mother ==332)) | /// 
						(ukb==1 & (father==333 | mother ==333))  // post2004 EU countries: Second generation  

						
						
						
tab native, gen(f) 

label define native 0 "N" 1 "I1" 2 "I2" 3 "P1" 4 "P2" 5 "B1" 6 "B2" 7 "A1" 8 "A2" 9 "C1" 10 "C2" 11 "E1" 12 "E2" 13 "EE1" 14 "EE2" 
label values native native  



gen nat = 0 if f1==1 
replace  nat= 1 if f2==1 | f4==1 |f6==1 |f8==1 |f10==1 |f12==1 | f14==1  
replace  nat= 2 if f3==1 | f5==1 |f7==1 |f9==1 |f11==1 |f13==1 | f15==1  



tab nat, gen(foreign) 

***********generate english first language indicator**************
gen first = 0  if kid_dv>=3 & kid_dv<=97    
replace first = 1 if kid_dv==1 | kid_dv==2 
 
* generate number of hrs normally worked per week  
gen jobhrs = jbhrs 
replace jobhrs = . if jbhrs==-8  

* potential work experience * 
gen sla0 = scend_dv  // school leaving age 
replace sla0 =. if sla0==0  // code 0 for missing age
replace sla0=0 if sla0==-8 // code -8 for never been in school or still in school 

gen sla = feend_dv  // further education leaving age  


*****************************************education dummies*************************************
gen education = 1 if qfhigh_dv == 1 | (highother_dv>=1 & highother_dv<=3) // postgraduate 
replace education = 2 if qfhigh_dv == 2 | highother_dv==4   // bachelor 
replace education = 3 if qfhigh_dv == 3 | highother_dv==5  // 2-year diploma 
replace education = 4 if qfhigh_dv == 4 | qfhigh_dv==5 | (highother_dv>=6 & highother_dv<=8) // nursing/teaching/other 
replace education = 5 if hiqual==3   // A-level etc. 
replace education = 6 if hiqual==4 | highother_dv==9 // high-school  
replace education = 7 if hiqual==5 | highother_dv==96    // other qualification 
replace education = 8 if hiqual==9 | highother_dv==10    // no qualification  

tab education, gen(edu) 


gen als = sla0 if education>=5 & education<=8
replace als = sla if education>=1 & education<=4 

*paygu_dv usual gross payment 
gen mw =  paygu_dv if jbstat==2  // missings generated, this variable does NOT include self employment income     
replace mw = . if  paygu_dv ==-8 & jbstat==2
gen hw = (mw/4.33)/ jobhrs  if jbstat==2

*gen age variable*
gen age = dvage if dvage>0 & dvage<.
gen age2 = age*age 

*****year arrived in the UK**********

gen year1 = yr2uk4  if foreign2==1 
replace year1=0 if (foreign1==1 | foreign3==1 ) 

 *years in UK* 
gen yuk = year-year1 if foreign2==1 
replace yuk = 0 if foreign1==1  | foreign3==1     
gen yuk2 = yuk*yuk 

**arrival age**
gen aage = age - yuk   if foreign2==1 
drop if aage<0  
replace aage =0 if (foreign1==1 | foreign3==1  ) 


*generate one foreign degree dummy for all degrees 

gen fd = 0 if (aage <=sla0 & sla0<.) | qualoc_dv == 1 | highuk_dv==1 
replace fd = 1 if (aage >sla0 & aage<.) | qualoc_dv == 0 | highuk_dv==2


*generate source country of qualification  

gen degree = plbornc_all if fd == 1 						
replace degree = 10000 if fd == 0 // UK as country of birth versus UK as country of qualification  



**************UK regions****************
 
gen region = 1 if gor ==1 
replace region = 2 if gor==2 
replace region = 3 if gor==3 
replace region = 4 if gor==4 
replace region = 5 if gor==5 
replace region = 6 if gor==6 
replace region = 7 if gor==7 
replace region = 8 if gor==8 
replace region = 9 if gor==9 
replace region = 10 if gor==10 
replace region = 11 if gor==11 
replace region = 12 if gor==12 

tab region, gen(region) 

// total work experience 
gen total = age - als if foreign1==1 | foreign2==1 | foreign3==1
drop if total<0  

gen total2 = (total^2)/100


gen pexpuk = total if foreign1==1 | foreign3==1   // foreign1 are natives, and foreign3 are second generation immigrants 
replace pexpuk = age-aage  if foreign2==1 & (fd==1) // aage is arrival age and foreign2 are first generation immigrants  
replace pexpuk = total if aage<=18 & foreign2==1 & (fd==0)  
replace pexpuk = age-aage - 4.5 if aage>=19 & aage<=22 & (fd==0) & edu1==1 
replace pexpuk = age-aage - 3 if aage>=19 & aage<=22 & (fd==0) & (edu2==1)
replace pexpuk = age-aage - 2 if aage>=19 & aage<=22 & (fd==0) & (edu3==1)
replace pexpuk = age-aage - 2 if aage>=19 & aage<=22 & (fd==0) & (edu4==1)
replace pexpuk = age-aage - 2 if aage>=19 & aage<=22 & (fd==0) & (edu5==1)
replace pexpuk = age-aage - 1 if aage>=19 & aage<=22 & (fd==0) & (edu6==1)
replace pexpuk = age-aage - 1.5 if aage <. & aage>22 & (fd==0) & edu1==1 
replace pexpuk = age-aage - 3 if aage <. & aage>22 & (fd==0) & (edu2==1)   
replace pexpuk = age-aage - 2 if aage <. & aage>22 & (fd==0) & (edu3==1)   
replace pexpuk = age-aage - 2 if aage <. & aage>22 & fd==0 & edu4==1 & foreign2==1     
replace pexpuk = age-aage - 2 if aage <. & aage>22 & fd==0 & edu5==1 & foreign2==1      

drop if pexpuk<0 
gen pexpuk2 = (pexpuk^2)/100 

gen pexpabroad = total - pexpuk  if foreign2==1 
replace pexpabroad = 0 if foreign1==1 | foreign3==1  
drop if pexpabroad<0 
gen pexpabroad2 = (pexpabroad^2)/100  

 
************occupation dummies**************
gen occupation = jbisco88 if jbisco88>0 & jbisco88<10000  // four digit occupation  
 
// deflator // CDID	D7BT Source dataset ID	MM23  (UK) Office for National Statistics, CPI all items  

generate cpi=. 
replace cpi = 86.6  if year == 2009
replace cpi = 89.4  if year == 2010 
replace cpi = 93.4  if year == 2011
replace cpi = 96.1  if year == 2012 
replace cpi = 98.5  if year == 2013 
replace cpi = 100  if year == 2014 
replace cpi = 100 if year == 2015
replace cpi = 100.7 if year == 2016  
replace cpi = 103.4 if year == 2017 
replace cpi = 105.9 if year== 2018 
replace cpi =  107.8  if year==2019

*****generate real wages******
gen rhw = (hw/cpi)*100 if jbstat==2 
gen logrhw = log(rhw) 

***********generate local authority districts**********since oslaua is difficult to destring with both number and letter, do the following 
tab oslaua, gen(d)     
gen lad = 1 if d1==1 
forvalue i =2(1)433 {                  
	replace lad = `i' if d`i'==1 
}

**********************************************************************************  
drop if (foreign1==1|foreign3==1) & fd==1
drop if year==2020 
*************************************************************************************
global pooled (edu1==1| edu2==1 | edu3==1 | edu4==1|edu5==1 |edu6==1|edu7==1) & (f1==1  | f2==1  | f4==1 | f6==1 |f8==1 |f10==1 | f12==1 | f14==1) & (age>=16 & age<65)  

global pooled2 (edu1==1| edu2==1 | edu3==1 | edu4==1|edu5==1 |edu6==1|edu7==1) & (f1==1  | f3==1  | f5==1 | f7==1 |f9==1 |f11==1| f13==1 | f15==1) & (age>=16 & age<65) 

global pooled3 (edu1==1| edu2==1 | edu3==1 | edu4==1|edu5==1 |edu6==1 | edu7==1) & (f1==1  | f2==1  | f3==1|  f4==1 | f5==1 | f6==1 | f7==1 |f8==1 | f9==1 | f10==1 | f11==1 | f12==1 | f13==1 | f14==1) & (age>=16 & age<65) 

*******************************************for Gelbach Decomposition: b1x2 command, which does not allow for factor variables 
tab year, gen(ja)
tab degree, gen(ab)
tab occupation, gen(oc)
tab industry, gen(in)
tab lad, gen(la)
********************************************************************************
// make percentages for summary stats first and second generation for categorical variables  

forvalue i =1(1)8 {
	generate edu`i'_ = edu`i'*100 
}

gen fd_ = fd*100
gen first_ = first*100 
gen muslim_ = muslim*100 
forvalue i =1(1)12 {
	generate region`i'_ = region`i'*100 
}

***************labelling for ethnicity*************** 

label variable f2  "India"
label variable f3  "India" 
label variable f4 "Pakistan" 
label variable f5 "Pakistan"  
label variable f6 "Bangladesh" 
label variable f7 "Bangladesh" 
label variable f8 "Africa" 
label variable f9 "Africa"  
label variable f10 "Caribbean" 
label variable f11 "Caribbean"
label variable f12 "Europe" 
label variable f13 "Europe"
label variable f14 "Eastern Europe" 
label variable f15 "Eastern Europe" 
label variable rhw "Hourly Wage"
label variable pexpuk "UK Exp.(yrs)"
label variable pexpuk2 "UK Exp. square(/100)"
label variable total "Work Exp. (yrs)"
label variable total2 "Work Exp. square(/100)"  
label variable pexpabroad "Foreign Exp. (yrs)"
label variable pexpabroad2 "Foreign Exp. square(/100)"
label variable edu1_ "Postgraduate (\%)"
label variable edu2_ "Bachelor"
label variable edu3_ "Two-Year Diploma"
label variable edu4_ "Nursing/Teaching"
label variable edu5_ "A-level"
label variable edu6_ "High School"
label variable edu7_ "Other Qual."
label variable edu8_ "No Qual."
label variable fd_ "Foreign Qual. (\%)"
label variable first_ "English Lang. (\%)"
label variable muslim_ "Muslim (\%)"
label variable region7_ "London (\%)"
label variable region8_ "Southeast"
label variable region5_ "West Midlands"
label variable region2_ "Northwest"


***********save and erase datasets*************** 
erase $mergeddata/xwavedat.dta
foreach w in a b c d e f g h i j{
	erase $mergeddata/`w'_oslaua_protect_2.dta  
}
erase $mergeddata/abcdefghij_long_timeinvarvar.dta
foreach w in a b c d e f g h i j{
	erase $mergeddata/`w'.dta  
}
erase $mergeddata/noweight.dta

erase $mergeddata/h_indresp_short_protect.dta 

save $mergeddata/noweight_all, replace 


********************************************************************************ESTIMATION*******************************************************************************
*************************************************************************************************************************************************************************
global pooled (edu1==1| edu2==1 | edu3==1 | edu4==1|edu5==1 |edu6==1|edu7==1 | edu8==1) & (foreign1==1 | foreign2==1) & (age>=16 & age<=64)    

global pooled2 (edu1==1| edu2==1 | edu3==1 | edu4==1|edu5==1 |edu6==1|edu7==1 | edu8==1) & ((foreign1==1 & e(sample)) | foreign3==1) & (age>=16 & age<=64) 


global algan f2 f4  f6  f8  f10 f12 f14   i.year i.education  total total2  i.region 
global all_occup f2 f4  f6  f8  f10 f12 f14   i.year i.education  pexpuk pexpuk2 pexpabroad pexpabroad2  i.lad  ib10000.degree first muslim  
global all   f2 f4  f6  f8  f10 f12 f14   i.year i.education  pexpuk pexpuk2 pexpabroad pexpabroad2  i.lad  ib10000.degree first muslim  i.industry i.occupation    
 
global algan2 f3  f5  f7  f9  f11 f13  f15  i.year i.education  total total2  i.region 
global all2_occup f3  f5  f7  f9  f11 f13  f15  i.year i.education  total total2  i.lad first muslim          
global all2   f3  f5  f7  f9  f11 f13  f15  i.year i.education  total total2  i.lad first muslim i.industry i.occupation           


**Table 1 Results** 
set more off 
**********************************test statistical difference across first and second generation equations, men  

*************************************************************1st(BS)-2nd(BS) Men***************************POLS
reg logrhw $all if $pooled & female==0 // use re for estimation sample 
reg logrhw $algan    if e(sample) & $pooled & female==0 // without cluster option 


estimates store first 

quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0
reg logrhw  $algan2   if e(sample) & female==0 


estimates store second 


suest first second, vce(cluster pidp) // with cluster option   	
	
		 
lincom  [first_mean]f2 - [second_mean]f3 // lincom gives s.e. and difference  
lincom  [first_mean]f4 - [second_mean]f5
lincom  [first_mean]f6 - [second_mean]f7
lincom  [first_mean]f8 - [second_mean]f9 
lincom  [first_mean]f10 - [second_mean]f11 
lincom  [first_mean]f12 - [second_mean]f13  
lincom  [first_mean]f14 - [second_mean]f15


*************************************************************1st(FS)-2nd(FS) Men***************************POLS 
reg logrhw $all if $pooled & female==0 // use re for estimation sample

reg logrhw $all    if e(sample) & $pooled & female==0 // without cluster option 


estimates store first 


quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0
reg logrhw  $all2   if e(sample) & female==0 


estimates store second 


suest first second, vce(cluster pidp) // with cluster option   	
	
		 
lincom  [first_mean]f2 - [second_mean]f3 // lincom gives s.e. and difference  
lincom  [first_mean]f4 - [second_mean]f5
lincom  [first_mean]f6 - [second_mean]f7
lincom  [first_mean]f8 - [second_mean]f9 
lincom  [first_mean]f10 - [second_mean]f11 
lincom  [first_mean]f12 - [second_mean]f13 
lincom  [first_mean]f14 - [second_mean]f15 

**Table 2 Results** 
********************************************************************************Gelbach Decomposition, first generation  
eststo basefull: b1x2 logrhw if $pooled & female==0, x1all(f2 f4  f6  f8  f10 f12 f14) x2all(edu2-edu8 pexpuk pexpuk2 pexpabroad pexpabroad2 la2-la433 ja2-ja11 ab1-ab161 first muslim in2-in34 oc2-oc236) x2delta(Education = edu2-edu8 : LocalEffects =la2-la433 : YearEffects=ja2-ja11 : UKExp=pexpuk pexpuk2 : ForeignExp=pexpabroad pexpabroad2 : CountryofStudy=ab1-ab161 : English=first : Muslim = muslim : Ind./Occ. = in2-in34 oc2-oc236)  cluster(pidp) 
esttab basefull using $graphs_tables/table6_.tex, replace label star(* 0.10 ** 0.05 *** 0.01) nonotes b(3) se(3) unstack  mtitles("" "" "" "" "" "" "") nonumbers stats(N_clust N, fmt(%9.0f) labels(Individuals Observations))  compress drop(_cons:Education _cons:LocalEffects _cons:YearEffects _cons:UKExp _cons:ForeignExp _cons:CountryofStudy _cons:English _cons:Muslim _cons:Ind./Occ.  _cons:__TC)    


**Figure 1 Results** 
// define 1_5 generation  
gen young = 0 if native==0 
replace young = 1 if f2==1 & aage>=0 & aage<=18 
replace young = 2 if f4==1 & aage>=0 & aage<=18
replace young = 3 if f6==1 & aage>=0 & aage<=18
replace young = 4 if f8==1 & aage>=0 & aage<=18
replace young = 5 if f10==1 & aage>=0 & aage<=18
replace young = 6 if f12==1 & aage>=0 & aage<=18
replace young = 7 if f14==1 & aage>=0 & aage<=18


global pooled1_5 (edu1==1| edu2==1 | edu3==1 | edu4==1|edu5==1 |edu6==1|edu7==1 | edu8==1) & (foreign1==1 | (aage>=0 & aage<=18 & foreign2==1)) & (age>=16 & age<65)       
global algan1_5 i.young   i.year i.education  total total2  i.region   
global all1_5   i.young   i.year i.education  total total2  i.lad first muslim  i.occupation i.industry  

preserve 
drop if (aage>=0 & aage<=18 & foreign2==1)  

reg  logrhw  $all   if  $pooled & female==0,  
eststo BS1:  reg logrhw $algan   if e(sample) & $pooled & female==0,   cluster(pidp)

eststo FS1:  reg logrhw $all if e(sample) & $pooled & female==0,  cluster(pidp)
restore 


reg  logrhw  $all1_5   if  $pooled1_5 & female==0,  
eststo BS1_5:  reg logrhw $algan1_5   if e(sample) & $pooled1_5 & female==0,   cluster(pidp)

eststo FS1_5:  reg logrhw $all1_5 if e(sample) & $pooled1_5 & female==0,  cluster(pidp)
 

quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0 

eststo BS2:  reg logrhw  $algan2  if e(sample) & female==0  ,   cluster(pidp)


eststo FS2:  reg logrhw  $all2    if e(sample) & female==0 ,  cluster(pidp)


coefplot (BS1, msymbol(T))  (FS1, msymbol(O)) (BS1_5, msymbol(Th))  (FS1_5, msymbol(Oh)) (BS2, msymbol(S)) (FS2, msymbol(D)), keep(f2 f3 f4 f5 f6 f7 f8 f9 f10 f11 f12 f13 f14 f15 1.young 2.young 3.young 4.young 5.young 6.young 7.young)  order(f2 f4 f6  f8  f10 f12 f14 1.young 2.young 3.young 4.young 5.young 6.young 7.young f3 f5 f7 f9 f11 f13 f15) xline(0)  title(Wage Differentials: Three Generations) levels(95)  xlabel(-0.7(0.1)0.7) xtitle(Log-Points) /// // rescale(100)
coeflabels(f2 = "India_1" f2 = ""  f4 = "Pakistan_1" f4 = "" f6 = "Bangladesh_1" f6= "" f8 = "Africa_1" f8= "" f10 = "Caribbean_1" f10 = ""   f12 = "Europe_1" f12= ""  ///
  f14 = "EasternEurope_1" f14 = "" 1.young = "India_1.5" 1.young = ""  2.young = "Pakistan_1.5" 2.young = "" 3.young = "Bangladesh_1.5" 3.young= "" 4.young = "Africa_1.5" 4.young= "" 5.young = "Caribbean_1.5" 5.young = ""   6.young = "Europe_1.5" 6.young= ""  ///
 7.young = "EasternEurope_1.5" 7.young = "" f3 = "India_2" f3 = ""  f5 = "Pakistan_2" f5 = "" f7 = "Bangladesh_2" f7= "" f9 = "Africa_2" f9= "" f11 = "Caribbean_2" f11 = ""   f13 = "Europe_2" f13= "" f15 = "EasternEurope_2" f15 = "" ) grid(none)  /// 
graphregion(color(white)) bgcolor(white)   
graph save $graphs_tables/younggeneration, replace 
graph export $graphs_tables/younggeneration.pdf, replace  

**Table A.3.4 Results** 
********************************************************************************Summary Stats Men, first generation
   
global all   f2 f4  f6  f8  f10 f12 f14   i.year i.education  pexpuk pexpuk2 pexpabroad pexpabroad2  i.lad  ib10000.degree first muslim i.industry i.occupation              
global all2   f3  f5  f7  f9  f11 f13  f15  i.year i.education  total total2  i.lad first muslim i.industry i.occupation           
 
global summary1 rhw pexpuk pexpabroad edu1_ edu2_ edu3_ edu4_ edu5_ edu6_ edu7_ edu8_ fd_ first_ muslim_  region7_  region5_ region8_ region2_ 

quietly reg logrhw $all   if  $pooled & female==0, cluster(pidp)      

eststo A: estpost sum $summary1  if e(sample) &  f1==1 & female==0      // since esttab is a e() base command, information must be stored in e(): estpost does that because sum stores r(). 

quietly reg logrhw $all   if  $pooled & female==0, cluster(pidp)
xtsum $summary1 if e(sample) &  f1==1 & female==0 
estadd scalar n = r(n): A      // adds scalar n to stored A in e() format 


quietly reg logrhw $all if $pooled & female==0, cluster(pidp)  

eststo B: estpost sum $summary1 if e(sample) &  f2==1 & female==0

quietly reg logrhw $all   if  $pooled & female==0, cluster(pidp)
xtsum $summary1 if e(sample) &  f2==1 & female==0 
estadd scalar n = r(n): B 
 

quietly reg logrhw $all if $pooled & female==0, cluster(pidp)

eststo C: estpost sum  $summary1 if e(sample) & f4==1 & female==0
 
quietly reg logrhw $all   if  $pooled & female==0, cluster(pidp)
xtsum $summary1 if e(sample) &  f4==1 & female==0 
estadd scalar n = r(n): C 


quietly reg logrhw $all if $pooled & female==0, cluster(pidp)

eststo D: estpost sum $summary1 if e(sample) & f6==1 & female==0

quietly reg logrhw $all   if  $pooled & female==0, cluster(pidp)
xtsum $summary1 if e(sample) &  f6==1 & female==0 
estadd scalar n = r(n): D

quietly reg logrhw $all if $pooled & female==0, cluster(pidp)

eststo E: estpost sum $summary1 if e(sample) &  f8==1  & female==0, 

quietly reg logrhw $all   if  $pooled & female==0, cluster(pidp)
xtsum $summary1 if e(sample) &  f8==1 & female==0 
estadd scalar n = r(n): E 

quietly reg logrhw $all if $pooled & female==0, cluster(pidp)

eststo F: estpost sum $summary1 if e(sample) &  f10==1 & female==0,
quietly reg logrhw $all   if  $pooled & female==0, cluster(pidp)

xtsum $summary1 if e(sample) &  f10==1 & female==0 
estadd scalar n = r(n): F


quietly reg logrhw $all if $pooled & female==0, cluster(pidp)

eststo G: estpost sum $summary1 if e(sample) &  f12==1 & female==0, 

quietly reg logrhw $all   if  $pooled & female==0, cluster(pidp)
xtsum $summary1 if e(sample) &  f12==1 & female==0 

estadd scalar n = r(n): G 

quietly reg logrhw $all if $pooled & female==0, cluster(pidp)

eststo H: estpost sum $summary1 if e(sample) &  f14==1  & female==0,

quietly reg logrhw $all   if  $pooled & female==0, cluster(pidp)
xtsum $summary1 if e(sample) &  f14==1 & female==0 
estadd scalar n = r(n): H 

		 
esttab A B C D E F G H  using $graphs_tables/table2_.tex, replace main(mean 1) nostar  obs nonote label nonumber   ///  // aux(sd 1)
	     refcat(pexpuk  "\emph{Human Capital}" region7_ "\emph{Regional Distribution}", nolabel) compress nogaps  ///       // gaps 
		 mtitle("N" "I" "P" "B" "A" "C" "E" "EE") stats(n N, fmt(%9.0f) labels(Individuals Observations)) 
		 
		 
		 
		
**Table A.3.5 Results** 

**********************************************************************************************************************Second Generation Men Summary Stats 
global summary2 rhw total edu1_ edu2_ edu3_ edu4_ edu5_ edu6_ edu7_ edu8_ first_ muslim_ region7_  region5_ region8_ region2_ 
quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0

quietly reg logrhw $all2 if e(sample) & female==0,  

estpost summarize $summary2  if e(sample) &  f1==1 & female==0

est store I 

quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0
quietly reg logrhw $all2 if e(sample) & female==0,  

xtsum $summary2 if e(sample) &  f1==1 & female==0 
estadd scalar n = r(n): I 


quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0

quietly reg logrhw $all2 if e(sample) & female==0,   // pooled OLS

estpost summarize $summary2 if e(sample) & f3==1 & female==0

est store J

quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0
quietly reg logrhw $all2 if e(sample) & female==0,  

xtsum $summary2 if e(sample) &  f3==1 & female==0 
estadd scalar n = r(n): J 

quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0
quietly reg logrhw $all2 if e(sample) & female==0,   // pooled OLS

estpost summarize $summary2 if e(sample) & f5==1 & female==0

est store K 

quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0
quietly reg logrhw $all2 if e(sample) & female==0,  

xtsum $summary2 if e(sample) &  f5==1 & female==0 
estadd scalar n = r(n): K 


quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0

quietly reg logrhw $all2 if e(sample) & female==0,   // pooled OLS

estpost summarize $summary2 if e(sample) &  f7==1  & female==0, 

est store L

quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0
quietly reg logrhw $all2 if e(sample) & female==0,  

xtsum $summary2 if e(sample) &  f7==1 & female==0 
estadd scalar n = r(n): L 


quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0

quietly reg logrhw $all2 if e(sample) & female==0,   // pooled OLS

estpost summarize $summary2 if e(sample) &  f9==1 & female==0,

est store M

quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0
quietly reg logrhw $all2 if e(sample) & female==0,  

xtsum $summary2 if e(sample) &  f9==1 & female==0 
estadd scalar n = r(n): M 

quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0

quietly reg logrhw $all2 if e(sample) & female==0,   // pooled OLS

estpost summarize $summary2 if e(sample) &  f11==1 & female==0, 

est store N 

quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0
quietly reg logrhw $all2 if e(sample) & female==0,  

xtsum $summary2 if e(sample) &  f11==1 & female==0 
estadd scalar n = r(n): N 


quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0

quietly reg logrhw $all2 if e(sample) & female==0,   // pooled OLS
estpost summarize $summary2 if e(sample) &  f13==1  & female==0,

est store O

quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0
quietly reg logrhw $all2 if e(sample) & female==0,  

xtsum $summary2 if e(sample) &  f13==1 & female==0 
estadd scalar n = r(n): O 


quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0

quietly reg logrhw $all2 if e(sample) & female==0,   // pooled OLS

estpost summarize $summary2 if e(sample) &  f15==1  & female==0,

est store P

quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0
quietly reg logrhw $all2 if e(sample) & female==0,  

xtsum $summary2 if e(sample) &  f15==1 & female==0 
estadd scalar n = r(n): P 

esttab I J K L M N O P  using $graphs_tables/table3_.tex, replace main(mean 1) nostar  obs nonote label nonumber   ///  // aux(sd 1)  
	     refcat(total   "\emph{Human Capital}" region7_ "\emph{Regional Distribution}", nolabel) compress nogaps  ///  // gaps
		 mtitle("N" "I" "P" "B" "A" "C" "E" "EE") stats(n N, fmt(%9.0f) labels(Individuals Observations))
		
**Table A.3.6 Results** 
********************************************************************************POLS************************************************************************************************************
set more off 
************************standard model, MEN POLS, cluster at pidp   

quietly reg  logrhw  $all   if  $pooled & female==0,  
eststo est1:  reg logrhw $algan   if e(sample) & $pooled & female==0,   cluster(pidp)
eststo est2:  reg logrhw $all_occup if e(sample) & $pooled & female==0, cluster(pidp)
eststo est3:  reg logrhw $all if e(sample) & $pooled & female==0,  cluster(pidp)
quietly reg  logrhw $all   if  $pooled & female==0 
quietly reg  logrhw  $all2    if  $pooled2 & female==0      
preserve 
keep logrhw f3  f5  f7  f9  f11 f13  f15  year education total total2 lad region edu1 edu2  edu3  edu4  edu5 edu6  edu7 edu8 foreign1 foreign3 age female pidp first muslim occupation industry 
rename (f3 f5 f7 f9 f11 f13 f15) (f2 f4 f6 f8 f10 f12 f14)
eststo est4:  reg logrhw  f2 f4 f6 f8 f10 f12 f14  i.year i.education  total total2  i.region  if e(sample) & female==0  ,   cluster(pidp)
eststo est5:  reg logrhw  f2 f4 f6 f8 f10 f12 f14  i.year i.education  total total2  i.lad first muslim   if e(sample) & female==0 ,  cluster(pidp)
eststo est6:  reg logrhw  f2 f4 f6 f8 f10 f12 f14  i.year i.education  total total2  i.lad first muslim i.industry i.occupation   if e(sample) & female==0 ,  cluster(pidp)
restore 

esttab est1 est2 est3 est4 est5 est6 using $graphs_tables/table0_.tex, label replace star(* 0.10 ** 0.05 *** 0.01) unstack nonotes b(3) se(3) numbers keep(f2 f4 f6 f8 f10 f12 f14)  mtitles("BS1" "FS1w/oOcc" "FS1" "BS2" "FS2w/oOcc" "FS2")  stats(N_clust N, fmt(%9.0f) labels(Individuals Observations)) 

 
**Table A.3.7 Results** 
set more off 
************************standard model, MEN Random Effects, cluster at pidp    
quietly reg  logrhw  $all   if  $pooled & female==0,  
eststo est1:  xtreg logrhw $algan   if e(sample) & $pooled & female==0,   cluster(pidp)
eststo est2:  xtreg logrhw $all_occup if e(sample) & $pooled & female==0, cluster(pidp)
eststo est3:  xtreg logrhw $all if e(sample) & $pooled & female==0,  cluster(pidp)
quietly reg  logrhw $all   if  $pooled & female==0 
quietly reg  logrhw  $all2    if  $pooled2 & female==0      
preserve 
keep logrhw f3  f5  f7  f9  f11 f13  f15  year education total total2 lad region edu1 edu2  edu3  edu4  edu5 edu6  edu7 edu8 foreign1 foreign3 age female pidp first muslim occupation industry 
rename (f3 f5 f7 f9 f11 f13 f15) (f2 f4 f6 f8 f10 f12 f14)
eststo est4:  xtreg logrhw  f2 f4 f6 f8 f10 f12 f14  i.year i.education  total total2  i.region  if e(sample) & female==0  ,   cluster(pidp)
eststo est5:  xtreg logrhw  f2 f4 f6 f8 f10 f12 f14  i.year i.education  total total2  i.lad first muslim   if e(sample) & female==0 ,  cluster(pidp)
eststo est6:  xtreg logrhw  f2 f4 f6 f8 f10 f12 f14  i.year i.education  total total2  i.lad first muslim i.industry i.occupation   if e(sample) & female==0 ,  cluster(pidp)
restore 

esttab est1 est2 est3 est4 est5 est6 using $graphs_tables/table1_.tex, label replace star(* 0.10 ** 0.05 *** 0.01) unstack nonotes b(3) se(3) numbers keep(f2 f4 f6 f8 f10 f12 f14)  mtitles("BS1" "FS1w/oOcc" "FS1" "BS2" "FS2w/oOcc" "FS2")  stats(N_clust N, fmt(%9.0f) labels(Individuals Observations)) 

**Table A.3.8 Results** 
********************************************************************************Fortin/Lemieux 
global all   f2 f4  f6  f8  f10 f12 f14   i.year i.education  pexpuk pexpuk2 pexpabroad pexpabroad2  i.lad  ib10000.degree first muslim i.industry i.occupation    
global fortin f2 f4  f6  f8  f10 f12 f14   i.year i.education  total total2     
global fortin1  f2 f4  f6  f8  f10 f12 f14   i.year i.education  total total2  ib10000.degree 
global fortin2  f2 f4  f6  f8  f10 f12 f14   i.year i.education  total total2 i.education#fd  ib10000.degree    
global fortinall   f2 f4  f6  f8  f10 f12 f14   i.year i.education  pexpuk pexpuk2 pexpabroad pexpabroad2 i.education#fd ib10000.degree    

reg  logrhw  $all   if  $pooled & female==0, vce(cluster pidp) 
reg  logrhw  $fortinall   if e(sample) & $pooled & female==0,  
reg logrhw $fortin if e(sample) & $pooled & female==0,   cluster(pidp)
estadd local controls "Yes", replace 
estadd local exp      "Yes", replace 
estadd local studyfe "No", replace
estadd local interaction "No", replace
estadd local foreignexp "No", replace
eststo est9   

reg logrhw $fortin1 if e(sample) & $pooled & female==0,   cluster(pidp)
quietly estadd local controls "Yes", replace 
quietly estadd local exp      "Yes", replace 
quietly estadd local studyfe "Yes", replace
quietly estadd local interaction "No", replace
quietly estadd local foreignexp "No", replace
eststo est10 

reg logrhw $fortin2 if e(sample) & $pooled & female==0,   cluster(pidp)
quietly estadd local controls "Yes", replace
quietly estadd local exp      "Yes", replace 
quietly estadd local studyfe "Yes", replace
quietly estadd local interaction "Yes", replace
quietly estadd local foreignexp "No", replace
eststo est11
 
reg logrhw $fortinall if e(sample) & $pooled & female==0,  cluster(pidp)
quietly estadd local controls "Yes", replace
quietly estadd local exp      "No", replace 
quietly estadd local studyfe "Yes", replace
quietly estadd local interaction "Yes", replace
quietly estadd local foreignexp "Yes", replace
eststo est12 

esttab est9 est10 est11 est12  using $graphs_tables/table4_.tex, replace label star(* 0.10 ** 0.05 *** 0.01)  nonotes b(3) se(3) keep(f2 f4 f6 f8 f10 f12 f14)  mtitles("" "" "" "") s(controls exp studyfe interaction foreignexp N_clust N, fmt(%9.0f) label("Edu, Year" "Exp" "Country of Study" "Edu*ForDegree" "UK Exp, Foreign Exp" "Individuals" "Observations"))  numbers

**Table A.3.9 Results** 
global lemieux f2 f4  f6  f8  f10 f12 f14   i.year i.education  total total2     
global lemieux1  f2 f4  f6  f8  f10 f12 f14   i.year i.education  pexpuk pexpuk2 pexpabroad pexpabroad2  
global lemieux2  f2 f4  f6  f8  f10 f12 f14   i.year i.education  pexpuk pexpuk2 pexpabroad pexpabroad2 i.education#fd    
global lemieuxall   f2 f4  f6  f8  f10 f12 f14   i.year i.education  pexpuk pexpuk2 pexpabroad pexpabroad2 i.education#fd ib10000.degree    

reg  logrhw  $all   if  $pooled & female==0, vce(cluster pidp) 
reg  logrhw  $lemieuxall   if e(sample) &  $pooled & female==0,  
reg logrhw $lemieux if e(sample) & $pooled & female==0,   cluster(pidp)
estadd local controls "Yes", replace
estadd local exp      "Yes", replace 
estadd local studyfe "No", replace
estadd local interaction "No", replace
estadd local foreignexp "No", replace
eststo est9   

reg logrhw $lemieux1 if e(sample) & $pooled & female==0,   cluster(pidp)
quietly estadd local controls "Yes", replace 
quietly estadd local exp      "No", replace 
quietly estadd local studyfe "No", replace
quietly estadd local interaction "No", replace
quietly estadd local foreignexp "Yes", replace
eststo est10 

reg logrhw $lemieux2 if e(sample) & $pooled & female==0,   cluster(pidp)
quietly estadd local controls "Yes", replace 
quietly estadd local exp      "No", replace 
quietly estadd local studyfe "No", replace
quietly estadd local interaction "Yes", replace
quietly estadd local foreignexp "Yes", replace
eststo est11
 
reg logrhw $lemieuxall if e(sample) & $pooled & female==0,  cluster(pidp)
quietly estadd local controls "Yes", replace 
quietly estadd local exp      "No", replace 
quietly estadd local studyfe "Yes", replace
quietly estadd local interaction "Yes", replace
quietly estadd local foreignexp "Yes", replace
eststo est12 

esttab est9 est10 est11 est12  using $graphs_tables/table5_.tex, replace label star(* 0.10 ** 0.05 *** 0.01)  nonotes b(3) se(3) keep(f2 f4 f6 f8 f10 f12 f14)  mtitles("" "" "" "") s(controls exp studyfe interaction foreignexp N_clust N, fmt(%9.0f) label("Edu, Year" "Exp" "Country of Study" "Edu*ForDegree" "UK Exp, Foreign Exp" "Individuals" "Observations"))  numbers

**Table A.3.10 Results** 
********************************************************************************Gelbach Decomposition, second generation  
quietly reg  logrhw $all   if  $pooled & female==0 
quietly reg  logrhw  $all2    if  $pooled2 & female==0                                        
eststo basefull: b1x2 logrhw if e(sample) & $pooled2 & female==0, x1all(f3 f5 f7 f9 f11 f13 f15) x2all(edu2-edu8 total total2 la2-la433 ja2-ja11 first muslim in2-in34 oc2-oc236) x2delta(Education = edu2-edu8 : LocalEffects =la2-la433 : YearEffects =ja2-ja11 : UKExp=total total2 : English=first : Muslim = muslim : Ind./Occ. = in2-in34 oc2-oc236)  cluster(pidp) 
esttab basefull using $graphs_tables/table66_.tex, replace label star(* 0.10 ** 0.05 *** 0.01) nonotes b(3) se(3) unstack  mtitles("" "" "" "" "" "" "") nonumbers stats(N_clust N, fmt(%9.0f) labels(Individuals Observations))  compress drop(_cons:Education _cons:LocalEffects _cons:YearEffects _cons:UKExp  _cons:English _cons:Muslim _cons:Ind./Occ.  _cons:__TC)
***************************
**Figure A.4.1 Results**POLS  
reg  logrhw  $all   if  $pooled & female==0,  
eststo BS1:  reg logrhw $algan   if e(sample) & $pooled & female==0,   cluster(pidp)

eststo FS1:  reg logrhw $all if e(sample) & $pooled & female==0,  cluster(pidp)

quietly reg logrhw $all   if  $pooled & female==0      
quietly reg  logrhw  $all2    if  $pooled2 & female==0 
eststo BS2:  reg logrhw  $algan2  if e(sample) & female==0  ,   cluster(pidp)


eststo FS2:  reg logrhw  $all2    if e(sample) & female==0 ,  cluster(pidp)


coefplot (BS1, msymbol(T))  (FS1, msymbol(O)) (BS2, msymbol(S)) (FS2, msymbol(D)), keep(f2 f3 f4 f5 f6 f7 f8 f9 f10 f11 f12 f13 f14 f15)  order(f2 f4  f6  f8  f10 f12 f14 f3 f5 f7 f9 f11 f13 f15) xline(0)  title(Wage Differentials: First-/Second-Generation) levels(95)  xlabel(-0.5(0.1)0.2) xtitle(Log-Points) /// // rescale(100)
coeflabels(f2 = "India_1" f2 = ""  f4 = "Pakistan_1" f4 = "" f6 = "Bangladesh_1" f6= "" f8 = "Africa_1" f8= "" f10 = "Caribbean_1" f10 = ""   f12 = "Europe_1" f12= ""  ///
  f14 = "EasternEurope_1" f14 = "" f3 = "India_2" f3 = ""  f5 = "Pakistan_2" f5 = "" f7 = "Bangladesh_2" f7= "" f9 = "Africa_2" f9= "" f11 = "Caribbean_2" f11 = ""   f13 = "Europe_2" f13= "" f15 = "EasternEurope_2" f15 = "" ) grid(none)  /// 
graphregion(color(white)) bgcolor(white)   
graph save $graphs_tables/pointestimates, replace 
graph export $graphs_tables/pointestimates.pdf, replace  
 
****************************
**Figure A.4.2 Results**Random Effects 

reg  logrhw  $all   if  $pooled & female==0, vce(cluster pidp) 

eststo BS1:  xtreg logrhw $algan   if e(sample) & $pooled & female==0,  vce(cluster pidp)

eststo FS1:  xtreg logrhw $all if e(sample) & $pooled & female==0, vce(cluster pidp)

quietly reg logrhw $all   if  $pooled & female==0      
quietly  reg logrhw  $all2    if  $pooled2 & female==0   

eststo BS2:  xtreg logrhw  $algan2  if e(sample) & female==0  ,  vce(cluster pidp)


eststo FS2:  xtreg logrhw  $all2    if  e(sample) & female==0 ,  vce(cluster pidp)


coefplot (BS1, msymbol(T))  (FS1, msymbol(O)) (BS2, msymbol(S)) (FS2, msymbol(D)), keep(f2 f3 f4 f5 f6 f7 f8 f9 f10 f11 f12 f13 f14 f15)  order(f2 f4  f6  f8  f10 f12 f14 f3 f5 f7 f9 f11 f13 f15) xline(0)  title(Wage Differentials: First-/Second-Generation) levels(95)  xlabel(-0.5(0.1)0.2) xtitle(Log-Points)  /// // rescale(100)
coeflabels(f2 = "India_1" f2 = ""  f4 = "Pakistan_1" f4 = "" f6 = "Bangladesh_1" f6= "" f8 = "Africa_1" f8= "" f10 = "Caribbean_1" f10 = ""   f12 = "Europe_1" f12= ""  ///
  f14 = "EasternEurope_1" f14 = "" f3 = "India_2" f3 = ""  f5 = "Pakistan_2" f5 = "" f7 = "Bangladesh_2" f7= "" f9 = "Africa_2" f9= "" f11 = "Caribbean_2" f11 = ""   f13 = "Europe_2" f13= "" f15 = "EasternEurope_2" f15 = "" ) grid(none)   /// 
graphregion(color(white)) bgcolor(white) 
graph save $graphs_tables/pointestimates_re, replace 
graph export $graphs_tables/pointestimates_re.pdf, replace