加载中…
个人资料
  • 博客等级:
  • 博客积分:
  • 博客访问:
  • 关注人气:
  • 获赠金笔:0支
  • 赠出金笔:0支
  • 荣誉徽章:
正文 字体大小:

工业企业数据库匹配

(2016-05-08 23:21:12)
标签:

杂谈

分类: 计量/STATA

//Brandt(2012)

//stata code

clear

set mem 900m

set more off

 

 

* current directory should contain m2000, m2001, ..., m2006

 

 

 

*****************************************

                        *

* STAGE 1: Match Two Consecutive Years *

                        *

*****************************************

 

 

* change lower case to upper case in "firm id"

local i = 1998

while `i' <= 2006{

use m`i'.dta, clear

replace id`i'=upper(id`i')

save m`i'.10.dta, replace

local i = `i' + 1

}

 

 

local i = 1998

while `i' < 2006{

 

local j=`i'+1

 

 

**step 10: match by firm ID**

 

*deal with duplicates of IDs (there are a few firms that have same IDs)*

use m`i'.10.dta, clear

bysort id`i': keep if _N>1

save duplicates_ID`i'.dta, replace

 

use m`i'.10.dta, clear

bysort id`i': drop if _N>1

rename id`i' id

keep bdat`i' cic`i' dq`i' e_HMT`i' e_collective`i' e_foreign`i' e_individual`i' e_legal_person`i' e_state`i' employment`i' export`i' fa_net`i' fa_original`i' a_dep`i' c_dep`i' id input`i' legal_person`i' name`i' new_product`i' output`i' phone`i' product1_`i' profit`i' revenue`i' street`i' town`i' type`i' va`i' village`i' wage`i' zip`i'

sort id

save match`i'.1.dta, replace

 

use m`j'.10.dta, clear

bysort id`j': keep if _N>1

save duplicates_ID`j'.dta, replace

 

use m`j'.10.dta, clear

bysort id`j': drop if _N>1

rename id`j' id

keep bdat`j' cic`j' dq`j' e_HMT`j' e_collective`j' e_foreign`j' e_individual`j' e_legal_person`j' e_state`j' employment`j' export`j' fa_net`j' fa_original`j' a_dep`j' c_dep`j' id input`j' legal_person`j' name`j' new_product`j' output`j' phone`j' product1_`j' profit`j' revenue`j' street`j' town`j' type`j' va`j' village`j' wage`j' zip`j'

sort id

save match`j'.1.dta, replace

 

use match`i'.1.dta, clear

merge id using match`j'.1.dta

keep if _merge==3

gen id`i'=id

rename id id`j'

drop _merge

gen match_method_`i'_`j'="ID"

gen match_status_`i'_`j'="3"

save matched_by_ID`i'_`j'.dta, replace

 

 

 

**step 20: match by firm names**

 

*match those unmatched firms in previous step by firm names*

 

use match`i'.1.dta, clear

merge id using match`j'.1.dta

keep if _merge==1

rename id id`i'

append using duplicates_ID`i'.dta

bysort name`i': keep if _N>1

keep bdat`i' cic`i' dq`i' e_HMT`i' e_collective`i' e_foreign`i' e_individual`i' e_legal_person`i' e_state`i' employment`i' export`i' fa_net`i' fa_original`i' a_dep`i' c_dep`i' id`i' input`i'    legal_person`i' name`i' new_product`i' output`i' phone`i' product1_`i' profit`i' revenue`i' street`i' town`i' type`i' va`i' village`i' wage`i' zip`i'

save duplicates_name`i'.dta, replace

 

use match`i'.1.dta, clear

merge id using match`j'.1.dta

keep if _merge==1

rename id id`i'

append using duplicates_ID`i'.dta

bysort name`i': drop if _N>1

rename name`i' name

sort name

keep bdat`i' cic`i' dq`i' e_HMT`i' e_collective`i' e_foreign`i' e_individual`i' e_legal_person`i' e_state`i' employment`i' export`i' fa_net`i' fa_original`i' a_dep`i' c_dep`i' id`i' input`i'    legal_person`i' name new_product`i' output`i' phone`i' product1_`i' profit`i' revenue`i' street`i' town`i' type`i' va`i' village`i' wage`i' zip`i'

save unmatched_by_ID`i'.dta, replace

 

use match`i'.1.dta, clear

merge id using match`j'.1.dta

keep if _merge==2

rename id id`j'

append using duplicates_ID`j'.dta

bysort name`j': keep if _N>1

keep bdat`j' cic`j' dq`j' e_HMT`j' e_collective`j' e_foreign`j' e_individual`j' e_legal_person`j' e_state`j' employment`j' export`j' fa_net`j' fa_original`j' a_dep`j' c_dep`j' id`j' input`j'    legal_person`j' name`j' new_product`j' output`j' phone`j' product1_`j' profit`j' revenue`j' street`j' town`j' type`j' va`j' village`j' wage`j' zip`j'

save duplicates_name`j'.dta, replace

 

use match`i'.1.dta, clear

merge id using match`j'.1.dta

keep if _merge==2

rename id id`j'

append using duplicates_ID`j'.dta

bysort name`j': drop if _N>1

rename name`j' name

sort name

keep bdat`j' cic`j' dq`j' e_HMT`j' e_collective`j' e_foreign`j' e_individual`j' e_legal_person`j' e_state`j' employment`j' export`j' fa_net`j' fa_original`j' a_dep`j' c_dep`j' id`j' input`j'    legal_person`j' name new_product`j' output`j' phone`j' product1_`j' profit`j' revenue`j' street`j' town`j' type`j' va`j' village`j' wage`j' zip`j'

save unmatched_by_ID`j'.dta, replace

 

use unmatched_by_ID`i'.dta, clear

merge name using unmatched_by_ID`j'.dta

keep if _merge==3

gen name`i'=name

rename name name`j'

drop _merge

gen match_method_`i'_`j'="firm name"

gen match_status_`i'_`j'="3"

save matched_by_name`i'_`j'.dta, replace

 

 

 

**step 30: match by the names of legal person representatives**

 

*match those unmatched firms in previous steps by firm legal person representatives*

 

use unmatched_by_ID`i'.dta, clear

merge name using unmatched_by_ID`j'.dta

keep if _merge==1

rename name name`i'

append using duplicates_name`i'.dta

replace legal_person`i'="." if legal_person`i'==""

gen code1=legal_person`i'+substr(dq`i',1,4)

bysort code1: keep if _N>1

keep bdat`i' cic`i' dq`i' e_HMT`i' e_collective`i' e_foreign`i' e_individual`i' e_legal_person`i' e_state`i' employment`i' export`i' fa_net`i' fa_original`i' a_dep`i' c_dep`i' id`i' input`i'    legal_person`i' name`i' new_product`i' output`i' phone`i' product1_`i' profit`i' revenue`i' street`i' town`i' type`i' va`i' village`i' wage`i' zip`i'

save duplicates_code1_`i'.dta, replace

 

use unmatched_by_ID`i'.dta, clear

merge name using unmatched_by_ID`j'.dta

keep if _merge==1

rename name name`i'

append using duplicates_name`i'.dta

replace legal_person`i'="." if legal_person`i'==""

gen code1=legal_person`i'+substr(dq`i',1,4)

bysort code1: drop if _N>1

sort code1

keep code1 bdat`i' cic`i' dq`i' e_HMT`i' e_collective`i' e_foreign`i' e_individual`i' e_legal_person`i' e_state`i' employment`i' export`i' fa_net`i' fa_original`i' a_dep`i' c_dep`i' id`i' input`i'    legal_person`i' name`i' new_product`i' output`i' phone`i' product1_`i' profit`i' revenue`i' street`i' town`i' type`i' va`i' village`i' wage`i' zip`i'

save unmatched_by_ID_and_name`i'.dta, replace

 

use unmatched_by_ID`i'.dta, clear

merge name using unmatched_by_ID`j'.dta

keep if _merge==2

rename name name`j'

append using duplicates_name`j'.dta

gen code1=legal_person`j'+substr(dq`j',1,4)

bysort code1: keep if _N>1

keep bdat`j' cic`j' dq`j' e_HMT`j' e_collective`j' e_foreign`j' e_individual`j' e_legal_person`j' e_state`j' employment`j' export`j' fa_net`j' fa_original`j' a_dep`j' c_dep`j' id`j' input`j' legal_person`j' name`j' new_product`j' output`j' phone`j' product1_`j' profit`j' revenue`j' street`j' town`j' type`j' va`j' village`j' wage`j' zip`j'

save duplicates_code1_`j'.dta, replace

 

use unmatched_by_ID`i'.dta, clear

merge name using unmatched_by_ID`j'.dta

keep if _merge==2

rename name name`j'

append using duplicates_name`j'.dta

gen code1=legal_person`j'+substr(dq`j',1,4)

bysort code1: drop if _N>1

sort code1

keep code1 bdat`j' cic`j' dq`j' e_HMT`j' e_collective`j' e_foreign`j' e_individual`j' e_legal_person`j' e_state`j' employment`j' export`j' fa_net`j' fa_original`j' a_dep`j' c_dep`j' id`j' input`j' legal_person`j' name`j' new_product`j' output`j' phone`j' product1_`j' profit`j' revenue`j' street`j' town`j' type`j' va`j' village`j' wage`j' zip`j'

save unmatched_by_ID_and_name`j'.dta, replace

 

use unmatched_by_ID_and_name`i'.dta, clear

display _N

merge code1 using unmatched_by_ID_and_name`j'.dta

keep if _merge==3

drop _merge code1

gen match_method_`i'_`j'="legal person"

gen match_status_`i'_`j'="3"

save matched_by_legalperson`i'_`j'.dta, replace

 

 

 

**step 40: match by phone number + city code**

 

*match those unmatched firms in previous steps by phone number + city code*

 

use unmatched_by_ID_and_name`i'.dta, clear

merge code1 using unmatched_by_ID_and_name`j'.dta

keep if _merge==1

drop code1

append using duplicates_code1_`i'

replace phone`i'="." if phone`i'==""

gen code2=substr(dq`i',1,4)+substr(cic`i',1,3)+phone`i'

bysort code2: keep if _N>1

keep bdat`i' cic`i' dq`i' e_HMT`i' e_collective`i' e_foreign`i' e_individual`i' e_legal_person`i' e_state`i' employment`i' export`i' fa_net`i' fa_original`i' a_dep`i' c_dep`i' id`i' input`i'    legal_person`i' name`i' new_product`i' output`i' phone`i' product1_`i' profit`i' revenue`i' street`i' town`i' type`i' va`i' village`i' wage`i' zip`i'

save duplicates_code2_`i'.dta, replace

 

use unmatched_by_ID_and_name`i'.dta, clear

merge code1 using unmatched_by_ID_and_name`j'.dta

keep if _merge==1

drop code1

append using duplicates_code1_`i'

replace phone`i'="." if phone`i'==""

gen code2=substr(dq`i',1,4)+substr(cic`i',1,3)+phone`i'

bysort code2: drop if _N>1

sort code2

keep code2 bdat`i' cic`i' dq`i' e_HMT`i' e_collective`i' e_foreign`i' e_individual`i' e_legal_person`i' e_state`i' employment`i' export`i' fa_net`i' fa_original`i' a_dep`i' c_dep`i' id`i' input`i'    legal_person`i' name`i' new_product`i' output`i' phone`i' product1_`i' profit`i' revenue`i' street`i' town`i' type`i' va`i' village`i' wage`i' zip`i'

save unmatched_by_ID_and_name_and_legalperson`i'.dta, replace

 

 

use unmatched_by_ID_and_name`i'.dta, clear

merge code1 using unmatched_by_ID_and_name`j'.dta

keep if _merge==2

drop code1

append using duplicates_code1_`j'

gen code2=substr(dq`j',1,4)+substr(cic`j',1,3)+phone`j'

bysort code2: keep if _N>1

keep bdat`j' cic`j' dq`j' e_HMT`j' e_collective`j' e_foreign`j' e_individual`j' e_legal_person`j' e_state`j' employment`j' export`j' fa_net`j' fa_original`j' a_dep`j' c_dep`j' id`j' input`j'    legal_person`j' name`j' new_product`j' output`j' phone`j' product1_`j' profit`j' revenue`j' street`j' town`j' type`j' va`j' village`j' wage`j' zip`j'

save duplicates_code2_`j'.dta, replace

 

use unmatched_by_ID_and_name`i'.dta, clear

merge code1 using unmatched_by_ID_and_name`j'.dta

keep if _merge==2

drop code1

append using duplicates_code1_`j'

gen code2=substr(dq`j',1,4)+substr(cic`j',1,3)+phone`j'

bysort code2: drop if _N>1

sort code2

keep code2 bdat`j' cic`j' dq`j' e_HMT`j' e_collective`j' e_foreign`j' e_individual`j' e_legal_person`j' e_state`j' employment`j' export`j' fa_net`j' fa_original`j' a_dep`j' c_dep`j' id`j' input`j' legal_person`j' name`j' new_product`j' output`j' phone`j' product1_`j' profit`j' revenue`j' street`j' town`j' type`j' va`j' village`j' wage`j' zip`j'

save unmatched_by_ID_and_name_and_legalperson`j'.dta, replace

 

use unmatched_by_ID_and_name_and_legalperson`i'.dta,clear

merge code2 using unmatched_by_ID_and_name_and_legalperson`j'.dta

keep if _merge==3

drop _merge code2

gen match_method_`i'_`j'="phone number"

gen match_status_`i'_`j'="3"

save matched_by_phone`i'_`j'.dta, replace

 

 

 

**step 50: match by code = founding year + geographic code + industry code + name of town + name of main product**

 

*match those unmatched firms in previous steps by founding year + geographic code + industry code + name of town + name of main product*

 

use unmatched_by_ID_and_name_and_legalperson`i'.dta,clear

merge code2 using unmatched_by_ID_and_name_and_legalperson`j'.dta

keep if _merge==1

drop code2

append using duplicates_code2_`i'.dta

replace town`i'="." if town`i'==""

replace product1_`i'="." if product1_`i'==""

gen code3=bdat`i'+substr(dq`i',1,6)+substr(cic`i',1,4)+town`i'+product1_`i'

bysort code3: keep if _N>1

keep bdat`i' cic`i' dq`i' e_HMT`i' e_collective`i' e_foreign`i' e_individual`i' e_legal_person`i' e_state`i' employment`i' export`i' fa_net`i' fa_original`i' a_dep`i' c_dep`i' id`i' input`i'    legal_person`i' name`i' new_product`i' output`i' phone`i' product1_`i' profit`i' revenue`i' street`i' town`i' type`i' va`i' village`i' wage`i' zip`i'

save duplicates_code3_`i'.dta, replace

 

use unmatched_by_ID_and_name_and_legalperson`i'.dta,clear

merge code2 using unmatched_by_ID_and_name_and_legalperson`j'.dta

keep if _merge==1

drop code2

append using duplicates_code2_`i'.dta

replace town`i'="." if town`i'==""

replace product1_`i'="." if product1_`i'==""

gen code3=bdat`i'+substr(dq`i',1,6)+substr(cic`i',1,4)+town`i'+product1_`i'

bysort code3: drop if _N>1

sort code3

keep code3 bdat`i' cic`i' dq`i' e_HMT`i' e_collective`i' e_foreign`i' e_individual`i' e_legal_person`i' e_state`i' employment`i' export`i' fa_net`i' fa_original`i' a_dep`i' c_dep`i' id`i' input`i'    legal_person`i' name`i' new_product`i' output`i' phone`i' product1_`i' profit`i' revenue`i' street`i' town`i' type`i' va`i' village`i' wage`i' zip`i'

save unmatched_by_ID_and_name_and_legalperson_and_phone`i'.dta, replace

 

use unmatched_by_ID_and_name_and_legalperson`i'.dta,clear

merge code2 using unmatched_by_ID_and_name_and_legalperson`j'.dta

keep if _merge==2

drop code2

append using duplicates_code2_`j'.dta

gen code3=bdat`j'+substr(dq`j',1,6)+substr(cic`j',1,4)+town`j'+product1_`j'

bysort code3: keep if _N>1

keep bdat`j' cic`j' dq`j' e_HMT`j' e_collective`j' e_foreign`j' e_individual`j' e_legal_person`j' e_state`j' employment`j' export`j' fa_net`j' fa_original`j' a_dep`j' c_dep`j' id`j' input`j' legal_person`j' name`j' new_product`j' output`j' phone`j' product1_`j' profit`j' revenue`j' street`j' town`j' type`j' va`j' village`j' wage`j' zip`j'

save duplicates_code3_`j'.dta, replace

 

use unmatched_by_ID_and_name_and_legalperson`i'.dta,clear

merge code2 using unmatched_by_ID_and_name_and_legalperson`j'.dta

keep if _merge==2

drop code2

append using duplicates_code2_`j'.dta

gen code3=bdat`j'+substr(dq`j',1,6)+substr(cic`j',1,4)+town`j'+product1_`j'

bysort code3: drop if _N>1

sort code3

keep code3 bdat`j' cic`j' dq`j' e_HMT`j' e_collective`j' e_foreign`j' e_individual`j' e_legal_person`j' e_state`j' employment`j' export`j' fa_net`j' fa_original`j' a_dep`j' c_dep`j' id`j' input`j' legal_person`j' name`j' new_product`j' output`j' phone`j' product1_`j' profit`j' revenue`j' street`j' town`j' type`j' va`j' village`j' wage`j' zip`j'

save unmatched_by_ID_and_name_and_legalperson_and_phone`j'.dta, replace

 

use unmatched_by_ID_and_name_and_legalperson_and_phone`i'.dta,clear

display _N

merge code3 using unmatched_by_ID_and_name_and_legalperson_and_phone`j'.dta

keep if _merge==3

drop _merge code3

gen match_method_`i'_`j'="code 3"

gen match_status_`i'_`j'="3"

save matched_by_code3_`i'_`j'.dta, replace

 

use unmatched_by_ID_and_name_and_legalperson_and_phone`i'.dta,clear

merge code3 using unmatched_by_ID_and_name_and_legalperson_and_phone`j'.dta

keep if _merge==1

drop _merge code3

append using duplicates_code3_`i'.dta

gen match_method_`i'_`j'=""

gen match_status_`i'_`j'="1"

save unmatched_by_ID_and_name_and_legalperson_and_phone_and_code2_`i'.dta, replace

 

use unmatched_by_ID_and_name_and_legalperson_and_phone`i'.dta,clear

merge code3 using unmatched_by_ID_and_name_and_legalperson_and_phone`j'.dta

keep if _merge==2

drop _merge code3

append using duplicates_code3_`j'.dta

gen match_method_`i'_`j'=""

gen match_status_`i'_`j'="2"

save unmatched_by_ID_and_name_and_legalperson_and_phone_and_code2_`j'.dta, replace

 

 

 

**step 60: merge the matched and unmatched files to create files of two consecutive years**

 

use matched_by_ID`i'_`j'.dta, clear

append using matched_by_name`i'_`j'.dta

append using matched_by_legalperson`i'_`j'.dta

append using matched_by_phone`i'_`j'.dta

append using matched_by_code3_`i'_`j'.dta

append using unmatched_by_ID_and_name_and_legalperson_and_phone_and_code2_`i'.dta

append using unmatched_by_ID_and_name_and_legalperson_and_phone_and_code2_`j'.dta

save m`i'-m`j'.dta, replace

 

 

local i = `i' + 1

}

 

 

local i = 1998

while `i' < 2006{

 

local j=`i'+1

 

use m`i'-m`j'.dta, clear

tab match_method_`i'_`j'

tab match_status_`i'_`j'

 

local i = `i' + 1

}

 

 

 

******************************************

                        *

* STAGE 2: Match Three Consecutive Years *

                        *

******************************************

 

 

 

local i = 1998

while `i' < 2005{

 

local j=`i'+1

local k=`i'+2

 

 

**step 70: create a three-year balanced sample**

 

use m`i'-m`j'.dta, clear

keep if match_status_`i'_`j'=="1"

keep bdat`i' cic`i' dq`i' e_HMT`i' e_collective`i' e_foreign`i' e_individual`i' e_legal_person`i' e_state`i' employment`i' export`i'    fa_net`i' fa_original`i' a_dep`i' c_dep`i' id`i' input`i' legal_person`i' name`i' new_product`i' output`i' phone`i' product1_`i' profit`i' revenue`i' street`i' town`i' type`i' va`i' village`i' wage`i' zip`i'

save unmatched`i'.10.dta, replace

 

use m`i'-m`j'.dta, clear

drop if match_status_`i'_`j'=="1"

gen code=id`j'+string(revenue`j')+string(employment`j')+string(profit`j')

sort code

save m`i'-m`j'.10.dta, replace

 

use m`j'-m`k'.dta, clear

keep if match_status_`j'_`k'=="2"

keep bdat`k' cic`k' dq`k' e_HMT`k' e_collective`k' e_foreign`k' e_individual`k' e_legal_person`k' e_state`k' employment`k' export`k'    fa_net`k' fa_original`k' a_dep`k' c_dep`k' id`k' input`k' legal_person`k' name`k' new_product`k' output`k' phone`k' product1_`k' profit`k' revenue`k' street`k' town`k' type`k' va`k' village`k' wage`k' zip`k'

save unmatched`k'.10.dta, replace

 

use m`j'-m`k'.dta, clear

drop if match_status_`j'_`k'=="2"

gen code=id`j'+string(revenue`j')+string(employment`j')+string(profit`j')

sort code

save m`j'-m`k'.10.dta, replace

 

use m`i'-m`j'.10.dta, clear

merge code using m`j'-m`k'.10.dta

drop _merge code

keep if match_status_`i'_`j'=="3"& match_status_`j'_`k'=="3"

gen match_status_`i'_`k'="3"

gen match_method_`i'_`k'="`j'"

save balanced.m`i'-m`j'-m`k'.dta, replace

 

 

 

**step 80: create files for unmatched `i' firms and `k' firms**

 

 

use m`i'-m`j'.10.dta, clear

merge code using m`j'-m`k'.10.dta

drop _merge code

drop if match_status_`i'_`j'=="3"& match_status_`j'_`k'=="3"

drop if id`i'==""

gen code=id`i'+string(revenue`i')+string(employment`i')+string(profit`i')

sort code

save unmatched`i'.15.dta, replace

 

use unmatched`i'.15.dta, clear

keep bdat`i' cic`i' dq`i' e_HMT`i' e_collective`i' e_foreign`i' e_individual`i' e_legal_person`i' e_state`i' employment`i' export`i'    fa_net`i' fa_original`i' a_dep`i' c_dep`i' id`i' input`i' legal_person`i' name`i' new_product`i' output`i' phone`i' product1_`i' profit`i' revenue`i' street`i' town`i' type`i' va`i' village`i' wage`i' zip`i'

append using unmatched`i'.10.dta

save unmatched`i'.20.dta, replace

 

 

use m`i'-m`j'.10.dta, clear

merge code using m`j'-m`k'.10.dta

drop _merge code

drop if match_status_`i'_`j'=="3"& match_status_`j'_`k'=="3"

drop if id`k'==""

gen code=id`k'+string(revenue`k')+string(employment`k')+string(profit`k')

sort code

save unmatched`k'.15.dta, replace

 

use unmatched`k'.15.dta, clear

keep bdat`k' cic`k' dq`k' e_HMT`k' e_collective`k' e_foreign`k' e_individual`k' e_legal_person`k' e_state`k' employment`k' export`k'    fa_net`k' fa_original`k' a_dep`k' c_dep`k' id`k' input`k' legal_person`k' name`k' new_product`k' output`k' phone`k' product1_`k' profit`k' revenue`k' street`k' town`k' type`k' va`k' village`k' wage`k' zip`k'

append using unmatched`k'.10.dta

save unmatched`k'.20.dta, replace

 

use m`i'-m`j'.10.dta, clear

merge code using m`j'-m`k'.10.dta

drop _merge code

drop if match_status_`i'_`j'=="3"| match_status_`j'_`k'=="3"

gen code=id`j'+string(revenue`j')+string(employment`j')+string(profit`j')

sort code

save unmatched`j'.15.dta, replace

 

 

 

**step 90: match `i' firms and `k' firms by firm ID and name**

 

 

*ID*

 

use unmatched`i'.20.dta, clear

bysort id`i': keep if _N>1

save duplicates_ID`i'.dta, replace

 

use unmatched`i'.20.dta, clear

bysort id`i': drop if _N>1

rename id`i' id

keep bdat`i' cic`i' dq`i' e_HMT`i' e_collective`i' e_foreign`i' e_individual`i' e_legal_person`i' e_state`i' employment`i' export`i'    fa_net`i' fa_original`i' a_dep`i' c_dep`i' id input`i' legal_person`i' name`i' new_product`i' output`i' phone`i' product1_`i' profit`i' revenue`i' street`i' town`i' type`i' va`i' village`i' wage`i' zip`i'

sort id

save match`i'.1.dta, replace

 

use unmatched`k'.20.dta, clear

bysort id`k': keep if _N>1

save duplicates_ID`k'.dta, replace

 

use unmatched`k'.20.dta, clear

bysort id`k': drop if _N>1

rename id`k' id

keep bdat`k' cic`k' dq`k' e_HMT`k' e_collective`k' e_foreign`k' e_individual`k' e_legal_person`k' e_state`k' employment`k' export`k'    fa_net`k' fa_original`k' a_dep`k' c_dep`k' id input`k' legal_person`k' name`k' new_product`k' output`k' phone`k' product1_`k' profit`k' revenue`k' street`k' town`k' type`k' va`k' village`k' wage`k' zip`k'

sort id

save match`k'.1.dta, replace

 

use match`i'.1.dta, clear

merge id using match`k'.1.dta

keep if _merge==3

gen id`i'=id

rename id id`k'

drop _merge

gen match_method_`i'_`k'="ID"

gen match_status_`i'_`k'="3"

save matched_by_ID`i'_`k'.dta, replace

 

*name*

 

use match`i'.1.dta, clear

merge id using match`k'.1.dta

keep if _merge==1

rename id id`i'

append using duplicates_ID`i'.dta

bysort name`i': keep if _N>1

keep bdat`i' cic`i' dq`i' e_HMT`i' e_collective`i' e_foreign`i' e_individual`i' e_legal_person`i' e_state`i' employment`i' export`i'    fa_net`i' fa_original`i' a_dep`i' c_dep`i' id`i' input`i'    legal_person`i' name`i' new_product`i' output`i' phone`i' product1_`i' profit`i' revenue`i' street`i' town`i' type`i' va`i' village`i' wage`i' zip`i'

save duplicates_name`i'.dta, replace

 

use match`i'.1.dta, clear

merge id using match`k'.1.dta

keep if _merge==1

rename id id`i'

append using duplicates_ID`i'.dta

bysort name`i': drop if _N>1

rename name`i' name

sort name

keep bdat`i' cic`i' dq`i' e_HMT`i' e_collective`i' e_foreign`i' e_individual`i' e_legal_person`i' e_state`i' employment`i' export`i'    fa_net`i' fa_original`i' a_dep`i' c_dep`i' id`i' input`i'    legal_person`i' name new_product`i' output`i' phone`i' product1_`i' profit`i' revenue`i' street`i' town`i' type`i' va`i' village`i' wage`i' zip`i'

save unmatched_by_ID`i'.dta, replace

 

use match`i'.1.dta, clear

merge id using match`k'.1.dta

keep if _merge==2

rename id id`k'

append using duplicates_ID`k'.dta

bysort name`k': keep if _N>1

keep bdat`k' cic`k' dq`k' e_HMT`k' e_collective`k' e_foreign`k' e_individual`k' e_legal_person`k' e_state`k' employment`k' export`k'    fa_net`k' fa_original`k' a_dep`k' c_dep`k' id`k' input`k'    legal_person`k' name`k' new_product`k' output`k' phone`k' product1_`k' profit`k' revenue`k' street`k' town`k' type`k' va`k' village`k' wage`k' zip`k'

save duplicates_name`k'.dta, replace

 

use match`i'.1.dta, clear

merge id using match`k'.1.dta

keep if _merge==2

rename id id`k'

append using duplicates_ID`k'.dta

bysort name`k': drop if _N>1

rename name`k' name

sort name

keep bdat`k' cic`k' dq`k' e_HMT`k' e_collective`k' e_foreign`k' e_individual`k' e_legal_person`k' e_state`k' employment`k' export`k'    fa_net`k' fa_original`k' a_dep`k' c_dep`k' id`k' input`k'    legal_person`k' name new_product`k' output`k' phone`k' product1_`k' profit`k' revenue`k' street`k' town`k' type`k' va`k' village`k' wage`k' zip`k'

 

compress

save unmatched_by_ID`k'.dta, replace

 

use unmatched_by_ID`i'.dta, clear

merge name using unmatched_by_ID`k'.dta

keep if _merge==3

gen name`i'=name

rename name name`k'

drop _merge

gen match_method_`i'_`k'="firm name"

gen match_status_`i'_`k'="3"

compress

save matched_by_name`i'_`k'.dta, replace

 

 

use unmatched_by_ID`i'.dta, clear

merge name using unmatched_by_ID`k'.dta

keep if _merge==1

rename name name`i'

keep bdat`i' cic`i' dq`i' e_HMT`i' e_collective`i' e_foreign`i' e_individual`i' e_legal_person`i' e_state`i' employment`i' export`i'    fa_net`i' fa_original`i' a_dep`i' c_dep`i' id`i' input`i'    legal_person`i' name`i' new_product`i' output`i' phone`i' product1_`i' profit`i' revenue`i' street`i' town`i' type`i' va`i' village`i' wage`i' zip`i'

append using duplicates_name`i'.dta

gen match_method_`i'_`k'=""

gen match_status_`i'_`k'="1"

compress

save unmatched_by_ID_and_name_`i'.dta, replace

 

use unmatched_by_ID`i'.dta, clear

merge name using unmatched_by_ID`k'.dta

keep if _merge==2

rename name name`k'

keep bdat`k' cic`k' dq`k' e_HMT`k' e_collective`k' e_foreign`k' e_individual`k' e_legal_person`k' e_state`k' employment`k' export`k'    fa_net`k' fa_original`k' a_dep`k' c_dep`k' id`k' input`k'    legal_person`k' name`k' new_product`k' output`k' phone`k' product1_`k' profit`k' revenue`k' street`k' town`k' type`k' va`k' village`k' wage`k' zip`k'

append using duplicates_name`k'.dta

gen match_method_`i'_`k'=""

gen match_status_`i'_`k'="2"

compress

save unmatched_by_ID_and_name_`k'.dta, replace

 

 

 

**step 100: merge the files**

 

use matched_by_ID`i'_`k'.dta, clear

append using matched_by_name`i'_`k'.dta

append using unmatched_by_ID_and_name_`i'.dta

append using unmatched_by_ID_and_name_`k'.dta

save m`i'-m`k'.dta, replace

 

use m`i'-m`k'.dta, clear

gen code=id`i'+string(revenue`i')+string(employment`i')+string(profit`i')

sort code

merge code using unmatched`i'.15.dta

drop code _merge

sort id`i'

compress

save m`i'-m`k'.05.dta, replace

 

*deal with disagreement (_merge==5 if "update" is used)*

 

use m`i'-m`k'.05.dta, clear

gen code=id`k'+string(revenue`k')+string(employment`k')+string(profit`k')

sort code

merge code using unmatched`k'.15.dta, update

keep if _merge==5

drop bdat`k' cic`k' dq`k' e_HMT`k' e_collective`k' e_foreign`k' e_individual`k' e_legal_person`k' e_state`k' employment`k' export`k'    fa_net`k' fa_original`k' a_dep`k' c_dep`k' id`k' input`k'    legal_person`k' name`k' new_product`k' output`k' phone`k' product1_`k' profit`k' revenue`k' street`k' town`k' type`k' va`k' village`k' wage`k' zip`k'

drop code _merge

sort id`i'

compress

save m`i'-m`k'.disagree.dta, replace

 

use m`i'-m`k'.05.dta, clear

merge id`i' using m`i'-m`k'.disagree.dta

drop if _merge==3

drop _merge

append using m`i'-m`k'.disagree.dta

 

gen code=id`k'+string(revenue`k')+string(employment`k')+string(profit`k')

sort code

merge code using unmatched`k'.15.dta, update

drop code _merge

gen code=id`j'+string(revenue`j')+string(employment`j')+string(profit`j')

sort code

merge code using unmatched`j'.15.dta, update

drop code _merge

 

compress

save m`i'-m`k'.dta.10.dta, replace

 

use m`i'-m`k'.dta.10.dta, clear

append using balanced.m`i'-m`j'-m`k'.dta

drop match_status_`i'_`j'

drop match_status_`j'_`k'

drop match_status_`i'_`k'

drop match_method_`i'_`j'

drop match_method_`j'_`k'

drop match_method_`i'_`k'

gen match_status_`i'_`j'_`k'="`i'-`j'-`k'" if id`i'!=""&id`j'!=""&id`k'!=""

replace match_status_`i'_`j'_`k'="`i'-`j' only" if id`i'!=""&id`j'!=""&id`k'==""

replace match_status_`i'_`j'_`k'="`j'-`k' only" if id`i'==""&id`j'!=""&id`k'!=""

replace match_status_`i'_`j'_`k'="`i'-`k' only" if id`i'!=""&id`j'==""&id`k'!=""

replace match_status_`i'_`j'_`k'="`i' no match" if id`i'!=""&id`j'==""&id`k'==""

replace match_status_`i'_`j'_`k'="`j' no match" if id`i'==""&id`j'!=""&id`k'==""

replace match_status_`i'_`j'_`k'="`k' no match" if id`i'==""&id`j'==""&id`k'!=""

 

compress

save unbalanced.`i'-`j'-`k'.dta, replace

 

 

local i = `i' + 1

}

 

 

local i = 1998

while `i' < 2005{

 

local j=`i'+1

local k=`i'+2

 

use unbalanced.`i'-`j'-`k'.dta, clear

tab match_status_`i'_`j'_`k'

 

local i = `i' + 1

}

 

 

 

*************************************

                    *

* STAGE 3: Create a Nine-Year Panel *

*                      *

*************************************

 

 

use unbalanced.1998-1999-2000.dta, clear

tab match_status_1998_1999_2000

gen code=id2000+string(revenue2000)+string(employment2000)+string(profit2000)

sort code

save test1.dta, replace

 

 

**step 110: add 2001 from 1999-2000-2001**

 

use unbalanced.1999-2000-2001.dta, clear

tab match_status_1999_2000_2001

keep if match_status_1999_2000_2001=="1999-2000-2001"|match_status_1999_2000_2001=="2000-2001 only"

gen code=id2000+string(revenue2000)+string(employment2000)+string(profit2000)

sort code

save test2.dta, replace

 

use test1.dta, clear

merge code using test2.dta

tab _merge

drop _merge code

gen code=id1999+string(revenue1999)+string(employment1999)+string(profit1999)

sort code

save test3.dta, replace

 

use unbalanced.1999-2000-2001.dta, clear

tab match_status_1999_2000_2001

keep if match_status_1999_2000_2001=="1999-2001 only"

gen code=id1999+string(revenue1999)+string(employment1999)+string(profit1999)

sort code

save test4.dta, replace

 

use test3.dta, clear

merge code using test4.dta, update

tab _merge

drop code _merge

save test5.dta, replace

 

use test3.dta, clear

merge code using test4.dta, update replace

keep if _merge==5

keep id2001    bdat2001 cic2001 dq2001 e_HMT2001 e_collective2001 e_foreign2001 e_individual2001 e_legal_person2001 e_state2001 employment2001 export2001    fa_net2001 fa_original2001 a_dep2001 c_dep2001 input2001    name2001 new_product2001 output2001 profit2001 revenue2001 type2001 va2001 wage2001 legal_person2001 phone2001 product1_2001 street2001 town2001 village2001 zip2001

save test6.dta, replace

 

use unbalanced.1999-2000-2001.dta, clear

keep if match_status_1999_2000_2001=="2001 no match"

display _N

save test7.dta, replace

 

use test5.dta, clear

append using test6.dta

dis _N

append using test7.dta

dis _N

gen code=id2001+string(revenue2001)+string(employment2001)+string(profit2001)

sort code

save test1.dta, replace

 

 

 

**step 120: add 2002 from 2000-2001-2002**

 

use unbalanced.2000-2001-2002.dta, clear

tab match_status_2000_2001_2002

keep if match_status_2000_2001_2002=="2000-2001-2002"|match_status_2000_2001_2002=="2001-2002 only"

gen code=id2001+string(revenue2001)+string(employment2001)+string(profit2001)

sort code

save test2.dta, replace

 

use test1.dta, clear

merge code using test2.dta

tab _merge

drop _merge code

gen code=id2000+string(revenue2000)+string(employment2000)+string(profit2000)

sort code

save test3.dta, replace

 

use unbalanced.2000-2001-2002.dta, clear

tab match_status_2000_2001_2002

keep if match_status_2000_2001_2002=="2000-2002 only"

gen code=id2000+string(revenue2000)+string(employment2000)+string(profit2000)

sort code

save test4.dta, replace

 

use test3.dta, clear

merge code using test4.dta, update

tab _merge

drop code _merge

save test5.dta, replace

 

use test3.dta, clear

merge code using test4.dta, update replace

keep if _merge==5

keep id2002    bdat2002 cic2002 dq2002 e_HMT2002 e_collective2002 e_foreign2002 e_individual2002 e_legal_person2002 e_state2002 employment2002 export2002    fa_net2002 fa_original2002 a_dep2002 c_dep2002 input2002    name2002 new_product2002 output2002 profit2002 revenue2002 type2002 va2002 wage2002 legal_person2002 phone2002 product1_2002 street2002 town2002 village2002 zip2002

save test6.dta, replace

 

use unbalanced.2000-2001-2002.dta, clear

keep if match_status_2000_2001_2002=="2002 no match"

display _N

save test7.dta, replace

 

use test5.dta, clear

append using test6.dta

dis _N

append using test7.dta

dis _N

gen code=id2002+string(revenue2002)+string(employment2002)+string(profit2002)

sort code

save test1.dta, replace

 

 

 

**step 130: add 2003 from 2001-2002-2003**

 

use unbalanced.2001-2002-2003.dta, clear

tab match_status_2001_2002_2003

keep if match_status_2001_2002_2003=="2001-2002-2003"|match_status_2001_2002_2003=="2002-2003 only"

gen code=id2002+string(revenue2002)+string(employment2002)+string(profit2002)

sort code

save test2.dta, replace

 

use test1.dta, clear

merge code using test2.dta

tab _merge

drop _merge code

gen code=id2001+string(revenue2001)+string(employment2001)+string(profit2001)

sort code

save test3.dta, replace

 

use unbalanced.2001-2002-2003.dta, clear

tab match_status_2001_2002_2003

keep if match_status_2001_2002_2003=="2001-2003 only"

gen code=id2001+string(revenue2001)+string(employment2001)+string(profit2001)

sort code

save test4.dta, replace

 

use test3.dta, clear

merge code using test4.dta, update

tab _merge

drop code _merge

save test5.dta, replace

 

use test3.dta, clear

merge code using test4.dta, update replace

keep if _merge==5

keep id2003    bdat2003 cic2003 dq2003 e_HMT2003 e_collective2003 e_foreign2003 e_individual2003 e_legal_person2003 e_state2003 employment2003 export2003    fa_net2003 fa_original2003 a_dep2003 c_dep2003 input2003    name2003 new_product2003 output2003 profit2003 revenue2003 type2003 va2003 wage2003 legal_person2003 phone2003 product1_2003 street2003 town2003 village2003 zip2003

save test6.dta, replace

 

use unbalanced.2001-2002-2003.dta, clear

keep if match_status_2001_2002_2003=="2003 no match"

display _N

save test7.dta, replace

 

use test5.dta, clear

append using test6.dta

dis _N

append using test7.dta

dis _N

gen code=id2003+string(revenue2003)+string(employment2003)+string(profit2003)

sort code

save test1.dta, replace

 

 

 

**step 140: add 2004 from 2002-2003-2004 **

 

use unbalanced.2002-2003-2004.dta, clear

tab match_status_2002_2003_2004

keep if match_status_2002_2003_2004=="2002-2003-2004"|match_status_2002_2003_2004=="2003-2004 only"

gen code=id2003+string(revenue2003)+string(employment2003)+string(profit2003)

sort code

save test2.dta, replace

 

use test1.dta, clear

merge code using test2.dta

tab _merge

drop _merge code

gen code=id2002+string(revenue2002)+string(employment2002)+string(profit2002)

sort code

save test3.dta, replace

 

use unbalanced.2002-2003-2004.dta, clear

tab match_status_2002_2003_2004

keep if match_status_2002_2003_2004=="2002-2004 only"

gen code=id2002+string(revenue2002)+string(employment2002)+string(profit2002)

sort code

save test4.dta, replace

 

use test3.dta, clear

merge code using test4.dta, update

tab _merge

drop code _merge

save test5.dta, replace

 

use test3.dta, clear

merge code using test4.dta, update replace

keep if _merge==5

keep id2004    bdat2004 cic2004 dq2004 e_HMT2004 e_collective2004 e_foreign2004 e_individual2004 e_legal_person2004 e_state2004 employment2004 export2004    fa_net2004 fa_original2004 a_dep2004 c_dep2004 input2004    name2004 new_product2004 output2004 profit2004 revenue2004 type2004 va2004 wage2004 legal_person2004 phone2004 product1_2004 street2004 town2004 village2004 zip2004

save test6.dta, replace

 

use unbalanced.2002-2003-2004.dta, clear

keep if match_status_2002_2003_2004=="2004 no match"

display _N

save test7.dta, replace

 

use test5.dta, clear

append using test6.dta

dis _N

append using test7.dta

dis _N

gen code=id2004+string(revenue2004)+string(employment2004)+string(profit2004)

sort code

save test1.dta, replace

 

 

 

**step 150: add 2005 from 2003-2004-2005 **

 

use unbalanced.2003-2004-2005.dta, clear

tab match_status_2003_2004_2005

keep if match_status_2003_2004_2005=="2003-2004-2005"|match_status_2003_2004_2005=="2004-2005 only"

gen code=id2004+string(revenue2004)+string(employment2004)+string(profit2004)

sort code

save test2.dta, replace

 

use test1.dta, clear

merge code using test2.dta

tab _merge

drop _merge code

gen code=id2003+string(revenue2003)+string(employment2003)+string(profit2003)

sort code

save test3.dta, replace

 

use unbalanced.2003-2004-2005.dta, clear

tab match_status_2003_2004_2005

keep if match_status_2003_2004_2005=="2003-2005 only"

gen code=id2003+string(revenue2003)+string(employment2003)+string(profit2003)

sort code

save test4.dta, replace

 

use test3.dta, clear

merge code using test4.dta, update

tab _merge

drop code _merge

save test5.dta, replace

 

use test3.dta, clear

merge code using test4.dta, update replace

keep if _merge==5

keep id2005    bdat2005 cic2005 dq2005 e_HMT2005 e_collective2005 e_foreign2005 e_individual2005 e_legal_person2005 e_state2005 employment2005 export2005    fa_net2005 fa_original2005 a_dep2005 c_dep2005 input2005    name2005 new_product2005 output2005 profit2005 revenue2005 type2005 va2005 wage2005 legal_person2005 phone2005 product1_2005 street2005 town2005 village2005 zip2005

save test6.dta, replace

 

use unbalanced.2003-2004-2005.dta, clear

keep if match_status_2003_2004_2005=="2005 no match"

display _N

save test7.dta, replace

 

use test5.dta, clear

append using test6.dta

dis _N

append using test7.dta

dis _N

gen code=id2005+string(revenue2005)+string(employment2005)+string(profit2005)

sort code

save test1.dta, replace

 

 

**step 160: add 2006 from 2004-2005-2006 **

 

 

use unbalanced.2004-2005-2006.dta, clear

tab match_status_2004_2005_2006

keep if match_status_2004_2005_2006=="2004-2005-2006"|match_status_2004_2005_2006=="2005-2006 only"

gen code=id2005+string(revenue2005)+string(employment2005)+string(profit2005)

sort code

save test2.dta, replace

 

 

use test1.dta, clear

merge code using test2.dta

tab _merge

drop _merge code

gen code=id2004+string(revenue2004)+string(employment2004)+string(profit2004)

sort code

save test3.dta, replace

 

use unbalanced.2004-2005-2006.dta, clear

tab match_status_2004_2005_2006

keep if match_status_2004_2005_2006=="2004-2006 only"

gen code=id2004+string(revenue2004)+string(employment2004)+string(profit2004)

sort code

save test4.dta, replace

 

use test3.dta, clear

merge code using test4.dta, update

tab _merge

drop code _merge

save test5.dta, replace

 

use test3.dta, clear

merge code using test4.dta, update replace

keep if _merge==5

keep id2006    bdat2006 cic2006 dq2006 e_HMT2006 e_collective2006 e_foreign2006 e_individual2006 e_legal_person2006 e_state2006 employment2006 export2006    fa_net2006 fa_original2006 a_dep2006 c_dep2006 input2006    name2006 new_product2006 output2006 profit2006 revenue2006 type2006 va2006 wage2006 legal_person2006 phone2006 product1_2006 street2006 town2006 village2006 zip2006

save test6.dta, replace

 

use unbalanced.2004-2005-2006.dta, clear

keep if match_status_2004_2005_2006=="2006 no match"

display _N

save test7.dta, replace

 

use test5.dta, clear

append using test6.dta

append using test7.dta

aorder

drop match_status_1998_1999_2000- match_status_2004_2005_2006

compress

save unbalanced.1998--2006.dta, replace

0

阅读 收藏 喜欢 打印举报/Report
前一篇:资料分享
  

新浪BLOG意见反馈留言板 欢迎批评指正

新浪简介 | About Sina | 广告服务 | 联系我们 | 招聘信息 | 网站律师 | SINA English | 产品答疑

新浪公司 版权所有