******************************************************************************
* Advanced Fundamentals of Microeconometrics and Data Science (AFMD)
* Vietnamese German University
* Fall Semester 2023
* Prof. Michael Binder, Ph.D.
*
* Problem Set 1: House Prices
******************************************************************************
cls // clear display in results window
clear // clear previous work out of memory
// change working directory
cd "/Users/macbook/Desktop/AFMD 30.9_PhD.Binder/Day 2"
use "[Link]" // Load Data
* Data Description
summarize price housesize bdrms lotsize assess
* Histograms
* House Prices
label variable price " "
histogram price, normal bin(20) ///
title("House Prices for 88 Homes") ///
name(histprices, replace)
* Housesize
label variable housesize " "
histogram housesize, normal bin(20) ///
title("House Sizes for 88 Homes") ///
name(housesize, replace)
* Bedrooms
label variable bdrms " "
histogram bdrms, normal bin(20) ///
title("Number of Bedrooms for 88 Homes") ///
name(histbrms, replace)
* Lotsize
label variable lotsize " "
histogram lotsize, normal bin(20) ///
title("Lot Sizes for 88 Homes") ///
name(histlotsize, replace)
* Assessment Value
label variable assess " "
histogram assess, normal bin(20) ///
title("Assessment Values for 88 Homes") ///
name(histassess, replace)
* OLS Estimation
* First Model
regress price housesize bdrms
predict pricehat1, xb
list price pricehat1 in 1
estat ic
* Second Model
regress price housesize bdrms lotsize assess
estat ic
predict pricehat2, xb
list price pricehat2 in 1
* Calculating Ceteris Paribus Effect (Frisch-Waugh Result)
* Auxiliary Regression 1
quietly regress price lotsize assess
predict residpriceadj, resid
// changes in price not driven by changes in lotsize and assess
* Auxiliary Regression 2a
quietly regress housesize lotsize assess
predict residhsadj, resid
// changes in housesize not driven by changes in lotsize and assess
* Auxiliary Regression 2b
quietly regress bdrms lotsize assess
predict residbradj, resid
// changes in bdrms not driven by changes in lotsize and assess
* Ceteris Paribus Effect
regress residpriceadj residhsadj residbradj
// marginal effect of changes in housesize and bedrooms on price
******************************************************************************
* Advanced Fundamentals of Microeconometrics and Data Science (AFMD)
* Vietnamese German University
* Fall Semester 2023
* Prof. Michael Binder, Ph.D.
*
* Problem Set 2: Election Recount
******************************************************************************
cls // clear display in results window
clear // clear previous work out of memory
cd "/Users/macbook/Desktop/AFMD 30.9_PhD.Binder/Day 3"
// change working directory
use "[Link]" // Load Data
* Data Description
summarize
* OLS Estimation
* First Model
regress BUCHANAN GORE
* Scatter Plot with OLS Regression Line
twoway (scatter BUCHANAN GORE) (lfitci BUCHANAN GORE, level(90)), ///
title("67 Counties") name(scatterlfitci1, replace)
* Remove Outlier and redo regression analysis
destring OBS, generate(COUNTY)
drop if COUNTY == 50
* First Model
regress BUCHANAN GORE
* Scatter Plot with OLS Regression Line
twoway (scatter BUCHANAN GORE) (lfitci BUCHANAN GORE, level(90)), ///
title("66 Counties") name(scatterlfitci2, replace)
* Return to Full Data Set
clear
use "[Link]"
destring OBS, generate(COUNTY)
* Create Dummy Variable for Palm Beach County
generate PBC = 0
replace PBC = 1 if COUNTY == 50
* Second Model
regress BUCHANAN GORE PBC
test _b[PBC]=975
display "H1: coef >= 975 p-value = " 1-ttail(r(df_r),sqrt(r(F)))
* Heteroskedasticity
* Breusch-Pagan Test
quietly regress BUCHANAN GORE
estat hettest TOTAL, mtest // iid normal disturbance terms
estat hettest TOTAL, iid // iid disturbance terms
quietly regress BUCHANAN GORE PBC
estat hettest TOTAL, mtest
estat hettest TOTAL, iid
* Fraction of Votes
generate FBUCHANAN = BUCHANAN/TOTAL
generate FGORE = GORE/TOTAL
* First Model
regress FBUCHANAN FGORE
* Scatter Plot with OLS Regression Line
twoway (scatter FBUCHANAN FGORE) (lfitci FBUCHANAN FGORE, level(90)), ///
title("67 Counties") name(scatterlfitci1, replace)
* Breusch-Pagan Test
generate TOTALINV = TOTAL^(-1)
estat hettest TOTALINV, mtest
estat hettest TOTALINV, iid
* Second Model
regress FBUCHANAN FGORE PBC
estat hettest TOTALINV, mtest
estat hettest TOTALINV, iid
* Feasible GLS Estimation
generate TOTAL2 = TOTAL^2
* First Model
regress BUCHANAN GORE [aw = 1/TOTAL2] // variance of the i-th observation
// is Var(eps)/TOTAL2; i-th observation
// is divided by TOTAL
regress BUCHANAN GORE [aw = 1/TOTAL2], vce(hc3)
* Second Model
regress BUCHANAN GORE PBC [aw = 1/TOTAL2]
regress BUCHANAN GORE PBC [aw = 1/TOTAL2], vce(hc3)
******************************************************************************
* Advanced Fundamentals of Microeconometrics and Data Science (AFMD)
* Vietnamese German University
* Fall Semester 2023
* Prof. Michael Binder, Ph.D.
*
* Problem Set 3: Individual Retirements Accounts (IRAs)
******************************************************************************
cls // clear display in results window
clear // clear previous work out of memory
cd "/Users/macbook/Desktop/AFMD 30.9_PhD.Binder/Day 4"
// change working directory
use "[Link]" // Load Data
* Data Description
summarize
* OLS Estimation
regress pira p401k inc incsq age agesq
* Instrument Analysis
regress p401k e401k inc incsq age agesq
* Just-Identified IV Estimation
global xlistexo inc incsq age agesq
ivregress 2sls pira $xlistexo (p401k = e401k), first
ivregress 2sls pira $xlistexo (p401k = e401k), first vce(robust)
* Quality of Instruments
estat firststage
* Comparing OLS and IV Estimators (Hausman Test)
estat endogenous
******************************************************************************
* Advanced Fundamentals of Microeconometrics and Data Science (AFMD)
* Vietnamese German University
* Fall Semester 2023
* Prof. Michael Binder, Ph.D.
*
* Problem Set 4: Deciding Whether to See a Medical Doctor
******************************************************************************
cls // clear display in results window
clear // clear previous work out of memory
cd "/Users/macbook/Desktop/AFMD 30.9_PhD.Binder/Day 5"
// change working directory
use "[Link]" // Load Data
* dmed: 1 if annual USD medical expenditure (excluding dental and outpatient
* mental expenditure) > 0, 0 otherwise
* linc: logarithm of annual USD family income
* lc: log(coinsrate+1) where coinsurance rate is 0 to 100
* idp: 1 if individual deductible plan
* lpi: log(annual participation incentive payment) or 0 if no payment
* fmde: log(max(medical deductible expenditure (mde))) if idp=1 and mde>1
* or 0 otherwise.
* ndisease: number of chronic diseases
* physlim: 1 if physical limitation
* hlthg: 1 if good health
* hlthf: 1 if good health
* hlthp: 1 if good health (omitted is excellent)
* lfam: log of family size
* educdec: years of schooling of decision maker
* age: exact age
* black: 1 if black
* female: 1 if female
* child: 1 if child
* femchild: 1 if female child
* Data Description
summarize
* Define inDependent Variable List
global xlist1 linc lc lpi fmde idp ndisease physlim ///
hlthg hlthf hlthp lfam educdec age black female child femchild
* Probit Model
probit dmed $xlist1
estimates store ProbitEst
* Average Marginal Effects
margins, dydx(*)
* Logit Model
logit dmed $xlist1
estimates store LogitEst
estimates table ProbitEst LogitEst, b
* Average Marginal Effects
margins, dydx(*)
* Odds Ratios
logistic dmed $xlist1
******************************************************************************
* Advanced Fundamentals of Microeconometrics and Data Science (AFMD)
* Vietnamese German University
* Fall Semester 2023
* Prof. Michael Binder, Ph.D.
*
* Problem Set 5: Ambulatory Expenditure
******************************************************************************
cls // clear display in results window
clear // clear previous work out of memory
cd "/Users/macbook/Desktop/AFMD 30.9_PhD.Binder/Day 8"
// change working directory
use "[Link]" // Load Data
* y: ambulatory expenditure
* lny: logarithm of ambulatory expenditure, with zeros replacing NA's
* dy: 1 if ambulatory expenditure is greater than zero
* educ: educational attainment, in years
* age: age / 10
* income: income
* female: 1 if female
* totch: number of chronic diseases
* blhisp: 1 if black or hispanic ethnicity
* ins: 1 if insured
* Data Description
summarize
* Define inDependent Variable List
global xlist1 ins totch age age2 educ blhisp
* Tobit I Model
tobit y $xlist1 income, ll //
* Marginal Effects: E(y|x,y>0)
margins, dydx(*) predict(ystar(0,.))
* Tobit II Model
heckman lny $xlist1 income, select (dy = $xlist1 income) /// Prob >>> => fail to reject Ho: (corr e1,
e2)=0 ==> can have correlation
* Marginal Effects: E(y|x,y>0)
margins, dydx(*) predict(ystar(0,.))
* Tobit II Model With Exclusion Restrictions
heckman lny $xlist1, select (dy = $xlist1 income)
* Marginal Effects: E(y|x,y>0)
margins, dydx(*) predict(ystar(0,.))
* Tobit I Predictions
* Part 1: Probit
probit dy $xlist1 income
predict probpart1, p
* Part 2: OLS for lny corresponding to y>0
regress lny $xlist1 income if y>0
scalar sert1 = e(rmse)
predict predlnaepos1, xb
generate predaepos1 = exp(predlnaepos1+(sert1^2)/2)
generate predaeall1 = probpart1*predaepos1
* First Set of Tobit II Predictions
heckman lny $xlist1 income, select (dy = $xlist1 income)
scalar sert2 = e(sigma)
predict predlaepos2, ycond
predict probpart2, psel
generate predaepos2 = exp(predlaepos2+(sert2^2)/2)
generate predaeall2 = probpart2*predaepos2
* Second Set of Tobit II Predictions
heckman lny $xlist1, select (dy = $xlist1 income)
scalar shml3 = e(sigma)
predict predlaepos3, ycond
predict probpart3, psel
generate predaepos3 = exp(predlaepos3+(shml3^2)/2)
generate predaeall3 = probpart3*predaepos3
* Prediction Results
summarize y predaepos1 predaepos2 predaepos3 if y>0
summarize y predaeall1 predaeall2 predaeall3
******************************************************************************
* Advanced Fundamentals of Microeconometrics and Data Science (AFMD)
* Vietnamese German University
* Fall Semester 2022
* Prof. Michael Binder, Ph.D.
*
* Problem Set 6: Firm Investment
******************************************************************************
cls // clear display in results window
clear // clear previous work out of memory
cd "/Users/macbook/Desktop/AFMD 30.9_PhD.Binder/Day 9" // change working directory
use "[Link]" // Load Data
* Generate identifiers for cross-sectional and time-series observation numbers
generate id = [_n]
generate firm = 1 if id <= 20
replace firm = 2 if id > 20 & id < =40
replace firm = 3 if id > 40 & id < =60
replace firm = 4 if id > 60 & id < =80
replace firm = 5 if id > 80
generate t = _n if id <= 20
replace t = _n-20 if id > 20 & id < =40
replace t = _n-40 if id > 40 & id < =60
replace t = _n-60 if id > 60 & id < =80
replace t = _n-80 if id > 80
* Declare identifiers for cross-sectional and time-series observation numbers
xtset id t
* Data Description
summarize I F C
* Generate Firm Dummies
generate Dum1 = ([_n] <= 20)
generate Dum2 = ([_n] >20 & [_n] <= 40)
generate Dum3 = ([_n] >40 & [_n] <= 60)
generate Dum4 = ([_n] >60 & [_n] <= 80)
generate Dum5 = ([_n] >80 & [_n] <= 100)
* Alternatively(Generate Firm Dummies):
drop Dum1 Dum2 Dum3 Dum4 Dum5
tabulate firm, gen(Dum) /// dễ làm
* Fixed Effects Model
* Least Suqares Dummy Variables
regress I Dum1 Dum2 Dum3 Dum4 Dum5 F C, nocons/// all dummy va ko co intercept
regress I Dum1 Dum2 Dum3 Dum4 F C /// ko thể 5 dummy and intercept b/c mulcticollinrity
/*
regress I Dum1 Dum2 Dum3 Dum4 F C
Source | SS df MS Number of obs = 100
-------------+---------------------------------- F(6, 93) = 232.32 Model | 6659149.41 6
1109858.23 Prob > F = 0.0000
Residual | 444288.411 93 4777.29474 R-squared = 0.9375
-------------+---------------------------------- Adj R-squared = 0.9334
Total | 7103437.82 99 71751.8972 Root MSE = 69.118
------------------------------------------------------------------------------
I | Coef. Std. Err. t P>|t| [95% Conf. Interval]
-------------+----------------------------------------------------------------
Dum1 | -168.6053 41.50614 -4.06 0.000 -251.0282 -86.1823
Dum2 | -121.9121 29.05297 -4.20 0.000 -179.6056 -64.21868
Dum3 | -334.7093 22.01613 -15.20 0.000 -378.429 -290.9896
Dum4 | -150.438 29.19629 -5.15 0.000 -208.416 -92.45992
F | .1059799 .015891 6.67 0.000 .0744236 .1375363
C | .3466596 .0241612 14.35 0.000 .2986803 .3946388
_cons | 92.53855 33.23551 2.78 0.006 26.53941 158.5377
------------------------------------------------------------------------------
*/
* STATA Panel Data Command Syntax
xtreg I F C, fe i(firm) /// strong reject indvidual effect =0 => #
* Random Effects Model
xtreg I F C, mle i(firm)
xtreg I F C, re i(firm) /// different FE and RE ==> strongly reject Corr =0
* Hypothesis Testing
* Individual Effects
quietly regress I Dum1 Dum2 Dum3 Dum4 Dum5 F C, nocons
testparm Dum1-Dum5, equal
* Hausman Test
quietly xtreg I F C, fe i(firm)
estimates store fixed
quietly xtreg I F C, re i(firm)
estimates store random
hausman fixed random /// thấp hơn standard error.
* b - B: positive
* Var (b) - Var(B): phu thuoc vao dau cua Var ==> Var(FI) increase > ==> negative hamen test ==>
therefore model fail
******************************************************************************
* Advanced Fundamentals of Microeconometrics and Data Science (AFMD)
* Vietnamese German University
* Fall Semester 2023
* Prof. Michael Binder, Ph.D.
*
* Problem Set 7: Individual Retirements Accounts (IRAs)
******************************************************************************
cls // clear display in results window
clear // clear previous work out of memory
cd "/Users/macbook/Desktop/AFMD 30.9_PhD.Binder/ Day 11"
// change working directory
use "[Link]" // Load Data
* Setting up Lists of Regressors
global xlistexo inc incsq age agesq
global rlist c.($xlistexo)##c.($xlistexo)
* Note: SFlaTATA will remove redundant regressors
regress pira $rlist
* Predictions
* Create Training and Prediction Samples
splitsample pira, generate(train) split(1 4) values(0 1) rseed(90210)
* training sample: train = 1
* fitting sample: train = 0
tabulate train
* Ridge
elasticnet linear pira p401k $rlist if train==1, alpha(0)
lassoinfo
predict y_ridge, xb
* Lasso
lasso linear pira p401k $rlist if train==1
lassoinfo
predict y_lasso, xb
* Elastic Net
elasticnet linear pira p401k $rlist if train==1
lassoinfo
predict y_elanet, xb
* Multiple Linear Regression Model
regress pira p401k $xlistexo
predict y_mlr, xb
* Compare Predictive Performance
foreach var of varlist y_ridge y_lasso y_elanet y_mlr {
quietly generate `var'errorsq1 = (`var'-pira)^2
quietly summarize `var'errorsq1 if train == 1
quietly scalar mse`var'train1 = r(mean)
quietly summarize `var'errorsq1 if train == 0
quietly scalar mse`var'test1 = r(mean)
display "Predictor: " "`var'" _col(21) ///
" Train MSE = " %8.6f mse`var'train1 " Test MSE = " %8.6f mse`var'test1
}
* For Comparison: OLS Estimation of Multiple Linear Regression
regress pira p401k inc incsq age agesq
* Partialed-Out Lasso-OLS
poregress pira p401k, controls($rlist)
lassoinfo
predict y_lassools, xb
* For Comparison: IV Estimation of Multiple Linear Regression
ivregress 2sls pira inc incsq age agesq (p401k = e401k)
* Partialed-Out Lasso-IV
poivregress pira (p401k=e401k), controls($rlist)
lassoinfo
predict y_lassoiv, xb
* Compare Predictive Performance
foreach var of varlist y_ridge y_lasso y_elanet y_mlr y_lassools y_lassoiv {
quietly generate `var'errorsq2 = (`var'-pira)^2
quietly summarize `var'errorsq2 if train == 1
quietly scalar mse`var'train2 = r(mean)
quietly summarize `var'errorsq2 if train == 0
quietly scalar mse`var'test2 = r(mean)
display "Predictor: " "`var'" _col(21) ///
" Train MSE = " %8.6f mse`var'train2 " Test MSE = " %8.6f mse`var'test2
}