第 4 章 R for IV

4.1 setup

library(dplyr)
library(magrittr)

library(AER)

data("CigarettesSW")

4.2 資料結構觀察

CigarettesSW %<>% as_tibble
CigarettesSW
## # A tibble: 96 x 9
##    state year    cpi population packs income   tax
##    <fct> <fct> <dbl>      <dbl> <dbl>  <dbl> <dbl>
##  1 AL    1985   1.08    3973000  116. 4.60e7  32.5
##  2 AR    1985   1.08    2327000  129. 2.62e7  37
##  3 AZ    1985   1.08    3184000  105. 4.40e7  31
##  4 CA    1985   1.08   26444000  100. 4.47e8  26
##  5 CO    1985   1.08    3209000  113. 4.95e7  31
##  6 CT    1985   1.08    3201000  109. 6.01e7  42
##  7 DE    1985   1.08     618000  144. 9.93e6  30
##  8 FL    1985   1.08   11352000  122. 1.67e8  37
##  9 GA    1985   1.08    5963000  127. 7.84e7  28
## 10 IA    1985   1.08    2830000  114. 3.79e7  34
## # … with 86 more rows, and 2 more variables:
## #   price <dbl>, taxs <dbl>

CigarettesSW %<>% filter(year=="1995")

4.3 產生新變數

CigarettesSW %<>%
mutate(
rprice=price/cpi,
rincome=income/population/cpi,
tdiff=(taxs-tax)/cpi
)

4.4 迴歸模型

\begin{align} log(packs) &= \beta_0+\beta_1 log(rprice)+\epsilon \tag{4.1}\\ log(packs) &= \beta_0+\beta_1 log(rprice)+\beta_2 log(rincome)+\epsilon \tag{4.2} \end{align}

\begin{align} \mbox{one IV} &: \mbox{tdiff} \tag{4.3}\\ \mbox{two IVs} &: \mbox{tdiff, tax/cpi} \tag{4.4} \end{align}

設定formulae

model1<-log(packs) ~ log(rprice)
model2<-log(packs) ~ log(rprice) + log(rincome)

4.5 OLS估計

(4.1)：ols1
(4.2)：ols2

ols1<-lm(model1,CigarettesSW)
ols2<-lm(model2,CigarettesSW)

library(sandwich)
library(lmtest)
library(stargazer)

#使用vcovHC函數來計算HC1型的異質變異（即橫斷面資料下的線性迴歸模型）
coeftest(ols1, vcov. = vcovHC, type="HC1") -> ols1_coeftest
coeftest(ols2, vcov. = vcovHC, type="HC1") -> ols2_coeftest

OLS結果比較

library(stargazer)
stargazer(ols1,ols2,type="html")
 Dependent variable: log(packs) (1) (2) log(rprice) -1.213*** -1.407*** (0.216) (0.251) log(rincome) 0.344 (0.235) Constant 10.340*** 10.340*** (1.035) (1.023) Observations 48 48 R2 0.406 0.433 Adjusted R2 0.393 0.408 Residual Std. Error 0.190 (df = 46) 0.187 (df = 45) F Statistic 31.410*** (df = 1; 46) 17.160*** (df = 2; 45) Note: p<0.1; p<0.05; p<0.01

4.6 TSLS估計

tsls_1iv <- ivreg(
log(packs) ~ log(rprice) + log(rincome) | log(rincome) + tdiff,
data=CigarettesSW
)

tsls_2iv <- ivreg(
log(packs) ~ log(rprice) + log(rincome) | log(rincome) + tdiff + I(tax/cpi),
data=CigarettesSW
)

假設檢定

vcov=sandwich:使用HC0異質變異設定
diagonostic=TRUE: 同時進行TSLS的三項檢定
df=Inf: 檢定p值使用z表而非t表

summary(tsls_1iv, vcov = sandwich, diagnostics = TRUE, df=Inf) -> tsls_1iv_tests
summary(tsls_2iv, vcov = sandwich, diagnostics = TRUE, df=Inf) -> tsls_2iv_tests

[Optional]原始sandwich(x,adjust=FALSE)使用HC0的異質變異設定，若要改成HC1必需使adjust=TRUE:

sandwich_HC1<-function(x,adjust=TRUE){
summary(tsls_2iv, vcov = sandwich_HC1, diagnostics = TRUE, df=Inf) -> tsls_2iv_tests_hc1
stargazer(tsls_1iv,tsls_2iv,type="html",
))