## —- warning=FALSE, message=FALSE, eval=TRUE, echo=FALSE, purl=TRUE—————————–
# load the data set and get an overview
library(“AER”)
data(“CigarettesSW”)
summary(CigarettesSW)
## ————————————————————————————————
# compute real per capita prices
CigarettesSW$rprice <- with(CigarettesSW, price / cpi)
# compute the sales tax
CigarettesSW$salestax <- with(CigarettesSW, (taxs - tax) / cpi)
# check the correlation between sales tax and price
cor(CigarettesSW$salestax, CigarettesSW$price)
# generate a subset for the year 1995
c1995 <- subset(CigarettesSW, year == "1995")
## ------------------------------------------------------------------------------------------------
# perform the first stage regression
cig_s1 <- lm(log(rprice) ~ salestax, data = c1995)
coeftest(cig_s1, vcov = vcovHC, type = "HC1")
## ------------------------------------------------------------------------------------------------
# inspect the R^2 of the first stage regression
summary(cig_s1)$r.squared
## ------------------------------------------------------------------------------------------------
# store the predicted values
lcigp_pred <- cig_s1$fitted.values
## ------------------------------------------------------------------------------------------------
# run the stage 2 regression
cig_s2 <- lm(log(c1995$packs) ~ lcigp_pred)
coeftest(cig_s2, vcov = vcovHC)
## ------------------------------------------------------------------------------------------------
# perform TSLS using 'ivreg()'
cig_ivreg <- ivreg(log(packs) ~ log(rprice) | salestax,
data = c1995)
coeftest(cig_ivreg, vcov = vcovHC, type = "HC1")
## ------------------------------------------------------------------------------------------------
# add real income to the dataset (cpi: consumer price index)
CigarettesSW$rincome <- with(CigarettesSW,
income / population / cpi)
c1995 <- subset(CigarettesSW, year == "1995")
## ------------------------------------------------------------------------------------------------
# estimate the model
cig_ivreg2 <- ivreg(log(packs) ~ log(rprice) +
log(rincome) | log(rincome) +
salestax, data = c1995)
coeftest(cig_ivreg2, vcov = vcovHC, type = "HC1")
## ------------------------------------------------------------------------------------------------
# add cigtax to the data set
CigarettesSW$cigtax <- with(CigarettesSW, tax/cpi)
c1995 <- subset(CigarettesSW, year == "1995")
## ------------------------------------------------------------------------------------------------
# estimate the model
cig_ivreg3 <- ivreg(log(packs) ~ log(rprice) + log(rincome) |
log(rincome) + salestax + cigtax,
data = c1995)
coeftest(cig_ivreg3, vcov = vcovHC, type = "HC1")
## ------------------------------------------------------------------------------------------------
# subset data for year 1985
c1985 <- subset(CigarettesSW, year == "1985")
# define differences in variables
packsdiff <- log(c1995$packs) - log(c1985$packs)
pricediff <- log(c1995$price/c1995$cpi) - log(c1985$price/c1985$cpi)
incomediff <- log(c1995$income/c1995$population/c1995$cpi) -
log(c1985$income/c1985$population/c1985$cpi)
salestaxdiff <- (c1995$taxs - c1995$tax)/c1995$cpi - (c1985$taxs - c1985$tax)/c1985$cpi
cigtaxdiff <- c1995$tax/c1995$cpi - c1985$tax/c1985$cpi
## ------------------------------------------------------------------------------------------------
# estimate the three models
cig_ivreg_diff1 <- ivreg(packsdiff ~ pricediff +
incomediff | incomediff +
salestaxdiff)
cig_ivreg_diff2 <- ivreg(packsdiff ~ pricediff +
incomediff | incomediff +
cigtaxdiff)
cig_ivreg_diff3 <- ivreg(packsdiff ~ pricediff +
incomediff | incomediff +
salestaxdiff + cigtaxdiff)
## ------------------------------------------------------------------------------------------------
# robust coefficient summary for 1.
coeftest(cig_ivreg_diff1, vcov = vcovHC, type = "HC1")
# robust coefficient summary for 2.
coeftest(cig_ivreg_diff2, vcov = vcovHC, type = "HC1")
# robust coefficient summary for 3.
coeftest(cig_ivreg_diff3, vcov = vcovHC, type = "HC1")
## ---- eval = FALSE, echo=TRUE--------------------------------------------------------------------
## # gather robust standard errors in a list
## rob_se <- list(sqrt(diag(vcovHC(cig_ivreg_diff1, type = "HC1"))),
## sqrt(diag(vcovHC(cig_ivreg_diff2, type = "HC1"))),
## sqrt(diag(vcovHC(cig_ivreg_diff3, type = "HC1"))))
##
## # generate table
## stargazer(cig_ivreg_diff1, cig_ivreg_diff2, cig_ivreg_diff3,
## header = FALSE,
## type = "latex",
## omit.table.layout = "n",
## digits = 3,
## column.labels = c("IV: salestax", "IV: cigtax",
## "IVs: salestax, cigtax"),
## dep.var.labels.include = FALSE,
## dep.var.caption =
## "Dependent Variable: 1985-1995 Difference in Log per Pack Price",
## se = rob_se)
## ------------------------------------------------------------------------------------------------
# first-stage regressions
mod_relevance1 <- lm(pricediff ~ salestaxdiff + incomediff)
mod_relevance2 <- lm(pricediff ~ cigtaxdiff + incomediff)
mod_relevance3 <- lm(pricediff ~ incomediff + salestaxdiff +
cigtaxdiff)
## ------------------------------------------------------------------------------------------------
# check instrument relevance for model (1)
linearHypothesis(mod_relevance1,
"salestaxdiff = 0",
vcov = vcovHC, type = "HC1")
## ------------------------------------------------------------------------------------------------
# check instrument relevance for model (2)
linearHypothesis(mod_relevance2,
"cigtaxdiff = 0",
vcov = vcovHC, type = "HC1")
## ------------------------------------------------------------------------------------------------
# check instrument relevance for model (3)
linearHypothesis(mod_relevance3,
c("salestaxdiff = 0", "cigtaxdiff = 0"),
vcov = vcovHC, type = "HC1")
## ------------------------------------------------------------------------------------------------
# compute the J-statistic
cig_iv_OR <- lm(residuals(cig_ivreg_diff3) ~ incomediff +
salestaxdiff + cigtaxdiff)
cig_OR_test <- linearHypothesis(cig_iv_OR,
c("salestaxdiff = 0",
"cigtaxdiff = 0"),
test = "Chisq")
cig_OR_test
## ------------------------------------------------------------------------------------------------
# compute correct p-value for J-statistic
pchisq(cig_OR_test[2, 5], df = 1, lower.tail = FALSE)