round(var(beta_hat_2), 5)
library(“scales”)
curve(expr = dnorm(x, mean = beta_true_2,
sd=sqrt(var_true_beta_2)),
xlab=””,ylab=””, col=gray(.2), lwd=3, lty=1,
xlim=c(2,4), ylim=c(0,3),main=paste0(“n=”,n))
lines(density(beta_hat_2, bw = bw.SJ(beta_hat_2)),
col=alpha(“blue”,.5), lwd=3)
legend(“topleft”, lty=c(1,1), lwd=c(3,3),
col=c(gray(.2), alpha(“blue”,.5)), bty=”n”, legend=
c(expression(
“Theoretical (Asymptotic) Gaussian Density of”~hat(beta)[2]),
expression(
“Empirical Density Estimation based on MC realizations from”~
hat(beta)[2])))
suppressMessages(library(“car”))
library(“sandwich”)
## Generate data
MC_data <- myDataGenerator(n = 100,
beta = beta_true)
lm_obj <- lm(Y ~ X_2 + X_3, data = MC_data)
vcovHC3_mat <- sandwich::vcovHC(lm_obj, type="HC3")
lm_obj <- lm(Y ~ X_2 + X_3, data = MC_data)
vcovHC3_mat <- sandwich::vcovHC(lm_obj, type="HC3")
car::linearHypothesis(model = lm_obj,
hypothesis.matrix = c("X_2=3", "X_3=5"),
vcov=vcovHC3_mat)
R <- rbind(c(0,1,0),
c(0,0,1))
car::linearHypothesis(model = lm_obj,
hypothesis.matrix = R,
rhs = c(3,5),
vcov=vcovHC3_mat)
suppressMessages(library("car")) # for linearHyothesis()
# ?linearHypothesis
library("sandwich")
## Generate data
MC_data <- myDataGenerator(n = 100,
beta = beta_true)
## Estimate the linear regression model parameters
lm_obj <- lm(Y ~ X_2 + X_3, data = MC_data)
vcovHC3_mat <- sandwich::vcovHC(lm_obj, type="HC3")
## Option 1:
car::linearHypothesis(model = lm_obj,
hypothesis.matrix = c("X_2=3", "X_3=5"),
vcov=vcovHC3_mat)
R <- rbind(c(0,1,0),
c(0,0,1))
car::linearHypothesis(model = lm_obj,
hypothesis.matrix = R,
rhs = c(3,5),
vcov=vcovHC3_mat)
suppressMessages(library("car")) # for linearHyothesis()
library("sandwich")
## Generate data
MC_data <- myDataGenerator(n = 100,
beta = beta_true)
lm_obj <- lm(Y ~ X_2 + X_3, data = MC_data)
vcovHC3_mat <- sandwich::vcovHC(lm_obj, type="HC3")
car::linearHypothesis(model = lm_obj,
hypothesis.matrix = "X_3=5",
vcov=vcovHC3_mat)
install.packages("sandwich")
install.packages("lmtest")
install.packages("car")
install.packages("AER")
install.packages("car")
install.packages("car")
install.packages("AER")
install.packages("lmtest")
install.packages("sandwich")
library("lmtest") # for coeftest()
library("MASS") # for Boston housing data
data("Boston") # ?Boston; names(Boston)
# focusing here on a small sample case (n=20):
Boston_small <- Boston[1:20,]
lm_obj <- lm(medv ~ ptratio + lstat + nox + crim, data = Boston_small)
round(coeftest(lm_obj), 2)
library("car")
linearHypothesis(lm_obj, c("lstat=0", "nox=0"))
install.packages("car")
install.packages("car")
library("car")
linearHypothesis(lm_obj, c("lstat=0", "nox=0"))
## Below I use the same data that was used to produce the
## results in Table 6.1 of our script. However, you
## can produce new data by setting another seed-value
theta_true <- 0.2 # unknown true theta value
n <- 5 # sample size
## Use a common Random Number Generator:
RNGkind(sample.kind = "Rounding")
## Warning in RNGkind(sample.kind = "Rounding"): non-uniform 'Rounding' sampler used
set.seed(1)
# simulate data: n many (unfair) coin tosses
x <- sample(x = c(0,1),
size =n,
replace = TRUE,
prob = c(1-theta_true, theta_true))
## number of heads (i.e., the number of "1"s in x)
h <- sum(x)
## First derivative of the log-likelihood function
Lp_fct <- function(theta, h = h, n = n){
(h/theta) - (n - h)/(1 - theta)
}
## Second derivative of the log-likelihood function
Lpp_fct <- function(theta, h = h, n = n){
- (h/theta^2) - (n - h)/(1 - theta)^2
}
t <- 1e-10 # convergence criterion
check <- TRUE # for stopping the while-loop
i <- 0 # count iterations
theta <- 0.4 # starting value
Lp <- Lp_fct( theta, h=h, n=n)
Lpp <- Lpp_fct(theta, h=h, n=n)
while(check){
i <- i + 1
##
theta_new <- theta[i] - (Lp_fct(theta[i], h=h, n=n) / Lpp_fct(theta[i], h=h, n=n))
Lp_new <- Lp_fct( theta_new, h = h, n = n)
Lpp_new <- Lpp_fct(theta_new, h = h, n = n)
##
theta <- c(theta, theta_new)
Lp <- c(Lp, Lp_new)
Lpp <- c(Lpp, Lpp_new)
##
if(abs(Lp_fct(theta_new, h=h, n=n)) < t ){check <- FALSE}
}
cbind(theta, Lp, Lp/Lpp)
?RNGkind()
?sample
x
set.seed(1)
# ?sample
# simulate data: n many (unfair) coin tosses
x <- sample(x = c(0,1),
size = n,
replace = TRUE,
prob = c(1-theta_true, theta_true))
x
h <- sum(x)
h
t
library("AER")
data("CigarettesSW")
summary(CigarettesSW)
pchisq(cig_OR_test[2, 5], df = 1, lower.tail = FALSE)
library("AER")
data("CigarettesSW")
summary(CigarettesSW)
#> state year cpi population packs income
#> AL : 2 1985:48 Min. :1.076 Min. : 478447 Min. : 49.27 Min. : 6887097
#> AR : 2 1995:48 1st Qu.:1.076 1st Qu.: 1622606 1st Qu.: 92.45 1st Qu.: 25520384
#> AZ : 2 Median :1.300 Median : 3697472 Median :110.16 Median : 61661644
#> CA : 2 Mean :1.300 Mean : 5168866 Mean :109.18 Mean : 99878736
#> CO : 2 3rd Qu.:1.524 3rd Qu.: 5901500 3rd Qu.:123.52 3rd Qu.:127313964
#> CT : 2 Max. :1.524 Max. :31493524 Max. :197.99 Max. :771470144
#> (Other):84
#> tax price taxs
#> Min. :18.00 Min. : 84.97 Min. : 21.27
#> 1st Qu.:31.00 1st Qu.:102.71 1st Qu.: 34.77
#> Median :37.00 Median :137.72 Median : 41.05
#> Mean :42.68 Mean :143.45 Mean : 48.33
#> 3rd Qu.:50.88 3rd Qu.:176.15 3rd Qu.: 59.48
#> Max. :99.00 Max. :240.85 Max. :112.63
# P168
## ————————————————————————————————
# compute real per capita prices
CigarettesSW$rprice <- with(CigarettesSW, price / cpi)
# compute the sales tax
CigarettesSW$salestax <- with(CigarettesSW, (taxs - tax) / cpi)
# check the correlation between sales tax and price
cor(CigarettesSW$salestax, CigarettesSW$price)
# generate a subset for the year 1995
c1995 <- subset(CigarettesSW, year == "1995")
## ------------------------------------------------------------------------------------------------
# perform the first stage regression
cig_s1 <- lm(log(rprice) ~ salestax, data = c1995)
coeftest(cig_s1, vcov = vcovHC, type = "HC1")
## ------------------------------------------------------------------------------------------------
# inspect the R^2 of the first stage regression
summary(cig_s1)$r.squared
## ------------------------------------------------------------------------------------------------
# store the predicted values
lcigp_pred <- cig_s1$fitted.values
## ------------------------------------------------------------------------------------------------
# run the stage 2 regression
cig_s2 <- lm(log(c1995$packs) ~ lcigp_pred)
coeftest(cig_s2, vcov = vcovHC)
## ------------------------------------------------------------------------------------------------
# perform TSLS using 'ivreg()'
cig_ivreg <- ivreg(log(packs) ~ log(rprice) | salestax,
data = c1995)
coeftest(cig_ivreg, vcov = vcovHC, type = "HC1")
## ------------------------------------------------------------------------------------------------
# add real income to the dataset (cpi: consumer price index)
CigarettesSW$rincome <- with(CigarettesSW,
income / population / cpi)
c1995 <- subset(CigarettesSW, year == "1995")
## ------------------------------------------------------------------------------------------------
# estimate the model
cig_ivreg2 <- ivreg(log(packs) ~ log(rprice) +
log(rincome) | log(rincome) +
salestax, data = c1995)
coeftest(cig_ivreg2, vcov = vcovHC, type = "HC1")
## ------------------------------------------------------------------------------------------------
# add cigtax to the data set
CigarettesSW$cigtax <- with(CigarettesSW, tax/cpi)
c1995 <- subset(CigarettesSW, year == "1995")
## ------------------------------------------------------------------------------------------------
# estimate the model
cig_ivreg3 <- ivreg(log(packs) ~ log(rprice) + log(rincome) |
log(rincome) + salestax + cigtax,
data = c1995)
coeftest(cig_ivreg3, vcov = vcovHC, type = "HC1")
## ------------------------------------------------------------------------------------------------
# subset data for year 1985
c1985 <- subset(CigarettesSW, year == "1985")
# define differences in variables
packsdiff <- log(c1995$packs) - log(c1985$packs)
pricediff <- log(c1995$price/c1995$cpi) - log(c1985$price/c1985$cpi)
incomediff <- log(c1995$income/c1995$population/c1995$cpi) -
log(c1985$income/c1985$population/c1985$cpi)
salestaxdiff <- (c1995$taxs - c1995$tax)/c1995$cpi - (c1985$taxs - c1985$tax)/c1985$cpi
cigtaxdiff <- c1995$tax/c1995$cpi - c1985$tax/c1985$cpi
## ------------------------------------------------------------------------------------------------
# estimate the three models
cig_ivreg_diff1 <- ivreg(packsdiff ~ pricediff +
incomediff | incomediff +
salestaxdiff)
cig_ivreg_diff2 <- ivreg(packsdiff ~ pricediff +
incomediff | incomediff +
cigtaxdiff)
cig_ivreg_diff3 <- ivreg(packsdiff ~ pricediff +
incomediff | incomediff +
salestaxdiff + cigtaxdiff)
## ------------------------------------------------------------------------------------------------
# robust coefficient summary for 1.
coeftest(cig_ivreg_diff1, vcov = vcovHC, type = "HC1")
# robust coefficient summary for 2.
coeftest(cig_ivreg_diff2, vcov = vcovHC, type = "HC1")
# robust coefficient summary for 3.
coeftest(cig_ivreg_diff3, vcov = vcovHC, type = "HC1")
## ---- eval = FALSE, echo=TRUE--------------------------------------------------------------------
## # gather robust standard errors in a list
## rob_se <- list(sqrt(diag(vcovHC(cig_ivreg_diff1, type = "HC1"))),
## sqrt(diag(vcovHC(cig_ivreg_diff2, type = "HC1"))),
## sqrt(diag(vcovHC(cig_ivreg_diff3, type = "HC1"))))
##
## # generate table
## stargazer(cig_ivreg_diff1, cig_ivreg_diff2, cig_ivreg_diff3,
## header = FALSE,
## type = "latex",
## omit.table.layout = "n",
## digits = 3,
## column.labels = c("IV: salestax", "IV: cigtax",
## "IVs: salestax, cigtax"),
## dep.var.labels.include = FALSE,
## dep.var.caption =
## "Dependent Variable: 1985-1995 Difference in Log per Pack Price",
## se = rob_se)
## ------------------------------------------------------------------------------------------------
# first-stage regressions
mod_relevance1 <- lm(pricediff ~ salestaxdiff + incomediff)
mod_relevance2 <- lm(pricediff ~ cigtaxdiff + incomediff)
mod_relevance3 <- lm(pricediff ~ incomediff + salestaxdiff +
cigtaxdiff)
## ------------------------------------------------------------------------------------------------
# check instrument relevance for model (1)
linearHypothesis(mod_relevance1,
"salestaxdiff = 0",
vcov = vcovHC, type = "HC1")
## ------------------------------------------------------------------------------------------------
# check instrument relevance for model (2)
linearHypothesis(mod_relevance2,
"cigtaxdiff = 0",
vcov = vcovHC, type = "HC1")
## ------------------------------------------------------------------------------------------------
# check instrument relevance for model (3)
linearHypothesis(mod_relevance3,
c("salestaxdiff = 0", "cigtaxdiff = 0"),
vcov = vcovHC, type = "HC1")
## ------------------------------------------------------------------------------------------------
# compute the J-statistic
cig_iv_OR <- lm(residuals(cig_ivreg_diff3) ~ incomediff +
salestaxdiff + cigtaxdiff)
cig_OR_test <- linearHypothesis(cig_iv_OR,
c("salestaxdiff = 0",
"cigtaxdiff = 0"),
test = "Chisq")
cig_OR_test
## ------------------------------------------------------------------------------------------------
# the p-value reported by linearHypothesis() is wrong
# because the degrees of freedom are set to 2.
# This differs from the degree of overidentification (m − (K − 1) = 2 − (2 − 1) = 1)
# so the J -statistic is χ21 distributed instead of following a χ2 distribution
# as assumed defaultly by linearHypothesis().
# compute correct p-value for J-statistic
pchisq(cig_OR_test[2, 5], df = 1, lower.tail = FALSE)
library("sandwich")
library("lmtest")
library("car")
library("AER")
data("Affairs")
lm_obj <- lm(affairs ~ age + yearsmarried + religiousness + rating, data = Affairs)
Var_beta_hat_robust <- sandwich::vcovHC(lm_fit, type="HC3")
Var_beta_hat_robust <- sandwich::vcovHC(lm_obj, type="HC3")
Var_beta_hat_robust
vcovHC3 <- sandwich::vcovHC(lm_obj, type="HC3")
vcovHC3 <- sandwich::vcovHC(lm_obj, type="HC3")
car::linearHypothesis(model = lm_obj,
hypothesis.matrix = c("age=o", "yearsmarried=0"),
vcov=vcovHC3)
vcovHC3 <- sandwich::vcovHC(lm_obj, type="HC3")
car::linearHypothesis(model = lm_obj,
hypothesis.matrix = c("age=0", "yearsmarried=0"),
vcov=vcovHC3)
car::linearHypothesis(model = lm_obj,
hypothesis.matrix = c("age+yearsmarried=0"),
vcov=vcovHC3)
n <- 100
X_2 <- runif(n, 1, 4)
V <- rnorm(n)
X_3 <- 2 * X_2 + V
eps <- rnorm(n, sd=sqrt(2/3))
rep <- 500
beta_hat_2 <- rep(NA, times=rep)
for(r in 1:rep){
lm_obj <- lm(Y ~ X_2 + X_3, data = MC_data)
beta_hat_2[r] <- coef(lm_obj)[2]
}
rep <- 500
beta_hat_2 <- rep(NA, times=rep)
for(r in 1:rep){
lm_obj <- lm(Y ~ X_2 + X_3)
beta_hat_2[r] <- coef(lm_obj)[2]
}
c(2,3,4)
beta <- c(2,3,4)
X <- c(X_2, X_3)
Y <- beta %*% X + eps
rep <- 500
beta_hat_2 <- rep(NA, times=rep)
for(r in 1:rep){
lm_obj <- lm(Y ~ X_2 + X_3)
beta_hat_2[r] <- coef(lm_obj)[2]
}
beta <- c(2,3,4)
X <- c(X_2, X_3)
Y <- beta %*% X + eps
rep <- 500
beta_hat_2 <- rep(NA, times=rep)
for(r in 1:rep){
lm_obj <- lm(Y ~ X_2 + X_3)
beta_hat_2[r] <- coef(lm_obj)[2]
}
n <- 100
X_2 <- runif(n, 1, 4)
V <- rnorm(n)
X_3 <- 2 * X_2 + V
eps <- rnorm(n, sd=sqrt(2/3))
## a.
beta <- c(2,3,4)
X <- c(X_2, X_3)
Y <- beta %*% X + eps
rep <- 500
beta_hat_2 <- rep(NA, times=rep)
for(r in 1:rep){
lm_obj <- lm(Y ~ X_2 + X_3)
beta_hat_2[r] <- coef(lm_obj)[2]
}
## a.
beta_1 <- 2
beta_2 <- 3
beta_3 <- 4
Y <- beta_1 + beta_2*X_2 + beta_3*X_3 + eps
rep <- 500
beta_hat_2 <- rep(NA, times=rep)
for(r in 1:rep){
lm_obj <- lm(Y ~ X_2 + X_3)
beta_hat_2[r] <- coef(lm_obj)[2]
}
beta_1 <- 2
beta_2 <- 3
beta_3 <- 4
Y <- beta_1 + beta_2*X_2 + beta_3*X_3 + eps
rep <- 500
beta_hat_2 <- rep(NA, times=rep)
for(r in 1:rep){
lm_obj <- lm(Y ~ X_2 + X_3)
beta_hat_2[r] <- coef(lm_obj)[2]
}
beta_true_2
round(mean(beta_hat_2), 3)
beta_1 <- 2
beta_2 <- 3
beta_3 <- 4
Y <- beta_1 + beta_2*X_2 + beta_3*X_3 + eps
rep <- 500
beta_hat_2 <- rep(NA, times=rep)
for(r in 1:rep){
lm_obj <- lm(Y ~ X_2 + X_3)
beta_hat_2[r] <- coef(lm_obj)[2]
}
round(var(beta_hat_2), 5)
round(var(beta_hat_2), 5)
beta_1 <- 2
beta_2 <- 3
beta_3 <- 4
Y <- beta_1 + beta_2*X_2 + beta_3*X_3 + eps
rep <- 500
beta_hat_2 <- rep(NA, times=rep)
for(r in 1:rep){
lm_obj <- lm(Y ~ X_2 + X_3)
beta_hat_2[r] <- coef(lm_obj)[2]
}
round(var(beta_hat_2), 5)
n <- 100
X_2 <- runif(n, 1, 4)
V <- rnorm(n)
X_3 <- 2 * X_2 + V
eps <- rnorm(n, sd=sqrt(2/3))
## a.
beta_1 <- 2
beta_2 <- 3
beta_3 <- 4
Y <- beta_1 + beta_2*X_2 + beta_3*X_3 + eps
rep <- 500
beta_hat_2 <- rep(NA, times=rep)
for(r in 1:rep){
lm_obj <- lm(Y ~ X_2 + X_3)
beta_hat_2[r] <- coef(lm_obj)[2]
}
round(var(beta_hat_2), 5)
myDataGenerator <- function(n, beta, X=NULL, sigma=3){
if (is.null(X)){
X <- cbind(rep(1,n)),
runif(n,2,10),
runif(n,12,22)
}
}
for(r in 1:rep){
MC_data_2 <- myDataGenerator(n=n,
beta=beta_true,
X=X_cond)
lm_obj_2 <- lm(Y ~ X_2, data = MC_data_2)
beta_hat_2[r] <- coef(lm_obj)[2]
}
round(var(beta_hat_2), 5)
for(r in 1:rep){
lm_obj <- lm(Y ~ X_2)
beta_hat_2[r] <- coef(lm_obj)[2]
}
round(var(beta_hat_2), 5)
round(mean(beta_hat_2_b), 5)
Y <- beta_1 + beta_2*X_2 + beta_3*X_3 + eps
rep <- 500
beta_hat_2 <- rep(NA, times=rep)
for(r in 1:rep){
lm_obj <- lm(Y ~ X_2 + X_3)
beta_hat_2[r] <- coef(lm_obj)[2]
beta_hat_2_b[r] <- coef(lm_obj)[2]
}
round(var(beta_hat_2), 5)
round(mean(beta_hat_2_b), 5)
beta_hat_2_b[r] <- coef(lm_obj)[2]
Y <- beta_1 + beta_2*X_2 + beta_3*X_3 + eps
rep <- 500
beta_hat_2 <- rep(NA, times=rep)
beta_hat_2_b <- rep(NA, times=rep)
for(r in 1:rep){
lm_obj <- lm(Y ~ X_2 + X_3)
beta_hat_2[r] <- coef(lm_obj)[2]
beta_hat_2_b[r] <- coef(lm_obj)[2]
}
round(var(beta_hat_2), 5)
round(mean(beta_hat_2_b), 5)
Y <- beta_1 + beta_2*X_2 + beta_3*X_3 + eps
rep <- 500
beta_hat_2_b <- rep(NA, times=rep)
for(r in 1:rep){
lm_obj <- lm(Y ~ X_2 + X_3)
beta_hat_2_b[r] <- coef(lm_obj)[2]
}
round(mean(beta_hat_2_b), 5)
Y <- beta_1 + beta_2*X_2 + beta_3*X_3 + eps
rep <- 500
beta_hat_2_b <- rep(NA, times=rep)
for(r in 1:rep){
lm_obj <- lm(Y ~ X_2 )
beta_hat_2_b[r] <- coef(lm_obj)[2]
}
round(mean(beta_hat_2_b), 5)
beta_1 <- 2
beta_2 <- 3
beta_3 <- 4
Y <- beta_1 + beta_2*X_2 + beta_3*X_3 + eps
rep <- 500
beta_hat_2_b <- rep(NA, times=rep)
for(r in 1:rep){
lm_obj <- lm(Y ~ X_2 + X_3)
beta_hat_2_b[r] <- coef(lm_obj)[2]
}
round(mean(beta_hat_2_b), 5)
## b
Y <- beta_1 + beta_2*X_2 + beta_3*X_3 + eps
rep <- 500
beta_hat_2_b <- rep(NA, times=rep)
for(r in 1:rep){
lm_obj <- lm(Y ~ X_2 )
beta_hat_2_b[r] <- coef(lm_obj)[2]
}
round(mean(beta_hat_2_b), 5)