## Exercise Chapter 3
## Problem 4
## Background
set.seed(109) # Set the “seed” of the random number generators
n <- 100 # number of observations
## Generate two explanatory variables plus an intercept-variable:
X_1 <- rep(1, n) # Intercept # 1重复n次
X_2 <- rnorm(n, mean = 10, sd = 1.5) # Draw realizations form a normal distribution
X_3 <- runif(n, min = 0.2, max = 8) # Draw realizations form a t-distribution
X <- cbind(X_1, X_2, X_3) # Save as a Nx3-dimensional data matrix, 合并列;rbind是合并行
beta <- c(1, -5, 5) # c默认列向量
## Generate realizations from the heteroscadastic error term
eps <- rnorm(n, mean = 0, sd = abs(X_3)) # sd = sigma
## Dependent variable:
Y <- X %*% beta + eps # 如果拆开向量Y <- X_1*beta_1 + X_2*beta_2 + X_3*beta_3 + eps,不方便后续计算
## Problem 4a compute the theoretical variance of the OLS estimator beta_hat
## Solution
Var_theo <- solve(t(X) %*% X) %*% t(X) %*% diag(X_3^2) %*% X %*% solve(t(X) %*% X) # Var(beta_hat|X) 公式要记住,solve()是逆,t()是转置
rownames(Var_theo) <- c("", "", "") # remove row name, 如果不remove,会显示X_1, X_2, X_3
colnames(Var_theo) <- c("", "", "") # remove col name, 如果不remove,会显示X_1, X_2, X_3
## 如果不remove
## X_1 X_2 X_3
## X_1 7.22896936 -0.683424680 -0.072931570
## X_2 -0.68342468 0.069232031 -0.007624144
## X_3 -0.07293157 -0.007624144 0.057342462
Var_theo
## 7.22896936 -0.683424680 -0.072931570
## -0.68342468 0.069232031 -0.007624144
## -0.07293157 -0.007624144 0.057342462
## above 2. and 3. diag elements are true variance of beta_hat_2 and beta_hat_3
## Problem 4b Monte-Carlo Simulation for Var_beta2_hat|x, and Var_beta3_hat|x
## Solution
## R code for Monte-Carlo Simulation
library("sandwich") # HC robust variance estimation
MC_reps <- 10000 # Number of Mont-Carlo replications
VarHC3_estims <- matrix(NA, 3, MC_reps) # Container to collect the results, matrix(data, nrow, ncol, ...)
# 3行因为三种数据,和上面Var_theo行格式一样,10000列用来做模拟
for(r in 1:MC_reps){
## Generate new relisations from the heteroscedastic error term, 因为conditionally on X,generate new eps
eps <- rnorm(n, mean = 0, sd = abs(X_3)) # 10000个eps
## Generate new relizations from the dependent variable,得到new Y
Y <- X %*% beta + eps # %*%矩阵乘法 # 10000个Y
## OLS estimation
lm_fit <- lm(Y ~ X)
## Robust estimation of the variance of \hat\beta
VarHC3_estims[,r] <- diag(sandwich::vcovHC(lm_fit, type = "HC3"))} # 10000个var
# 容器是matrix,每次生成的三个元素,以列的形式填充进去
VarHC3_estims_means <- rowMeans(VarHC3_estims) # 返回()内的行均值组成的向量,每种数据的10000个值做平均,得出均值,返回新的向量,返回后默认排列为列向量
## Compare the theoretical variance Var(\hat\beta_2) and Var(\hat\beta_3) with the means of 10000 variance estimations \hat{Var}(\hat\beta_2) and \hat{Var}(\hat\beta_3)
cbind(diag(Var_theo)[c(2,3)], VarHC3_estims_means[c(2,3)])
## 老师给的模拟值,好像是HC算出来的,理论值是对的
## [,1] [,2]
## 0.06923203 0.06578514
## 0.05734246 0.05449071
## HC3模拟值
## [,1] [,2]
## 0.06923203 0.07258430
## 0.05734246 0.05945045
plot(x = c(1,2), y = c(0,0), ylim = range(VarHC3_estims[c(2,3),]), type = "n", axes = FALSE, xlab = "", ylab = "")
box() # 盒型
axis(1, c(1,2), labels = c(2,3))
axis(2)
points(x = rep(1, MC_reps), y = VarHC3_estims[2,], pch = 21, col = gray(.5, .25), bg = gray(.5, .25))
points(x = 1, y = VarHC3_estims_means[2], pch = 32, col = "black", bg = gray(.5, .25))
points(x = 1, y = diag(Var_theo)[2], pch = 1)
points(x = rep(2, MC_reps), y = VarHC3_estims[3,], pch = 21, col = gray(.5, .25), bg = gray(.5, .25))
points(x = 2, y = VarHC3_estims_means[3], pch = 21, col = "black", bg = "black")
points(x = 2, y = diag(Var_theo)[3], pch = 1)
legend("top",
legend = c("10000 Variance estimates", "Mean of variance estimates", "True Variance"),
bty = "n", pt.bg = c(gray(.5, .75), "black", "black"),
pch = c(21, 21, 1), col = c(gray(.5, .75), "black", "black"))