## ————————————————————–
## Function to generate artificial data
myDataGenerator <- function(n, beta){
##
X <- cbind(rep(1, n),
runif(n, -4, 4),
runif(n, -5, 5))
##
eps <- runif(n, -.5 * abs(X[,2]), +.5 * abs(X[,2]))
Y <- X %*% beta + eps
data <- data.frame("Y"=Y,
"X_1"=X[,1], "X_2"=X[,2], "X_3"=X[,3])
##
return(data)
}
## ---- eval=FALSE, echo=FALSE-----------------------------------
## library("fractional")
## moment_fun <- function(m=3, a, b){fractional((b^(m+1)-a^(m+1))/((m+1)*(b-a)))}
## moment_fun(m=4, a=-4,b=4)
## ---- fig.align="center", echo=TRUE, fig.width = 8, fig.height = 5, out.width = "1\\textwidth"----
set.seed(123)
n <- 100 # a largish sample size
beta_true <- c(2,3,4) # true data vector
## Mean and variance of the true asymptotic
## normal distribution of beta_hat_2:
# true mean
beta_true_2 <- beta_true[2]
# true variance
var_true_beta_2 <- (solve(diag(c(1, 16/3, 25/3))) %*%
diag(c(4/9, 64/15, 100/27))%*%
solve(diag(c(1, 16/3, 25/3))))[2,2]/n
## Let's generate 5000 realizations from beta_hat_2
## conditionally on X and check whether their
## distribution is close to the true normal distribution
rep <- 5000 # MC replications
beta_hat_2 <- rep(NA, times=rep)
##
for(r in 1:rep){
MC_data <- myDataGenerator(n = n,
beta = beta_true)
lm_obj <- lm(Y ~ X_2 + X_3, data = MC_data)
beta_hat_2[r] <- coef(lm_obj)[2]
}
## Compare:
## True beta_2 versus average of beta_hat_2 estimates
beta_true_2
round(mean(beta_hat_2), 3)
## True variance of beta_hat_2 versus
## empirical variance of beta_hat_2 estimates
round(var_true_beta_2, 5)
round(var(beta_hat_2), 5)
## True normal distribution of beta_hat_2 versus
## empirical density of beta_hat_2 estimates
library("scales")
curve(expr = dnorm(x, mean = beta_true_2,
sd=sqrt(var_true_beta_2)),
xlab="",ylab="", col=gray(.2), lwd=3, lty=1,
xlim=range(beta_hat_2),ylim=c(0,14.1),main=paste0("n=",n))
lines(density(beta_hat_2, bw = bw.SJ(beta_hat_2)),
col=alpha("blue",.5), lwd=3)
legend("topleft", lty=c(1,1), lwd=c(3,3),
col=c(gray(.2), alpha("blue",.5)), bty="n", legend=
c(expression(
"Theoretical (Asymptotic) Gaussian Density of"~hat(beta)[2]),
expression(
"Empirical Density Estimation based on MC realizations from"~
hat(beta)[2])))
## ---- fig.align="center", echo=TRUE, fig.width = 8, fig.height = 5, out.width = "1\\textwidth"----
set.seed(123)
n <- 5 # a small sample size
beta_true <- c(2,3,4) # true data vector
## Mean and variance of the true asymptotic
## normal distribution of beta_hat_2:
# true mean
beta_true_2 <- beta_true[2]
# true variance
var_true_beta_2 <- (solve(diag(c(1, 16/3, 25/3)))%*%
diag(c(4/9, 64/15, 100/27))%*%
solve(diag(c(1, 16/3, 25/3))))[2,2]/n
## Let's generate 5000 realizations from beta_hat_2
## conditionally on X and check whether their
## distribution is close to the true normal distribution
rep <- 5000 # MC replications
beta_hat_2 <- rep(NA, times=rep)
##
for(r in 1:rep){
MC_data <- myDataGenerator(n = n,
beta = beta_true)
lm_obj <- lm(Y ~ X_2 + X_3, data = MC_data)
beta_hat_2[r] <- coef(lm_obj)[2]
}
## Compare:
## True beta_2 versus average of beta_hat_2 estimates
beta_true_2
round(mean(beta_hat_2), 3)
## True variance of beta_hat_2 versus
## empirical variance of beta_hat_2 estimates
round(var_true_beta_2, 5)
round(var(beta_hat_2), 5)
## True normal distribution of beta_hat_2 versus
## empirical density of beta_hat_2 estimates
library("scales")
curve(expr = dnorm(x, mean = beta_true_2,
sd=sqrt(var_true_beta_2)),
xlab="",ylab="", col=gray(.2), lwd=3, lty=1,
xlim=c(2,4), ylim=c(0,3),main=paste0("n=",n))
lines(density(beta_hat_2, bw = bw.SJ(beta_hat_2)),
col=alpha("blue",.5), lwd=3)
legend("topleft", lty=c(1,1), lwd=c(3,3),
col=c(gray(.2), alpha("blue",.5)), bty="n", legend=
c(expression(
"Theoretical (Asymptotic) Gaussian Density of"~hat(beta)[2]),
expression(
"Empirical Density Estimation based on MC realizations from"~
hat(beta)[2])))
## --------------------------------------------------------------
suppressMessages(library("car")) # for linearHyothesis()
# ?linearHypothesis
library("sandwich")
## Generate data
MC_data <- myDataGenerator(n = 100,
beta = beta_true)
## Estimate the linear regression model parameters
lm_obj <- lm(Y ~ X_2 + X_3, data = MC_data)
vcovHC3_mat <- sandwich::vcovHC(lm_obj, type="HC3")
## Option 1:
car::linearHypothesis(model = lm_obj,
hypothesis.matrix = c("X_2=3", "X_3=5"),
vcov=vcovHC3_mat)
## Option 2:
R <- rbind(c(0,1,0),
c(0,0,1))
car::linearHypothesis(model = lm_obj,
hypothesis.matrix = R,
rhs = c(3,5),
vcov=vcovHC3_mat)
## --------------------------------------------------------------
# Load libraries
library("lmtest") # for coeftest()
library("sandwich") # for vcovHC()
## Generate data
n <- 100
MC_data <- myDataGenerator(n = n,
beta = beta_true)
## Estimate the linear regression model parameters
lm_obj <- lm(Y ~ X_2 + X_3, data = MC_data)
## Robust t test
## Robust standard error for \hat{\beta}_3:
SE_rob <- sqrt(vcovHC(lm_obj, type = "HC3")[3,3])
## hypothetical (H0) value of \beta_3:
beta_3_H0 <- 5
## estimate for beta_3:
beta_3_hat <- coef(lm_obj)[3]
## robust t-test statistic
t_test_stat <- (beta_3_hat - beta_3_H0)/SE_rob
## p-value
K <- length(coef(lm_obj))
##
p_value <- 2 * min( pt(q = t_test_stat, df = n - K),
1- pt(q = t_test_stat, df = n - K))
p_value