—
title: “Untitled”
output:
word_document: default
pdf_document: default
—
“`{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
“`
#2
“`{r}
library(readr)
german_healthcare_usage <- read_csv("D:/german_healthcare_usage.csv")
```
##(a)
We are interested in a multiple regression model as:$DOCVIS_i=\beta_0+\beta_1AGE_i+\beta_2HEALTHY_i+\beta_3HSAT_i+\beta_4HANDPER_i+\beta_5HANDDUM_i+\beta_6UNEMPLOY_i+\varepsilon_i$, for chosen independence variables are the most correlated variables with $DOCVIS$.
```{r}
newdata<-na.omit(german_healthcare_usage)
reg<-lm(DOCVIS~AGE+HEALTHY+HSAT+HANDPER+UNEMPLOY+HANDDUM,data = newdata)
summary(reg)
```
The regression output shows that most estimated coefficients on independent variables are significant at 0.000 level. The p value of model is less than 0.05, which means that model is significant, the R-Square is 0.1531, this model has fitted 15.31% of original data.
##(b)
###i.
The control group is all men observation, the treatment group is all women observation, the null hypothesis is $H_0:\delta \geq0$, versus $H_1:\delta<0$.
```{r}
female<-newdata[which(newdata$FEMALE==1),]
male<-newdata[which(newdata$FEMALE==0),]
female$before<-as.numeric(female$YEAR1984|female$YEAR1985|female$YEAR1986)
female$after<-as.numeric(female$YEAR1987|female$YEAR1988|female$YEAR1991|female$YEAR1994)
male$before<-as.numeric(male$YEAR1984|male$YEAR1985|male$YEAR1986)
male$after<-as.numeric(male$YEAR1987|male$YEAR1988|male$YEAR1991|male$YEAR1994)
control_before<-male[which(male$before==1),]
control_after<-male[which(male$before==0),]
treatment_before<-female[which(female$before==1),]
treatment_after<-female[which(female$before==0),]
D_in_D<-(mean(treatment_after$DOCVIS)-mean(control_after$DOCVIS))-(mean(treatment_before$DOCVIS)-mean(control_before$DOCVIS))
D_in_D
```
The Differences-in-Differences estimator on women is less than 0, the policy is worked for women.
###ii.
The control group is all unemployed observation, the treatment group is all employed observation, the null hypothesis is $H_0:\delta \geq0$, versus $H_1:\delta<0$.
```{r}
unemployed<-newdata[which(newdata$UNEMPLOY==1),]
employed<-newdata[which(newdata$UNEMPLOY==0),]
unemployed$before<-as.numeric(unemployed$YEAR1984|unemployed$YEAR1985|unemployed$YEAR1986)
unemployed$after<-as.numeric(unemployed$YEAR1987|unemployed$YEAR1988|unemployed$YEAR1991|unemployed$YEAR1994)
employed$before<-as.numeric(employed$YEAR1984|employed$YEAR1985|employed$YEAR1986)
employed$after<-as.numeric(employed$YEAR1987|employed$YEAR1988|employed$YEAR1991|employed$YEAR1994)
control_before<-employed[which(employed$before==1),]
control_after<-employed[which(employed$before==0),]
treatment_before<-unemployed[which(unemployed$before==1),]
treatment_after<-unemployed[which(unemployed$before==0),]
D_in_D<-(mean(treatment_after$DOCVIS)-mean(control_after$DOCVIS))-(mean(treatment_before$DOCVIS)-mean(control_before$DOCVIS))
D_in_D
```
The Differences-in-Differences estimator on unemployed is less than 0, the policy is worked for unemployed.
##(c)
The null hypothesis is $H_0:DOCVIS_{women}=DOCVIS_{men}$, the alternative hypothesis is $H_1:DOCVIS_{women}\neq DOCVIS_{men}$, we perform a Two Sample t-test here:
```{r}
men<-newdata[which(newdata$FEMALE==0),]
t.test(female$DOCVIS,men$DOCVIS, alternative = c("greater"))
```
The p value of Two Sample t-test is less than 0.05, which means that there is a reason to reject the null hypothesis, the number of doctor visits a patient has over a 3 month period is greater for women than for men.
##(d)
We are interested in whether the policy is worked for civil servant, the control group is all civil servant observation, the treatment group is all not civil servant observation, the null hypothesis is $H_0:\delta \geq0$, the alterative hypothesis is $H_1:\delta<0$,
```{r}
BEAMT<-newdata[which(newdata$BEAMT==1),]
unBEAMT<-newdata[which(newdata$BEAMT==0),]
BEAMT$before<-as.numeric(BEAMT$YEAR1984|BEAMT$YEAR1985|BEAMT$YEAR1986)
BEAMT$after<-as.numeric(BEAMT$YEAR1987|BEAMT$YEAR1988|BEAMT$YEAR1991|BEAMT$YEAR1994)
unBEAMT$before<-as.numeric(unBEAMT$YEAR1984|unBEAMT$YEAR1985|unBEAMT$YEAR1986)
unBEAMT$after<-as.numeric(unBEAMT$YEAR1987|unBEAMT$YEAR1988|unBEAMT$YEAR1991|unBEAMT$YEAR1994)
control_before<-unBEAMT[which(unBEAMT$before==1),]
control_after<-unBEAMT[which(unBEAMT$before==0),]
treatment_before<-BEAMT[which(BEAMT$before==1),]
treatment_after<-BEAMT[which(BEAMT$before==0),]
D_in_D<-(mean(treatment_after$DOCVIS)-mean(control_after$DOCVIS))-(mean(treatment_before$DOCVIS)-mean(control_before$DOCVIS))
D_in_D
```
The Differences-in-Differences estimator on civil servant is greater than 0, the policy is NOT worked for civil servant.