# Assignment 1 Code — Empirical Finance
# Author: Chris Hansman
# Email; chansman@imperial.ac.uk
# Date: 09/02/21
# install.packages(“tidyverse”)
# install.packages(“glmnet”)
# install.packages(“glmnetUtils”)
library(tidyverse)
library(readxl)
library(lubridate)
library(glmnet)
library(glmnetUtils)
library(stargazer)
#Problem 4
# Cleaning Data
msales<-read_xls("rollingsales_manhattan.xls", skip=4)
names(msales) <- gsub(" ","_", names(msales))
msales <- msales %>%
filter(SALE_PRICE!=0) %>%
mutate(log_price=log(SALE_PRICE), age=2020-YEAR_BUILT)
# Part a
neighborhood_price <- msales %>%
group_by(NEIGHBORHOOD) %>%
summarise(n_price=mean(SALE_PRICE)) %>%
arrange(n_price)
# Part b
msales %>%
lm(log_price ~ age + NEIGHBORHOOD, data=.) %>%
stargazer(type = “text”, digits = 4)
# Part c
msales %>%
filter(NEIGHBORHOOD==”UPPER EAST SIDE (59-79)”) %>%
lm(log_price ~ age, data=. ) %>%
stargazer(type = “text”, digits = 4)
# Part d
msales_summary <- msales %>%
mutate(month=month(SALE_DATE))%>%
mutate(SALE_PRICE=SALE_PRICE/100000) %>%
group_by(month) %>%
summarise(mean_price=mean(SALE_PRICE), median_price=median(SALE_PRICE), n = n())
ggplot(data=msales_summary) +
geom_line(aes(x=month, y=mean_price , color=”Mean”)) +
geom_line(aes(x=month, y=median_price, color=”Median”)) +
xlab(“Month”) + ylab(“Price (100k USD)”) +
ylim(0, 100) +
scale_x_continuous(breaks=c( 3, 6, 9, 12)) +
scale_colour_manual(“”,
breaks = c(“Mean”, “Median”),
values = c(“red”, “blue”)) +
theme_classic() +
theme(legend.position=”top”) +
ggsave(“prices.pdf”, width=6, height=4)
ggplot(data=msales_summary) +
geom_line(aes(x=month, y=n)) +
xlab(“Month”) + ylab(“Quantity of Sales”) +
ylim(0, 1200) +
scale_x_continuous(breaks=c( 3, 6, 9, 12)) +
theme_classic() +
ggsave(“quantities.pdf”, width=6, height=4)
# Problem 5
# Loading Data
patriots <- read_csv("patriots.csv")
# Difference-in-Difference
patriots <- patriots %>%
mutate(d=dmy(date)) %>%
mutate(T=d>dmy(01022015)) %>%
mutate(D=exchange==”nyse”) %>%
mutate(DT=D*T)
patriots %>%
lm(price~D+T+DT, data=.) %>%
stargazer(type = “text”, digits = 4)
# Plotting
daily_mean <- patriots %>%
group_by(d, exchange) %>%
summarise(mean_p=mean(price))
ggplot(aes(y=mean_p, x=d,group=exchange, color=exchange), data=daily_mean) +
geom_line() +
theme_classic() +
xlab(“Date”) +
ylab(“Price”) +
ylim(0,200) +
ggsave(“nysesse.pdf”, width=6, height=4)
# Problem 6
#set.seed(1234)
train <- read_csv("regularization_train.csv")
test <- read_csv("regularization_test.csv")
cv_glmnet_10 <- cv.glmnet(y~.,data=train)
cv_glmnet_20 <- cv.glmnet(y~.,data=train, nfolds=20)
cv_glmnet_alpha <- cv.glmnet(y~.,data=train, alpha=0.1)
#Part a
cv_glmnet_10$lambda.min
cv_glmnet_10$lambda.1se
coef(cv_glmnet_10, s="lambda.1se")
y_pred_10 <- predict(cv_glmnet_10, s="lambda.1se", newdata=test)
mse_10 <- mean((test$y-y_pred_10)^2)
mse_10
#Part b
cv_glmnet_20$lambda.min
cv_glmnet_20$lambda.1se
coef(cv_glmnet_20, s="lambda.1se")
y_pred_20 <- predict(cv_glmnet_20, s="lambda.1se", newdata=test)
mse_20 <- mean((test$y-y_pred_20)^2)
mse_20
#Part c
cv_glmnet_alpha$lambda.min
cv_glmnet_alpha$lambda.1se
coef(cv_glmnet_alpha, s="lambda.1se")
y_pred_alpha <- predict(cv_glmnet_alpha, s="lambda.1se", newdata=test)
mse_alpha <- mean((test$y-y_pred_alpha)^2)
mse_alpha