# Empirical Finance Lecture 4 Analysis (Part 1)
# Author: Chris Hansman
# Email: chansman@imperial.ac.uk
# Date : 28/01/21
# Loading Libraries
library(tidyverse)
# Reading Data
poly <- read_csv("polynomial.csv")
# Plotting Data
ggplot(aes(x=x,y=y), data=poly)+
geom_point(aes(color=test)) +
theme_classic()
# Test and Training Data
poly_train <-poly %>%
filter(test==”train”)
poly_test <- poly %>%
filter(test==”test”)
# Linear Fit
ggplot(data=poly_train, aes(x = x, y = y)) +
theme_classic() +
geom_point(color=”red”)+
geom_smooth(method = “lm”, formula = y ~ x, se=F, color=”black”)
# Quadratic Fit
ggplot(data=poly_train, aes(x = x, y = y)) +
theme_classic() +
geom_point(color=”red”)+
geom_smooth(method = “lm”, formula = y ~ x+I(x^2), se=F, color=”black”)
# Quadratic Fit with Poly
ggplot(data=poly_train, aes(x = x, y = y)) +
theme_classic() +
geom_point(color=”red”)+
geom_smooth(method = “lm”, formula = y ~ poly(x,2, raw=TRUE), se=F, color=”black”) +
geom_smooth(method = “lm”, formula = y ~ poly(x,40, raw=TRUE), se=F, color=”purple”)+
geom_point(data=poly_test,color=”blue”)
#Quadratic Regression Model
ols_quad <- lm(y~poly(x,2, raw=TRUE), data=poly_train)
#Prediction on Training Data
poly_train <- poly_train %>%
mutate(yhat_quad = predict(ols_quad)) %>%
mutate(pe_quad=yhat_quad-y)
#Mean Squared Error
mse_train_quad <- mean((poly_train$pe_quad)^2) #MSE
#Prediction on Testing Data
poly_test <- poly_test %>%
mutate(yhat_quad = predict(ols_quad, newdata=poly_test)) %>%
mutate(pe_quad=y-yhat_quad)
#Mean Squared Error out of Sample
mse_test_quad <- mean((poly_test$pe_quad)^2)
#Comparing MSE
mse_train_quad
mse_test_quad
#High Order Regression Model
ols_25 <- lm(y~poly(x,25,raw=TRUE), data=poly_train)
yhat_train_25 <-predict(ols_25)
mse_train_25 <- mean((yhat_train_25-poly_train$y)^2)
# Model Fit
mse_train_quad
mse_train_25
#High Order Out of Sample Fit
yhat_test_25 <-predict(ols_25, newdata=poly_test)
mse_test_25 <- mean((yhat_test_25-poly_test$y)^2)
#Comparing MSe
mse_test_quad
mse_test_25