# Lecture 8 Analysis (Part 1)
# Author: Chris Hansman
# Email: chansman@imperial.ac.uk
# Date : 01/03/21
#install.packages(“kader”)
#install.packages(“KernSmooth”)
library(kader)
library(tidyverse)
library(KernSmooth)
theme_set(theme_classic())
#———————————————-
# Part 1: Kernel Density Estimation
#———————————————-
# Loading Data
density <- read_csv("density.csv")
# Scatter Plot
ggplot(data=density,aes(x=x, y=y)) +
geom_point(alpha=0.5) +
ylim(-0.005,0.1) +
xlim(-20,20)
# Estimating Parameters
mean_x <- mean(density$x)
sd_x <- sd(density$x)
n <- length(density$x)
# Parametric Density
ggplot(data=density) +
geom_point(alpha=0.5,aes(x=x, y=y)) +
ylim(-0.005,0.1) +
xlim(-20,20) +
stat_function(fun = dnorm, args=c(mean=mean_x, sd=sd_x))
# Histogram
ggplot(data=density) +
geom_point(alpha=0.5,aes(x=x, y=y)) +
ylim(-0.005,0.1) +
xlim(-20,20) +
stat_function(fun = dnorm, args=c(mean=mean_x, sd=sd_x)) +
geom_histogram(aes(x=x, y=..density..),
colour="black", fill="white", binwidth=1)
# Number of x near a point u:
u <- -10.6
dist_ux <- abs(u-density$x)
near_ux <- as.integer(dist_ux<=0.5)
count_ux <-sum(near_ux)
# Converting to density
density_u <- count_ux/n
density_u
# Computing Density for a fine grid
u_grid = seq(-20, 20, by=0.01)
#matrix of distances
distmatrix_ux = abs(outer(u_grid, density$x, "-"))
nearmatrix_ux = distmatrix_ux<=0.5
countvec_ux = rowSums(nearmatrix_ux)
densityvec_u=countvec_ux/n
#Generating Tibble to Plot
densityplot <- tibble(
u_grid,
countvec_ux,
densityvec_u
)
# Plotting
ggplot(data=densityplot) +
geom_line(aes(x=u_grid, y=densityvec_u))
# All at Once
h=1/2
kdensity_unif <- tibble(
u_grid = seq(-20, 20, by=0.01),
count_ux = rowSums(abs(outer(u_grid, density$x, "-"))/h<1),
countperunit_ux = (1/h)*(1/2)*count_ux,
densityvec_u=(1/n)*countperunit_ux
)
# Plotting
ggplot(data=kdensity_unif) +
geom_line(aes(x=u_grid, y=densityvec_u))
# Gaussian Kernel
h_g=3
kdensity_gaussian <- tibble(
u_grid = seq(-20, 20, by=0.01),
weight_ux = rowSums(dnorm(abs(outer(u_grid, density$x, "-"))/h_g)),
weightperunit_ux = (1/h_g)*weight_ux,
densityvec_u=(1/n)*weightperunit_ux
)
# Plotting
ggplot(data=kdensity_gaussian) +
geom_line(aes(x=u_grid, y=densityvec_u), color="blue") +
geom_density(data=density, aes(x=x), kernel = "epanechnikov", bw=h_g, alpha=.2, fill="red", color="red")
# Rule of Thumb bandwidth
h_rot=1.06*sd_x*(n^(-1/5))
ggplot(data=kdensity_gaussian) +
geom_density(data=density, aes(x=x), kernel = "epanechnikov", bw=h_rot, alpha=.2, fill="red", color="red")
# epanechnikov(x)
h_e=h_rot
kdensity_epi <- tibble(
u_grid = seq(-20, 20, by=0.01),
weight_ux = rowSums(kader:::epanechnikov(abs(outer(u_grid, density$x, "-"))/h_e)),
weightperunit_ux = (1/h_e)*weight_ux,
densityvec_u=(1/n)*weightperunit_ux
)
ggplot() +
geom_line(data=kdensity_epi, aes(x=u_grid, y=densityvec_u), color="blue") +
geom_density(data=density, aes(x=x), kernel = "epanechnikov", bw=h_rot, alpha=.2, fill="red", color="red")
#----------------------------------------------
# Part 2: Non-parametric Regression
#----------------------------------------------
lpdata <- read_csv("lpdata.csv")
# NW Approach at a point
u <- 1.5 # Point
h=1 # bandwidth
diff_ux <- abs(u-lpdata$x) # difference
kernel_ux <- (1/h)*dnorm(diff_ux/h) # Kernel
weight_ux <- kernel_ux/sum(kernel_ux) # Converting Kernels to weights
hu=sum(lpdata$y*weight_ux) # estimate
hu
# NW approach on a Grid
h_nw=0.4 # bandwidth
u_grid = seq(-4,4, by=0.01) # Grid
kernelgrid_ux=(1/h_nw)*dnorm(abs(outer(u_grid, lpdata$x, "-"))/h_nw) # diff matrix
kernelsum_ux=rowSums(kernelgrid_ux) # Sum of Kernels
nwweights_ux = kernelgrid_ux/kernelsum_ux # Matrix of weights
nwestimates <- nwweights_ux%*%lpdata$y # Estimates
# Converting to tibble for plotting
nw_gaussian <- tibble(
u_grid,
nwestimates
)
# Plotting
ggplot( ) +
geom_point(data=lpdata,aes(x=x, y=y)) +
geom_line(data=nw_gaussian, aes(x=u_grid, y=nwestimates))
# Using Pre-built command for NW
locpoly0 <- as_tibble(locpoly(x = lpdata$x, y = lpdata$y,
bandwidth = h_nw, degree = 0,
range.x = c(-4,4), gridsize = 801,
kernel = "normal"))
# Plotting Pre-Built NW
ggplot( ) +
geom_point(data=lpdata,aes(x=x, y=y)) +
geom_line(data=nw_gaussian, aes(x=u_grid, y=nwestimates)) +
geom_line(data=locpoly0, aes(x=x, y=y), color="red")
# Using Pre-built command for Local Linear Regression
locpoly1 <- as_tibble(locpoly(x = lpdata$x, y = lpdata$y,
bandwidth = h_nw, degree = 3,
range.x = c(-3,3), gridsize = 801,
kernel = "normal"))
# Plotting Pre-Built local Linear
ggplot( ) +
geom_point(data=lpdata,aes(x=x, y=y)) +
geom_line(data=nw_gaussian, aes(x=u_grid, y=nwestimates)) +
geom_line(data=locpoly1, aes(x=x, y=y), color="red")+
geom_smooth(data=lpdata,aes(x=x, y=y), span=0.1)