CS计算机代考程序代写 # Lecture 8 Analysis (Part 1)

# Lecture 8 Analysis (Part 1)
# Author: Chris Hansman
# Email: chansman@imperial.ac.uk
# Date : 01/03/21
#install.packages(“kader”)
#install.packages(“KernSmooth”)
library(kader)
library(tidyverse)
library(KernSmooth)
theme_set(theme_classic())
#———————————————-
# Part 1: Kernel Density Estimation
#———————————————-
# Loading Data
density <- read_csv("density.csv") # Scatter Plot ggplot(data=density,aes(x=x, y=y)) + geom_point(alpha=0.5) + ylim(-0.005,0.1) + xlim(-20,20) # Estimating Parameters mean_x <- mean(density$x) sd_x <- sd(density$x) n <- length(density$x) # Parametric Density ggplot(data=density) + geom_point(alpha=0.5,aes(x=x, y=y)) + ylim(-0.005,0.1) + xlim(-20,20) + stat_function(fun = dnorm, args=c(mean=mean_x, sd=sd_x)) # Histogram ggplot(data=density) + geom_point(alpha=0.5,aes(x=x, y=y)) + ylim(-0.005,0.1) + xlim(-20,20) + stat_function(fun = dnorm, args=c(mean=mean_x, sd=sd_x)) + geom_histogram(aes(x=x, y=..density..), colour="black", fill="white", binwidth=1) # Number of x near a point u: u <- -10.6 dist_ux <- abs(u-density$x) near_ux <- as.integer(dist_ux<=0.5) count_ux <-sum(near_ux) # Converting to density density_u <- count_ux/n density_u # Computing Density for a fine grid u_grid = seq(-20, 20, by=0.01) #matrix of distances distmatrix_ux = abs(outer(u_grid, density$x, "-")) nearmatrix_ux = distmatrix_ux<=0.5 countvec_ux = rowSums(nearmatrix_ux) densityvec_u=countvec_ux/n #Generating Tibble to Plot densityplot <- tibble( u_grid, countvec_ux, densityvec_u ) # Plotting ggplot(data=densityplot) + geom_line(aes(x=u_grid, y=densityvec_u)) # All at Once h=1/2 kdensity_unif <- tibble( u_grid = seq(-20, 20, by=0.01), count_ux = rowSums(abs(outer(u_grid, density$x, "-"))/h<1), countperunit_ux = (1/h)*(1/2)*count_ux, densityvec_u=(1/n)*countperunit_ux ) # Plotting ggplot(data=kdensity_unif) + geom_line(aes(x=u_grid, y=densityvec_u)) # Gaussian Kernel h_g=3 kdensity_gaussian <- tibble( u_grid = seq(-20, 20, by=0.01), weight_ux = rowSums(dnorm(abs(outer(u_grid, density$x, "-"))/h_g)), weightperunit_ux = (1/h_g)*weight_ux, densityvec_u=(1/n)*weightperunit_ux ) # Plotting ggplot(data=kdensity_gaussian) + geom_line(aes(x=u_grid, y=densityvec_u), color="blue") + geom_density(data=density, aes(x=x), kernel = "epanechnikov", bw=h_g, alpha=.2, fill="red", color="red") # Rule of Thumb bandwidth h_rot=1.06*sd_x*(n^(-1/5)) ggplot(data=kdensity_gaussian) + geom_density(data=density, aes(x=x), kernel = "epanechnikov", bw=h_rot, alpha=.2, fill="red", color="red") # epanechnikov(x) h_e=h_rot kdensity_epi <- tibble( u_grid = seq(-20, 20, by=0.01), weight_ux = rowSums(kader:::epanechnikov(abs(outer(u_grid, density$x, "-"))/h_e)), weightperunit_ux = (1/h_e)*weight_ux, densityvec_u=(1/n)*weightperunit_ux ) ggplot() + geom_line(data=kdensity_epi, aes(x=u_grid, y=densityvec_u), color="blue") + geom_density(data=density, aes(x=x), kernel = "epanechnikov", bw=h_rot, alpha=.2, fill="red", color="red") #---------------------------------------------- # Part 2: Non-parametric Regression #---------------------------------------------- lpdata <- read_csv("lpdata.csv") # NW Approach at a point u <- 1.5 # Point h=1 # bandwidth diff_ux <- abs(u-lpdata$x) # difference kernel_ux <- (1/h)*dnorm(diff_ux/h) # Kernel weight_ux <- kernel_ux/sum(kernel_ux) # Converting Kernels to weights hu=sum(lpdata$y*weight_ux) # estimate hu # NW approach on a Grid h_nw=0.4 # bandwidth u_grid = seq(-4,4, by=0.01) # Grid kernelgrid_ux=(1/h_nw)*dnorm(abs(outer(u_grid, lpdata$x, "-"))/h_nw) # diff matrix kernelsum_ux=rowSums(kernelgrid_ux) # Sum of Kernels nwweights_ux = kernelgrid_ux/kernelsum_ux # Matrix of weights nwestimates <- nwweights_ux%*%lpdata$y # Estimates # Converting to tibble for plotting nw_gaussian <- tibble( u_grid, nwestimates ) # Plotting ggplot( ) + geom_point(data=lpdata,aes(x=x, y=y)) + geom_line(data=nw_gaussian, aes(x=u_grid, y=nwestimates)) # Using Pre-built command for NW locpoly0 <- as_tibble(locpoly(x = lpdata$x, y = lpdata$y, bandwidth = h_nw, degree = 0, range.x = c(-4,4), gridsize = 801, kernel = "normal")) # Plotting Pre-Built NW ggplot( ) + geom_point(data=lpdata,aes(x=x, y=y)) + geom_line(data=nw_gaussian, aes(x=u_grid, y=nwestimates)) + geom_line(data=locpoly0, aes(x=x, y=y), color="red") # Using Pre-built command for Local Linear Regression locpoly1 <- as_tibble(locpoly(x = lpdata$x, y = lpdata$y, bandwidth = h_nw, degree = 3, range.x = c(-3,3), gridsize = 801, kernel = "normal")) # Plotting Pre-Built local Linear ggplot( ) + geom_point(data=lpdata,aes(x=x, y=y)) + geom_line(data=nw_gaussian, aes(x=u_grid, y=nwestimates)) + geom_line(data=locpoly1, aes(x=x, y=y), color="red")+ geom_smooth(data=lpdata,aes(x=x, y=y), span=0.1)