程序代做CS代考 data mining MAST90083 Computational Statistics & Data Mining Regression Splines

MAST90083 Computational Statistics & Data Mining Regression Splines
Figure 1: Solution of Question 1
rm( list=ls ()) # clear all the variables in console library(splines)
library (gam)
library (pracma)
################################################################################ #Question 1:
n<=50 set . seed (5) # sets the seed for random number generation making their regeneration possible e <= r n o r m ( n , 0 , 0 . 2 ) x<=sort ( runif (n,0 ,1)) a <= seq(0, 1, length= n) y <= c o s ( 2 * p i * x ) = 0 . 2 * x + e b <= c o s ( 2 * p i * a ) = 0 . 2 * a plot (x , y) lines (a,b) ################################################################################ #Question 2: myknots <= quantile(x, probs = c(0.2, 0.4, 0.6, 0.8)) #ns generates a B=spline basis matrix for natural cubic splines , intercept is the first constant term xns<= ns(x, knots = myknots, intercept = TRUE, Boundary.knots = range(c(0,1))) #y.fit<= lm(y ̃ =1 + xns) # command is used to fit linear models y. fit <= xns%*%pinv(xns)%*%y plot(x, y) lines(a, b, col = ”dodgerblue”, lty = 1) lines(x, y.fit , col = ”forestgreen”, lty = 2) myknots <= quantile(x, probs = seq(0.05,0.95,length=8)) #ns generates a B=spline basis matrix for natural cubic splines , intercept is the first constant term xns <= ns(x, knots = myknots, intercept = TRUE, Boundary.knots = range(c(0,1))) #y.fit<= lm(y ̃ =1 + xns) # command is used to fit linear models y. fit <= xns%*%pinv(xns)%*%y plot(x, y) lines(a, b, col = ”dodgerblue”, lty = 1) lines(x, y.fit , col = ”forestgreen”, lty = 2) # at around about 8 knots , overfitting starts ################################################################################ 1 MAST90083 Computational Statistics & Data Mining Regression Splines Figure 2: Solution of Question 2 #Question 3: xss <= gam(y ̃ s(x, df = 6)) yfit <= predict(xss) plot(x, y) lines(a, b, type = ”l”, col = ”dodgerblue3”, lty = 1) lines(x, yfit , type = ”l”, col = ”forestgreen”, lty = 2) ################################################################################ #Question 4: results <= numeric(15) for (i in 1:15) { xss <= gam(y ̃ s(x, df = i)) yfit <= predict(xss) results[i] <= sum((yfit = b)ˆ2)/length(yfit) } plot(2:15, results[2:15], type = ”b”, col = ”dodgerblue2”, xlab = ”DoF”, ylab = ”MSE”, pch = 19, lwd = 3) df = which.min(results) # optimal number found to be at index 7 so df = 7 is optimal ################################################################################ #Question 5: data<=read . table (”D:/R/data . txt ”) #Change the path according to your x<=as.numeric(data[2:222 ,1]) y<=as.numeric(data[2:222 ,2]) xps<=smooth.spline(x,y,spar =0.9,all.knots=FALSE) yfit <= predict(xps,x)$y plot(x, y) lines(x, yfit , type = ”l”, col = ”dodgerblue3”, lty = 2) f i l e location # we have to check this manually the overfitting starts at around about 0.5 and underfitting at 1 2 MAST90083 Computational Statistics & Data Mining Regression Splines Figure 3: Solution of Question 2 Figure 4: Solution of Question 3 3 MAST90083 Computational Statistics & Data Mining Regression Splines Figure 5: Solution of Question 4 Figure 6: Solution of Question 5 4