CS计算机代考程序代写 Practical 10 Solution

Practical 10 Solution

Practical 10 Solution

Libaries and least square estimate needed for Question 1

rm(list=ls())
library(boot)
z<-matrix(0,13,2) z[,1]<-c(0.01,0.48,0.71,0.95,1.19,0.01,0.48,1.44,0.71,1.96,0.01,1.44,1.96) z[,2]<-c(127.6,124.0,110.8,103.9,101.5,130.1,122.0,92.3,113.1,83.7,128.0,91.4,86.2) temp<-lm(z[,2]~z[,1]) temp$coef[2]/temp$coef[1] ## z[, 1] ## -0.1850722 Question 1.1 lm1.bt=function(x,i) { temp<-lm(x[i,2]~x[i,1])$coef ratio<-temp[2]/temp[1] return(ratio) } set.seed(5); boot1<-boot(data=z, stat=lm1.bt, R=20) print(boot1) ## ## ORDINARY NONPARAMETRIC BOOTSTRAP ## ## ## Call: ## boot(data = z, statistic = lm1.bt, R = 20) ## ## ## Bootstrap Statistics : ## original bias std. error ## t1* -0.1850722 0.000468238 0.006386187 mean(boot1$t)-boot1$t0 ## x[i, 1] ## 0.000468238 sd(boot1$t) ## [1] 0.006386187 boot.ci(boot1, type = "norm") 1 ## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS ## Based on 20 bootstrap replicates ## ## CALL : ## boot.ci(boot.out = boot1, type = "norm") ## ## Intervals : ## Level Normal ## 95% (-0.1981, -0.1730 ) ## Calculations and Intervals on Original Scale ba<-boot.array(boot1, indices=T) res<-rep(0,5) for (i in 1:5) { z2<-z[ba[i,],1:2] temp<-lm(z2[,2]~z2[,1]) res[i]<-temp$coef[2]/temp$coef[1] } res-boot1$t ## [,1] ## [1,] 0.000000000 ## [2,] 0.000000000 ## [3,] 0.000000000 ## [4,] 0.000000000 ## [5,] 0.000000000 ## [6,] -0.015447394 ## [7,] -0.001291957 ## [8,] -0.007105341 ## [9,] 0.002143070 ## [10,] -0.013688289 ## [11,] -0.015129832 ## [12,] 0.006220850 ## [13,] -0.013476829 ## [14,] 0.011677981 ## [15,] -0.011416528 ## [16,] -0.009643222 ## [17,] 0.013992953 ## [18,] -0.010335582 ## [19,] 0.006080048 ## [20,] -0.006988010 Question 1.2 lm2.bt=function(x,i) { temp<-lm(x[,2]~x[,1]) y.star<-temp$fitted + temp$residual[i] temp1<-lm(y.star~x[,1])$coef ratio<-temp1[2]/temp1[1] return(ratio) } set.seed(5); boot2<-boot(data=z, statistic=lm2.bt, R=5); boot2 2 ## ## ORDINARY NONPARAMETRIC BOOTSTRAP ## ## ## Call: ## boot(data = z, statistic = lm2.bt, R = 5) ## ## ## Bootstrap Statistics : ## original bias std. error ## t1* -0.1850722 -0.004011592 0.01066213 print(boot2) ## ## ORDINARY NONPARAMETRIC BOOTSTRAP ## ## ## Call: ## boot(data = z, statistic = lm2.bt, R = 5) ## ## ## Bootstrap Statistics : ## original bias std. error ## t1* -0.1850722 -0.004011592 0.01066213 boot.ci(boot2, type = "norm") ## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS ## Based on 5 bootstrap replicates ## ## CALL : ## boot.ci(boot.out = boot2, type = "norm") ## ## Intervals : ## Level Normal ## 95% (-0.2020, -0.1602 ) ## Calculations and Intervals on Original Scale Question 1.3 lm3.bt=function(x) { temp<-lm(x[,2]~x[,1])$coef; ratio<-temp[2]/temp[1]; ratio } lm.gen=function(x, mle) { n<-nrow(x); err<-rnorm(n, mean=0, sd=mle$sigma) y<-mle$beta0+x[,1]*mle$beta1+err return(cbind(x[,1],y)) } 3 temp<-lm(z[,2]~z[,1]) mle.list<-list(beta0=temp$coef[1], beta1=temp$coef[2]) mle.list$sigma<-sqrt(sum(temp$residˆ2)/temp$df.resid) set.seed(5); boot3<-boot(data=z, stat=lm3.bt, R=20, sim="parametric", ran.gen=lm.gen, mle=mle.list) print(boot3) ## ## PARAMETRIC BOOTSTRAP ## ## ## Call: ## boot(data = z, statistic = lm3.bt, R = 20, sim = "parametric", ## ran.gen = lm.gen, mle = mle.list) ## ## ## Bootstrap Statistics : ## original bias std. error ## t1* -0.1850722 0.0001882346 0.006385126 boot.ci(boot3, type = "norm") ## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS ## Based on 20 bootstrap replicates ## ## CALL : ## boot.ci(boot.out = boot3, type = "norm") ## ## Intervals : ## Level Normal ## 95% (-0.1978, -0.1727 ) ## Calculations and Intervals on Original Scale Question 1.4 cor.bt=function(x,i) { cor(x[i,1],x[i,2]) } set.seed(5); boot4<-boot(data=z, stat=cor.bt, R=5000); boot4 ## ## ORDINARY NONPARAMETRIC BOOTSTRAP ## ## ## Call: ## boot(data = z, statistic = cor.bt, R = 5000) ## ## ## Bootstrap Statistics : ## original bias std. error ## t1* -0.9847435 -5.809659e-05 0.006875357 4 bc<-boot.ci(boot4, type = "norm") plot(density(boot4$t), ylim=c(0,65), lwd=2) hist(boot4$t, breaks=30, freq=F, add=T) abline(v = boot4$t0, col = "red", lwd = 2) abline(v = bc$normal[,2], col = "green", lwd = 2) abline(v = bc$normal[,3], col = "green", lwd = 2) −1.00 −0.98 −0.96 −0.94 −0.92 −0.90 0 1 0 2 0 3 0 4 0 5 0 6 0 density.default(x = boot4$t) N = 5000 Bandwidth = 0.00104 D e n si ty 5 Libaries and data needed for Question 2 rm(list=ls()) library(e1071) set.seed(114) f1 <-1 f2 <-1 m1 <- cbind(rnorm(10) + f1, rnorm(10)) m2 <- cbind(rnorm(10), rnorm(10) + f2) sds <- sqrt(0.2) n <- 100 xx1 <- xx2 <- matrix(0, n, 2) set.seed(5) t1<-t2<-rep(0,n) for (i in 1:n) { t1[i] <- sample(1:10, 1) xx1[i, ] <- c(rnorm(1, m1[t1[i], 1], sds), rnorm(1, m1[t1[i], 2], sds)) t2[i] <- sample(1:10, 1) xx2[i, ] <- c(rnorm(1, m2[t2[i], 1], sds), rnorm(1, m2[t2[i], 2], sds)) } xl1<-min(xx1[,1],xx2[,1]) xl2<-max(xx1[,1],xx2[,1]) yl1<-min(xx1[,2],xx2[,2]) yl2<-max(xx1[,2],xx2[,2]) plot(xx1[, 1], xx1[, 2], col = "dodgerblue3", pch = 3,xlim=c(xl1,xl2),ylim=c(yl1,yl2)) #plus points(xx2[, 1], xx2[, 2], col = "forestgreen", pch =24,xlim=c(xl1,xl2),ylim=c(yl1,yl2)) #tri 6 −3 −2 −1 0 1 2 3 4 − 3 − 2 − 1 0 1 2 3 xx1[, 1] xx 1 [, 2 ] gnum <- 200 x1 <- seq(xl1, xl2, length.out = gnum) y1 <- seq(yl1, yl2, length.out = gnum) x <- rep(x1, gnum) #rep(c(1,2,3),10) y <- rep(y1, each = gnum) Question 2.1 # par(mfrow=c(3,1)); plot(dis1); plot(dis2); plot(dis1-dis2) dis1 <- dis2 <- numeric(length(y)) for (j in 1:10) { dis1 <- dis1 + dnorm(x, m1[j, 1], sds) * dnorm(y, m1[j, 2], sds) dis2 <- dis2 + dnorm(x, m2[j, 1], sds) * dnorm(y, m2[j, 2], sds) } z <- matrix(dis1 - dis2, gnum, gnum) contour(x1, y1, z, levels = 0, xlim = c(xl1, xl2), ylim = c(yl1, yl2), lwd = 2, lty = 2, col = "black") points(xx1[, 1], xx1[, 2], col = "dodgerblue3", pch = 3,xlim=c(xl1,xl2),ylim=c(yl1,yl2)) #plus points(xx2[, 1], xx2[, 2], col = "forestgreen", pch =24,xlim=c(xl1,xl2),ylim=c(yl1,yl2)) #tri 7 0 −3 −2 −1 0 1 2 3 4 − 3 − 2 − 1 0 1 2 3 8 Question 2.2 plot(xx1[, 1], xx1[, 2], col = "dodgerblue3", pch = 3,xlim=c(xl1,xl2),ylim=c(yl1,yl2)) #plus points(xx2[, 1], xx2[, 2], col = "forestgreen", pch =24,xlim=c(xl1,xl2),ylim=c(yl1,yl2)) #tri X <- rbind(xx1, xx2) Y <- factor(c(rep(-1, 100), rep(1, 100))) svmfit <- svm(X, Y, cost = 1.5, kernel = "polynomial", degree = 1, coef = 1) zpred <- matrix(attr(predict(svmfit, cbind(x, y), decision.values = TRUE),"decision.values")[, 1], gnum, gnum) mycontour <- contourLines(x = x1, y = y1, zpred, levels = 0) lines(mycontour[[1]]$x, mycontour[[1]]$y, lwd = 2, lty = 1, col = "firebrick") −3 −2 −1 0 1 2 3 4 − 3 − 2 − 1 0 1 2 3 xx1[, 1] xx 1 [, 2 ] 9 Question 2.3 plot(xx1[, 1], xx1[, 2], col = "dodgerblue3", pch = 3,xlim=c(xl1,xl2),ylim=c(yl1,yl2)) #plus points(xx2[, 1], xx2[, 2], col = "forestgreen", pch =24,xlim=c(xl1,xl2),ylim=c(yl1,yl2)) #tri svmfit <- svm(X, Y, cost = 1.5, kernel = "polynomial", degree = 3, coef = 1) zpred <- matrix(attr(predict(svmfit, cbind(x, y), decision.values = TRUE),"decision.values")[, 1], gnum, gnum) mycontour <- contourLines(x = x1, y = y1, zpred, levels = 0) lines(mycontour[[1]]$x, mycontour[[1]]$y, lwd = 2, lty = 1, col = "firebrick") −3 −2 −1 0 1 2 3 4 − 3 − 2 − 1 0 1 2 3 xx1[, 1] xx 1 [, 2 ] 10 Question 2.4 plot(xx1[, 1], xx1[, 2], col = "dodgerblue3", pch = 3,xlim=c(xl1,xl2),ylim=c(yl1,yl2)) #plus points(xx2[, 1], xx2[, 2], col = "forestgreen", pch =24,xlim=c(xl1,xl2),ylim=c(yl1,yl2)) #tri svmfit <- svm(X, Y, cost = 1.5, kernel = "radial", gamma = 1) zpred <- matrix(attr(predict(svmfit, cbind(x, y), decision.values = TRUE),"decision.values")[, 1], gnum, gnum) mycontour <- contourLines(x = x1, y = y1, zpred, levels = 0) lines(mycontour[[1]]$x, mycontour[[1]]$y, lwd = 2, lty = 1, col = "firebrick") −3 −2 −1 0 1 2 3 4 − 3 − 2 − 1 0 1 2 3 xx1[, 1] xx 1 [, 2 ] 11 Question 2.5 polynomial with increased cost plot(xx1[, 1], xx1[, 2], col = "dodgerblue3", pch = 3,xlim=c(xl1,xl2),ylim=c(yl1,yl2)) #plus points(xx2[, 1], xx2[, 2], col = "forestgreen", pch =24,xlim=c(xl1,xl2),ylim=c(yl1,yl2)) #tri svmfit <- svm(X, Y, cost = 1500, kernel = "polynomial", degree = 3, coef = 1) zpred <- matrix(attr(predict(svmfit, cbind(x, y), decision.values = TRUE),"decision.values")[, 1], gnum, gnum) mycontour <- contourLines(x = x1, y = y1, zpred, levels = 0) lines(mycontour[[1]]$x, mycontour[[1]]$y, lwd = 2, lty = 1, col = "firebrick") −3 −2 −1 0 1 2 3 4 − 3 − 2 − 1 0 1 2 3 xx1[, 1] xx 1 [, 2 ] 12 Question 2.5 radial with cost versus gamma plot(xx1[, 1], xx1[, 2], col = "dodgerblue3", pch = 3,xlim=c(xl1,xl2),ylim=c(yl1,yl2)) #plus points(xx2[, 1], xx2[, 2], col = "forestgreen", pch =24,xlim=c(xl1,xl2),ylim=c(yl1,yl2)) #tri svmfit <- svm(X, Y, cost = 1500, kernel = "radial", gamma = 1) zpred <- matrix(attr(predict(svmfit, cbind(x, y), decision.values = TRUE),"decision.values")[, 1], gnum, gnum) mycontour <- contourLines(x = x1, y = y1, zpred, levels = 0) lines(mycontour[[1]]$x, mycontour[[1]]$y, lwd = 2, lty = 1, col = "firebrick") −3 −2 −1 0 1 2 3 4 − 3 − 2 − 1 0 1 2 3 xx1[, 1] xx 1 [, 2 ] plot(xx1[, 1], xx1[, 2], col = "dodgerblue3", pch = 3,xlim=c(xl1,xl2),ylim=c(yl1,yl2)) #plus points(xx2[, 1], xx2[, 2], col = "forestgreen", pch =24,xlim=c(xl1,xl2),ylim=c(yl1,yl2)) #tri svmfit <- svm(X, Y, cost = 1.5, kernel = "radial", gamma = 50) zpred <- matrix(attr(predict(svmfit, cbind(x, y), decision.values = TRUE),"decision.values")[, 1], gnum, gnum) mycontour <- contourLines(x = x1, y = y1, zpred, levels = 0) lines(mycontour[[1]]$x, mycontour[[1]]$y, lwd = 2, lty = 1, col = "firebrick") 13 −3 −2 −1 0 1 2 3 4 − 3 − 2 − 1 0 1 2 3 xx1[, 1] xx 1 [, 2 ] plot(xx1[, 1], xx1[, 2], col = "dodgerblue3", pch = 3,xlim=c(xl1,xl2),ylim=c(yl1,yl2)) #plus points(xx2[, 1], xx2[, 2], col = "forestgreen", pch =24,xlim=c(xl1,xl2),ylim=c(yl1,yl2)) #tri svmfit <- svm(X, Y, cost = 1500, kernel = "radial", gamma = 50) zpred <- matrix(attr(predict(svmfit, cbind(x, y), decision.values = TRUE),"decision.values")[, 1], gnum, gnum) mycontour <- contourLines(x = x1, y = y1, zpred, levels = 0) lines(mycontour[[1]]$x, mycontour[[1]]$y, lwd = 2, lty = 1, col = "firebrick") 14 −3 −2 −1 0 1 2 3 4 − 3 − 2 − 1 0 1 2 3 xx1[, 1] xx 1 [, 2 ] A small cost (C) creates a soft margin and allows more misclassifications. On the other hand, a large cost creates a hard margin and permits fewer misclassifications. A large C gives you low bias and high variance, the bias is low because you have penalized the cost of missclasification a lot. For SVM with radial basis kernel, a high value of gamma leads to more accuracy in terms of classification. If gamma is high, the impact of C will become negligible. If gamma is weak, affect of C becomes significant 15 Libaries and least square estimate needed for Question 1 Question 1.1 Question 1.2 Question 1.3 Question 1.4 Libaries and data needed for Question 2 Question 2.1 Question 2.2 Question 2.3 Question 2.4 Question 2.5 polynomial with increased cost Question 2.5 radial with cost versus gamma A small cost (C) creates a soft margin and allows more misclassifications. On the other hand, a large cost creates a hard margin and permits fewer misclassifications. A large C gives you low bias and high variance, the bias is low because you have penalized the cost of missclasification a lot. For SVM with radial basis kernel, a high value of gamma leads to more accuracy in terms of classification. If gamma is high, the impact of C will become negligible. If gamma is weak, affect of C becomes significant