程序代写代做代考 —


title: “assignment”
output: html_document

“`{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
“`

## Question 1

### Load Data

“`{r}
source(“loader.R”)
load_mnist()
“`

### 1.a
“`{r}
library(RANN)
res = nn2(train$x, query = test$x, k = 3)
“`

“`{r}
predLabels = rep(0, nrow(res$nn.idx))

for(i in 1:nrow(res$nn.idx))
{
labels = train$y[res$nn.idx[i, ]]
if(labels[2] == labels[3])
{
predLabels[i] = labels[2];
}else{

predLabels[i] = labels[1];
}
}

“`

“`{r}
accuracy = sum(predLabels == test$y) / test$n
accuracy
“`

the achieved accuracy on the test set is 0.8556

### 1.b

“`{r}
getPredLabel <- function(idx, dists, y, k) { predLabelsb = rep(0, nrow(idx)) for(i in 1:nrow(idx)) { labels = y[idx[i, 1:k]] dises = dists[i, 1:k] if(k == 1 || dises[1] < 1e-8) { predLabelsb[i] = labels[1]; }else{ weights = rep(0, 10) for(k in 1:length(labels)) { l = labels[k] + 1 weights[l] = weights[l] + 1 / dises[k] } l = labels[1] + 1 for(k in 2:length(labels)) { if(weights[labels[k] + 1] > weights[l])
{
l = labels[k] + 1
}
}

predLabelsb[i] = l – 1;
}
}

return (predLabelsb)
}

predLabelsb = getPredLabel(res$nn.idx, res$nn.dists, train$y, 3)

“`

“`{r}
accuracb = sum(predLabelsb == test$y) / test$n
accuracb
“`
the achieved accuracy of weighted version on the test set is 0.8561
which is an improvement over the unweighted version 0.8556.

## Question 2
“`{r}
getFold <- function(foldNum) { trainSeq = 20000*foldNum + (1:20000) trainx = train$x[trainSeq,] trainy = train$y[trainSeq] testx = train$x[-trainSeq,] testy = train$y[-trainSeq] res = nn2(trainx, query = testx, k = 10) return (list(trainx=trainx, trainy=trainy, testx=testx, testy = testy, res = res)) } f0 = getFold(0) f1 = getFold(1) f2 = getFold(2) ``` ```{r} getFoldAccuracy <- function(f, k) { predLabels = getPredLabel(f$res$nn.idx, f$res$nn.dists, f$trainy, k) accuracy = sum(predLabels == f$testy) / length(f$testy) return (accuracy) } getAverageAccuracy <- function(k) { return ((getFoldAccuracy(f0, k) + getFoldAccuracy(f1, k) + getFoldAccuracy(f2, k)) / 3) } ks = 1:10 accus = rep(0, 10) for(k in ks) { accus[k] = getAverageAccuracy(k) } accus max(accus) which(accus == max(accus)) ``` ```{r} plot(ks, accus, type="l", xlab = "k", ylab="accuracy") ``` I use 3-fold cross validation to compute the average accuracy of k=1:10. When k=6, it achieves the highest average accuracy 0.8414083. ```{r} res2 = nn2(train$x, query = test$x, k = 6) ``` ```{r} predk6 = getPredLabel(res2$nn.idx, res2$nn.dists, train$y, 6) accuracyK6 = sum(predk6 == test$y) / test$n accuracyK6 ``` With K = 6, the accuray over test data is 0.8575 which is an further improvement over 0.8561 in 1.b.