程序代写代做代考 —


title: “Student Performance Analysis”
date: “October 16, 2018”
output: html_document

“`{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)
library(randomForest)
library(car)
standard<-read.csv("Student_data_standard.csv",header=T,sep=",") colnames(standard)[1]<-c("Year") advanced<-read.csv("Student_data_advanced.csv",header=T,sep=",") colnames(advanced)[1]<-c("Year") ``` ##Distribution of Grades by year below chart provides us with the distribution of grades by year ```{r} d<-data.frame(table(standard$Year,standard$Unit.of.Study.Grade)) colnames(d)<-c("year","Unit.of.Study.Grade","Freq") ggplot()+geom_col(data=d,aes(x=year,y=Freq,fill=Unit.of.Study.Grade),position = "dodge") ggplot(data=d,aes(x=year,y=Freq,group=Unit.of.Study.Grade))+ geom_line(aes(color = Unit.of.Study.Grade))+ geom_point(aes(color = Unit.of.Study.Grade)) ``` ##Classification ```{r} #split data into trainining and validation set set.seed(100) dat<-advanced[,-1] ind<-sample(2,nrow(dat), replace=T, prob =c(.70,0.3)) training<-dat[ind==1,] testing<-dat[ind==2,] classi<-randomForest(Unit.of.Study.Grade~.,data=training,importance=T, ntree = 10) classi ``` ##Prediction ```{r} #predicting on the training set predictionTrain<-predict(classi,training,type="class") #classification table(predictionTrain,training$Unit.of.Study.Grade) #predicting on the validation set predictionTest<-predict(classi,testing,type="class") #classification mean(predictionTest==testing$Unit.of.Study.Grade) table(predictionTest,testing$Unit.of.Study.Grade) ``` ##Checking for important variables ```{r} importance(classi) varImpPlot(classi) ``` ##iterating mtry to obtain the best classification ```{r} leveneTest(Count~Gender*Mode*Unit.of.Study.Level*Domestic.Intl,data = standard) ``` ##Linear model a one way anova to check if the number of the number of grades with the preceding attributes differ across the variables Gender,Mode,Unit.of.Study.Level and Domestic.Intl.the model is as below ```{r} fit<-aov(standard$Count~standard$Gender+standard$Mode+standard$Unit.of.Study.Level+standard$Domestic.Intl) summary(fit) ``` ```{r} plot(fit) hist(residuals(fit),col="cyan") ``` ```{r} kruskal.test(Count~ Gender,data=standard) kruskal.test(Count~ Unit.of.Study.Level,data=standard) kruskal.test(Count~ Domestic.Intl,data=standard) kruskal.test(Count~ Mode,data=standard) ```