CS代写 # Python functions for Statistical Learning

# Python functions for Statistical Learning
# , The University of School
# August 2017

import pandas as pd
import numpy as np
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf

def rmse_jack(response, predicted):

y = np.array((np.ravel(response)-np.ravel(predicted))**2)
y_sum = np.sum(y)
n = len(y)

resample = np.sqrt((y_sum-y)/(n-1))

rmse = np.sqrt(y_sum/n)
se = np.sqrt((n-1)*np.var(resample))

return rmse, se

def r2_jack(response, predicted):

e2 = np.array((np.ravel(response)-np.ravel(predicted))**2)
y2 = np.array((np.ravel(response)-np.mean(np.ravel(response)))**2)

rss = np.sum(e2)
tss = np.sum(y2)
n = len(e2)

resample = 1-(rss-e2)/(tss-y2)

r2 = 1-rss/tss
se = np.sqrt((n-1)*np.var(resample))

return r2, se

def forwardselection(X, y):
“””Forward variable selection based on the Scikit learn API

———————————————————————————-
Scikit learn OLS regression object for the best model

# Functions
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score

# Initialisation
p = X.shape[1]
candidates = list(np.arange(p))

# Forward recursion
bestcvscore=-np.inf
while i<=p: bestscore = 0 for variable in candidates: ols = LinearRegression() ols.fit(X.iloc[:, base + [variable]], y) score = ols.score(X.iloc[:, base + [variable]], y) if score > bestscore:
bestscore = score
best = ols
newvariable=variable
base.append(newvariable)
candidates.remove(newvariable)

cvscore = cross_val_score(best, X.iloc[:, base], y, scoring=’neg_mean_squared_error’).mean()

if cvscore > bestcvscore:
bestcvscore=cvscore
bestcv = best
subset = base[:]

return bestcv, subset

class forward:
def __init__(self):

def fit(self, X, y):
self.ols, self.subset = forwardselection(X, y)

def predict(self, X):
return self.ols.predict(X.iloc[:, self.subset])

def cv_score(self, X, y, cv=10):
from sklearn.model_selection import cross_val_score
scores = cross_val_score(self.ols, X.iloc[:, self.subset], np.ravel(y), cv=cv, scoring=’neg_mean_squared_error’)
return np.sqrt(-1*np.mean(scores))

class PCR:
def __init__(self, M=1):

def fit(self, X, y):
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression

self.pca=PCA(n_components=self.M)
Z= self.pca.fit_transform(X)
self.pcr = LinearRegression().fit(Z, y)

def predict(self, X):
return self.pcr.predict(self.pca.transform(X))

def cv_score(self, X, y, cv=10):
from sklearn.model_selection import cross_val_score
Z=self.pca.transform(X)
scores = cross_val_score(self.pcr, Z, np.ravel(y), cv=cv, scoring=’neg_mean_squared_error’).mean()
return np.sqrt(-1*np.mean(scores))

def pcrCV(X, y):
# Approximate cross-validation
from sklearn.model_selection import cross_val_score

p=X.shape[1]
bestscore= -np.inf
cv_scores = []
for m in range(1,p+1):
model = PCR(M=m)
model.fit(X, y)
Z=model.pca.transform(X)
score = cross_val_score(model.pcr, Z, y, cv=10, scoring=’neg_mean_squared_error’).mean()
cv_scores.append(score)
if score > bestscore:
bestscore=score
best=model

best.cv_scores = pd.Series(cv_scores, index = np.arange(1,p+1))
return best

def plsCV(X, y):

from sklearn.cross_decomposition import PLSRegression
from sklearn.model_selection import cross_val_score

p=X.shape[1]
bestscore=-np.inf
for m in range(1,p): # not fitting with M=p avoids occasional problems
pls = PLSRegression(n_components=m).fit(X, y)
score = cross_val_score(pls, X, y, cv=10, scoring=’neg_mean_squared_error’).mean()
if score > bestscore:
bestscore=score
return best

import matplotlib.pyplot as plt
import itertools

# This function is from the scikit-learn documentation
# http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
def plot_confusion_matrix(cm, classes,
normalize=False,
title=’Confusion matrix’,
cmap=plt.cm.Blues):
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
if normalize:
cm = cm.astype(‘float’) / cm.sum(axis=1)[:, np.newaxis]
print(“Normalized confusion matrix”)
print(‘Confusion matrix, without normalization’)

#print(cm)

plt.imshow(cm, interpolation=’nearest’, cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)

fmt = ‘.3f’ if normalize else ‘d’
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment=”center”,
color=”white” if cm[i, j] > thresh else “black”)

plt.tight_layout()
plt.ylabel(‘True label’)
plt.xlabel(‘Predicted label’)

程序代写 CS代考加微信: powcoder QQ: 1823890830 Email: powcoder@163.com

Related Posts