CS代写 TA Week 5_Logistic Regression

TA Week 5_Logistic Regression

Table of Contents

import math
import numpy as np
import pandas as pd

from pandas import DataFrame
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
#from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
from numpy import loadtxt, where
from pylab import scatter, show, legend, xlabel, ylabel

# import self-defined functions
from util import Cost_Function, Gradient_Descent, Cost_Function_Derivative, Cost_Function, Prediction, Sigmoid

########################################################################
########################### Step-1: data preprocessing #################
########################################################################
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(-1,1))
df = pd.read_csv(“data.csv”, header=0)

grade1 grade2 label;;;;
0 34.623660 78.024693 0;;;;
1 30.286711 43.894998 0;;;;
2 35.847409 72.902198 0;;;;
3 60.182599 86.308552 1;;;;
4 79.032736 75.344376 1;;;;

df.describe()

grade1 grade2
count 100.000000 100.000000
mean 65.644274 66.221998
std 19.458222 18.582783
min 30.058822 30.603263
25% 50.919511 48.179205
50% 67.032988 67.682381
75% 80.212529 79.360605
max 99.827858 98.869436

## Clean data
# Rename columns
df.columns = [“grade1″,”grade2″,”label”]

# Regressors / Rescaling
X = df[[“grade1″,”grade2”]]
X = np.array(X)
X = min_max_scaler.fit_transform(X)

# Dependent Variable
Y = df[“label”].map(lambda x: float(x.rstrip(‘;’)))
Y = np.array(Y)

print(X.shape)
print(Y.shape)

########################################################################
########################### Step-2: data splitting #################
########################################################################
# split the dataset into two subsets: testing and training
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.33)

########################################################################
#################Step-3: training and testing using sklearn #########
########################################################################

# use sklearn class
clf = LogisticRegression(fit_intercept=False)
# call the function fit() to train the class instance
clf.fit(X_train,Y_train)
# scores over testing samples
print(clf.score(X_test,Y_test))

# visualize data using functions in the library pylab
pos = where(Y == 1)
neg = where(Y == 0)
scatter(X[pos, 0], X[pos, 1], marker=’o’, c=’b’)
scatter(X[neg, 0], X[neg, 1], marker=’x’, c=’r’)
xlabel(‘Feature 1: score 1’)
ylabel(‘Feature 2: score 2’)
legend([‘Label: Admitted’, ‘Label: Not Admitted’])

0.8181818181818182

array([[2.21098873, 2.30679671]])

########################################################################
################# Step-5: comparing two models #########
########################################################################
##comparing accuracies of two models.

winner = “”
# accuracy for sklearn
scikit_score = clf.score(X_test,Y_test)

length = len(X_test)
for i in range(length):
prediction = round(Prediction(X_test[i],theta))
answer = Y_test[i]
if prediction == answer:
score += 1

my_score = float(score) / float(length)

if my_score > scikit_score:
print(‘You won!’)
elif my_score == scikit_score:
print(‘Its a tie!’)
print(‘Scikit won.. :(‘)
print(‘Your score: ‘, my_score)
print(‘Scikits score: ‘, scikit_score)

Scikit won.. 🙁
Your score: 0.5151515151515151
Scikits score: 0.8181818181818182

########################################################################
##############Step-4: training and testing using self-developed model ##
########################################################################
theta = [2,2] #initial model parameters
alpha = 0.001 # learning rates
max_iteration = 1000 # maximal iterations

m = len(Y_train) # number of samples

for x in range(max_iteration):
# call the functions for gradient descent method
new_theta = Gradient_Descent(X_train,Y_train,theta,m,alpha)
theta = new_theta
if x % 200 == 0:
# calculate the cost function with the present theta
Cost_Function(X_train,Y_train,theta,m)
print(‘Theta: ‘, theta)
print(‘Cost is ‘, Cost_Function(X_train,Y_train,theta,m))

Theta: [2.000038305235856, 2.000043946147299]
Cost is 0.3304005933824229
Theta: [2.007676039480249, 2.008804872343667]
Cost is 0.32972515768762545
Theta: [2.0152677628696973, 2.0175099123665325]
Cost is 0.32905810598591195
Theta: [2.022814040646683, 2.0261597143265235]
Cost is 0.32839928716627875
Theta: [2.030315427775724, 2.0347549154254727]
Cost is 0.3277485536709123

程序代写 CS代考加微信: powcoder QQ: 1823890830 Email: powcoder@163.com

Related Posts