CS代考 L1

L1

APS1070 Fall 2021¶
Lecture 1¶
K Nearest Neighbours for Iris dataset

Example 1a: KNN in sci-kit learn¶

In [1]:

import numpy as np
import matplotlib.pyplot as plt
from sklearn import neighbors, datasets

# Loading some example data
iris = datasets.load_iris()

In [2]:

# Selecting features and the target

X = iris.data[:, [2, 3]]
y = iris.target

In [3]:

# Plotting decision regions
step = 0.2
x_min, x_max = X[:, 0].min() – 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() – 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, step),
np.arange(y_min, y_max, step))

In [4]:

# Training classifiers
knn = neighbors.KNeighborsClassifier(n_neighbors=5)
knn.fit(X, y)

Out[4]:

KNeighborsClassifier(algorithm=’auto’, leaf_size=30, metric=’minkowski’,
metric_params=None, n_jobs=None, n_neighbors=5, p=2,
weights=’uniform’)

In [5]:

# Make predictions on grid
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)

In [6]:

# plot
plt.contourf(xx,yy,Z, alpha=0.4, levels=2)
plt.scatter(X[:,0], X[:,1],c=y, s=20, edgecolor=’k’)
plt.title(‘KNN’)
plt.xlabel(‘petal length (cm)’)
plt.ylabel(‘petal width (cm)’)
plt.show()

Example 1b: Change Units¶

In [7]:

import numpy as np
import matplotlib.pyplot as plt
from sklearn import neighbors, datasets

# Loading some example data
iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target

In [8]:

# Change petal_width from cm to m
X[:,1] = X[:,1]/100

In [9]:

# Plotting decision regions
step = 0.2
x_min, x_max = X[:, 0].min() – 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() – 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, step),
np.arange(y_min, y_max, step))

# Training classifiers
knn = neighbors.KNeighborsClassifier(n_neighbors=5)
knn.fit(X, y)

# Make predictions on grid
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)

# plot
plt.contourf(xx,yy,Z, alpha=0.4, levels=2)
plt.scatter(X[:,0], X[:,1],c=y, s=20, edgecolor=’k’)
plt.title(‘KNN’)
plt.xlabel(‘petal length (cm)’)
plt.ylabel(‘petal width (m)’)
plt.show()

Example 1c: Normalization¶

In [10]:

import numpy as np
import matplotlib.pyplot as plt
from sklearn import neighbors, datasets

# Loading some example data
iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target

X[:,1] = X[:,1]*10

In [11]:

# Normalization of iris dataset
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

# Normalize
scaler.fit(X)
X_norm = scaler.transform(X)

# Save data before normalization
X_orig = np.array(X)

# set x to normalized
X = X_norm

In [12]:

# Plotting decision regions
step = 0.2
x_min, x_max = X[:, 0].min() – 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() – 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, step),
np.arange(y_min, y_max, step))

# Training classifiers
knn = neighbors.KNeighborsClassifier(n_neighbors=5)
knn.fit(X, y)

# Make predictions on grid
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)

# plot
plt.contourf(xx,yy,Z, alpha=0.4, levels=2)
plt.scatter(X[:,0], X[:,1],c=y, s=20, edgecolor=’k’)
plt.title(‘KNN’)
plt.xlabel(‘petal length normalized’)
plt.ylabel(‘petal width normalized’)
plt.show()

In [13]:

# Display the mean and standard deviation
import matplotlib.pyplot as plt

plt.errorbar([2,3], X_orig.mean(axis=0), X_orig.std(axis=0), linestyle=’None’, marker=’x’ , label = “Original Data”)
plt.errorbar([2,3], X_norm.mean(axis=0), X_norm.std(axis=0), linestyle=’None’, linewidth=2, marker=’o’ , label = “Normalized Data”)
plt.xlabel(‘Feature number’)
plt.ylabel(‘Data distribution’)
plt.xticks([2,3])
plt.legend()
plt.show()

In [13]: