L1
APS1070 Fall 2021¶
Lecture 1¶
K Nearest Neighbours for Iris dataset
Example 1a: KNN in sci-kit learn¶
In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import neighbors, datasets
# Loading some example data
iris = datasets.load_iris()
In [2]:
# Selecting features and the target
X = iris.data[:, [2, 3]]
y = iris.target
In [3]:
# Plotting decision regions
step = 0.2
x_min, x_max = X[:, 0].min() – 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() – 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, step),
np.arange(y_min, y_max, step))
In [4]:
# Training classifiers
knn = neighbors.KNeighborsClassifier(n_neighbors=5)
knn.fit(X, y)
Out[4]:
KNeighborsClassifier(algorithm=’auto’, leaf_size=30, metric=’minkowski’,
metric_params=None, n_jobs=None, n_neighbors=5, p=2,
weights=’uniform’)
In [5]:
# Make predictions on grid
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
In [6]:
# plot
plt.contourf(xx,yy,Z, alpha=0.4, levels=2)
plt.scatter(X[:,0], X[:,1],c=y, s=20, edgecolor=’k’)
plt.title(‘KNN’)
plt.xlabel(‘petal length (cm)’)
plt.ylabel(‘petal width (cm)’)
plt.show()
Example 1b: Change Units¶
In [7]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import neighbors, datasets
# Loading some example data
iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
In [8]:
# Change petal_width from cm to m
X[:,1] = X[:,1]/100
In [9]:
# Plotting decision regions
step = 0.2
x_min, x_max = X[:, 0].min() – 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() – 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, step),
np.arange(y_min, y_max, step))
# Training classifiers
knn = neighbors.KNeighborsClassifier(n_neighbors=5)
knn.fit(X, y)
# Make predictions on grid
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
# plot
plt.contourf(xx,yy,Z, alpha=0.4, levels=2)
plt.scatter(X[:,0], X[:,1],c=y, s=20, edgecolor=’k’)
plt.title(‘KNN’)
plt.xlabel(‘petal length (cm)’)
plt.ylabel(‘petal width (m)’)
plt.show()
Example 1c: Normalization¶
In [10]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import neighbors, datasets
# Loading some example data
iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
X[:,1] = X[:,1]*10
In [11]:
# Normalization of iris dataset
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# Normalize
scaler.fit(X)
X_norm = scaler.transform(X)
# Save data before normalization
X_orig = np.array(X)
# set x to normalized
X = X_norm
In [12]:
# Plotting decision regions
step = 0.2
x_min, x_max = X[:, 0].min() – 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() – 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, step),
np.arange(y_min, y_max, step))
# Training classifiers
knn = neighbors.KNeighborsClassifier(n_neighbors=5)
knn.fit(X, y)
# Make predictions on grid
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
# plot
plt.contourf(xx,yy,Z, alpha=0.4, levels=2)
plt.scatter(X[:,0], X[:,1],c=y, s=20, edgecolor=’k’)
plt.title(‘KNN’)
plt.xlabel(‘petal length normalized’)
plt.ylabel(‘petal width normalized’)
plt.show()
In [13]:
# Display the mean and standard deviation
import matplotlib.pyplot as plt
plt.errorbar([2,3], X_orig.mean(axis=0), X_orig.std(axis=0), linestyle=’None’, marker=’x’ , label = “Original Data”)
plt.errorbar([2,3], X_norm.mean(axis=0), X_norm.std(axis=0), linestyle=’None’, linewidth=2, marker=’o’ , label = “Normalized Data”)
plt.xlabel(‘Feature number’)
plt.ylabel(‘Data distribution’)
plt.xticks([2,3])
plt.legend()
plt.show()
In [13]: