CS计算机代考程序代写 Unsupervised learning

Unsupervised learning

In unsupervised learning our goal is to find structure from unlabelled data. In machine

learning terms that means that we only have samples without the target values . For

example, could you say anything about the CIFAR-10 images without the class labels?

Example 5.1 Floo Powder is used by the wizards (read Harry Potter saga) and one floo

poweder company is interested to find out why its certain customers give up their monthly

contract of floo powder delivery. Provide them reply based on your analysis.

→xi yi

lec06_clustering http://localhost:8888/nbconvert/html/lec06_clustering….

1 of 12 9/15/21, 16:17

In [27]:
import time

import numpy as np
import matplotlib.pyplot as plt

from sklearn.cluster import MiniBatchKMeans, KMeans
from sklearn.metrics.pairwise import pairwise_distances_argmin
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_blobs

# Generate sample data
#np.random.seed(13)

np.random.seed(42)

mu1 = [15, 1000]
mu2 = [42, 40000]
mu3 = [90, 26000]
N1 = 300
N2 = 150
N3 = 290
cov1 = [[2**2,0],[0,250**2]]
cov2 = [[13**2,0],[0,4000**2]]
cov3 = [[8**2,0],[0,3000**2]]

X1 = np.random.multivariate_normal(mu1, cov1, N1).T
X2 = np.random.multivariate_normal(mu2, cov2, N2).T
X3 = np.random.multivariate_normal(mu3, cov3, N3).T
X = np.concatenate((X1,X2,X3),axis=1)
plt.figure(figsize=(10,4))
plt.plot(X[0,:],X[1,:], ‘w’, markerfacecolor=’black’, marker=’.’)
plt.xlabel(‘age [y]’)
plt.ylabel(‘annual income [Sickles]’)
plt.show()

lec06_clustering http://localhost:8888/nbconvert/html/lec06_clustering….

2 of 12 9/15/21, 16:17

Clustering

The idea in clustering is to find those “clusters” in data automatically.

K-means clustering

In [28]:
plt.figure(figsize=(10,4))
plt.plot(X[0,:],X[1,:], ‘w’, markerfacecolor=’black’, marker=’.’)
plt.xlabel(‘age [y]’)
plt.ylabel(‘annual income [Sickles]’)
plt.plot(mu1[0], mu1[1], ‘o’, markerfacecolor=’black’, markeredgecolor=’k’,
plt.plot(mu2[0], mu2[1], ‘o’, markerfacecolor=’black’, markeredgecolor=’k’,
plt.plot(mu3[0], mu3[1], ‘o’, markerfacecolor=’black’, markeredgecolor=’k’,
plt.axis()
plt.text(mu1[0]+5,mu1[1]+1500, ‘(%.1f, %.1f)’ % (mu1[0],mu1[1]), color=’red’
plt.text(mu2[0]+5,mu2[1]+1500, ‘(%.1f, %.1f)’ % (mu2[0],mu2[1]), color=’red’
plt.text(mu3[0]+5,mu3[1]+1500, ‘(%.1f, %.1f)’ % (mu3[0],mu3[1]), color=’red’
plt.show()

lec06_clustering http://localhost:8888/nbconvert/html/lec06_clustering….

3 of 12 9/15/21, 16:17

In [34]:
# LET’S TEST K-MEANS – TEST n_iters = 1,2, …
n_iters = 1000

from sklearn.datasets import make_blobs
from sklearn.datasets import make_moons

# A suitable seed for nice pictures
#np.random.seed(666) # very bad
#np.random.seed(3) # very good
#np.random.seed(4) # super good

# Generate sample data: easy

centers = [[0, 0], [1, 0], [1, 1]]
n_clusters = len(centers)
X, labels_true = make_blobs(n_samples=3000, centers=centers, cluster_std=[0.1

#
# Compute clustering with Means

k_means = KMeans(init=’random’,n_clusters=3, max_iter=n_iters, n_init=1)
k_means.fit(X)

#
# Plot result

fig = plt.figure(figsize=(4, 3))
#fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9)
colors = [‘#4EACC5’, ‘#FF9C34’, ‘#4E9A06’]
#colors = [‘cyan’, ‘magenta’, ‘yellow’]

k_means_cluster_centers = k_means.cluster_centers_

k_means_labels = pairwise_distances_argmin(X, k_means_cluster_centers)

for k, col in zip(range(n_clusters), colors):
my_members = k_means_labels == k
cluster_center = k_means_cluster_centers[k]
plt.plot(X[my_members, 0], X[my_members, 1], ‘w’,

markerfacecolor=col, marker=’.’)
plt.plot(cluster_center[0], cluster_center[1], ‘o’, markerfacecolor=col

markeredgecolor=’k’, markersize=6)
plt.title(‘K-means %2d iteraation jälkeen’ %n_iters)
plt.axis(‘equal’)
plt.show()

lec06_clustering http://localhost:8888/nbconvert/html/lec06_clustering….

4 of 12 9/15/21, 16:17

In [36]:
from sklearn.datasets import make_blobs
from sklearn.datasets import make_moons

# A suitable seed for nice pictures
#np.random.seed(666) # very bad
#np.random.seed(3) # very good
#p.random.seed(4) # super good

# Generate sample data: easy

centers = [[0, 0], [1, 0], [1, 1]]
n_clusters = len(centers)
X, labels_true = make_blobs(n_samples=3000, centers=centers, cluster_std=[0.1

#
# Compute clustering with Means

n_iters = 1000
k_means = KMeans(init=’random’,n_clusters=3, max_iter=n_iters, n_init=1)
k_means.fit(X)

#
# Plot result

fig = plt.figure(figsize=(4, 3))
#fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9)
colors = [‘#4EACC5’, ‘#FF9C34’, ‘#4E9A06’]
#colors = [‘cyan’, ‘magenta’, ‘yellow’]

k_means_cluster_centers = k_means.cluster_centers_

k_means_labels = pairwise_distances_argmin(X, k_means_cluster_centers)

for k, col in zip(range(n_clusters), colors):
my_members = k_means_labels == k
cluster_center = k_means_cluster_centers[k]
plt.plot(X[my_members, 0], X[my_members, 1], ‘w’,

markerfacecolor=col, marker=’.’)
plt.plot(cluster_center[0], cluster_center[1], ‘o’, markerfacecolor=col

markeredgecolor=’k’, markersize=6)
plt.title(‘K-means %2d iteraation jälkeen’ %n_iters)
plt.axis(‘equal’)
plt.show()

#
# Generate sample data: difficult

X, labels_true = make_blobs(n_samples=500, random_state=170)
transformation = [[0.6, -0.6], [-0.4, 0.8]]
X_aniso = np.dot(X, transformation)
X = X_aniso

#
# Compute clustering with Means

n_iters = 1000
k_means = KMeans(init=’random’,n_clusters=3, max_iter=n_iters, n_init=1)
k_means.fit(X)

lec06_clustering http://localhost:8888/nbconvert/html/lec06_clustering….

5 of 12 9/15/21, 16:17

k_means_cluster_centers = k_means.cluster_centers_

k_means_labels = pairwise_distances_argmin(X, k_means_cluster_centers)

for k, col in zip(range(n_clusters), colors):
my_members = k_means_labels == k
cluster_center = k_means_cluster_centers[k]
plt.plot(X[my_members, 0], X[my_members, 1], ‘w’,

markerfacecolor=col, marker=’.’)
plt.plot(cluster_center[0], cluster_center[1], ‘o’, markerfacecolor=col

markeredgecolor=’k’, markersize=6)
plt.title(‘K-means %2d iteraation jälkeen’ %n_iters)
plt.axis(‘equal’)
plt.show()

#
# Generate sample data: difficult 2

X, labels_true = make_blobs(n_samples=1500, random_state=170, cluster_std=[

#
# Compute clustering with Means

n_iters = 1000
k_means = KMeans(init=’random’,n_clusters=3, max_iter=n_iters, n_init=1)
k_means.fit(X)

#
# Plot result

fig = plt.figure(figsize=(4, 3))
#fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9)
colors = [‘#4EACC5’, ‘#FF9C34’, ‘#4E9A06’]
#colors = [‘cyan’, ‘magenta’, ‘yellow’]

k_means_cluster_centers = k_means.cluster_centers_

k_means_labels = pairwise_distances_argmin(X, k_means_cluster_centers)

for k, col in zip(range(n_clusters), colors):
my_members = k_means_labels == k
cluster_center = k_means_cluster_centers[k]
plt.plot(X[my_members, 0], X[my_members, 1], ‘w’,

markerfacecolor=col, marker=’.’)
plt.plot(cluster_center[0], cluster_center[1], ‘o’, markerfacecolor=col

markeredgecolor=’k’, markersize=6)
plt.title(‘K-means %2d iteraation jälkeen’ %n_iters)
plt.axis(‘equal’)
plt.show()

#
# Generate sample data: difficult 3

X, labels_true = make_moons(n_samples=1500, noise=0.05)

lec06_clustering http://localhost:8888/nbconvert/html/lec06_clustering….

6 of 12 9/15/21, 16:17

k_means_cluster_centers = k_means.cluster_centers_

k_means_labels = pairwise_distances_argmin(X, k_means_cluster_centers)

for k, col in zip(range(2), colors):
my_members = k_means_labels == k
cluster_center = k_means_cluster_centers[k]
plt.plot(X[my_members, 0], X[my_members, 1], ‘w’,

markerfacecolor=col, marker=’.’)
plt.plot(cluster_center[0], cluster_center[1], ‘o’, markerfacecolor=col

markeredgecolor=’k’, markersize=6)
plt.title(‘K-means %2d iteraation jälkeen’ %n_iters)
plt.axis(‘equal’)
plt.show()

lec06_clustering http://localhost:8888/nbconvert/html/lec06_clustering….

7 of 12 9/15/21, 16:17

Hierarchical clustering

lec06_clustering http://localhost:8888/nbconvert/html/lec06_clustering….

8 of 12 9/15/21, 16:17

In [46]:
# LET’S PLAY WITH AGGLOMERATIVE CLUSTERING AND ITS LINKAGE VALUES (TRY THEM ALL)
agglo_linkage = ‘ward’ # single / ward / complete (max) / average

from sklearn.cluster import AgglomerativeClustering

# A suitable seed for nice pictures
#np.random.seed(666) # very bad
#np.random.seed(3) # very good
#np.random.seed(4) # super good

# Generate sample data: easy

centers = [[0, 0], [1, 0], [1, 1]]
n_clusters = len(centers)
X, labels_true = make_blobs(n_samples=3000, centers=centers, cluster_std=[0.1

#
# Compute clustering with hierarchial agglomerative clustering
agglo = AgglomerativeClustering(n_clusters=3, linkage=agglo_linkage, connectivity

agglo.fit(X)
y_pred = agglo.labels_.astype(int)

#
# Plot result

fig = plt.figure(figsize=(4, 3))
#fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9)
colors = [‘#4EACC5’, ‘#FF9C34’, ‘#4E9A06’]
#colors = [‘cyan’, ‘magenta’, ‘yellow’]

for k, col in zip(range(n_clusters), colors):
my_members = y_pred == k
plt.plot(X[my_members, 0], X[my_members, 1], ‘w’,

markerfacecolor=col, marker=’.’)
#plt.plot(cluster_center[0], cluster_center[1], ‘o’, markerfacecolor=col,
# markeredgecolor=’k’, markersize=6)

plt.title(f’Agglomerative hierarchical clustering (linkage={agglo_linkage})’
plt.axis(‘equal’)
plt.show()

#
# Generate sample data: difficult

X, labels_true = make_blobs(n_samples=500, random_state=170)
transformation = [[0.6, -0.6], [-0.4, 0.8]]
X_aniso = np.dot(X, transformation)
X = X_aniso

#
# Compute clustering with hierarchial agglomerative clustering
agglo = AgglomerativeClustering(n_clusters=3, linkage=agglo_linkage, connectivity

agglo.fit(X)
y_pred = agglo.labels_.astype(int)

#

lec06_clustering http://localhost:8888/nbconvert/html/lec06_clustering….

9 of 12 9/15/21, 16:17

for k, col in zip(range(n_clusters), colors):
my_members = y_pred == k
plt.plot(X[my_members, 0], X[my_members, 1], ‘w’,

markerfacecolor=col, marker=’.’)
#plt.plot(cluster_center[0], cluster_center[1], ‘o’, markerfacecolor=col,
# markeredgecolor=’k’, markersize=6)

plt.title(f’Agglomerative hierarchical clustering (linkage={agglo_linkage})’
plt.axis(‘equal’)
plt.show()

#
# Generate sample data: difficult 2

X, labels_true = make_blobs(n_samples=1500, random_state=170, cluster_std=[

#
# Compute clustering with hierarchial agglomerative clustering
agglo = AgglomerativeClustering(n_clusters=3, linkage=agglo_linkage, connectivity

agglo.fit(X)
y_pred = agglo.labels_.astype(int)

#
# Plot result

fig = plt.figure(figsize=(4, 3))
#fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9)
colors = [‘#4EACC5’, ‘#FF9C34’, ‘#4E9A06’]
#colors = [‘cyan’, ‘magenta’, ‘yellow’]

for k, col in zip(range(n_clusters), colors):
my_members = y_pred == k
plt.plot(X[my_members, 0], X[my_members, 1], ‘w’,

markerfacecolor=col, marker=’.’)
#plt.plot(cluster_center[0], cluster_center[1], ‘o’, markerfacecolor=col,
# markeredgecolor=’k’, markersize=6)

plt.title(f’Agglomerative hierarchical clustering (linkage={agglo_linkage})’
plt.axis(‘equal’)
plt.show()

#
# Generate sample data: difficult 3

X, labels_true = make_moons(n_samples=1500, noise=0.05)

#
# Compute clustering with hierarchial agglomerative clustering
agglo = AgglomerativeClustering(n_clusters=2, linkage=agglo_linkage, connectivity

agglo.fit(X)
y_pred = agglo.labels_.astype(int)

lec06_clustering http://localhost:8888/nbconvert/html/lec06_clustering….

10 of 12 9/15/21, 16:17

# markeredgecolor=’k’, markersize=6)
plt.title(f’Agglomerative hierarchical clustering (linkage={agglo_linkage})’
plt.axis(‘equal’)
plt.show()

lec06_clustering http://localhost:8888/nbconvert/html/lec06_clustering….

11 of 12 9/15/21, 16:17

References

• A.R. Web and K.D. Copsey: Statistical Pattern Recognition, 3rd ed, 2011. Chapters

10-11

• T. Hastie and R. Tibshirani and J. Friedman: The Elements of Statistical Learning, 2009,

Springer. Chapter 15

lec06_clustering http://localhost:8888/nbconvert/html/lec06_clustering….

12 of 12 9/15/21, 16:17