CS计算机代考程序代写 matlab python Unsupervised learning 2 – Manifold learning

Unsupervised learning 2 – Manifold learning

In manifold learning the goals is to find a lower dimensional ( ) manifold that represents the

data in a high dimesional ( ) space so that . Since we often want to visualize the

data somehow the dimensions of are often 1, 2 or 3.

Many of these methods you can find from https://scikit-learn.org/stable/modules/manifold.html

Multidimensional Scaling (MDS)

Highly popular in many fields.

RD̂

RD̂ D̂ << D D̂ lec07_manifold_learning http://localhost:8888/nbconvert/html/lec07_manifold_l... 1 of 10 9/20/21, 15:47 https://scikit-learn.org/stable/modules/manifold.html https://scikit-learn.org/stable/modules/manifold.html In [19]: from collections import OrderedDict from functools import partial from time import time import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from matplotlib.ticker import NullFormatter from sklearn import manifold, datasets, random_projection n_points = 500 X, color = datasets.make_s_curve(n_points, random_state=0, noise=0.1) n_neighbors = 10 n_components = 2 # Create figure fig = plt.figure() #fig = plt.figure(figsize=(15, 8)) #fig.suptitle("Manifold Learning with %i points, %i neighbors" # % (1000, n_neighbors), fontsize=14) # Add 3d scatter plot #ax = fig.add_subplot(projection='3d') #ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.Spectral) #ax.view_init(4, -72) plt.scatter(X[:,0],X[:,2], c=color, cmap=plt.cm.Spectral) plt.show() X2d = np.concatenate(([X[:,0]], [X[:,2]]),axis=0).T print(X2d.shape) mds = manifold.MDS(1, max_iter=1000, n_init=10, random_state=666) # Plot results t0 = time() Y = mds.fit_transform(X2d) t1 = time() print("%s: %.2g sec" % ("MDS", t1 - t0)) #ax = fig.add_subplot(2, 5, 2 + i + (i > 3))
plt.scatter(Y, np.zeros(Y.size), c=color, cmap=plt.cm.Spectral)
#ax.set_title(“%s (%.2g sec)” % (label, t1 – t0))
#ax.xaxis.set_major_formatter(NullFormatter())
#ax.yaxis.set_major_formatter(NullFormatter())
plt.axis(‘tight’)
plt.show()

## Next line to silence pyflakes. This import is needed.
Axes3D

fig = plt.figure() #fig = plt.figure(figsize=(15, 8))
ax = fig.add_subplot(projection=’3d’)
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.Spectral)
ax.view_init(4, -72)
plt.show()

mds = manifold.MDS(2, max_iter=1000, n_init=10)

# Plot results
t0 = time()
Y = mds.fit_transform(X)

lec07_manifold_learning http://localhost:8888/nbconvert/html/lec07_manifold_l…

2 of 10 9/20/21, 15:47

(500, 2)
MDS: 1.2 sec

MDS: 2.5 sec

plt.axis(‘tight’)
plt.show()

lec07_manifold_learning http://localhost:8888/nbconvert/html/lec07_manifold_l…

3 of 10 9/20/21, 15:47

Random Projections

Surprisingly powerful often, but depends on the random seed.

Random: 0.0019 sec

Self-organizing Map (SOM)

The main usage is data visualization, but this method is surprisingly powerful and would

deserve to be implemented to SkiLearn package.

Super implementation (Matlab) http://www.cis.hut.fi/somtoolbox/

Python implementation https://pypi.org

In [23]:
# Re-run multiple times

t0 = time()
rp = random_projection.GaussianRandomProjection(n_components=2)
Y = rp.fit_transform(X)
t1 = time()
print(“%s: %.2g sec” % (“Random”, t1 – t0))
plt.scatter(Y[:,0], Y[:,1], c=color, cmap=plt.cm.Spectral)
#ax.set_title(“%s (%.2g sec)” % (label, t1 – t0))
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
plt.axis(‘tight’)
plt.show()

lec07_manifold_learning http://localhost:8888/nbconvert/html/lec07_manifold_l…

4 of 10 9/20/21, 15:47

http://www.cis.hut.fi/somtoolbox/
http://www.cis.hut.fi/somtoolbox/
https://pypi.org/project/sklearn-som/
https://pypi.org/project/sklearn-som/

/project/sklearn-som/
In [24]:

# Re-run multiple times !!

import numpy as np

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.ticker import NullFormatter

from sklearn import datasets

# Import custom SOM implementation
import importlib.util
spec = importlib.util.spec_from_file_location(“sklearn_som.som”, “/home/kamarain/Work/ext/s
sklearn_som = importlib.util.module_from_spec(spec)
spec.loader.exec_module(sklearn_som)

n_points = 1000
X, color = datasets.make_s_curve(n_points, random_state=0, noise=0.1)
n_neighbors = 10
n_components = 2
X2d = np.concatenate(([X[:,0]], [X[:,2]]),axis=0).T

# Create figure
fig = plt.figure()
plt.scatter(X2d[:,0],X2d[:,1], c=color, cmap=plt.cm.Spectral)
plt.show()

som = sklearn_som.SOM(m=1, n=13, dim=2)
som.fit(X2d, shuffle=False)
#bmus = som.predict(X2d)
#X_som = som._locations[bmus,:]
X_w = som.weights

plt.scatter(X2d[:,0],X2d[:,1], c=color, cmap=plt.cm.Spectral)
plt.plot(X_w[:,0],X_w[:,1],’k-‘)
plt.plot(X_w[:,0],X_w[:,1],’ko’)
plt.show()

lec07_manifold_learning http://localhost:8888/nbconvert/html/lec07_manifold_l…

5 of 10 9/20/21, 15:47

https://pypi.org/project/sklearn-som/
https://pypi.org/project/sklearn-som/

lec07_manifold_learning http://localhost:8888/nbconvert/html/lec07_manifold_l…

6 of 10 9/20/21, 15:47

In [25]:
# THIS CODE IS MODIFIED VERSION OF SCIKIT-LEARN CODE
# Original Authors: Fabian Pedregosa
# Olivier Grisel # Mathieu Blondel < >
# Gael Varoquaux
# License: BSD 3 clause (C) INRIA 2011

#from time import time
#import numpy as np
#import matplotlib.pyplot as plt
from matplotlib import offsetbox
#from sklearn import (manifold, datasets, decomposition, ensemble,
# discriminant_analysis, random_projection, neighbors)

#import importlib.util
#spec = importlib.util.spec_from_file_location(“sklearn_som.som”, “/home/kamarain/Work/ext/
#sklearn_som = importlib.util.module_from_spec(spec)
#spec.loader.exec_module(sklearn_som)

digits = datasets.load_digits(n_class=6)
X = digits.data
y = digits.target
n_samples, n_features = X.shape
n_neighbors = 30

print(X.shape)

# ———————————————————————-
# Scale and visualize the embedding vectors
def plot_embedding(X, title=None):

x_min, x_max = np.min(X, 0), np.max(X, 0)
X = (X – x_min) / (x_max – x_min)

plt.figure()
ax = plt.subplot(111)
for i in range(X.shape[0]):

plt.text(X[i, 0], X[i, 1], str(y[i]),
color=plt.cm.Set1(y[i] / 10.),
fontdict={‘weight’: ‘bold’, ‘size’: 9})

if hasattr(offsetbox, ‘AnnotationBbox’):
# only print thumbnails with matplotlib > 1.0
shown_images = np.array([[1., 1.]]) # just something big
for i in range(X.shape[0]):

dist = np.sum((X[i] – shown_images) ** 2, 1)
if np.min(dist) < 4e-3: #if np.min(dist) < 5e-3: # Change this to plot more digit examples # don't show points that are too close continue shown_images = np.r_[shown_images, [X[i]]] imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(digits.images[i], cmap=plt.cm.gray_r), X[i]) ax.add_artist(imagebox) plt.xticks([]), plt.yticks([]) if title is not None: plt.title(title) # ---------------------------------------------------------------------- # Plot images of the digits lec07_manifold_learning http://localhost:8888/nbconvert/html/lec07_manifold_l... 7 of 10 9/20/21, 15:47 plt.imshow(img, cmap=plt.cm.binary) plt.xticks([]) plt.yticks([]) plt.title('Esimerkkejä 8x8 Digits-datajoukosta') plt.show() # ---------------------------------------------------------------------- # Random 2D projection using a random unitary matrix print("Computing random projection") #rp = random_projection.SparseRandomProjection(n_components=2, random_state=42) rp = random_projection.GaussianRandomProjection(n_components=2, random_state X_projected = rp.fit_transform(X) plot_embedding(X_projected, "Satunnaisprojektio") print(X_projected) #from pprint import pprint #pprint(vars(X_projected)) plt.show() # ---------------------------------------------------------------------- # MDS embedding of the digits dataset print("Computing MDS embedding") clf = manifold.MDS(n_components=2, n_init=1, max_iter=100) t0 = time() X_mds = clf.fit_transform(X) print("Done. Stress: %f" % clf.stress_) plot_embedding(X_mds, "MDS-projektio") plt.show() # ---------------------------------------------------------------------- # SOM embedding of the digits dataset print("Computing SOM embedding") print(X.size) som = sklearn_som.SOM(m=20, n=20, dim=64) t0 = time() som.fit(X) bmus = som.predict(X) X_som = som._locations[bmus,:] #from pprint import pprint #pprint(vars(som)) #print(X_som) print("Done.") plot_embedding(X_som, "SOM-kartta") plt.show() # LATEEEEEEEER ## ---------------------------------------------------------------------- ## t-SNE embedding of the digits dataset #print("Computing t-SNE embedding") #tsne = manifold.TSNE(n_components=2, init='pca', random_state=0) lec07_manifold_learning http://localhost:8888/nbconvert/html/lec07_manifold_l... 8 of 10 9/20/21, 15:47 (1083, 64) Computing random projection [[ -68.26635221 0.30075222] [ -65.64810565 3.01885825] [ -73.54228309 21.47372818] ... [-106.99211851 33.18923775] [-113.40774221 26.87927004] [ -88.78495966 7.37088605]] Computing MDS embedding Done. Stress: 150988651.554803 lec07_manifold_learning http://localhost:8888/nbconvert/html/lec07_manifold_l... 9 of 10 9/20/21, 15:47 Computing SOM embedding 69312 Done. References • A.R. Web and K.D. Copsey: Statistical Pattern Recognition, 3rd ed, 2011. Chapters 10-11 • T. Hastie and R. Tibshirani and J. Friedman: The Elements of Statistical Learning, 2009, Springer. Chapter 15 • T. Kohonen (2014): MATLAB Implementations and Applications of the Self-Organizing Map (PDF: http://docs.unigrafia.fi/publications/kohonen_teuvo/) lec07_manifold_learning http://localhost:8888/nbconvert/html/lec07_manifold_l... 10 of 10 9/20/21, 15:47 http://docs.unigrafia.fi/publications/kohonen_teuvo/ http://docs.unigrafia.fi/publications/kohonen_teuvo/