CS代考 L4

L4

APS1070 Fall 2021¶
Lecture 4¶

Example for K-Means¶
Image Compression

In [ ]:

#source: https://towardsdatascience.com/image-compression-using-k-means-clustering-aa0c91bb0eeb

from skimage import io
from sklearn.cluster import KMeans
import numpy as np

In [ ]:

#Read the image
image = io.imread(‘https://www.eecg.utoronto.ca/~hadizade/APS1070/Lion.png’)
io.imshow(image)
io.show()

In [ ]:

#Dimension of the original image
rows = image.shape[0]
cols = image.shape[1]

#Flatten the image
image = image.reshape(rows*cols, 3)

#Implement k-means clustering to form k clusters
kmeans = KMeans(n_clusters=4)
kmeans.fit(image)

#Replace each pixel value with its nearby centroid
compressed_image = kmeans.cluster_centers_[kmeans.labels_]
compressed_image = np.clip(compressed_image.astype(‘uint8’), 0, 255)

#Reshape the image to original dimension
compressed_image = compressed_image.reshape(rows, cols, 3)

#Save and display output image
io.imsave(‘compressed_image_4.png’, compressed_image)
io.imshow(compressed_image)
io.show()

In [ ]:

Example 1: Plotting a Gaussian¶

In [ ]:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# mean and standard deviation
mu, sigma = 0, 0.1

# sample from Gaussian (normal) distribution
s = np.random.normal(mu, sigma, 2000)

# split samples into bins
count, bins, ignored = plt.hist(s, 40, density=True)

# plot the bins and Gaussian distribution
plt.plot(bins, 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2)), linewidth=2, color=’r’)
plt.show()

Example 2a: Plotting a Bivariate Gaussian¶

In [ ]:

from sklearn.datasets.samples_generator import make_blobs
X, y_true = make_blobs(n_samples=1000, centers=3, cluster_std=1)

plt.scatter(X[:,0],X[:,1],marker=”x”);
plt.axis([-12,12,-12,12]);

In [ ]:

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
from numpy.random import multivariate_normal

# provide mean and covariance
mean = [0, 0]
cov = np.array([[1, 0], [0, 1]])
#cov = np.array([[5, 0], [0, 5]])
#cov = np.array([[5, 0], [0, 1]])
#cov = np.array([[1, 0.5], [0.5, 1]])
#cov = np.array([[2, -1.9], [-1.9, 2]])

# sample from Gaussian distribution
data = multivariate_normal(mean, cov, size=1000000)

# split samples into bins and plot
plt.hist2d(data[:, 0], data[:, 1], bins=100)

plt.xlim(-5, 5)
plt.ylim(-5, 5)
plt.gca().set_aspect(‘equal’, adjustable=’box’)

plt.show()

# obtain the correlation
correlation = cov[0,1]/((cov[0,0]**0.5)*(cov[1,1]**0.5))
print(cov)
print(correlation)

Example 2b: Example from Lecture¶

In [ ]:

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
from numpy.random import multivariate_normal

mean = [0.1, 0.2]
cov = np.array([[0.69, -0.08], [-0.08, 0.16]])

# sample from Gaussian distribution
data = multivariate_normal(mean, cov, size=1000000)

# split samples into bins and plot
plt.hist2d(data[:, 0], data[:, 1], bins=100)

plt.xlim(-3, 3)
plt.ylim(-1.5, 2)
plt.gca().set_aspect(‘equal’, adjustable=’box’)

plt.show()

# calculate the correlation
correlation = cov[0,1]/((cov[0,0]**0.5)*(cov[1,1]**0.5))
print(cov)
print(correlation)

can compare to the scatter plot

In [ ]:

# Scatter plots do not show the distribution properly

from sklearn.datasets.samples_generator import make_blobs
X = data
plt.scatter(X[:,0],X[:,1],marker=”x”);
plt.axis([-4,4,-4,4]);

Example 3: Covariance from Real Data¶

In [ ]:

df = pd.DataFrame({‘Length’:[4.0, 4.2, 3.9, 4.3, 4.1],
‘Width’:[2.0, 2.1, 2.0, 2.1, 2.2],
‘Height’:[0.60, 0.59, 0.58, 0.62, 0.63]})
df

Each row is an observation. See source example here.

In [ ]:

df_dev = df – df.mean(axis = 0) #Matrix of deviations from the mean
df_dev

In [ ]:

df_dev.T #Take transpose

$S=\frac{1}{n-1}\sum_{i=1}^{n}\left ( X_{i} – \bar{X} \right )\left ( X_{i} – \bar{X} \right )’$

In [ ]:

# implement the above equation
sum = np.zeros(9).reshape(3,3)
for i in range(df_dev.shape[0]):
temp = df_dev.loc[i].values.T
temp.shape = (3, 1)
sum += temp @ temp.T
df_cov0 = sum/(df_dev.shape[0]-1)
df_cov0

Alternate method using matricies: link.

In [ ]:

df_cov1 = np.matmul(df_dev.T.values, df_dev.values)/(df_dev.shape[0]-1)
df_cov1

Another method, this time using np.cov

In [ ]:

df_cov2 = np.cov(df.values.T)
df_cov2

Example 4: Correlation¶

In [ ]:

import seaborn as sns

df_cor = df.corr()
sns.heatmap(df_cor, xticklabels = df_cor.columns,
yticklabels = df_cor.columns, annot=True)

In [ ]: