L4
APS1070 Fall 2021¶
Lecture 4¶
Example for K-Means¶
Image Compression
In [ ]:
#source: https://towardsdatascience.com/image-compression-using-k-means-clustering-aa0c91bb0eeb
from skimage import io
from sklearn.cluster import KMeans
import numpy as np
In [ ]:
#Read the image
image = io.imread(‘https://www.eecg.utoronto.ca/~hadizade/APS1070/Lion.png’)
io.imshow(image)
io.show()
In [ ]:
#Dimension of the original image
rows = image.shape[0]
cols = image.shape[1]
#Flatten the image
image = image.reshape(rows*cols, 3)
#Implement k-means clustering to form k clusters
kmeans = KMeans(n_clusters=4)
kmeans.fit(image)
#Replace each pixel value with its nearby centroid
compressed_image = kmeans.cluster_centers_[kmeans.labels_]
compressed_image = np.clip(compressed_image.astype(‘uint8’), 0, 255)
#Reshape the image to original dimension
compressed_image = compressed_image.reshape(rows, cols, 3)
#Save and display output image
io.imsave(‘compressed_image_4.png’, compressed_image)
io.imshow(compressed_image)
io.show()
In [ ]:
Example 1: Plotting a Gaussian¶
In [ ]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# mean and standard deviation
mu, sigma = 0, 0.1
# sample from Gaussian (normal) distribution
s = np.random.normal(mu, sigma, 2000)
# split samples into bins
count, bins, ignored = plt.hist(s, 40, density=True)
# plot the bins and Gaussian distribution
plt.plot(bins, 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2)), linewidth=2, color=’r’)
plt.show()
Example 2a: Plotting a Bivariate Gaussian¶
In [ ]:
from sklearn.datasets.samples_generator import make_blobs
X, y_true = make_blobs(n_samples=1000, centers=3, cluster_std=1)
plt.scatter(X[:,0],X[:,1],marker=”x”);
plt.axis([-12,12,-12,12]);
In [ ]:
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
from numpy.random import multivariate_normal
# provide mean and covariance
mean = [0, 0]
cov = np.array([[1, 0], [0, 1]])
#cov = np.array([[5, 0], [0, 5]])
#cov = np.array([[5, 0], [0, 1]])
#cov = np.array([[1, 0.5], [0.5, 1]])
#cov = np.array([[2, -1.9], [-1.9, 2]])
# sample from Gaussian distribution
data = multivariate_normal(mean, cov, size=1000000)
# split samples into bins and plot
plt.hist2d(data[:, 0], data[:, 1], bins=100)
plt.xlim(-5, 5)
plt.ylim(-5, 5)
plt.gca().set_aspect(‘equal’, adjustable=’box’)
plt.show()
# obtain the correlation
correlation = cov[0,1]/((cov[0,0]**0.5)*(cov[1,1]**0.5))
print(cov)
print(correlation)
Example 2b: Example from Lecture¶
In [ ]:
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
from numpy.random import multivariate_normal
mean = [0.1, 0.2]
cov = np.array([[0.69, -0.08], [-0.08, 0.16]])
# sample from Gaussian distribution
data = multivariate_normal(mean, cov, size=1000000)
# split samples into bins and plot
plt.hist2d(data[:, 0], data[:, 1], bins=100)
plt.xlim(-3, 3)
plt.ylim(-1.5, 2)
plt.gca().set_aspect(‘equal’, adjustable=’box’)
plt.show()
# calculate the correlation
correlation = cov[0,1]/((cov[0,0]**0.5)*(cov[1,1]**0.5))
print(cov)
print(correlation)
can compare to the scatter plot
In [ ]:
# Scatter plots do not show the distribution properly
from sklearn.datasets.samples_generator import make_blobs
X = data
plt.scatter(X[:,0],X[:,1],marker=”x”);
plt.axis([-4,4,-4,4]);
Example 3: Covariance from Real Data¶
In [ ]:
df = pd.DataFrame({‘Length’:[4.0, 4.2, 3.9, 4.3, 4.1],
‘Width’:[2.0, 2.1, 2.0, 2.1, 2.2],
‘Height’:[0.60, 0.59, 0.58, 0.62, 0.63]})
df
Each row is an observation. See source example here.
In [ ]:
df_dev = df – df.mean(axis = 0) #Matrix of deviations from the mean
df_dev
In [ ]:
df_dev.T #Take transpose
$S=\frac{1}{n-1}\sum_{i=1}^{n}\left ( X_{i} – \bar{X} \right )\left ( X_{i} – \bar{X} \right )’$
In [ ]:
# implement the above equation
sum = np.zeros(9).reshape(3,3)
for i in range(df_dev.shape[0]):
temp = df_dev.loc[i].values.T
temp.shape = (3, 1)
sum += temp @ temp.T
df_cov0 = sum/(df_dev.shape[0]-1)
df_cov0
Alternate method using matricies: link.
In [ ]:
df_cov1 = np.matmul(df_dev.T.values, df_dev.values)/(df_dev.shape[0]-1)
df_cov1
Another method, this time using np.cov
In [ ]:
df_cov2 = np.cov(df.values.T)
df_cov2
Example 4: Correlation¶
In [ ]:
import seaborn as sns
df_cor = df.corr()
sns.heatmap(df_cor, xticklabels = df_cor.columns,
yticklabels = df_cor.columns, annot=True)
In [ ]: