## ——————————————
##
## ST4060 / ST6015 / ST6040
## R script – Fri 26 Nov 2021 lecture
## Eric Wolsztynski
##
## Clustering examples…
##
## ——————————————
# ————————————————
# (1) Hierarchical clustering:
eurodist
h1 <- hclust(eurodist, method="ward.D")
plot(h1)
?hclust
x = iris[,1:4]
COLS = c(1,2,4) # black, red, blue
plot(x[,c(1,3)], col=COLS[iris$Species], pch=20, cex=1.3)
h1 = hclust(dist(x))
plot(h1)
rect.hclust(h1, k=3) # visualise a 3-cluster outcome
h2 = cutree(h1, 3) # obtain labels for 3 clusters from hclust output
table(h2, iris$Species)
# Trying now with a different agglomeration method:
h1 = hclust(dist(x), method="ward.D")
h2 = cutree(h1, 3)
table(h2, iris$Species)
plot(x[,c(1,3)], col=COLS[h2], pch=20)
# (2) Partition-based clustering (k-means here):
# better scale since Euclidean distance is used:
xs = apply(x,2,scale)
k1 = kmeans(xs, 3)
table(k1$cluster, iris$Species)
plot(x[,1:2], col=COLS[k1$cluster], pch=20)
table(k1$cluster, h2) # compare with hclust output
# How to 'decide' on optimal number of clusters?
library(NbClust) # this package is handy...
?NbClust
nbo = NbClust(x, method="kmeans")
names(nbo)
nbo$All.index
nbo$Best.partition
plot(x[,1:2], col=c(1,2,4)[nbo$Best.partition], pch=20)
# ... but we're still none the wiser!