CS计算机代考程序代写 ## ——————————————

## ——————————————
##
## ST4060 / ST6015 / ST6040
## R script – Fri 26 Nov 2021 lecture
## Eric Wolsztynski
##
## Clustering examples…
##
## ——————————————

# ————————————————

# (1) Hierarchical clustering:

eurodist
h1 <- hclust(eurodist, method="ward.D") plot(h1) ?hclust x = iris[,1:4] COLS = c(1,2,4) # black, red, blue plot(x[,c(1,3)], col=COLS[iris$Species], pch=20, cex=1.3) h1 = hclust(dist(x)) plot(h1) rect.hclust(h1, k=3) # visualise a 3-cluster outcome h2 = cutree(h1, 3) # obtain labels for 3 clusters from hclust output table(h2, iris$Species) # Trying now with a different agglomeration method: h1 = hclust(dist(x), method="ward.D") h2 = cutree(h1, 3) table(h2, iris$Species) plot(x[,c(1,3)], col=COLS[h2], pch=20) # (2) Partition-based clustering (k-means here): # better scale since Euclidean distance is used: xs = apply(x,2,scale) k1 = kmeans(xs, 3) table(k1$cluster, iris$Species) plot(x[,1:2], col=COLS[k1$cluster], pch=20) table(k1$cluster, h2) # compare with hclust output # How to 'decide' on optimal number of clusters? library(NbClust) # this package is handy... ?NbClust nbo = NbClust(x, method="kmeans") names(nbo) nbo$All.index nbo$Best.partition plot(x[,1:2], col=c(1,2,4)[nbo$Best.partition], pch=20) # ... but we're still none the wiser!