## ——————————————
##
## ST4060 / ST6015 / ST6040
## R script – Wedn 24 Nov 2021 lecture
## Eric Wolsztynski
##
## Types of challenges…
##
## ——————————————
# ————————————————
# (1) Data scales…
x = iris[,c(2:4)]
y = iris[,5]
K = 2
par(mfrow=c(2,1), mar=c(3,3,1,1))
#
# clustering without scaling:
ko = kmeans(x, K)
is = c(1,3)
plot(x[,is],col=c(1,2,4)[ko$cluster], pch=20, cex=2)
#
# clustering with scaling:
z = apply(x,2,scale)
koz = kmeans(z, K)
plot(x[,is],col=c(1,2,4)[koz$cluster], pch=20, cex=2)
# ————————————————
# (2) Very large P…
pairs(iris[,1:4], pch=20)
cor(iris[,1:4]) # features 3 and 4 redundant
# See JMI for another example with large P
# ————————————————
# (3) Very large D…
plot(EuStockMarkets, lwd=3)
dev.new()
pca = prcomp(EuStockMarkets)
par(mfcol=c(4,1), mar=c(1,3,0,1))
for(i in 1:4){
plot(pca$x[,i], t=’l’, lwd=2)
}
dev.new()
plot(pca)
plot(prcomp(iris[,1:4]))
plot(prcomp(iris[,1:4], scale.=TRUE))