rm(list=ls())
# set the seed as the last three digits of your student ID
seed = 111
if (seed == 111) {
cat ("NOTE: to run this example you need to install and load two external packages.\n")
cat ("To install them, execute the following two commands:\n\n")
cat (" install.packages(\"e1071\")\n")
cat (" install.packages(\"clue\")\n")
stop("\nThe random generator seed is still set to its default value.\nEdit the script and change it with the last three digits of your student ID\n\n",
call. = FALSE)
}
set.seed(seed)
################################# MAIN CODE ###########################################
library(e1071)
library(clue)
cat ("Welcome to the fourth (and last!) PAMI homework. This time we are going to test\n")
cat ("some clustering algorithms on the Iris flower dataset. This is a quite classical\n")
cat ("dataset, consisting in 50 samples from each of three species of Iris. For each sample,\n")
cat ("four features were measured. You can find more information about this dataset here:\n\n")
cat ("http://en.wikipedia.org/wiki/Iris_flower_data_set\n\n")
cat ("Let us now load the dataset (which is available by default in R) and look at its contents.")
invisible(readline(prompt = "Press [enter] to continue"))
data(iris)
iris
cat ("The first four columns hold, respectively, length and width of sepal and petal of each\n")
cat ("sample. The fifth column holds the ground truth, e.g. the specie the sample belongs to.\n")
cat ("Let us see how the data looks like: as it is 4D, we will plot pairs and use the ground\n")
cat ("truth information to see how data is distributed in the different dimensions.\n")
invisible(readline(prompt = "Press [enter] to continue"))
classes = as.numeric(factor(iris$Species, levels=c("setosa","versicolor","virginica")))
plot(iris[,1:4], col=classes)
cat ("As you can see, while one cluster (Iris setosa) is well separated from the others,\n")
cat ("the other two are very close to each other. This means that some of the samples,\n")
cat ("despite being classified into two different families, share very common features.\n")
invisible(readline(prompt = "Press [enter] to continue"))
cat ("Let us first run K-Means on these data. We actually know K already, but let us use\n")
cat ("it to verify whether the \"elbow method\" gives us some reasonable results.\n")
cat ("We will now run K-Means 10 times, with K=1:10, calculate the within sum of squares\n")
cat ("for each run, and finally plot the results.\n")
invisible(readline(prompt = "Press [enter] to continue"))
wss = 0
for (i in 1:10) wss[i]