Lab 4A: Clustering (Easy)

1. Explore the Data

  1. Load Iris data
data(iris)
  1. Load color brewer library
library(RColorBrewer)
  1. Create a color palette
palette <- brewer.pal(3, "Set2")
  1. Create a scatterplot matrix colored by species
plot(
  x = iris[1:4], 
  col = palette[as.numeric(iris$Species)],
  pch = 19)

  1. View scatterplot of petal length vs width
plot(
  x = iris$Petal.Length, 
  y = iris$Petal.Width, 
  col = palette[as.numeric(iris$Species)],
  pch = 19)

2. Create Clusters with K-Means

  1. Set seed to make randomness reproducable
set.seed(42)
  1. Create K-means clusters
clusters <- kmeans(
  x = iris[, 1:4], 
  centers = 3, 
  nstart = 10)
  1. Plot each cluster as a shape
plot(
  x = iris$Petal.Length, 
  y = iris$Petal.Width, 
  col = palette[as.numeric(iris$Species)], 
  pch = clusters$cluster)

  1. Plot centroid of clusters
plot(
  x = iris$Petal.Length, 
  y = iris$Petal.Width, 
  col = palette[as.numeric(iris$Species)], 
  pch = clusters$cluster)
  
points(
  x = clusters$centers[, "Petal.Length"], 
  y = clusters$centers[, "Petal.Width"],
  pch = 4, 
  lwd = 4, 
  col = "blue")

3. Create Hierachical Clusters

  1. Create hierachical clusters
hclusters <- hclust(dist(iris[ ,1:4]))
  1. Plot dendrogram of clusters
plot(
  x = hclusters, 
  labels = as.numeric(iris$Species))

  1. Cut tree into three clusters
cuts <- cut(
  x = as.dendrogram(hclusters), 
  h = 4)
  1. Get clusters as vector
cuts2 <- cutree(
  tree = hclusters, 
  k = 3)
  1. Plot clusters and color by species
plot(
  x = iris$Petal.Length, 
  y = iris$Petal.Width, 
  col = palette[as.numeric(iris$Species)], 
  pch = cuts2)