mycolors <- c("orange", "seagreen", "red")

data(iris)

str(iris)

iris.sub <- iris[51:150, ]
iris.sub$Species <- droplevels(iris.sub$Species)
str(iris.sub)


fm <- lm(as.numeric(Species) ~ Sepal.Length + Sepal.Width, iris.sub)

plot(jitter(Sepal.Width) ~ jitter(Sepal.Length), iris.sub, col = Species)

g <- with(iris.sub,
          expand.grid(Sepal.Length = do.breaks(range(Sepal.Length), 100),
                      Sepal.Width = do.breaks(range(Sepal.Width), 100)))

g$pred <- predict(fm, newdata = g)


plot(Sepal.Width ~ Sepal.Length, data = g,
     col = ifelse(pred < 1.5, "orange", "seagreen"),
     pch = ".", cex = 3)

points(jitter(Sepal.Width) ~ jitter(Sepal.Length),
       iris.sub, 
       col = ifelse(Species == "versicolor", "orange", "seagreen"),
       pch = 16)


library(class)

g$pred.1nn <- knn(train = iris.sub[, c("Sepal.Length", "Sepal.Width")],
                  test = g[, c("Sepal.Length", "Sepal.Width")],
                  cl = iris.sub$Species, k = 1)

g$pred.2nn <- knn(train = iris.sub[, c("Sepal.Length", "Sepal.Width")],
                  test = g[, c("Sepal.Length", "Sepal.Width")],
                  cl = iris.sub$Species, k = 2)

g$pred.5nn <- knn(train = iris.sub[, c("Sepal.Length", "Sepal.Width")],
                  test = g[, c("Sepal.Length", "Sepal.Width")],
                  cl = iris.sub$Species, k = 5)


plot(Sepal.Width ~ Sepal.Length, data = g,
     col = mycolors[pred.2nn],
     pch = ".", cex = 3)


points(jitter(Sepal.Width) ~ jitter(Sepal.Length),
       iris.sub, 
       col = mycolors[Species],
       pch = 16)

## Doesn't need to be two classes

g <- with(iris,
          expand.grid(Sepal.Length = do.breaks(range(Sepal.Length), 100),
                      Sepal.Width = do.breaks(range(Sepal.Width), 100)))

g$pred.1nn <- knn(train = iris[, c("Sepal.Length", "Sepal.Width")],
                  test = g[, c("Sepal.Length", "Sepal.Width")],
                  cl = iris$Species, k = 1)

g$pred.2nn <- knn(train = iris[, c("Sepal.Length", "Sepal.Width")],
                  test = g[, c("Sepal.Length", "Sepal.Width")],
                  cl = iris$Species, k = 2)

g$pred.5nn <- knn(train = iris[, c("Sepal.Length", "Sepal.Width")],
                  test = g[, c("Sepal.Length", "Sepal.Width")],
                  cl = iris$Species, k = 5)


plot(Sepal.Width ~ Sepal.Length, data = g,
     col = mycolors[pred.5nn],
     pch = ".", cex = 3)

points(jitter(Sepal.Width) ~ jitter(Sepal.Length),
       iris, 
       col = mycolors[Species],
       pch = 16)

## Since we are repeatedly doing the same plot, make a function to
## avoid repitition.

plotPredictionIris <- function(pred)
{
    plot(Sepal.Width ~ Sepal.Length, data = g,
         col = mycolors[pred],
         pch = ".", cex = 3)
    points(jitter(Sepal.Width) ~ jitter(Sepal.Length),
           iris, 
           col = mycolors[Species],
           pch = 16)
}

plotPredictionIris(g$pred.5nn)
plotPredictionIris(g$pred.2nn)

## LDA, QDA

library(MASS)

fm1 <- lda(Species ~ Sepal.Length + Sepal.Width, iris)
g$pred.lda <- predict(fm1, newdata = g)$class
plotPredictionIris(g$pred.lda)

fm2 <- qda(Species ~ Sepal.Length + Sepal.Width, iris)
g$pred.qda <- predict(fm2, newdata = g)$class
plotPredictionIris(g$pred.qda)


library(nnet)

fm3 <- nnet(Species ~ Sepal.Length + Sepal.Width, data = iris,
            size = 10, maxit = 500)
g$pred.nnet <- predict(fm3, newdata = g, type = "class")
g$pred.nnet <- factor(g$pred.nnet, levels = levels(iris$Species))
plotPredictionIris(g$pred.nnet)


## More realistic example: handwritten digits

digits <- read.table("data/zip.train", header = FALSE)
## digits <- digits[sample(1:7291, 2000), ]
digit.class <- factor(digits[[1]])
digit.data <- t( data.matrix(digits[-1]) )

levelplot(matrix(digit.data[, 6], 16, 16)[, 16:1],
          col.regions = rev(gray.colors(100)))


P <- 9
levelplot((digit.data[, 1:P]) ~ rep(rep(1:16, P), 16) +
               rep(rep(1:16, each = 16), P) |
               gl(P, 256,
                  labels = paste(1:P,
                                 as.character(digit.class[1:P]), sep = " : ")),
          as.table = TRUE,
          col.regions = rev(gray.colors(100)),
          ylim = c(17, 0))


fm4 <- nnet(x = t(digit.data), y = class.ind(digit.class),
            size = 15,
            entropy = TRUE,
            MaxNWts = 5000, decay = 5e-04,
            maxit = 200)

pfm4 <- predict(fm4)
predicted.fm4 <- levels(digit.class)[ apply(pfm4, 1, which.max) ]
predicted.fm4 <- factor(predicted.fm4, levels = as.character(0:9))

class.tab <- table( digit.class, predicted.fm4)
class.tab
(sum(class.tab) - sum(diag(class.tab))) / sum(class.tab)

## test set

digits.test <- read.table("data/zip.test", header = FALSE)
digit.test.class <- factor(digits.test[[1]])
digit.test.data <- t( data.matrix(digits.test[-1]) )


pfm4.test <- predict(fm4, newdata = t(digit.test.data))
predicted.fm4.test <- levels(digit.class)[ apply(pfm4.test, 1, which.max) ]
predicted.fm4.test <- factor(predicted.fm4.test, levels = as.character(0:9))

class.tab.test <- table(digit.test.class, predicted.fm4.test)
(sum(class.tab.test) - sum(diag(class.tab.test))) / sum(class.tab.test)

## KNN (slow)

zip.knn <- knn(train = t(digit.data),
               test = t(digit.test.data),
               cl = digit.class, k = 5)

class.tab.test <- table(digit.test.class, zip.knn)
(sum(class.tab.test) - sum(diag(class.tab.test))) / sum(class.tab.test)


### Summarize by features

extractFeatures <- function(x)
{
    xm <- matrix(x, 16, 16)
    cl <- contourLines(z = xm, levels = c(0, 0.5))
    angles <- unlist(lapply(cl, function(cont) 2 * atan(diff(cont$y) / diff(cont$x)) / pi ))
    breaks <- do.breaks(c(-1, 1), 20)
    counts <- sqrt(hist(angles, breaks = breaks, plot = FALSE)$counts)
    counts / max(counts)
    ## (2 * counts / max(counts)) - 1
    ## quantile(angles, probs = ppoints(15), names = FALSE)
}

features.train <- apply(digit.data, 2, extractFeatures)
features.test <- apply(digit.test.data, 2, extractFeatures)

fm5 <- nnet(x = t(features.train), y = class.ind(digit.class),
            size = 50,
            entropy = TRUE,
            MaxNWts = 5000, decay = 5e-04,
            maxit = 100)

pfm5 <- predict(fm5)
predicted.fm5 <- levels(digit.class)[ apply(pfm5, 1, which.max) ]
predicted.fm5 <- factor(predicted.fm5, levels = as.character(0:9))

class.tab <- table( digit.class, predicted.fm5)

(sum(class.tab) - sum(diag(class.tab))) / sum(class.tab)

## test set

pfm5.test <- predict(fm5, newdata = t(features.test))
predicted.fm5.test <- levels(digit.class)[ apply(pfm5.test, 1, which.max) ]
predicted.fm5.test <- factor(predicted.fm5.test, levels = as.character(0:9))

class.tab.test <- table(digit.test.class, predicted.fm5.test)
(sum(class.tab.test) - sum(diag(class.tab.test))) / sum(class.tab.test)