Feature Importance on MNIST with feature counts¶

Select a subset of 3’s and 5’s from MNIST¶

Here we train a forest using RerF with the RandMatImagePatch option with patch.min = 1 and patch.max = 5.

## Get a random subsample, 100 each of 3's and 5's
threes <- which(mnist$Ytrain %in% 3)
fives  <- which(mnist$Ytrain %in% 5)
numsub <- c(threes, fives)

Ytrain <- mnist$Ytrain[numsub]
Xtrain <- mnist$Xtrain[numsub,]
Ytest <- mnist$Ytest[mnist$Ytest %in% c(3,5)]
Xtest <- mnist$Xtest[mnist$Ytest %in% c(3,5),]

# p is number of dimensions, d is the number of random features to evaluate, iw is image width, ih is image height, patch.min is min width of square patch to sample pixels from, and patch.max is the max width of square patch
p <- ncol(Xtrain)
d <- ceiling(sqrt(p))
iw <- sqrt(p)
ih <- iw
patch.min <- 1L
patch.max <- 5L

forestS <- RerF(Xtrain, Ytrain, num.cores = numCores, FUN = RandMatImagePatch,
               paramList = list(p = p, d = d, iw = iw, ih = ih,
                                pwMin = patch.min, pwMax = patch.max), max.depth = 8)

predS <- Predict(Xtest, forestS, num.cores = numCores)
(mnist.error.rate <- mean(predS != Ytest))

## [1] 0.01892744

forestRerF <- RerF(Xtrain, Ytrain, num.cores = numCores, FUN = RandMatBinary,
               paramList = list(p = p, d = d), max.depth = 8)


predRerF <- Predict(Xtest, forestRerF, num.cores = numCores)
(mnist.error.rate <- mean(predRerF != Ytest))

## [1] 0.02208202

forestRF <- RerF(Xtrain, Ytrain, num.cores = numCores, FUN = RandMatRF,
               paramList = list(p = p, d = d), max.depth = 8)


predRF <- Predict(Xtest, forestRF, num.cores = numCores)
(mnist.error.rate <- mean(predRF != Ytest))

## [1] 0.02050473

system.time({
FeatImpS <- FeatureImportance(forestS, num.cores = numCores, type = "C")
})

## Message: Computing feature importance as counts of unique feature combinations.

##     user   system  elapsed
## 1229.062    6.560  314.241

ni <- matrix(0, length(FeatImpS$features), ncol(Xtrain))

for(i in 1:length(FeatImpS$features)){
  ni[i, FeatImpS$features[[i]]] <- FeatImpS$imp[i]
}

NN <- matrix(apply(ni, 2, sum) / nrow(ni), 28, 28, byrow = FALSE)
saveRDS(NN, file = "NNsrerf.rds")

system.time({
FeatImpRerF <- FeatureImportance(forestRerF, num.cores = numCores, type = "C")
})

## Message: Computing feature importance as counts of unique feature combinations.

##     user   system  elapsed
## 1750.585   11.082  444.860

nir <- matrix(0, length(FeatImpRerF$features), ncol(Xtrain))

for(i in 1:length(FeatImpRerF$features)){
  nir[i, FeatImpRerF$features[[i]]] <- FeatImpRerF$imp[i]
}

NNr <- matrix(apply(nir, 2, sum) / nrow(nir), 28, 28, byrow = FALSE)
saveRDS(NNr, file = "NNrerf.rds")

system.time({
FeatImpRF <- FeatureImportance(forestRF, num.cores = numCores, type = "C")
})

## Message: Computing feature importance as counts of unique feature combinations.

##    user  system elapsed
##  14.579   0.323   5.549

nirf <- matrix(0, length(FeatImpRF$features), ncol(Xtrain))

for(i in 1:length(FeatImpRF$features)){
  nirf[i, FeatImpRF$features[[i]]] <- FeatImpRF$imp[i]
}

NNrf <- matrix(apply(nirf, 2, sum) / nrow(nirf), 28, 28, byrow = FALSE)
saveRDS(NNrf, file = "NNrf.rds")

plot the average 3 and 5 from the training set¶

the3s <- Xtrain[Ytrain == 3, ]
the5s <- Xtrain[Ytrain == 5, ]

sum3 <- matrix(apply(the3s, 2, sum) / sum(Ytrain == 3), 28, 28, byrow = FALSE)
sum5 <- matrix(apply(the5s, 2, sum) / sum(Ytrain == 5), 28, 28, byrow = FALSE)

g <- expand.grid(x = 1:28, y = 1:28)
gg <- rbind(g, g)

nn <- as.vector(t(NN[28:1, ]))
nnrerf <- as.vector(t(NNr[28:1, ]))
nnrf <- as.vector(t(NNrf[28:1, ]))

s3 <- as.vector(t(sum3[28:1,]))
s5 <- as.vector(t(sum5[28:1,]))
s3m5 <- abs(s3 - s5)

Z <- data.frame(g, weight = c(nn, nnrerf, nnrf, s3, s5, s3m5), Alg =  rep(c("MF", "Sporf", "RF", "Average 3", "Average 5", "x3m5"), each = length(nn)))

sc0 <- scale_fill_gradientn(colours = viridis(255))
sc1 <- scale_fill_gradientn(colours = inferno(255))

a1 <- ggplot(data = Z[ Z$Alg == "Average 3",    ], aes(x = x, y = y, fill = weight)) + geom_raster() + theme_void() + guides(fill = FALSE) + sc1 + ggtitle("Average 3")
a2 <- ggplot(data = Z[ Z$Alg == "Average 5", ], aes(x = x, y = y, fill = weight)) + geom_raster() + theme_void() + guides(fill = FALSE) + sc1 + ggtitle("Average 5")
a3 <- ggplot(data = Z[ Z$Alg == "x3m5", ], aes(x = x, y = y, fill = weight)) + geom_raster() + theme_void() + guides(fill = FALSE) + sc1 + ggtitle("abs(avg(3) - avg(5))")

grid.arrange(a1, a2, a3, ncol=3)

#ggslackr(grid.arrange(a1, a2, a3, ncol=3), channels="#manifold-forest")

Feature heatmap¶

These are the features that S-RerF and RerF used, plotted as averaged heatmaps.

p1 <- ggplot(data = Z[ Z$Alg == "MF",    ], aes(x = x, y = y, fill = weight)) + geom_raster() + theme_void() + sc1 + ggtitle("MF")
p2 <- ggplot(data = Z[ Z$Alg == "Sporf", ], aes(x = x, y = y, fill = weight)) + geom_raster() + theme_void() + sc1 + ggtitle("Sporf")
p3 <- ggplot(data = Z[ Z$Alg == "RF",    ], aes(x = x, y = y, fill = weight)) + geom_raster() + theme_void() + sc1 + ggtitle("RF")

grid.arrange(p1, p2, p3, ncol=3)