A short demo using The Simpsons’ Couch Gag
Today I’ll be demonstrating how to take a .jpeg
, convert it to a tidy dataset (one pixel per row of \(<R, G, B, \text{row, column}>\)), run \(k\)-means clustering on the data, create a color palette with the result, and use that color palette to create some generative art1.
color_rounder
Converts integer (0 to 255) colors to hexadecimal characters, with “0” padded in front.
color_rounder = function(int, round.by = 8){
rounded = as.integer(floor(int/round.by)*round.by + round.by/2)
char = as.character.hexmode(rounded)
char_out = ifelse(str_length(char) == 1, paste0("0", char), char)
return(char_out)
}
## Example usage
# color_rounder(254)
# [1] "fc"
RGBtohex
Converts three integers \(R\), \(G\), \(B\) to a single R
hexadecimal string preceded by a #
.
image.as.tidy.frame
and images.as.tidy.frame
Converts a .jpg
image to a tidy data.frame
(or multiple images).
image.as.tidy.frame = function(image, round.by = 1) {
Simp_df = readJPEG(image) %>%
`*`(., 256) %>% round() %>%
as.data.frame.table() %>%
mutate(x = as.integer(Var1),
y = as.integer(Var2),
channel = case_when(
Var3 == "A" ~ "R",
Var3 == "B" ~ "G",
Var3 == "C" ~ "B"
),
base256 = ifelse(as.integer(Freq) == 256,
255L, as.integer(Freq))) %>%
select(-Var1, -Var2, -Var3, -Freq) %>%
pivot_wider(names_from = channel, values_from = base256) %>%
mutate(color = RGBtohex(R, G, B, round.by = 1))
return(Simp_df)
}
images.as.tidy.frame = function(image_list){
Simp_df = lapply(X = image_list, FUN = image.as.tidy.frame) %>%
bind_rows()
return(Simp_df)
}
## Example usage
my_dat = image.as.tidy.frame("source-images/Simpsons.jpg")
my_dat %>% head(3) %>% kable(caption = "first three rows of data")
x | y | R | G | B | color |
---|---|---|---|---|---|
1 | 1 | 149 | 80 | 93 | #95505d |
2 | 1 | 150 | 81 | 94 | #96515e |
3 | 1 | 152 | 83 | 96 | #985360 |
color_cluster
Performs \(k\)-means clustering on an image stored as a tidy dataset. This function takes a substantial amount of time – I suggest running it one time, saving the results in an .rda
, and commenting out the code used to generate it. I set the default number of clusters to 50 to ensure the colors weren’t oversimplified, but you may want to raise this to, say, 100 if using a photo or complicated painting. The number of starts, nstart
, and search iterations, iter.max
are set to larger numbers than default to ensure a solution is converged upon.
color_cluster = function(color_data,
num_clusters = 50,
nstart = 20,
iter.max = 30){
kmeans_out = kmeans(x = color_data %>% select(R, G, B),
centers = num_clusters,
nstart = nstart,
iter.max = iter.max)
kmeans_df = kmeans_out$centers %>%
as.data.frame() %>%
mutate(across(c(R, G, B), as.integer),
color = RGBtohex(R, G, B, round.by = 1),
size = kmeans_out$size) %>%
arrange(size)
kmeans_df$color = factor(x = kmeans_df$color, levels = kmeans_df$color)
return(kmeans_df)
}
## Example usage
# Simp_clust = color_cluster(my_dat)
# save(Simp_clust, file = "cluster-data/Simp_clust.rda")
load(file = "cluster-data/Simp_clust.rda")
Simp_clust %>% head(3) %>%
kable(caption = "first three clusters", row.names = FALSE)
R | G | B | color | size |
---|---|---|---|---|
20 | 179 | 155 | #14b39b | 1382 |
85 | 109 | 54 | #556d36 | 1616 |
180 | 152 | 36 | #b49824 | 1846 |
blank_theme
and color_strip
blank_theme
eliminates all markings on ggplot by modifiying theme
.
color_strip
plots the color data frame (e.g., as produced by color_cluster
). proportional
determines whether the colors in the strip are presented proportionally to the data. rand.color.order
randomizes the color order. margins
is a four-element vector for determining the margins of the strip (in centimeters). rand.size.order = TRUE
re-orders the sizes at random and rand.size.order = "exponential"
generates exponential random variables to produce highly variable color widths. rand.size.order = "proportional"
draws colors at random proportional to their original appearance probability (in the clustering); it also uses exponential random variables to randomize color widths. rand.size.order
is only effective if proportional = TRUE
.
blank_theme = theme_minimal() +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank(),
legend.position="none"
)
color_strip = function(cluster_data,
proportional = TRUE,
rand.color.order = FALSE,
margins = c(-0.3, -0.3, -0.3, -0.3),
rand.size.order = FALSE,
n = nrow(cluster_data)){
color_pie = cluster_data %>% pull(color) %>% as.character()
if(rand.color.order) color_pie = color_pie %>%
sample(nrow(cluster_data), replace = FALSE)
if(proportional){
if(rand.size.order == "exponential") {
cluster_data$size = rexp(n = nrow(cluster_data))
} else if (rand.size.order == "proportional"){
cluster_data$prop = cluster_data$size/sum(cluster_data$size)
cluster_data$cumsum = cumsum(cluster_data$prop)
cutoffs = runif(n = n)
for(i in 1:n){cutoffs[i] = max(which(cluster_data$cumsum > cutoffs[i]))}
cluster_data[cutoffs, ]
cluster_data$size = rexp(n = nrow(cluster_data))
} else if(rand.size.order) {
cluster_data$size = cluster_data %>%
pull(size) %>%
sample(nrow(cluster_data), replace = FALSE)
}
plot = ggplot(cluster_data, aes(x = "", y = size, fill = color))
} else {
plot = ggplot(cluster_data, aes(x = "", y = 1, fill = color))
}
plot = plot +
geom_bar(width = 1, stat = "identity") +
scale_fill_manual(values = color_pie) +
blank_theme + theme_nothing() + labs(x=NULL, y=NULL) +
theme(plot.margin = unit({{margins}}, "cm")) +
coord_flip()
return(plot)
}
colorstrip_cowplot
Creates a cowplot
(a grid) of color strips.
colorstrip_cowplot = function(cluster_data,
numplots = 30,
margins = rep(-.278,4),
rand.subset = FALSE,
rand.position = "first",
rand.size.order = "exponential",
rand.color.order = TRUE,
n = nrow(cluster_data)){
Simp_list = list()
for(i in 1:numplots) {
if(rand.subset >= 3) {
upper = min(nrow(cluster_data), as.integer(rand.subset))
} else if(rand.subset) {upper = rdunif(n = 1, a = 3, b = nrow(cluster_data))
} else {upper = nrow(cluster_data)}
if(rand.position == "last"){index = 1:upper
} else if(rand.position == "any") {
index = sample(1:nrow(cluster_data), size = upper)
} else {index = (nrow(cluster_data) - upper + 1):nrow(cluster_data)}
Simp_list[[i]] = color_strip(cluster_data[index, ],
proportional = TRUE,
rand.color.order = rand.color.order,
rand.size.order = rand.size.order,
margins = margins,
n = n)
}
cowplotted = cowplot::plot_grid(plotlist = Simp_list, ncol = 3)
return(cowplotted)
}
set.seed(60540)
colorstrip_cowplot(cluster_data = Simp_clust, rand.subset = 15) +
annotation_custom(shadow)
here, I call it generative very loosely, as there are elements of randomness which produce the output, although it is contained randomness using standard ggplot2
and cowplot
output types rather than free-flowing and wild as other generative art is.↩︎