Stimuli Selection

library(tidyverse)
library(ggside)
library(ggimage)
# Local path
filepaths <- list.files("C:/Users/domma/Box/Databases/VAPS/VAPS/", 
                        pattern=".jpg$", full.names = TRUE, recursive = TRUE, ignore.case = TRUE) 

df <- haven::read_sav("VAPS_ValidationData.sav") |> # File provided in the database
  full_join(readxl::read_xlsx("VAPS_Information.xlsx"), by = "Picture_Number") |> 
  mutate(
    Picture_Number = as.numeric(Picture_Number),
    Folder = case_when(
      Category == 1 ~ "1 Scenes",
      Category == 2 ~ "2 Portraits",
      Category == 3 ~ "3 Landscapes",
      Category == 4 ~ "4 Still Lifes",
      .default = "5 Toward Abstraction"),
    Category = case_when(
      Category == 1 ~ "Scene",
      Category == 2 ~ "Portrait",
      Category == 3 ~ "Landscape",
      Category == 4 ~ "Still Life",
      .default = "Abstract"),
    File = paste0(Picture_Number, ".jpg"),
    Date = sub("-.*", "", Date),
    Date = sub("/.*", "", Date),
    Date = sub("ca. ", "", Date),
    Date = sub("about ", "", Date),
    Date = sub("around ", "", Date),
    Date = sub("before ", "", Date),
    Date = sub("after ", "", Date),
    Date = sub("er", "", Date),
    Date = sub("s", "", Date),
    Date = sub("um ", "", Date),
    Date = ifelse(Date %in% c("unknown", "unkown","undated", "Unknown"), NA, Date),
    Date = as.numeric(Date))

df$File <- unlist(sapply(df$File, function(f) filepaths[grepl(f, filepaths, ignore.case = T)], USE.NAMES = F))

df |> 
  summarize(n = n(), .by = c("Category", "Style")) |> 
  pivot_wider(names_from = Category, values_from = n, values_fill = 0) |> 
  full_join(summarize(df, Date = paste0(min(Date, na.rm = TRUE), "-", max(Date, na.rm = TRUE)), .by = "Style"), by = "Style") |>
  gt::gt()
Style Scene Portrait Landscape Still Life Abstract Date
Renaissance and Mannerism 45 33 0 0 0 1434-1610
Baroque and Rococo 51 33 27 34 0 1504-1789
Idealistic tendencies 28 14 11 13 0 1781-1876
Realistic tendencies I. (19th century) 12 5 18 7 0 1821-1888
Impressionistic tendencies 15 9 13 9 0 1869-1927
Postimpressionistic tendencies 50 26 39 20 0 1852-1998
Expressionistic tendencies 31 22 28 16 8 1905-1988
Cubistic tendencies 15 10 8 15 33 1907-1937
Realistic tendencies II. (20th century) 31 31 20 26 16 1848-2012
Surrealistic tendencies 27 7 10 10 15 1911-1983
Constructivist tendencies 0 0 0 0 44 1915-1984
Informel tendencies 0 0 0 0 25 1946-1991
Abstract expressionistic tendencies 0 0 0 0 39 1930-1993

Summary

To ensure a diverse and balanced set of emotional stimuli across artistic styles, we first reclassified the original painting styles into four broader and more evenly distributed categories - “Abstract and Avant-garde”, “Impressionist and Expressionist”, “Classical”, and “Romantic and Realism” - based on shared historical and aesthetic characteristics. We then restricted selection to unfamiliar items (i.e., those rated below the median familiarity score of 1.65/7). To maximize orthogonal variability across arousal and valence, we stratified the paintings within each style into four quadrants defined by tertiles of arousal and valence, and selected the four paintings most distant from the median point (using Manhattan distance) within each quadrant. This resulted in a final set of 64 paintings (16 per style).

Style Reclassification

df$Subcategory <- df$Style

df$Style <- case_when(
  df$Style %in% c("Surrealistic tendencies", "Cubistic tendencies") | df$Category == "Abstract" ~ "Abstract and Avant-garde",
  df$Style %in% c("Impressionistic tendencies", "Postimpressionistic tendencies",
                  "Expressionistic tendencies") ~ "Impressionist and Expressionist",
  df$Style %in% c("Renaissance and Mannerism", "Baroque and Rococo") ~ "Classical",
  df$Style %in% c("Idealistic tendencies", "Realistic tendencies I. (19th century)",
                  "Realistic tendencies II. (20th century)") ~ "Romantic and Realism",
  .default = df$Style
)

df |> 
  summarize(n = n(), .by = c("Category", "Style")) |> 
  pivot_wider(names_from = Category, values_from = n, values_fill = 0) |> 
  full_join(summarize(df, N = n(), Date = paste0(min(Date, na.rm = TRUE), "-", max(Date, na.rm = TRUE)), .by = "Style"), by = "Style") |>
  gt::gt()
Style Scene Portrait Landscape Still Life Abstract N Date
Classical 96 66 27 34 0 223 1434-1789
Romantic and Realism 71 50 49 46 0 216 1781-2012
Impressionist and Expressionist 96 57 80 45 0 278 1852-1998
Abstract and Avant-garde 42 17 18 25 180 282 1907-2000

Familiarity

df |> 
  ggplot(aes(x = Familarity_Mean_All)) +
  geom_histogram(bins = 50) +
  geom_vline(xintercept = median(df$Familarity_Mean_All), linetype = "dashed", color = "red") +
  theme_minimal()

df$Familiar <- ifelse(df$Familarity_Mean_All > median(df$Familarity_Mean_All), TRUE, FALSE)

Appraisals

df |> 
  select(Picture_Number, Category, Style, Familiar,
         Liking_Mean_All, Valence_Mean_All, Arousal_Mean_All,
         Liking_Mean_Female, Valence_Mean_Female, Arousal_Mean_Female,
         Liking_Mean_Male, Valence_Mean_Male, Arousal_Mean_Male) |> 
  pivot_longer(-c(Picture_Number, Category, Style, Familiar)) |> 
  separate(name, c("Dimension", "Index", "Sex")) |> 
  pivot_wider(names_from = Dimension, values_from = value) |>
  filter(Sex == "All") |> 
  ggplot(aes(x=Valence, y=Arousal)) +
  geom_point(aes(shape = Familiar, color = Style, size = Liking), alpha = 0.8) +
  # facet_wrap(~Sex) +
  scale_shape_manual(values = c(16, 4)) +
  ggside::geom_ysidedensity(aes(linetype = Familiar)) +
  ggside::geom_xsidedensity(aes(linetype = Familiar)) +
  theme_minimal() 
Warning: `is.ggproto()` was deprecated in ggplot2 3.5.2.
ℹ Please use `is_ggproto()` instead.

Filtering

Selection

N_per_quadrant <- 4

dffinal <- data.frame()  # Initialize selected items
dffiltered <- filter(df, Familiar == FALSE)
for(s in unique(dffiltered$Style)) {
  dat <- filter(dffiltered, Style == s)
  
  # Compute medians for quadrant splitting
  med_arousal <- median(dat$Arousal_Mean_All, na.rm = TRUE)
  med_valence <- median(dat$Valence_Mean_All, na.rm = TRUE)

  # Assign quadrant
  dat <- dat |> 
    mutate(
      Quadrant = case_when(
        Arousal_Mean_All >= quantile(Arousal_Mean_All, 2/3) & Valence_Mean_All >= quantile(Valence_Mean_All, 2/3) ~ "HA_HV",
        Arousal_Mean_All >= quantile(Arousal_Mean_All, 2/3) & Valence_Mean_All <=  quantile(Valence_Mean_All, 1/3) ~ "HA_LV",
        Arousal_Mean_All <=  quantile(Arousal_Mean_All, 1/3) & Valence_Mean_All >= quantile(Valence_Mean_All, 2/3) ~ "LA_HV",
        Arousal_Mean_All <=  quantile(Arousal_Mean_All, 1/3) & Valence_Mean_All <=  quantile(Valence_Mean_All, 1/3) ~ "LA_LV",
        .default = "Middle"
      )
    )
  
  # For each quadrant, select N items farthest from the median point
  dffinal <- dat |> 
    filter(Quadrant != "Middle") |>
    group_by(Quadrant) |> 
    mutate(
      # Euclidean distance
      # distance = sqrt((Arousal_Mean_All - med_arousal)^2 + (Valence_Mean_All - med_valence)^2)
      # Manhattan distance
      distance = abs(Arousal_Mean_All - med_arousal) + abs(Valence_Mean_All - med_valence)
    )  |> 
    slice_max(order_by = distance, n = N_per_quadrant, with_ties = FALSE)  |> 
    ungroup() |> 
    rbind(dffinal) 
}
Code
# q <- 0.78  # Quantile (adjust so to meet target N of stimuli)
# 
# dffiltered <- df |>
#   filter(Familiar == FALSE) |>
#   mutate(Liking_Up = quantile(Liking_Mean_All, q, na.rm = TRUE),
#          Liking_Down = quantile(Liking_Mean_All, 1 - q, na.rm = TRUE),
#          Arousal_Up = quantile(Arousal_Mean_All, q, na.rm = TRUE),
#          Arousal_Down = quantile(Arousal_Mean_All, 1 - q, na.rm = TRUE),
#          Valence_Up = quantile(Valence_Mean_All, q, na.rm = TRUE),
#          Valence_Down = quantile(Valence_Mean_All, 1 - q, na.rm = TRUE),
#          .by = "Style") |>
#   mutate(Liking_Extreme = ifelse(Liking_Mean_All >= Liking_Up |Liking_Mean_All <= Liking_Down, TRUE, FALSE),
#          Arousal_Extreme = ifelse(Arousal_Mean_All >= Arousal_Up | Arousal_Mean_All <= Arousal_Down, TRUE, FALSE),
#          Valence_Extreme = ifelse(Valence_Mean_All >= Valence_Up | Valence_Mean_All <= Valence_Down, TRUE, FALSE),
#          Total_Extreme = ifelse(Liking_Extreme & Arousal_Extreme & Valence_Extreme, TRUE, FALSE))
# 
# dffinal <- filter(dffiltered, Total_Extreme == TRUE)

Results

dffiltered |>
  select(Picture_Number, Category, Style,
         Liking_Mean_All, Valence_Mean_All, Arousal_Mean_All,
         Liking_Mean_Female, Valence_Mean_Female, Arousal_Mean_Female,
         Liking_Mean_Male, Valence_Mean_Male, Arousal_Mean_Male) |>
  pivot_longer(-c(Picture_Number, Category, Style)) |>
  separate(name, c("Dimension", "Index", "Sex")) |>
  pivot_wider(names_from = Dimension, values_from = value) |>
  filter(Sex == "All") |>
  mutate(Selected = as.factor(Picture_Number %in% dffinal$Picture_Number)) |> 
  ggplot(aes(x=Valence, y=Arousal)) +
  geom_point(aes(shape = Selected, color = Style, size = Liking), alpha = 0.8) +
  # facet_wrap(~Sex) +
  scale_shape_manual(values = c(4, 16)) +
  ggside::geom_ysidedensity(aes(linetype = Selected)) +
  ggside::geom_xsidedensity(aes(linetype = Selected)) +
  theme_minimal()

dffinal |>
  summarize(n = n(), .by = c("Style")) |> 
  rbind(data.frame(Style = "Total", n = nrow(dffinal))) |>  
  gt::gt() 
Style n
Abstract and Avant-garde 16
Impressionist and Expressionist 16
Romantic and Realism 16
Classical 16
Total 64
dat <- dffinal |> 
  select(Picture_Number, Style, File,  
         Quadrant,
         # Arousal_Up, Arousal_Down, Valence_Up, Valence_Down,
         Liking_Mean_All, Valence_Mean_All, Arousal_Mean_All,
         Liking_Mean_Female, Valence_Mean_Female, Arousal_Mean_Female,
         Liking_Mean_Male, Valence_Mean_Male, Arousal_Mean_Male) |> 
  pivot_longer(-c(Picture_Number, Style, File,
                  Quadrant)) |> 
                  # Arousal_Up, Arousal_Down, Valence_Up, Valence_Down)) |> 
  separate(name, c("Dimension", "Index", "Sex")) |> 
  pivot_wider(names_from = Dimension, values_from = value) |>
  filter(Sex == "All") |> 
  mutate(
    x_repelled = case_when(
      Quadrant == "LA_LV" ~ 0.75, 
      Quadrant == "LA_HV" ~ Valence,
      Quadrant == "HA_HV" ~ 6.5,
      Quadrant == "HA_LV"~ Valence,
      .default = Valence),
    y_repelled = case_when(
      Quadrant == "LA_LV" ~ Arousal, 
      Quadrant == "LA_HV" ~ 0.5,
      Quadrant == "HA_HV" ~ Arousal,
      Quadrant == "HA_LV" ~ 7.5,
      .default = Arousal)) |> 
  # mutate(
  #   x_repelled = case_when(
  #     Valence <= Valence_Down & Arousal <= Arousal_Down ~ 0.75, 
  #     Valence <= Valence_Down & Arousal >= Arousal_Up ~ Valence,
  #     Valence >= Valence_Up & Arousal >= Arousal_Up ~ 6.5,
  #     Valence >= Valence_Up & Arousal <= Arousal_Down ~ Valence,
  #     .default = Valence),
  #   y_repelled = case_when(
  #     Valence <= Valence_Down & Arousal <= Arousal_Down ~ Arousal, 
  #     Valence <= Valence_Down & Arousal >= Arousal_Up ~ 7,
  #     Valence >= Valence_Up & Arousal >= Arousal_Up ~ Arousal,
  #     Valence >= Valence_Up & Arousal <= Arousal_Down ~ 0.5,
  #     .default = Arousal)) |> 
  arrange(Style)



# Jitter positions
idx <- dat$Quadrant == "LA_LV"  # Left
dat$y_repelled[idx] <- seq(1.5, 6, length.out = sum(idx))
dat$x_repelled[idx] <- dat$x_repelled[idx] + rep_len(c(0, 0.5), length = sum(idx))

idx <- dat$Quadrant == "HA_LV"  # Top
dat$x_repelled[idx] <- seq(6.5, 0.75, length.out = sum(idx))
dat$y_repelled[idx] <- dat$y_repelled[idx] + rep_len(c(0, -0.5), length = sum(idx))

idx <- dat$Quadrant == "HA_HV"  # Right
dat$y_repelled[idx] <- seq(6, 1.5, length.out = sum(idx))
dat$x_repelled[idx] <- dat$x_repelled[idx] + rep_len(c(0, -0.5), length = sum(idx))

idx <- dat$Quadrant == "LA_HV"  # Bottom
dat$x_repelled[idx] <- seq(0.75, 6.5, length.out = sum(idx))
dat$y_repelled[idx] <- dat$y_repelled[idx] + rep_len(c(0, 0.5), length = sum(idx))

gc()
p <- dat |> 
  ggplot(aes(x=Valence, y=Arousal)) +
  geom_segment(aes(xend = x_repelled, yend = y_repelled, color = Style), alpha = 0.5) +
  geom_image(aes(x=x_repelled, y = y_repelled, image=File), size=0.08) +
  geom_point(aes(color = Style, size = Liking), alpha = 0.8) +
  ggside::geom_ysidedensity(aes(color = Style), linewidth = 1) +
  ggside::geom_xsidedensity(aes(color = Style), linewidth = 1) +
  theme_minimal() +
  ggside::theme_ggside_void() +
  scale_color_manual(values = c("#E41A1C", "#377EB8", "#4DAF4A", "#FF7F00")) +
  scale_size(range = c(0.5, 5), guide = "none") +
  theme(axis.title = element_text(face = "bold", size = rel(1.2)),
        legend.title = element_text(face = "bold", size = rel(1.2)))
p

Final Selection

files <- str_split(dffinal$File, "/", simplify = TRUE)[,10]  # Change index to take last element

selection <- dffinal |> 
  mutate(Item = files) |> 
  select(Item, Category, Subcategory, Style, Artist, Title, 
         Date, Width=Width_unified, Height=Height_unified, ends_with("_All"),
         File) |> 
  arrange(Style, Item)

selection |> 
  select(-File) |> 
  gt::gt() |> 
  gt::opt_interactive() |> 
  gt::data_color(
    columns = c("Style"))|> 
  gt::data_color(
    columns = c("Liking_Mean_All", "Valence_Mean_All", "Arousal_Mean_All",
                "Complexity_Mean_All", "Familarity_Mean_All"),
    palette = c("#E41A1C", "#FF7F00", "#4DAF4A"))

Save

Code
write.csv(select(selection, -File), "../stimuli_data.csv", row.names = FALSE)

json <- selection |>
  select(Item, Style, Width, Height) |>
  jsonlite::toJSON()

write(paste("var stimuli_list = ", json), "../stimuli_list.js")
Code
# Remove all current files
unlink("../stimuli/*")

# Copy each file
for(file in selection$File) {
  file.copy(file, "../stimuli/")
}