FictionEro - Data Cleaning

Data Preparation

Code
library(tidyverse)
library(easystats)
library(patchwork)
library(ggside)
df <- read.csv("../data/rawdata_participants.csv") |> 
  mutate(across(everything(), ~ifelse(.x == "", NA, .x))) |>
  mutate(Sample = case_when(
    Language=="English" & Experimenter %in% c("2discord1", "Experimenter2", "Experimenter1") ~ "Students (Non-incentivized)",
    Language=="English" & Experimenter %in% c("Unknown", "Readme (GitHub)", "website", "reddit") ~ "General Population",
    Experimenter %in% c("enter", "macre", "piaca", "unime", "classroom1") ~ "Students (Non-incentivized)",
    Experimenter %in% c("SONA") ~ "Students (Incentivized)",
    Experimenter %in% c("gacon", "mafas", "marmar", "mavio", "sobon", "RISC", "Snowball") ~ "General Population",
    str_detect(Experimenter, "reddit") ~ "General Population",
    .default = Experimenter
  ))

dftask <- read.csv("../data/rawdata_task.csv") |> 
  full_join(
    df[c("Participant", "Sex", "SexualOrientation")],
    by = join_by(Participant)
    )

The initial sample consisted of 1007 participants (Mean age = 28.4, SD = 11.2, range: [18, 80], 9.9e-02% missing; Sex: 45.3% females, 53.3% males, 1.4% other; Education: Bachelor, 30.09%; Doctorate, 4.87%; High School, 45.38%; Master, 17.18%; Other, 2.09%; Primary School, 0.40%; Country: 36.25% UK, 17.48% Italy, 12.21% USA, 10.23% Colombia, 23.83% other).

Compute Scores

# Create Sexual "relevance" variable (Relevant, irrelevant, non-erotic)
dftask <- dftask |> 
  mutate(Relevance = case_when(
    Type == "Non-erotic" ~ "Non-erotic",
    Sex == "Male" & SexualOrientation == "Heterosexual" & Category == "Female" ~ "Relevant",
    Sex == "Female" & SexualOrientation == "Heterosexual" & Category == "Male" ~ "Relevant",
    Sex == "Male" & SexualOrientation == "Homosexual" & Category == "Male" ~ "Relevant",
    Sex == "Female" & SexualOrientation == "Homosexual" & Category == "Female" ~ "Relevant",
    # TODO: what to do with "Other"? 
    SexualOrientation %in% c("Bisexual", "Other") & Category %in% c("Male", "Female") ~ "Relevant",
    .default = "Irrelevant"
  )) 

Recruitment History

Code
plot_recruitement <- function(df) {
  # Consecutive count of participants per day (as area)
  df |>
    mutate(Date = as.Date(Date, format = "%d/%m/%Y")) |> 
    group_by(Date, Language, Sample) |> 
    summarize(N = n()) |> 
    ungroup() |>
    # https://bocoup.com/blog/padding-time-series-with-r
    complete(Date, Language, Sample, fill = list(N = 0)) |> 
    group_by(Language, Sample) |>
    mutate(N = cumsum(N)) |>
    ggplot(aes(x = Date, y = N)) +
    geom_area(aes(fill=Sample)) +
    scale_y_continuous(expand = c(0, 0)) +
    labs(
      title = "Recruitment History",
      x = "Date",
      y = "Total Number of Participants"
    ) +
    see::theme_modern() 
}

plot_recruitement(df) +
  facet_wrap(~Language, nrow=5, scales = "free_y")

Code
# Table
table_recruitment <- function(df) {
  summarize(df, N = n(), .by=c("Language", "Experimenter")) |> 
    arrange(desc(N)) |> 
    gt::gt() |> 
    gt::opt_stylize() |> 
    gt::opt_interactive(use_compact_mode = TRUE) |> 
    gt::tab_header("Number of participants per recruitment source")
}
table_recruitment(df)
Number of participants per recruitment source
Code
plot_recruitement(filter(df, Language == "English"))

Code
table_recruitment(filter(df, Language == "English"))
Number of participants per recruitment source
Code
plot_recruitement(filter(df, Language == "French"))

Code
table_recruitment(filter(df, Language == "French"))
Number of participants per recruitment source
Code
plot_recruitement(filter(df, Language == "Italian"))

Code
table_recruitment(filter(df, Language == "Italian"))
Number of participants per recruitment source
Code
plot_recruitement(filter(df, Language == "Colombian"))

Code
table_recruitment(filter(df, Language == "Colombian"))
Number of participants per recruitment source
Code
plot_recruitement(filter(df, Language == "Spanish"))

Code
table_recruitment(filter(df, Language == "Spanish"))
Number of participants per recruitment source

Feedback

Evaluation

The majority of participants found the study to be a “fun” experience. Interestingly, reports of “fun” were significantly associated with finding at least some stimuli arousing. Conversely, reporting “no feelings” was associated with finding the experiment “boring”.

Code
df |> 
  select(starts_with("Feedback"), -Feedback_Comments) |>
  pivot_longer(everything(), names_to = "Question", values_to = "Answer") |>
  group_by(Question, Answer) |> 
  summarise(prop = n()/nrow(df), .groups = 'drop') |> 
  complete(Question, Answer, fill = list(prop = 0)) |> 
  filter(Answer == "True") |> 
  mutate(Question = str_remove(Question, "Feedback_"),
         Question = str_replace(Question, "AILessArousing", "AI = Less arousing"),
         Question = str_replace(Question, "AIMoreArousing", "AI = More arousing"),
         Question = str_replace(Question, "CouldNotDiscriminate", "Hard to discriminate"),
         Question = str_replace(Question, "LabelsIncorrect", "Labels were incorrect"),
         Question = str_replace(Question, "NoFeels", "Didn't feel anything"),
         Question = str_replace(Question, "CouldDiscriminate", "Easy to discriminate"),
         Question = str_replace(Question, "LabelsReversed", "Labels were reversed")) |>
  mutate(Question = fct_reorder(Question, desc(prop))) |> 
  ggplot(aes(x = Question, y = prop)) +
  geom_bar(stat = "identity") +
  scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks(), labels=scales::percent) +
  labs(x="Feedback", y = "Participants", title = "Feedback") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2), vjust = 7),
    axis.text.y = element_text(size = rel(1.1)),
    axis.text.x = element_text(size = rel(1.1), angle = 45, hjust = 1),
    axis.title.x = element_blank()
  )

Code
cor <- df |> 
  select(starts_with("Feedback"), -Feedback_Comments) |> 
  mutate_all(~ifelse(.=="True", 1, 0)) |> 
  correlation(method="tetrachoric", redundant = TRUE) |> 
  correlation::cor_sort() |> 
  correlation::cor_lower()

cor |> 
  mutate(val = paste0(insight::format_value(rho), format_p(p, stars_only=TRUE))) |>
  mutate(Parameter2 = fct_rev(Parameter2)) |>
  mutate(Parameter1 = fct_relabel(Parameter1, \(x) str_remove_all(x, "Feedback_")),
         Parameter2 = fct_relabel(Parameter2, \(x) str_remove_all(x, "Feedback_"))) |>
  ggplot(aes(x=Parameter1, y=Parameter2)) +
  geom_tile(aes(fill = rho), color = "white") +
  geom_text(aes(label = val), size = 3) +
  labs(title = "Feedback Co-occurence Matrix") +
  scale_fill_gradient2(
    low = "#2196F3",
    mid = "white",
    high = "#F44336",
    breaks = c(-1, 0, 1),
    guide = guide_colourbar(ticks=FALSE),
    midpoint = 0,
    na.value = "grey85",
    limit = c(-1, 1))  + 
  theme_minimal() +
  theme(legend.title = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1))

Comments

Code
data.frame(Language = df$Language,
           Source = df$Experimenter,
           Comments = trimws(df$Feedback_Comments)) |> 
  filter(!tolower(Comments) %in% c(NA, "no", "nope", "none", "na", "n/a", "non", ".", "..", "ninguno")) |> 
  arrange(Language, Source) |>
  gt::gt() |> 
  gt::opt_stylize() |> 
  gt::opt_interactive(use_compact_mode = TRUE) 

Exclusion

outliers <- list()

Mobile

Code
df |>
  ggplot(aes(x=Mobile, fill=Language)) +
  geom_bar() +
  geom_hline(yintercept=0.5*nrow(df), linetype="dashed") +
  theme_modern()

There were 296 participants that used a mobile device.

Code
# df <- filter(df, Mobile == "False")
# dftask <- filter(dftask, Participant %in% df$Participant)

Experiment Duration

The experiment’s median duration is 24.72 min (50% HDI [20.12, 27.82]).

Code
df |>
  mutate(Participant = fct_reorder(Participant, Experiment_Duration),
         Category = ifelse(Experiment_Duration > 60, "extra", "ok"),
         Duration = ifelse(Experiment_Duration > 60, 60, Experiment_Duration),
         Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |>
  ggplot(aes(y = Participant, x = Duration)) +
  geom_point(aes(color = Group, shape = Category)) +
  geom_vline(xintercept = median(df$Experiment_Duration), color = "red", linetype = "dashed") +
  geom_vline(xintercept = 15, color = "orange", linetype = "dotted") +
  scale_shape_manual(values = c("extra" = 3, ok = 19)) +
  scale_color_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
  guides(color = "none", shape = "none") +
  ggside::geom_xsidedensity(fill = "#4CAF50", color=NA) +
  ggside::scale_xsidey_continuous(expand = c(0, 0)) +
  labs(
    title = "Experiment Completion Time",
    x = "Duration (in minutes)",
    y = "Participants"
  )  +
  theme_modern() +
  ggside::theme_ggside_void() +
  theme(ggside.panel.scale = .3,
        panel.border = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank())

Code
outliers$expe_duration <- filter(df, Experiment_Duration < 15)$Participant

Task Duration

Code
plot_hist <- function(dat) {
  dens <- rbind(
    mutate(bayestestR::estimate_density(filter(dftask, RT1 < 40 & RT2 < 40)$RT1), 
           Phase="Emotional ratings",
           y = y / max(y)),
    mutate(bayestestR::estimate_density(filter(dftask, RT1 < 40 & RT2 < 40)$RT2), 
           Phase="Reality rating",
           y = y / max(y))
  )
  
  dat |> 
    filter(RT1 < 40 & RT2 < 40) |>  # Remove very long RTs
    # mutate(Participant = fct_reorder(Participant, RT1)) |> 
    pivot_longer(cols = c(RT1, RT2), names_to = "Phase", values_to = "RT") |>
    mutate(Phase = ifelse(Phase == "RT1", "Emotional ratings", "Reality rating")) |>
    ggplot(aes(x=RT)) +
    geom_area(data=dens, aes(x=x, y=y, fill=Phase), alpha=0.33, position="identity") +
    geom_density(aes(color=Phase, y=after_stat(scaled)), linewidth=1.5) + 
    scale_x_sqrt(breaks=c(0, 2, 5, 10, 20)) +
    theme_minimal() +
    theme(axis.title.y = element_blank(),
          axis.ticks.y = element_blank(),
          axis.text.y = element_blank(),
          axis.line.y = element_blank()) +
    labs(title = "Distribution of Response Time for each Participant", x="Response time per stimuli (s)") +
    facet_wrap(~Participant, ncol=5, scales="free_y") +
    coord_cartesian(xlim = c(0, 25))
}

BAIT Questionnaire Duration

Code
df |>
  mutate(Participant = fct_reorder(Participant, BAIT_Duration),
         Category = ifelse(BAIT_Duration > 5, "extra", "ok"),
         Duration = ifelse(BAIT_Duration > 5, 5, BAIT_Duration),
         Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |>
  ggplot(aes(y = Participant, x = Duration)) +
  geom_point(aes(color = Group, shape = Category)) +
  geom_vline(xintercept = median(df$BAIT_Duration), color = "red", linetype = "dashed") +
  geom_vline(xintercept = 0.5, color = "orange", linetype = "dotted") +
  scale_shape_manual(values = c("extra" = 3, ok = 19)) +
  scale_color_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
  guides(color = "none", shape = "none") +
  ggside::geom_xsidedensity(fill = "#9C27B0", color=NA) +
  ggside::scale_xsidey_continuous(expand = c(0, 0)) +
  labs(
    title = "Questionnaire Completion Time",
    x = "Duration (in minutes)",
    y = "Participant"
  )  +
  theme_modern() +
  ggside::theme_ggside_void() +
  theme(ggside.panel.scale = .3,
        panel.border = element_blank(),
        axis.ticks.y = element_blank(),
          axis.text.y = element_blank()) 

Code
outliers$bait_duration <- filter(df, BAIT_Duration < 0.5)$Participant
df[df$Participant %in% outliers$bait_duration, str_detect(names(df), "BAIT_\\d+")] <- NA

We excluded the BAIT data of 7 participants who completed the questionnaire in less than 30 seconds.

Response to Erotic Stimuli

Code
# Single arousal response (0)
outliers$novariability <- summarize(dftask, n = length(unique(Arousal)), .by="Participant") |> 
  filter(n == 1) |> 
  select(Participant) |> 
  pull()

df <- filter(df, !Participant %in% outliers$novariability)
dftask <- filter(dftask, !Participant %in% outliers$novariability)

We removed 7 that showed no variation in their arousal response (did not move the scales).

Code
dat <- dftask |> 
  filter(Relevance %in% c("Relevant", "Non-erotic")) |> 
  group_by(Participant, Type) |> 
  summarise(Arousal = mean(Arousal), 
            Valence = mean(Valence),
            Enticement = mean(Enticement),
            .groups = "drop") |>
  pivot_wider(names_from = Type, values_from = all_of(c("Arousal", "Valence", "Enticement"))) |>
  transmute(Participant = Participant,
            Arousal = Arousal_Erotic - `Arousal_Non-erotic`,
            Valence = Valence_Erotic - `Valence_Non-erotic`,
            Enticement = Enticement_Erotic - `Enticement_Non-erotic`) |>
  filter(!is.na(Arousal)) |> 
  mutate(Participant = fct_reorder(Participant, Arousal)) 

dat |> 
  pivot_longer(-Participant) |> 
  mutate(Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |> 
  ggplot(aes(x=value, y=Participant, fill=Group)) +
  geom_bar(aes(fill=value), stat = "identity") +
  scale_fill_gradient2(low = "#3F51B5", mid = "#FF9800", high = "#4CAF50", midpoint = 0) +
  # scale_fill_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
  theme_bw() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank()) +
  labs(title = "Difference between Erotic and Neutral", x="Erotic - Neutral") +
  facet_wrap(~name, ncol=3, scales="free_x")

Code
outliers$emo_diff <- sort(as.character(dat[dat$Arousal < 0, ]$Participant))

Response Coherence

Code
dat <- dftask |> 
  filter(!Participant %in% outliers) |> 
  summarize(cor_ArVal = cor(Arousal, Valence),
            cor_ArEnt = cor(Arousal, Enticement),
            .by="Participant") 
  
dat |>
  left_join(df[c("Participant", "Language")], by="Participant") |>
  mutate(Participant = fct_reorder(Participant, cor_ArVal))  |> 
  pivot_longer(starts_with("cor_")) |> 
  mutate(Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |> 
  mutate(name = fct_relevel(name, "cor_ArVal", "cor_ArEnt"),
         name = fct_recode(name, "Arousal - Valence" = "cor_ArVal", "Arousal - Enticement" = "cor_ArEnt")) |>
  ggplot(aes(y = Participant, x = value)) +
  geom_bar(aes(fill = Language), stat = "identity") +
  # scale_fill_gradient2(low = "#3F51B5", mid = "#FF9800", high = "#4CAF50", midpoint = 0) +
  # scale_fill_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
  theme_bw() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank()) +
  labs(title = "Difference between Erotic and Neutral", x="Erotic - Neutral") +
  facet_wrap(~name, ncol=3, scales="free_x")

Code
outliers$emo_cor <- sort(as.character(dat[dat$cor_ArEnt < 0, ]$Participant))

outliers$arousal <- intersect(outliers$emo_diff, outliers$emo_cor)

We removed 3 participants that had a negative correlation between arousal and enticement, and had a negative arousal difference between erotic and neutral (suggesting a misunderstanding of the scale direction).

Code
df <- filter(df, !Participant %in% outliers$arousal)
dftask <- filter(dftask, !Participant %in% outliers$arousal)

Sexual Profile

Sample

Code
df |>
  ggplot(aes(x = Sex)) +
  geom_bar(aes(fill = SexualOrientation)) +
  scale_y_continuous(expand = c(0, 0), breaks = scales::pretty_breaks()) +
  scale_fill_metro_d() +
  labs(x = "Biological Sex", y = "Number of Participants", title = "Sex and Sexual Orientation", fill = "Sexual Orientation") +
  theme_modern(axis.title.space = 15) +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2), vjust = 7),
    axis.text.y = element_text(size = rel(1.1)),
    axis.text.x = element_text(size = rel(1.1)),
    axis.title.x = element_blank()
  )

We removed 57 participants that were incompatible with further analysis.

df <- filter(df, !(Sex == "Other" | SexualOrientation == "Other"))
dftask <- filter(dftask, Participant %in% df$Participant)

Task Behaviour

Code
show_distribution <- function(dftask, target="Arousal") {
  dftask |> 
    filter(SexualOrientation %in% c("Heterosexual", "Bisexual", "Homosexual")) |>
    bayestestR::estimate_density(select=target, 
                                 at=c("Relevance", "Category", "Sex", "SexualOrientation"), 
                                 method="KernSmooth") |>
    ggplot(aes(x = x, y = y)) +
    geom_line(aes(color = Relevance, linetype = Category), linewidth=1) +
    facet_grid(SexualOrientation~Sex, scales="free_y")  +
    scale_color_manual(values = c("Relevant" = "red", "Non-erotic" = "blue", "Irrelevant"="darkorange")) +
    scale_y_continuous(expand = c(0, 0)) +
    scale_x_continuous(expand = c(0, 0)) +
    theme_minimal()  +
    theme(axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          axis.text.y = element_blank(),
          plot.title = element_text(face="bold")) +
    labs(title = target) 
}

(show_distribution(dftask, "Arousal") | show_distribution(dftask, "Valence")) /
  (show_distribution(dftask, "Enticement") | show_distribution(dftask, "Realness")) +
  patchwork::plot_layout(guides = "collect") +
  patchwork::plot_annotation(title = "Distribution of Appraisals depending on the Sexual Profile",
                             theme = theme(plot.title = element_text(hjust = 0.5, face="bold"))) 

We kept only heterosexual participants (70.64%).

df <- filter(df, SexualOrientation == "Heterosexual")
dftask <- filter(dftask, Participant %in% df$Participant)

Final Sample

The final sample includes 664 participants (Mean age = 29.9, SD = 11.6, range: [18, 80], 0.2% missing; Sex: 37.0% females, 63.0% males, 0.0% other; Education: Bachelor, 29.97%; Doctorate, 5.72%; High School, 41.57%; Master, 20.63%; Other, 1.66%; Primary School, 0.45%; Country: 29.37% UK, 19.73% Italy, 13.25% USA, 11.30% Colombia, 26.36% other).

Code
p_country <- dplyr::select(df, region = Country) |>
  group_by(region) |>
  summarize(n = n()) |>
  right_join(map_data("world"), by = "region") |>
  ggplot(aes(long, lat, group = group)) +
  geom_polygon(aes(fill = n)) +
  scale_fill_gradientn(colors = c("#FFEB3B", "red", "purple")) +
  labs(fill = "N") +
  theme_void() +
  labs(title = "A Geographically Diverse Sample", subtitle = "Number of participants by country")  +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2))
  )
p_country

Code
ggwaffle::waffle_iron(df, ggwaffle::aes_d(group = Ethnicity), rows=10) |> 
  ggplot(aes(x, y, fill = group)) + 
  ggwaffle::geom_waffle() + 
  coord_equal() + 
  scale_fill_flat_d() + 
  ggwaffle::theme_waffle() +
  labs(title = "Self-reported Ethnicity", subtitle = "Each square represents a participant", fill="")  +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2)),
    axis.title.x = element_blank(),
    axis.title.y = element_blank()
  )
Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.

Code
p_age <- estimate_density(df$Age) |>
  normalize(select = y) |> 
  mutate(y = y * 86) |>  # To match the binwidth
  ggplot(aes(x = x)) +
  geom_histogram(data=df, aes(x = Age), fill = "#616161", bins=28) +
  # geom_line(aes(y = y), color = "orange", linewidth=2) +
  geom_vline(xintercept = mean(df$Age), color = "red", linewidth=1.5) +
  # geom_label(data = data.frame(x = mean(df$Age) * 1.15, y = 0.95 * 75), aes(y = y), color = "red", label = paste0("Mean = ", format_value(mean(df$Age)))) +
  scale_x_continuous(expand = c(0, 0)) +
  scale_y_continuous(expand = c(0, 0)) +
  labs(title = "Age", y = "Number of Participants", color = NULL, subtitle = "Distribution of participants' age") +
  theme_modern(axis.title.space = 10) +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2), vjust = 7),
    axis.text.y = element_text(size = rel(1.1)),
    axis.text.x = element_text(size = rel(1.1)),
    axis.title.x = element_blank()
  )
p_age
Warning: Removed 1 row containing non-finite outside the scale range
(`stat_bin()`).
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_vline()`).

Code
p_edu <- df |>
  mutate(Education = fct_relevel(Education, "Other", "Primary School", "High School", "Bachelor", "Master", "Doctorate")) |> 
  ggplot(aes(x = Education)) +
  geom_bar(aes(fill = Education)) +
  scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
  scale_fill_viridis_d(guide = "none") +
  labs(title = "Education", y = "Number of Participants", subtitle = "Participants per achieved education level") +
  theme_modern(axis.title.space = 15) +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2), vjust = 7),
    axis.text.y = element_text(size = rel(1.1)),
    axis.text.x = element_text(size = rel(1.1)),
    axis.title.x = element_blank()
  )
p_edu

Birth Control

Code
colors <- c(
  "NA" = "#2196F3", "None" = "#E91E63", "Condoms (for partner)" = "#9C27B0",
  "Combined pills" = "#FF9800", "Intrauterine Device (IUD)" = "#FF5722", 
  "Intrauterine System (IUS)" = "#795548", "Progestogen pills" = "#4CAF50",
  "Other" = "#FFC107", "Condoms (female)" = "#607D8B"
)
colors <- colors[names(colors) %in% c("NA", df$BirthControl)]

p_sex <- df |>
  mutate(BirthControl = ifelse(Sex == "Male", "NA", BirthControl),
         BirthControl = fct_relevel(BirthControl, names(colors))) |>
  ggplot(aes(x = Sex)) +
  geom_bar(aes(fill = BirthControl)) +
  scale_y_continuous(expand = c(0, 0), breaks = scales::pretty_breaks()) +
  scale_fill_manual(
    values = colors,
    breaks = names(colors)[2:length(colors)]
  ) +
  labs(x = "Biological Sex", y = "Number of Participants", title = "Sex and Birth Control Method", fill = "Birth Control") +
  theme_modern(axis.title.space = 15) +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2), vjust = 7),
    axis.text.y = element_text(size = rel(1.1)),
    axis.text.x = element_text(size = rel(1.1)),
    axis.title.x = element_blank()
  )
p_sex

Sexual Profile

Code
p_sexprofile <- df |>
  select(Participant, Sex, SexualOrientation, SexualActivity, COPS_Duration_1, COPS_Frequency_2) |> 
  pivot_longer(-all_of(c("Participant", "Sex"))) |> 
  mutate(name = str_replace_all(name, "SexualOrientation", "Sexual Orientation"),
         name = str_replace_all(name, "SexualActivity", "Sexual Activity"),
         name = str_replace_all(name, "COPS_Duration_1", "Pornography Usage (Duration)"),
         name = str_replace_all(name, "COPS_Frequency_2", "Pornography Usage (Frequency)")) |> 
  ggplot(aes(x = value, fill=Sex)) +
  geom_bar() +
  scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
  scale_fill_manual(values = c("Male"= "#64B5F6", "Female"= "#F06292")) +
  labs(title = "Sexual Profile of Participants") +
  theme_modern(axis.title.space = 15) +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2), vjust = 7),
    axis.text.y = element_text(size = rel(1.1)),
    axis.text.x = element_text(size = rel(1.1), angle = 45, hjust = 1),
    axis.title.x = element_blank(),
    axis.title.y = element_blank()
  ) +
  facet_wrap(~name, scales = "free")
p_sexprofile

Code
dat <- select(df, COPS_Duration_1, COPS_Frequency_2, SexualActivity) |> 
    mutate(across(where(is.character), as.factor)) |> 
    mutate(across(where(is.factor), as.numeric))

r <- correlation::correlation(
  dat) |> 
  as.matrix()  

r <- parameters::factor_analysis(dat, n = 1, rotation = "oblimin", cor = r)
df$Porn <- get_scores(r)[, 1]
Code
p_language <- df |>
  ggplot(aes(x = Language_Level)) +
  geom_bar() +
  scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
  labs(x = "Level", y = "Number of Participants", title = "Language Level") +
  theme_modern(axis.title.space = 15) +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2), vjust = 7),
    axis.text.y = element_text(size = rel(1.1)),
    axis.text.x = element_text(size = rel(1.1))
  )

p_expertise <- df |>
  ggplot(aes(x = AI_Knowledge)) +
  geom_bar() +
  scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
  labs(x = "Level", y = "Number of Participants", title = "AI-Expertise") +
  theme_modern(axis.title.space = 15) +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2), vjust = 7),
    axis.text.y = element_text(size = rel(1.1)),
    axis.text.x = element_text(size = rel(1.1))
  )

p_language | p_expertise

Code
df$Screen_Size <- sqrt(df$Screen_Width * df$Screen_Height)

df |> 
  ggplot(aes(x = Screen_Size, fill=Mobile)) +
  geom_histogram(bins = 50, color="black") +
  scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
  labs(x =  expression("Screen Size ("~sqrt(Number~of~Pixels)~")"), y = "Number of Participants", title = "Screen Size") +
  theme_modern(axis.title.space = 15) +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2), vjust = 7),
    axis.text.y = element_text(size = rel(1.1)),
    axis.text.x = element_text(size = rel(1.1))
  )

Code
p_country / 
  (p_age + p_edu)
Warning: Removed 1 row containing non-finite outside the scale range
(`stat_bin()`).
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_vline()`).

Beliefs about Artificial Information Technology (BAIT)

This section pertains to the validation of the BAIT scale measuring beliefs and expectations about artificial creations.

Exploratory Factor Analysis

Code
bait <- df |> 
  select(starts_with("BAIT_"), -BAIT_Duration) |> 
  rename_with(function(x) gsub("BAIT_\\d_", "", x)) |> 
  filter(!is.na(TextRealistic ))


cor <- correlation::correlation(bait, redundant = TRUE) |> 
  correlation::cor_sort() |> 
  correlation::cor_lower()

clean_labels <- function(x) {
  x <- str_remove_all(x, "BAIT_") |> 
    str_replace_all("_", " - ")
}

cor |> 
  mutate(val = paste0(insight::format_value(r), format_p(p, stars_only=TRUE))) |>
  mutate(Parameter2 = fct_rev(Parameter2)) |>
  mutate(Parameter1 = fct_relabel(Parameter1, clean_labels),
         Parameter2 = fct_relabel(Parameter2, clean_labels)) |> 
  ggplot(aes(x=Parameter1, y=Parameter2)) +
  geom_tile(aes(fill = r), color = "white") +
  geom_text(aes(label = val), size = 3) +
  labs(title = "Correlation Matrix",
       subtitle = "Beliefs about Artificial Information Technology (BAIT)") +
  scale_fill_gradient2(
    low = "#2196F3",
    mid = "white",
    high = "#F44336",
    breaks = c(-1, 0, 1),
    guide = guide_colourbar(ticks=FALSE),
    midpoint = 0,
    na.value = "grey85",
    limit = c(-1, 1))  + 
  theme_minimal() +
  theme(legend.title = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1))

Code
n <- parameters::n_factors(bait, package = "nFactors")
plot(n)

Code
efa <- parameters::factor_analysis(bait, cor=cor(bait, use="pairwise.complete.obs"), n=3, rotation = "oblimin", 
                                   sort=TRUE, scores="tenBerge", fm="ml")
plot(efa)

Code
display(efa)
Rotated loadings from Factor Analysis (oblimin-rotation)
Variable ML3 ML1 ML2 Complexity Uniqueness
EnvironmentReal 0.62 0.01 5.89e-03 1.00 0.62
ImitatingReality 0.60 0.01 -0.07 1.03 0.61
ImagesRealistic 0.59 0.05 -0.04 1.02 0.65
VideosIssues 0.46 -0.28 0.12 1.78 0.67
VideosRealistic 2.21e-03 1.00 0.01 1.00 5.00e-03
ImagesIssues -0.13 0.20 0.19 2.75 0.86
TextIssues 0.05 0.04 0.80 1.01 0.37
TextRealistic 0.20 0.05 -0.56 1.27 0.59

The 3 latent factors (oblimin rotation) accounted for 45.37% of the total variance of the original data (ML3 = 17.86%, ML1 = 14.35%, ML2 = 13.16%).

Confirmatory Factor Analysis

Code
m1 <- lavaan::cfa(
  "G =~ ImitatingReality + EnvironmentReal + VideosIssues + TextIssues + VideosRealistic + ImagesRealistic + ImagesIssues + TextRealistic", 
  data=bait)
m2 <- lavaan::cfa(
  "Images =~ ImitatingReality + EnvironmentReal + ImagesRealistic + ImagesIssues + VideosIssues + VideosRealistic
  Text =~ TextIssues + TextRealistic", 
  data=bait)
m3 <- lavaan::cfa(
  "Images =~ ImitatingReality + EnvironmentReal + ImagesRealistic + ImagesIssues
  Videos =~ VideosIssues + VideosRealistic
  Text =~ TextIssues + TextRealistic", 
  data=bait)
m4 <- lavaan::cfa(
  "Environment =~ ImitatingReality + EnvironmentReal 
  Images =~ ImagesRealistic + ImagesIssues
  Videos =~ VideosIssues + VideosRealistic
  Text =~ TextIssues + TextRealistic", 
  data=bait)
m5 <- lavaan::cfa(efa_to_cfa(efa, threshold="max"), data=bait)


# bayestestR::bayesfactor_models(m1, m2)
lavaan::anova(m1, m2, m3, m4, m5)
Warning in lavTestLRT(object = object, ..., model.names = NAMES): lavaan
WARNING: some models have the same degrees of freedom

Chi-Squared Difference Test

   Df     AIC     BIC   Chisq Chisq diff   RMSEA Df diff Pr(>Chisq)    
m4 14 -457.35 -358.62  94.954                                          
m3 17 -450.05 -364.78 108.251     13.297 0.07228       3   0.004037 ** 
m5 17 -422.82 -337.55 135.481     27.230 0.00000       0               
m2 19 -402.29 -326.00 160.014     24.532 0.13095       2  4.708e-06 ***
m1 20 -288.47 -216.67 275.830    115.816 0.41804       1  < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
display(parameters::parameters(m4, standardize = TRUE))
# Loading
Link Coefficient SE 95% CI z p
Environment =~ ImitatingReality 0.66 0.04 (0.59, 0.73) 17.55 < .001
Environment =~ EnvironmentReal 0.63 0.04 (0.55, 0.70) 16.73 < .001
Images =~ ImagesRealistic 0.71 0.06 (0.60, 0.83) 11.71 < .001
Images =~ ImagesIssues -0.36 0.04 (-0.44, -0.27) -7.99 < .001
Videos =~ VideosIssues 0.80 0.06 (0.69, 0.92) 13.69 < .001
Videos =~ VideosRealistic -0.49 0.05 (-0.58, -0.40) -10.71 < .001
Text =~ TextIssues 0.57 0.05 (0.48, 0.67) 11.71 < .001
Text =~ TextRealistic -0.85 0.06 (-0.97, -0.72) -13.66 < .001
# Correlation
Link Coefficient SE 95% CI z p
Environment ~~ Images 0.73 0.07 (0.58, 0.87) 9.98 < .001
Environment ~~ Videos 0.58 0.06 (0.47, 0.70) 9.77 < .001
Environment ~~ Text -0.44 0.06 (-0.55, -0.33) -7.68 < .001
Images ~~ Videos 0.52 0.07 (0.38, 0.65) 7.34 < .001
Images ~~ Text -0.46 0.07 (-0.59, -0.33) -6.95 < .001
Videos ~~ Text -0.20 0.06 (-0.31, -0.09) -3.61 < .001
Code
m4b <- lavaan::sem(
  "Environment =~ ImitatingReality + EnvironmentReal 
  Images =~ ImagesRealistic + ImagesIssues
  Videos =~ VideosIssues + VideosRealistic
  Text =~ TextIssues + TextRealistic
  G =~ Environment + Images + Videos + Text", 
  data=bait)

m4c <- lavaan::sem(
  "Environment =~ ImitatingReality + EnvironmentReal 
  Images =~ ImagesRealistic + ImagesIssues
  Videos =~ VideosIssues + VideosRealistic
  Text =~ TextIssues + TextRealistic
  Visual =~ Environment + Images", 
  data=bait)

# bayestestR::bayesfactor_models(m1, m2)
lavaan::anova(m4, m4b, m4c)

Chi-Squared Difference Test

    Df     AIC     BIC   Chisq Chisq diff    RMSEA Df diff Pr(>Chisq)  
m4  14 -457.35 -358.62  94.954                                         
m4c 15 -458.48 -364.24  95.824     0.8695 0.000000       1    0.35110  
m4b 16 -454.02 -364.26 102.287     6.4625 0.091183       1    0.01102 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
display(parameters::parameters(m4c, standardize = TRUE))
# Loading
Link Coefficient SE 95% CI z p
Environment =~ ImitatingReality 0.66 0.04 (0.59, 0.73) 17.57 < .001
Environment =~ EnvironmentReal 0.62 0.04 (0.55, 0.70) 16.68 < .001
Images =~ ImagesRealistic 0.72 0.06 (0.60, 0.84) 11.66 < .001
Images =~ ImagesIssues -0.36 0.04 (-0.44, -0.27) -7.94 < .001
Videos =~ VideosIssues 0.80 0.06 (0.69, 0.92) 13.67 < .001
Videos =~ VideosRealistic -0.49 0.05 (-0.58, -0.40) -10.70 < .001
Text =~ TextIssues 0.57 0.05 (0.48, 0.67) 11.76 < .001
Text =~ TextRealistic -0.84 0.06 (-0.96, -0.72) -13.68 < .001
Visual =~ Environment 0.88 0.05 (0.78, 0.98) 17.03 < .001
Visual =~ Images 0.83 0.07 (0.68, 0.97) 11.20 < .001
# Correlation
Link Coefficient SE 95% CI z p
Videos ~~ Text -0.20 0.06 (-0.31, -0.09) -3.62 < .001
Videos ~~ Visual 0.65 0.06 (0.53, 0.76) 10.80 < .001
Text ~~ Visual -0.52 0.06 (-0.63, -0.41) -9.22 < .001
Code
library(ggraph)
library(tidySEM)

edges <- tidySEM::get_edges(m4c) |> 
  mutate(sign = as.factor(sign(as.numeric(est_std))))

tidygraph::tbl_graph(nodes = tidySEM::get_nodes(m4c), edges = edges) |> 
  ggraph(layout = 'kk') + 
  geom_edge_link(aes(filter=op=="=~", label=est_std, color=sign),
                 angle_calc="along",
                 label_dodge=unit(-0.015, "npc"),
                 edge_width=1) + 
  geom_edge_arc(aes(filter=op=="~~", label=est_std, color=sign), 
                linetype="dashed", strength=0.1,
                angle_calc="along",
                label_dodge=unit(-0.015, "npc"),
                edge_width=1) + 
  geom_node_point(aes(shape=shape, color=shape), size=14, alpha=0.3) +
  geom_node_text(aes(label = name)) + 
  scale_edge_color_manual(values=c("1"="darkgreen", "-1"="red"), guide="none") +
  scale_shape_manual(values=c("oval"="circle", "rect"="square"), guide="none") +
  scale_color_manual(values=c("oval"="orange", "rect"="black"), guide="none") +
  theme_void()

Exploratory Graph Analysis (EGA) is a recently developed framework for psychometric assessment, that can be used to estimate the number of dimensions in questionnaire data using network estimation methods and community detection algorithms, and assess the stability of dimensions and items using bootstrapping.

Unique Variable Analysis (UVA)

Unique Variable Analysis (Christensen, Garrido, & Golino, 2023) uses the weighted topological overlap measure (Nowick et al., 2009) on an estimated network. Values greater than 0.25 are determined to have considerable local dependence (i.e., redundancy) that should be handled (variables with the highest maximum weighted topological overlap to all other variables (other than the one it is redundant with) should be removed).

Code
uva <- EGAnet::UVA(data = bait, cut.off = 0.3)
uva
Variable pairs with wTO > 0.30 (large-to-very large redundancy)

        node_i     node_j   wto
 TextRealistic TextIssues 0.331

----

Variable pairs with wTO > 0.25 (moderate-to-large redundancy)

----

Variable pairs with wTO > 0.20 (small-to-moderate redundancy)

           node_i          node_j   wto
     VideosIssues VideosRealistic 0.234
 ImitatingReality EnvironmentReal 0.225
Code
uva$keep_remove
$keep
[1] "TextIssues"

$remove
[1] "TextRealistic"

Networks

Code
ega <- list()
for(model in c("glasso", "TMFG")) {
  for(algo in c("walktrap", "louvain")) {
    for(type in c("ega", "ega.fit", "riEGA")) {  # "hierega"
      if(type=="ega.fit" & algo=="louvain") next  # Too slow
      ega[[paste0(model, "_", algo, "_", type)]] <- EGAnet::bootEGA(
        data = bait,
        seed=123,
        model=model,
        algorithm=algo,
        EGA.type=type,
        type="resampling",
        plot.typicalStructure=FALSE,
        verbose=FALSE)
      }
   }
}

The random-intercept model converged. Wording effects likely. Results are only valid if data are unrecoded.

The random-intercept model converged. Wording effects likely. Results are only valid if data are unrecoded.

The random-intercept model converged. Wording effects likely. Results are only valid if data are unrecoded.

The random-intercept model converged. Wording effects likely. Results are only valid if data are unrecoded.

Code
EGAnet::compare.EGA.plots(
  ega$glasso_walktrap_ega, ega$glasso_walktrap_ega.fit,
  ega$glasso_louvain_ega, ega$TMFG_louvain_ega,
  ega$glasso_louvain_riEGA, ega$glasso_walktrap_riEGA,
  ega$TMFG_walktrap_ega, ega$TMFG_walktrap_ega.fit,
  ega$TMFG_louvain_riEGA, ega$TMFG_walktrap_riEGA, 
  labels=c("glasso_walktrap_ega", "glasso_walktrap_ega.fit",
           "glasso_louvain_ega", "TMFG_louvain_ega",
           "glasso_louvain_riEGA", "glasso_walktrap_riEGA",
           "TMFG_walktrap_ega", "TMFG_walktrap_ega.fit",
           "TMFG_louvain_riEGA", "TMFG_walktrap_riEGA"),
  rows=5,
  plot.all = FALSE)$all

Structure Stability

Figures shows how often each variable is replicating in their empirical structure across bootstraps.

Code
patchwork::wrap_plots(lapply(ega, plot), nrow = 4)

Final Model

Code
ega_final <- ega$glasso_walktrap_riEGA$EGA
plot(ega_final)

# Merge with data
ega_scores <-  EGAnet::net.scores(bait, ega_final)$scores$std.scores |> 
  as.data.frame() |> 
  setNames(c("BAIT_Text_EGA", "BAIT_Visual_EGA", "BAIT_Videos_EGA")) 

sem_scores <- lavaan::predict(m4c) |> 
  as.data.frame() |> 
  datawizard::data_addprefix("BAIT_") |> 
  datawizard::data_addsuffix("_SEM")

raw_scores <- data.frame(
  Participant = df[!is.na(df$BAIT_1_ImagesRealistic), ]$Participant,
  BAIT_Videos = (bait$VideosRealistic + (1 - bait$VideosIssues)) / 2,
  BAIT_Visual = (bait$ImagesRealistic + (1 - bait$ImagesIssues) + bait$ImitatingReality + bait$EnvironmentReal) / 4,
  BAIT_Text = (bait$TextRealistic + (1 - bait$TextIssues)) / 2
  )

scores <- cbind(sem_scores, ega_scores, raw_scores) 
df <- merge(df, raw_scores, by="Participant")

We computed two type of general scores for the BAIT scale, an empirical score based on the average of observed data (of the most loading items) and a model-based score as predicted by the structural model. The first one gives equal weight to all items (and keeps the same [0-1] range), while the second one is based on the factor loadings and the covariance structure.

Code
correlation::cor_test(scores, "BAIT_Visual", "BAIT_Visual_SEM") |> 
  plot() +
  ggside::geom_xsidedensity(aes(x=BAIT_Visual), color="grey", linewidth=1) +
  ggside::geom_ysidedensity(aes(y=BAIT_Visual_SEM), color="grey", linewidth=1) +
  ggside::scale_xsidey_continuous(expand = c(0, 0)) +
  ggside::scale_ysidex_continuous(expand = c(0, 0)) +
  ggside::theme_ggside_void() +
  theme(ggside.panel.scale = .1) 

While the two correlate substantially, they have different benefits. The empirical score has a more straightforward meaning and is more reproducible (as it is not based on a model fitted on a specific sample), the model-based score takes into account the relative importance of the contribution of each item to their factor.

Code
table <- correlation::correlation(scores) |> 
  summary()

format(table) |> 
  datawizard::data_rename("Parameter", "Variables") |> 
  gt::gt() |> 
  gt::cols_align(align="center") |> 
  gt::tab_options(column_labels.font.weight="bold")
Variables BAIT_Text BAIT_Visual BAIT_Videos BAIT_Videos_EGA BAIT_Visual_EGA BAIT_Text_EGA BAIT_Visual_SEM BAIT_Text_SEM BAIT_Videos_SEM BAIT_Images_SEM
BAIT_Environment_SEM 0.46*** 0.92*** -0.54*** 0.18*** 0.93*** 0.13** 0.97*** -0.57*** 0.72*** 0.89***
BAIT_Images_SEM 0.44*** 0.87*** -0.51*** 0.17*** 0.92*** 0.15** 0.95*** -0.57*** 0.69***
BAIT_Videos_SEM 0.20*** 0.56*** -0.92*** 0.34*** 0.57*** 0.04 0.78*** -0.27***
BAIT_Text_SEM -0.95*** -0.45*** 0.20*** 7.42e-03 -0.46*** -0.30*** -0.64***
BAIT_Visual_SEM 0.52*** 0.89*** -0.61*** 0.20*** 0.91*** 0.16***
BAIT_Text_EGA 8.46e-03 0.06 0.02 0.09 0.08
BAIT_Visual_EGA 0.34*** 0.99*** -0.39*** 0.09
BAIT_Videos_EGA -0.06 0.06 1.68e-03
BAIT_Videos -0.18*** -0.39***
BAIT_Visual 0.34***

Corrrelation with GAAIS

Code
n <- n_factors(select(df, starts_with("GAAIS")))
n
# Method Agreement Procedure:

The choice of 2 dimensions is supported by 8 (50.00%) methods out of 16 (Optimal coordinates, Acceleration factor, Parallel analysis, Kaiser criterion, Scree (SE), VSS complexity 2, BIC, BIC (adjusted)).
Code
parameters::factor_analysis(
  select(df, starts_with("GAAIS")), 
  n=2, 
  rotation="oblimin")
# Rotated loadings from Factor Analysis (oblimin-rotation)

Variable          |   MR1 |   MR2 | Complexity | Uniqueness
-----------------------------------------------------------
GAAIS_Positive_7  | -0.02 |  0.78 |       1.00 |       0.38
GAAIS_Negative_15 |  0.78 | -0.02 |       1.00 |       0.37
GAAIS_Negative_9  |  0.51 |  0.10 |       1.07 |       0.77
GAAIS_Negative_10 |  0.79 | -0.01 |       1.00 |       0.37
GAAIS_Positive_12 |  0.10 |  0.68 |       1.05 |       0.59
GAAIS_Positive_17 | -0.21 |  0.52 |       1.33 |       0.60

The 2 latent factors (oblimin rotation) accounted for 48.70% of the total variance of the original data (MR1 = 26.10%, MR2 = 22.60%).
Code
df$GAAIS_Negative <- rowMeans(select(df, starts_with("GAAIS_Negative")))
df$GAAIS_Positive <- rowMeans(select(df, starts_with("GAAIS_Positive")))
Code
table <- correlation::correlation(
  select(df, all_of(names(raw_scores))), 
  select(df, starts_with("GAAIS")),
  bayesian=TRUE) |> 
  summary()

format(table) |> 
  datawizard::data_rename("Parameter", "Variables") |> 
  gt::gt() |> 
  gt::cols_align(align="center") |> 
  gt::tab_options(column_labels.font.weight="bold")
Variables GAAIS_Positive_7 GAAIS_Negative_15 GAAIS_Negative_9 GAAIS_Negative_10 GAAIS_Positive_12 GAAIS_Positive_17 GAAIS_Negative GAAIS_Positive
BAIT_Videos 0.12** -0.19*** -0.10** -0.13*** 0.11*** 0.06 -0.17*** 0.13***
BAIT_Visual 0.17*** 0.02 0.13*** -0.01 0.24*** 0.16*** 0.06 0.24***
BAIT_Text 0.25*** -0.07* 0.05 -0.09* 0.17*** 0.18*** -0.05 0.25***
Code
dat <- df |> 
  select(starts_with("GAAIS"), starts_with("BAIT")) |> 
  select(matches("[[:digit:]]"))

ega <- EGAnet::EGA(
        data = dat,
        seed=123,
        model="glasso",
        algorithm="leiden",
        plot.EGA=FALSE,
        verbose=FALSE)

plot(ega)

AI-Expertise

Code
df |> 
  ggplot(aes(x=as.factor(AI_Knowledge), y=BAIT_Visual)) +
  geom_boxplot()

Code
# m <- betareg::betareg(BAIT ~ AI_Knowledge, data=df)
m <- lm(BAIT_Visual ~ poly(AI_Knowledge, 2), data=df)
# m <- brms::brm(BAIT ~ mo(AI_Knowledge), data=df, algorithm = "meanfield")
# m <- brms::brm(BAIT ~ AI_Knowledge, data=dfsub, algorithm = "meanfield")
display(parameters::parameters(m))
Parameter Coefficient SE 95% CI t(654) p
(Intercept) 0.68 6.18e-03 (0.67, 0.70) 110.56 < .001
AI Knowledge (1st degree) -0.06 0.16 (-0.37, 0.25) -0.39 0.699
AI Knowledge (2nd degree) 0.50 0.16 (0.19, 0.81) 3.18 0.002
Code
marginaleffects::predictions(m, by=c("AI_Knowledge"), newdata = "marginalmeans") |> 
  as.data.frame() |> 
  ggplot(aes(x=AI_Knowledge, y=estimate)) +
  ggdist::stat_halfeye(data=df, aes(y=BAIT_Visual), geom="slab") +
  # ggdist::stat_dots(data=df, aes(y=BAIT_Visual), side="left") +
  # geom_boxplot(data=df, aes(y=BAIT_Visual, group=AI_Knowledge)) +
  geom_point2(data=df, aes(x=AI_Knowledge-0.08, y=BAIT_Visual), alpha=0.2, position=position_jitter(width=0.06), size=2) +
  geom_line(aes(group=1), position = position_dodge(width=0.2)) +
  geom_pointrange(aes(ymin = conf.low, ymax=conf.high), position = position_dodge(width=0.2)) +
  theme_minimal() +
  labs(x = "AI-Knowledge", y="BAIT - Visual")

Gender and Age

Code
# m <- betareg::betareg(BAIT ~ Sex / Age, data=df, na.action=na.omit)
m <- lm(BAIT_Visual ~ Sex / Age, data=df)
display(parameters::parameters(m))
Parameter Coefficient SE 95% CI t(652) p
(Intercept) 0.66 0.03 (0.60, 0.72) 22.07 < .001
Sex (Male) 0.04 0.04 (-0.03, 0.11) 1.05 0.294
Sex (Female) × Age 1.35e-03 1.14e-03 (-8.90e-04, 3.59e-03) 1.18 0.237
Sex (Male) × Age -5.78e-04 6.64e-04 (-1.88e-03, 7.26e-04) -0.87 0.385

Belief in the Instructions

Code
glm(Feedback_LabelsIncorrect ~ BAIT_Visual * AI_Knowledge, 
    data=mutate(df, Feedback_LabelsIncorrect = ifelse(Feedback_LabelsIncorrect=="True", 1, 0)), 
    family="binomial") |> 
  parameters::parameters() |> 
  display(title="Predicting 'Labels are Incorrect'")
Predicting ‘Labels are Incorrect’
Parameter Log-Odds SE 95% CI z p
(Intercept) 0.22 1.06 (-1.87, 2.31) 0.20 0.838
BAIT Visual -1.56 1.48 (-4.50, 1.33) -1.05 0.293
AI Knowledge 0.09 0.30 (-0.50, 0.67) 0.29 0.774
BAIT Visual × AI Knowledge 0.11 0.41 (-0.70, 0.92) 0.27 0.790
Code
glm(Feedback_LabelsReversed ~ BAIT_Visual * AI_Knowledge, 
    data=mutate(df, Feedback_LabelsReversed = ifelse(Feedback_LabelsReversed=="True", 1, 0)), 
    family="binomial") |> 
  parameters::parameters() |> 
  display(title="Predicting 'Labels are reversed'")
Predicting ‘Labels are reversed’
Parameter Log-Odds SE 95% CI z p
(Intercept) -3.12 2.15 (-7.44, 0.95) -1.45 0.146
BAIT Visual 0.55 2.95 (-5.24, 6.31) 0.19 0.851
AI Knowledge 0.17 0.60 (-1.00, 1.35) 0.29 0.774
BAIT Visual × AI Knowledge -0.26 0.83 (-1.88, 1.37) -0.32 0.751
Code
glm(Feedback_CouldDiscriminate ~ BAIT_Visual * AI_Knowledge, 
    data=mutate(df, Feedback_CouldDiscriminate = ifelse(Feedback_CouldDiscriminate=="True", 1, 0)), 
    family="binomial") |> 
  parameters::parameters() |> 
  display(title="Predicting 'Easy to discriminate'")
Predicting ‘Easy to discriminate’
Parameter Log-Odds SE 95% CI z p
(Intercept) -1.62 1.88 (-5.40, 1.95) -0.86 0.389
BAIT Visual -1.01 2.59 (-6.07, 4.09) -0.39 0.698
AI Knowledge -0.27 0.54 (-1.32, 0.79) -0.50 0.614
BAIT Visual × AI Knowledge 0.31 0.74 (-1.14, 1.76) 0.42 0.672
Code
m <- glm(Feedback_CouldNotDiscriminate ~ BAIT_Visual * AI_Knowledge, 
    data=mutate(df, Feedback_CouldNotDiscriminate = ifelse(Feedback_CouldNotDiscriminate=="True", 1, 0)), 
    family="binomial") 

parameters::parameters(m) |> 
  display(title="Predicting 'Hard to discriminate'")
Predicting ‘Hard to discriminate’
Parameter Log-Odds SE 95% CI z p
(Intercept) -4.25 1.20 (-6.66, -1.96) -3.55 < .001
BAIT Visual 6.22 1.64 (3.08, 9.53) 3.78 < .001
AI Knowledge 0.42 0.33 (-0.23, 1.08) 1.26 0.206
BAIT Visual × AI Knowledge -0.77 0.45 (-1.68, 0.10) -1.71 0.087
Code
modelbased::estimate_relation(m, length=6) |> 
  plot()

Code
glm(Feedback_Fun ~ BAIT_Visual * AI_Knowledge, 
    data=mutate(df, Feedback_Fun = ifelse(Feedback_Fun=="True", 1, 0)), 
    family="binomial") |> 
  parameters::parameters() |> 
  display(title="Predicting 'Fun'")
Predicting ‘Fun’
Parameter Log-Odds SE 95% CI z p
(Intercept) 0.16 1.05 (-1.90, 2.21) 0.15 0.882
BAIT Visual 0.16 1.44 (-2.67, 3.00) 0.11 0.909
AI Knowledge -0.02 0.30 (-0.60, 0.56) -0.07 0.946
BAIT Visual × AI Knowledge 0.12 0.41 (-0.68, 0.93) 0.30 0.763
Code
glm(Feedback_Boring ~ BAIT_Visual * AI_Knowledge, 
    data=mutate(df, Feedback_Boring = ifelse(Feedback_Boring=="True", 1, 0)), 
    family="binomial") |> 
  parameters::parameters() |> 
  display(title="Predicting 'Boring'")
Predicting ‘Boring’
Parameter Log-Odds SE 95% CI z p
(Intercept) -1.87 1.39 (-4.65, 0.83) -1.34 0.181
BAIT Visual -0.14 1.94 (-3.96, 3.67) -0.07 0.943
AI Knowledge 0.20 0.38 (-0.54, 0.96) 0.54 0.592
BAIT Visual × AI Knowledge -0.13 0.53 (-1.18, 0.91) -0.25 0.802
Code
glm(Feedback_AILessArousing ~ BAIT_Visual * AI_Knowledge, 
    data=mutate(df, Feedback_AILessArousing = ifelse(Feedback_AILessArousing=="True", 1, 0)), 
    family="binomial") |> 
  parameters::parameters() |> 
  display(title="Predicting 'AI was less arousing'")
Predicting ‘AI was less arousing’
Parameter Log-Odds SE 95% CI z p
(Intercept) 1.62 1.61 (-1.56, 4.78) 1.01 0.314
BAIT Visual -5.66 2.43 (-10.51, -0.96) -2.32 0.020
AI Knowledge -0.64 0.46 (-1.55, 0.26) -1.40 0.161
BAIT Visual × AI Knowledge 0.93 0.68 (-0.39, 2.26) 1.38 0.169
Code
glm(Feedback_AIMoreArousing ~ BAIT_Visual * AI_Knowledge, 
    data=mutate(df, Feedback_AIMoreArousing = ifelse(Feedback_AIMoreArousing=="True", 1, 0)), 
    family="binomial") |> 
  parameters::parameters() |> 
  display(title="Predicting 'AI was more arousing'")
Predicting ‘AI was more arousing’
Parameter Log-Odds SE 95% CI z p
(Intercept) -3.11 1.74 (-6.63, 0.20) -1.78 0.075
BAIT Visual 1.41 2.29 (-3.02, 5.97) 0.62 0.537
AI Knowledge -0.16 0.49 (-1.12, 0.80) -0.34 0.736
BAIT Visual × AI Knowledge 0.27 0.64 (-0.99, 1.51) 0.42 0.678

Save

setdiff(unique(df$Participant), unique(dftask$Participant))
character(0)
write.csv(df, "../data/data_participants.csv", row.names = FALSE)
write.csv(dftask, "../data/data.csv", row.names = FALSE)