FakeArt - Data Cleaning

Data Preparation

library(tidyverse)
library(easystats)
library(patchwork)
library(ggside)
library(ggdist)

df <- read.csv("../data/rawdata_participants.csv")
dftask <- read.csv("../data/rawdata_task.csv")

Recruitment History

Code

# Consecutive count of participants per day (as area)
df |>
  mutate(Date = as.Date(Experiment_StartDate, format = "%Y-%m-%d %H:%M:%S")) |> 
  summarize(N = n(), .by=c("Date", "Recruitment")) |> 
  complete(Date, Recruitment, fill = list(N = 0)) |> 
  mutate(N = cumsum(N), .by="Recruitment") |>
  ggplot(aes(x = Date, y = N)) +
  geom_area(aes(fill=Recruitment)) +
  scale_y_continuous(expand = c(0, 0)) +
  labs(
    title = "Recruitment History",
    x = "Date",
    y = "Total Number of Participants"
  ) +
  see::theme_modern()

Code

# Table
summarize(df, N = n(), .by=c("Recruitment")) |> 
  arrange(desc(N)) |> 
  gt::gt() |> 
  gt::grand_summary_rows(columns = "N", fns = Total ~ sum(.)) |> 
  gt::opt_stylize(style = 2, color = "gray") |> 
  gt::tab_header("Number of participants per recruitment source")

	Recruitment	N
Number of participants per recruitment source
	prolific	329
	os	16
	fw	13
	dm	1
Total	—	359

Experiment Feedback

Experiment Enjoyment

Code

df |> 
  summarise(n = n(), .by=c("Experiment_Enjoyment")) |>
  filter(!is.na(Experiment_Enjoyment)) |> 
  mutate(n = n / sum(n),
         Experiment_Enjoyment = fct_rev(as.factor(Experiment_Enjoyment))) |>
  ggplot(aes(y = n, x = 1, fill = Experiment_Enjoyment)) +
  geom_bar(stat="identity", position="stack") +
  scale_fill_manual(values=c("green", "yellowgreen",  "yellow", "orange", "red")) +
  coord_flip() +
  scale_x_continuous(expand=c(0, 0))  +
  scale_y_continuous(labels = scales::percent)  +
  labs(title="Experiment Enjoyment",
       subtitle="Proportion of participants by condition") + 
  guides(fill = guide_legend(reverse=TRUE)) +
  theme_minimal() +
  theme(
    axis.title = element_blank(),
    axis.text.y = element_blank(),
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    legend.position = "top",
    legend.title = element_blank())

Comments

Code

data.frame(Comments = df$Experiment_Feedback) |> 
  filter(!tolower(Comments) %in% c(NA, "no", "none", "thank you", "nope", "N/A")) |> 
  gt::gt() |> 
  gt::opt_stylize() |> 
  gt::opt_interactive(use_compact_mode = TRUE)

Task Feedback

Code

dat <- df |> 
  select(starts_with("Feedback"), -contains("Confidence")) |>
  pivot_longer(everything(), names_to = "Question", values_to = "Answer") |>
  group_by(Question, Answer) |> 
  summarise(prop = n()/nrow(df), .groups = 'drop') |> 
  complete(Question, Answer, fill = list(prop = 0)) |> 
  filter(Answer == TRUE) |> 
  mutate(Question = str_remove(Question, "Feedback_"),
         Question = str_replace(Question, "LabelsNotMatched", "Labels did not always match the images"),
         Question = str_replace(Question, "LabelsReversed", "Labels were Reversed"),
         # Question = str_replace(Question, "DiffNone", "No Difference Real/AI"),
         # Question = str_replace(Question, "DiffObvious", "Obvious Difference Real/AI"),
         # Question = str_replace(Question, "DiffSubtle", "Subtle Difference Real/AI"),
         # Question = str_replace(Question, "AILessAttractive", "AI = less attractive"),
         # Question = str_replace(Question, "AIMoreAttractive", "AI = more attractive"),
         # Question = str_replace(Question, "SomeFacesAttractive", "Some Faces Attractive"),
         Question = str_replace(Question, "GoodForgeries", "Forgeries very convincing and hard to distinguish"),
         Question = str_replace(Question, "BadForgeries", "Forgeries were less well executed")) |>
  mutate(Question = fct_reorder(Question, desc(prop)))

dat |> 
  ggplot(aes(x = Question, y = prop)) +
  geom_bar(stat = "identity") +
  scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks(), labels=scales::percent) +
  labs(x="Feedback", y = "Participants", title = "Feedback") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2), vjust = 7),
    axis.text.y = element_text(size = rel(1.1)),
    axis.text.x = element_text(size = rel(1.1), angle = 90, hjust = 1),
    axis.title.x = element_blank()
  )

Code

dat |> 
  select(Question, prop) |> 
  arrange(desc(prop)) |> 
  mutate(prop = format_percent(prop)) |> 
  gt::gt() |> 
  gt::opt_stylize() |> 
  gt::tab_header("Proportion of participants who answered 'yes' to each question") |> 
  gt::cols_label(prop = "Proportion of participants who answered 'Yes'")

Question	Proportion of participants who answered 'Yes'
Proportion of participants who answered 'yes' to each question
Forgeries very convincing and hard to distinguish	84.96%
LabelsPaidAttention	64.90%
SmallDifferenceRealAI	52.09%
Labels did not always match the images	50.14%
NoDifferenceRealAI	33.98%
LessBeautifulAI	26.46%
LabelsAllReal	15.60%
Forgeries were less well executed	13.65%
BigDifferenceRealAI	12.81%
MoreBeautifulAI	11.98%
LabelsAllAI	7.80%
Labels were Reversed	7.80%
LabelsNoAttention	4.74%

Code

dat <- df |> 
  select(starts_with("Feedback"), -contains("Confidence"))  |> 
  mutate_all(~ifelse(.==TRUE, 1, 0)) |> 
  correlation(method="tetrachoric", redundant = TRUE) |> 
  correlation::cor_sort() |> 
  correlation::cor_lower() |> 
  mutate(val = paste0(insight::format_value(rho, zap_small=TRUE), format_p(p, stars_only=TRUE))) |>
  mutate(Parameter2 = fct_rev(Parameter2)) |>
  mutate(Parameter1 = fct_relabel(Parameter1, \(x) str_remove_all(x, "Feedback_")),
         Parameter2 = fct_relabel(Parameter2, \(x) str_remove_all(x, "Feedback_")))

dat |>
  ggplot(aes(x=Parameter1, y=Parameter2)) +
  geom_tile(aes(fill = rho), color = "white") +
  geom_tile(data=filter(dat, Parameter1 == "LessBeautifulAI" & Parameter2 == "BigDifferenceRealAI"), color = "red", alpha = 0, linewidth=1) +
  geom_tile(data=filter(dat, Parameter1 == "LabelsPaidAttention" & Parameter2 == "LessBeautifulAI"), color = "red", alpha = 0, linewidth=1) +
  geom_tile(data=filter(dat, Parameter1 == "LabelsPaidAttention" & Parameter2 == "LabelsNotMatched"), color = "red", alpha = 0, linewidth=1) +
  geom_tile(data=filter(dat, Parameter1 == "LabelsNoAttention" & Parameter2 == "LessBeautifulAI"), color = "red", alpha = 0, linewidth=1) +
  geom_text(aes(label = val), size = 3) +
  labs(title = "Feedback Co-occurence Matrix") +
  scale_fill_gradient2(
    low = "#2196F3",
    mid = "white",
    high = "#F44336",
    breaks = c(-1, 0, 1),
    guide = guide_colourbar(ticks=FALSE),
    midpoint = 0,
    na.value = "grey85",
    limit = c(-1, 1))  + 
  theme_minimal() +
  theme(legend.title = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1))

The more people paid attention to the labels, the stronger the self-reported anti-fake bias.
The more people paid attention, the less likely to report that labels were not matched.

Dimension Computation

Code

# Should we re-map the Worth values? -----------------------------------
# Re-map to values in dollars
# alldata_task$Worth2 <- ifelse(alldata_task$Worth == 1, 10, alldata_task$Worth)
# alldata_task$Worth2 <- ifelse(alldata_task$Worth2 == 2, 100, alldata_task$Worth2)
# alldata_task$Worth2 <- ifelse(alldata_task$Worth2 == 3, 1000, alldata_task$Worth2)
# alldata_task$Worth2 <- ifelse(alldata_task$Worth2 == 4, 10000, alldata_task$Worth2)
# alldata_task$Worth2 <- ifelse(alldata_task$Worth2 == 5, 100000, alldata_task$Worth2)
#
# library(tidyverse)
#
# t <- as.data.frame(table(alldata_task$Worth))
# cor.test(as.numeric(as.character(t$Var1)), t$Freq)
# t <- as.data.frame(table(log1p(alldata_task$Worth2)))
# cor.test(as.numeric(as.character(t$Var1)), t$Freq)
#
# ggplot(alldata_task, aes(x=Worth)) +
#   geom_bar() +
#   geom_abline()
# ggplot(alldata_task, aes(x=log1p(Worth2))) +
#   geom_bar()
# ggplot(alldata_task, aes(x=Worth2)) +
#   geom_bar() +
#   scale_x_continuous(transform = "log1p", breaks = c(0, 10, 100, 1000, 10000, 100000))
# ggplot(alldata_task, aes(x=Worth, y=Beauty)) +
#   geom_smooth(method = "lm") +
#   geom_jitter(height=0)
# ggplot(alldata_task, aes(x=Worth2, y=Beauty)) +
#   geom_smooth(method = "lm") +
#   geom_jitter(height=0) +
#   scale_x_continuous(transform = "log1p", breaks = c(0, 10, 100, 1000, 10000, 100000))
# summary(lm(Beauty ~ Worth, data = alldata_task))
# summary(lm(Beauty ~ log1p(Worth2), data = alldata_task))

MINT

Code

compute_and_remove <- function(df, name="BodyAwareness", pattern=name, method="mean") {
  items <- select(df, starts_with(pattern), -contains("AttentionCheck"))
  df <- df[!names(df) %in% names(items)]
  if(method == "mean") {
    df[[name]] <- rowMeans(items, na.rm=TRUE)
  } else {
    df[[name]] <- rowSums(items, na.rm=TRUE)
  }
  df
}

df <- compute_and_remove(df, name="MINT_Card", method="mean")
df <- compute_and_remove(df, name="MINT_Urin", method="mean")
df <- compute_and_remove(df, name="MINT_SexS", method="mean")
df <- compute_and_remove(df, name="MINT_Gast", method="mean")
df <- compute_and_remove(df, name="MINT_Olfa", method="mean")
df <- compute_and_remove(df, name="MINT_Derm", method="mean")
df <- compute_and_remove(df, name="MINT_ExAc", method="mean")
df <- compute_and_remove(df, name="MINT_RelA", method="mean")
df <- compute_and_remove(df, name="MINT_Resp", method="mean")
df <- compute_and_remove(df, name="MINT_Sati", method="mean")
df <- compute_and_remove(df, name="MINT_CaCo", method="mean")

# df[grepl("^MAIA_.*_R$", names(df))] <- 6 - df[grepl("^MAIA_.*_R$", names(df))]  # Reverse
# df <- compute_and_remove(df, name="MAIA_AttentionRegulation", method="mean")

VVIQ

Code

df <- compute_and_remove(df, name="VVIQ_Friend", method="mean")
df <- compute_and_remove(df, name="VVIQ_Sun", method="mean")
df <- compute_and_remove(df, name="VVIQ_Shop", method="mean")
df <- compute_and_remove(df, name="VVIQ_Country", method="mean")

Cronbach’s alpha: 0.86

Code

df$VVIQ_Total = rowMeans(df[c("VVIQ_Friend", "VVIQ_Sun", "VVIQ_Shop", "VVIQ_Country")], na.rm = TRUE) |> 
  datawizard::reverse_scale()  # Reverse the scale (e.g., 0–4 -> 4–0) so that higher = more vivid
df <- select(df, -VVIQ_Friend, -VVIQ_Sun, -VVIQ_Shop, -VVIQ_Country)

BAIT

Code

df$BAIT_Negative <- rowMeans(select(df, BAIT_8_TextIssues, BAIT_2_ImagesIssues, BAIT_3_VideosIssues, BAIT_9_Dangerous, BAIT_13_ArtIssues), na.rm=TRUE)
df$BAIT_Positive <- rowMeans(select(df, BAIT_5_ImitatingReality, BAIT_12_Benefit, BAIT_4_VideosRealistic, BAIT_1_ImagesRealistic, BAIT_14_ArtRealistic, BAIT_6_EnvironmentReal, BAIT_7_TextRealistic), na.rm=TRUE)
df$BAIT_ArtRealistic <- rowMeans(select(mutate(df, BAIT_13_ArtIssues = abs(BAIT_13_ArtIssues - 6)), BAIT_13_ArtIssues, BAIT_14_ArtRealistic), na.rm=TRUE)
df$BAIT_Total <- rowMeans(select(mutate(df, BAIT_Negative = abs(BAIT_Negative - 6)), BAIT_Negative, BAIT_Positive), na.rm=TRUE)

df |> 
  select(starts_with("BAIT")) |>
  mutate(BAIT_AI_Use = case_when(
    BAIT_AI_Use == "Never" ~ 0,
    BAIT_AI_Use == "A few times per month" ~ 1,
    BAIT_AI_Use == "A few times per week" ~ 2,
    BAIT_AI_Use == "Once a day" ~ 3,
    BAIT_AI_Use == "A few times per day" ~ 4,
    .default = NA
  )) |> 
  correlation::correlation(redundant = TRUE) |> 
  correlation::cor_sort() |> 
  correlation::cor_lower() |> 
  summary() |> 
  plot(text = list(size = 2.5)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Code

df <- select(df, -matches("BAIT_[0-9]+_"))

PHQ-4

Code

df <- compute_and_remove(df, name="PHQ4_Depression", method="mean")
df <- compute_and_remove(df, name="PHQ4_Anxiety", method="mean")

Exclusions

Code

exclude <- list()

Questionnaires

Code

m <- mgcv::gam(MINT_AttentionCheck ~ s(Duration_MINT),
         data = mutate(df, MINT_AttentionCheck = ifelse(MINT_AttentionCheck != 0, 1, 0)), family = "binomial")

estimate_relation(m, length=50) |>
  ggplot(aes(x = Duration_MINT, y = Predicted)) +
  geom_ribbon(aes(ymin = CI_low, ymax = CI_high), alpha = 0.2) +
  geom_line() +
  theme_minimal() +
  scale_y_continuous(labels = scales::percent) +
  labs(title = "MINT Attention Check",
       subtitle = "Predicted probability of failing attention check by duration",
       x = "Duration (minutes)",
       y = "Probability of failing attention check")

Code

# Contingency table for attention checks
# table(ifelse(df$MINT_AttentionCheck != 0, 1, 0), ifelse(df$BAIT_AttentionCheck != 6, 1, 0))


for(ppt in df$Participant) {
  if(df[df$Participant == ppt, "MINT_AttentionCheck"] != 0) {
    df[df$Participant == ppt, names(select(df, starts_with("MINT"), -MINT_AttentionCheck))] <- NA
    df[df$Participant == ppt, names(select(df, starts_with("VVIQ")))] <- NA
  }
  
  if(df[df$Participant == ppt, "BAIT_AttentionCheck"] != 6) {
    df[df$Participant == ppt, names(select(df, starts_with("BAIT"), -BAIT_AttentionCheck))] <- NA
  }
}

Attention Checks

Code

dfchecks <- data.frame(
  Participant = df$Participant,
  A_MINT = ifelse(df$MINT_AttentionCheck == 0, 0, 1),
  A_BAIT = ifelse(df$BAIT_AttentionCheck == 6, 0, 1),
  A_TASK = 1 - df$Task_AttentionCheck
)

summary(correlation(dfchecks))

# Correlation Matrix (pearson-method)

Parameter | A_TASK |  A_BAIT
----------------------------
A_MINT    |   0.12 | 0.29***
A_BAIT    |   0.10 |        

p-value adjustment method: Holm (1979)

Code

dfchecks |>
  mutate(Total = round(A_TASK, 2)) |> 
  ggplot(aes(x = Total)) +
  geom_bar(aes(fill = as.factor(Total))) +
  scale_fill_viridis_d(guide = "none") +
  scale_x_continuous(labels = scales::percent) +
  labs(title = "Failed Attention Checks", y = "Number of Participants", subtitle = "Number of failed attention checks per participant") +
  theme_modern(axis.title.space = 15) +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2), vjust = 7),
    axis.title.x = element_blank(),
  )

Warning: Removed 30 rows containing non-finite outside the scale range
(`stat_count()`).

Code

exclude$checks <- as.character(na.omit(dfchecks[dfchecks$A_TASK >= 0.5, "Participant"]))

We removed 36 (10.03%) participants for having failed at least 50% of attention checks.

Experiment Duration

Code

dfchecks$Duration_Experiment <- df |> 
  mutate(Version = ifelse(Recruitment == "prolific", "long", "short")) |> 
  mutate(Experiment_Duration = as.numeric(standardize(log(Experiment_Duration))), .by = "Version") |>
  pull(Experiment_Duration) 


dfchecks$Duration_Instructions1 <- standardize(log(df$Duration_TaskInstructions1))
dfchecks$Duration_Instructions2 <- standardize(log(df$Duration_TaskInstructions2))

m <- mgcv::gam(A_TASK ~ s(Duration_Experiment), 
             data = dfchecks[!is.na(dfchecks$A_TASK),], family = "quasibinomial")

estimate_relation(m, length=50) |> 
  ggplot(aes(x = Duration_Experiment, y = Predicted)) +
  geom_ribbon(aes(ymin = CI_low, ymax = CI_high), alpha = 0.2) +
  geom_line() +
  geom_hline(yintercept=1/3, linetype="dashed", color="darkgrey") +
  geom_vline(xintercept=3, linetype="dashed", color="red") +
  theme_minimal() +
  scale_y_continuous(labels = scales::percent) +
  ggside::geom_xsidedensity(
    data=mutate(dfchecks[!is.na(dfchecks$A_TASK),], AttentionCheckLabel = ifelse(A_TASK >= 0.5, "Failed 50% attention checks", "Valid")), 
    aes(fill=AttentionCheckLabel), alpha=0.3) +
  ggside::theme_ggside_void() +
  labs(title = "Experiment Duration",
       subtitle = "Predicted probability of failing attention checks by duration",
       x = "Experiment Duration (Z(log))",
       y = "Probability of failing attention checks")

Warning: `is.ggproto()` was deprecated in ggplot2 3.5.2.
ℹ Please use `is_ggproto()` instead.

Code

exclude$duration <- as.character(dfchecks[dfchecks$Duration_Experiment > 3, "Participant"])
exclude$duration <- exclude$duration[!exclude$duration %in% exclude$checks]

Response Consistency

Code

dftask |> 
  select(-starts_with("Screen"), -starts_with("Trial")) |> 
  correlation() |> 
  summary() |> 
  plot() +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Code

dfchecks <- dftask |> 
  summarize(across(c("Beauty", "Valence", "Meaning", "Worth", "Reality"), 
                   list(M = ~ mean(.x, na.rm = TRUE), SD = ~ sd(.x, na.rm = TRUE))),
            cor_BeautyValence = cor(Beauty, Valence, method = "spearman"),
            cor_MeaningWorth = cor(Meaning, Worth, method = "spearman"),
            cor_BeautyNorm = cor(Beauty, Norms_Liking, method = "spearman"),
            cor_ValenceNorm = cor(Valence, Norms_Valence, method = "spearman"),
            .by = c("Participant")) |> 
  mutate(cor_BeautyValence = ifelse(is.na(cor_BeautyValence), -1, cor_BeautyValence),
         cor_MeaningWorth = ifelse(is.na(cor_MeaningWorth), -1, cor_MeaningWorth),
         cor_BeautyNorm = ifelse(is.na(cor_BeautyNorm), -1, cor_BeautyNorm)) |>
  full_join(dfchecks, by = "Participant", keep = FALSE)

Anomaly Score

Code

features <- dfchecks |> 
  mutate(A_QUESTIONNAIRES = (A_MINT + A_BAIT) / 2) |> 
  select(-A_MINT, -A_BAIT, -A_TASK) |> 
  select(matches("_M|_SD|cor_|A_|SubjectiveQuality|Duration")) |> 
  standardize()

model <- solitude::isolationForest$new(sample_size = nrow(features), num_trees = 500)
model$fit(features)
dfchecks$AnomalyScore <- model$predict(features)$anomaly_score
 
# features <- mice::mice(features, method="pmm", m = 20, seed = 123, printFlag = FALSE) |> 
#   suppressWarnings() |> 
#   mice::complete(1)

Code

m <- mgcv::gam(A_TASK ~ s(AnomalyScore), 
               data = dfchecks[!is.na(dfchecks$A_TASK),], 
               family = "quasibinomial")
# m <- mgcv::gam(A_TASK ~ s(log(AnomalyScore), k=20), data = mutate(dfchecks, A_TASK = ifelse(A_TASK >= 0.5, 1, 0)), family = "binomial")

estimate_relation(m, length=100) |> 
  ggplot(aes(x = AnomalyScore, y = Predicted)) +
  geom_ribbon(aes(ymin = CI_low, ymax = CI_high), alpha = 0.2) +
  geom_line() +
  geom_vline(xintercept=quantile(dfchecks$AnomalyScore, 0.95), linetype="dashed", color="red") +
  theme_minimal() +
  scale_y_continuous(labels = scales::percent) +
  labs(title = "Experiment Duration",
       subtitle = "Predicted probability of failing attention checks by duration",
       x = "Anomaly Score",
       y = "Probability of failing attention checks") +
  ggside::geom_xsidehistogram(data=dfchecks, alpha=0.3, bins = 60) +
  ggside::geom_ysidedensity(data=dfchecks, aes(y=A_TASK), alpha=0.3) +
  ggside::theme_ggside_void()

Code

exclude$anomaly <- as.character(dfchecks[dfchecks$AnomalyScore > quantile(dfchecks$AnomalyScore, 0.95), "Participant"])
exclude$anomaly <- exclude$anomaly[!exclude$anomaly %in% exclude$duration]
exclude$anomaly <- exclude$anomaly[!exclude$anomaly %in% exclude$checks]

length(c(exclude$anomaly, exclude$duration, exclude$checks))

[1] 54

Code

selected_features <- features |>
  mutate(Cluster = ifelse(dfchecks$Participant %in% c(exclude$anomaly, exclude$duration, exclude$checks), "Excluded", "Valid")) |>
  select(Cluster, starts_with("Duration"), starts_with("A_"), starts_with("cor_"), starts_with("Beauty_"))


var_names <- names(select(selected_features, -Cluster))
feature_pairs <- expand_grid(x_var = var_names, y_var = var_names)
feature_pairs <- feature_pairs |>
  mutate(
    triangle = case_when(
      match(x_var, var_names) > match(y_var, var_names) ~ "lower",
      match(x_var, var_names) == match(y_var, var_names) ~ "diag",
      TRUE ~ "upper"
    )
  )
plot_data <- pmap_dfr(feature_pairs, function(x_var, y_var, triangle) {
  selected_features |>
    select(Cluster, x = all_of(x_var), y = all_of(y_var)) |>
    mutate(x_var = x_var, y_var = y_var, triangle = triangle)
}) |>
  mutate(
    x_var = factor(x_var, levels = rev(var_names)),
    y_var = factor(y_var, levels = rev(var_names))
  )

diag_histograms <- map_dfr(var_names, function(var) {
  # histogram bins computed globally for consistent breaks
  breaks <- hist(selected_features[[var]], breaks = 20, plot = FALSE)$breaks
  all_counts <- selected_features |>
    group_by(Cluster) |>
    summarise(counts = list(hist(!!sym(var), breaks = breaks, plot = FALSE)$counts),
              .groups = "drop")

  # max count across all clusters for this variable
  max_count <- max(unlist(all_counts$counts), na.rm = TRUE)
  y_range <- range(selected_features[[var]], na.rm = TRUE)

  all_counts |>
    unnest_wider(counts, names_sep = "_") |>
    pivot_longer(starts_with("counts_"), names_to = "bin", values_to = "count") |>
    mutate(
      bin_id = as.integer(gsub("counts_", "", bin)),
      x = breaks[-length(breaks)][bin_id],
      xmax = breaks[-1][bin_id],
      ymin = y_range[1],
      ymax = ymin + (count / max_count) * diff(y_range),
      var = var
    ) |>
    select(x, xmax, ymin, ymax, var, Cluster)
}) |> mutate(
  x_var = factor(var, levels = rev(var_names)),
  y_var = factor(var, levels = rev(var_names))
)

ggplot(plot_data, aes(x = x)) +
  # stat_density_2d_filled(data = filter(plot_data, triangle == "lower", x_var != "AttentionCheck", y_var != "AttentionCheck"),
  #                        aes(y = y, fill = after_stat(nlevel)), show.legend = FALSE) +
  # scale_fill_gradient(low = "white", high = "darkgreen") +
  # ggnewscale::new_scale_fill() +
  geom_point2(data = filter(plot_data, triangle == "lower"),
              aes(y = y, color = Cluster), alpha = 0.7, size = 3, shape = "+") +
  geom_rect(data = diag_histograms,
          aes(xmin = x, xmax = xmax, ymin = ymin, ymax = ymax, fill = Cluster),
          colour = NA, alpha = 0.5) +
  geom_blank(data = filter(plot_data, triangle != "lower")) +
  facet_grid(y_var ~ x_var, scales = "free", switch = "both") +
  # scale_color_gradient(low = "black", high = "red") +
  labs(color = "Participants", fill = "Participants") +
  theme_minimal() +
  theme(strip.placement = "outside",
        strip.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        axis.title = element_blank())

Self-Reported Quality

Code

dfchecks$SubjectiveQuality <- df$Experiment_Quality

dfchecks |> 
  ggplot(aes(x = SubjectiveQuality, y = A_TASK)) +
  geom_jitter(width = 0) +
  geom_smooth(method = "lm")

`geom_smooth()` using formula = 'y ~ x'

Warning: Removed 30 rows containing non-finite outside the scale range
(`stat_smooth()`).

Warning: Removed 30 rows containing missing values or values outside the scale range
(`geom_point()`).

Effect of Exclusion

Code

dftask |> 
  select(Participant, Beauty, Valence, Meaning, Worth, Reality, Authenticity) |> 
  pivot_longer(-Participant, names_to = "Variable", values_to = "Value") |>
  ggplot(aes(x = Value)) +
  geom_histogram(aes(fill = Variable, y = after_stat(density)), bins = 30, position = "identity") +
  facet_wrap(~Variable, scales = "free")

Code

library(glmmTMB)

dftask <- mutate(dftask, Condition = fct_relevel(Condition, "Human Original", "Human Forgery", "AI-Generated"))

quick_test <- function(what = "Beauty", family = "gaussian", exclude = c()){
  f <- as.formula(paste0(what, " ~ Condition + (1 + Condition|Participant)"))
  if(family == "ordbeta") {
    dat <- normalize(dftask, select = what)
  } else {
    dat <- dftask
  }
  
  m <- glmmTMB::glmmTMB(f, data = dat, family = family)
  params <-  parameters::parameters(m, effects = "fixed")
  
  m2 <- glmmTMB::glmmTMB(f, data = filter(dat, !Participant %in% exclude), family = family)
  params2 <-  parameters::parameters(m2, effects = "fixed")
  
  params$Coefficient_Excluded <- params2$Coefficient
  params$p_Excluded <- format_p(params2$p, name = NULL)
  params$Result <- ifelse(params$p_Excluded < params$p, "Exclude", "Original")
  
  p <- estimate_means(m, by = "Condition") |> 
    mutate(Exclusion = "Whole sample") |> 
    rbind(estimate_means(m2, by = "Condition") |> 
            mutate(Exclusion = "Excluded")) 
  
  if("Proportion" %in% names(p)) p <- datawizard::data_rename(p, "Proportion", "Mean")
  
  p <- p |>
    ggplot(aes(x = Condition, y = Mean, color = Exclusion, group = Exclusion)) +
    geom_line(position = position_dodge(width = 0.1)) +
    geom_pointrange(aes(ymin = CI_low, ymax = CI_high), position = position_dodge(width = 0.1), size = 1) 

  list(params = display(select(params, Parameter, Coefficient, Coefficient_Excluded, p, p_Excluded, Result)),
       plot = p)
}

Code

rez <- quick_test(what = "Beauty", family = "gaussian", exclude = c(exclude$anomaly, exclude$duration, exclude$checks))
rez$params

Fixed Effects
Parameter	Coefficient	Coefficient_Excluded	p	p_Excluded	Result
(Intercept)	0.23	0.22	< .001	< .001	Exclude
ConditionHuman Forgery	-0.21	-0.22	< .001	< .001	Exclude
ConditionAI-Generated	-0.36	-0.38	< .001	< .001	Exclude

Code

rez$plot

Code

rez <- quick_test(what = "Beauty", family = "ordbeta", exclude = c(exclude$anomaly, exclude$duration, exclude$checks))
rez$params

Fixed Effects
Parameter	Coefficient	Coefficient_Excluded	p	p_Excluded	Result
(Intercept)	0.14	0.13	< .001	< .001	Exclude
ConditionHuman Forgery	-0.14	-0.15	< .001	< .001	Exclude
ConditionAI-Generated	-0.25	-0.26	< .001	< .001	Exclude

Code

rez$plot

Code

rez <- quick_test(what = "Valence", family = "gaussian", exclude = c(exclude$anomaly, exclude$duration, exclude$checks))
rez$params

Fixed Effects
Parameter	Coefficient	Coefficient_Excluded	p	p_Excluded	Result
(Intercept)	0.17	0.14	< .001	< .001	Exclude
ConditionHuman Forgery	-0.22	-0.22	< .001	< .001	Exclude
ConditionAI-Generated	-0.38	-0.39	< .001	< .001	Exclude

Code

rez$plot

Code

rez <- quick_test(what = "Valence", family = "ordbeta", exclude = c(exclude$anomaly, exclude$duration, exclude$checks))
rez$params

Fixed Effects
Parameter	Coefficient	Coefficient_Excluded	p	p_Excluded	Result
(Intercept)	0.16	0.14	< .001	< .001	Exclude
ConditionHuman Forgery	-0.12	-0.12	< .001	< .001	Exclude
ConditionAI-Generated	-0.20	-0.21	< .001	< .001	Exclude

Code

rez$plot

Code

rez <- quick_test(what = "Meaning", family = "gaussian", exclude = c(exclude$anomaly, exclude$duration, exclude$checks))
rez$params

Fixed Effects
Parameter	Coefficient	Coefficient_Excluded	p	p_Excluded	Result
(Intercept)	2.81	2.76	< .001	< .001	Exclude
ConditionHuman Forgery	-0.27	-0.28	< .001	< .001	Exclude
ConditionAI-Generated	-0.60	-0.63	< .001	< .001	Exclude

Code

rez$plot

Code

rez <- quick_test(what = "Meaning", family = "ordbeta", exclude = c(exclude$anomaly, exclude$duration, exclude$checks))
rez$params

Fixed Effects
Parameter	Coefficient	Coefficient_Excluded	p	p_Excluded	Result
(Intercept)	0.02	-2.58e-03	0.475	0.938	Original
ConditionHuman Forgery	-0.15	-0.16	< .001	< .001	Exclude
ConditionAI-Generated	-0.33	-0.36	< .001	< .001	Exclude

Code

rez$plot

Code

rez <- quick_test(what = "Worth", family = "poisson", exclude = c(exclude$anomaly, exclude$duration, exclude$checks))
rez$params

Fixed Effects
Parameter	Coefficient	Coefficient_Excluded	p	p_Excluded	Result
(Intercept)	0.18	0.15	< .001	< .001	Exclude
ConditionHuman Forgery	-0.34	-0.35	< .001	< .001	Exclude
ConditionAI-Generated	-0.62	-0.67	< .001	< .001	Exclude

Code

rez$plot

Code

rez <- quick_test(what = "Reality", family = "gaussian", exclude = c(exclude$anomaly, exclude$duration, exclude$checks))

Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
problem; non-positive-definite Hessian matrix. See vignette('troubleshooting')
Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
problem; non-positive-definite Hessian matrix. See vignette('troubleshooting')

The variance-covariance matrix is not positive definite. Returning the
  nearest positive definite matrix now.
  This ensures that eigenvalues are all positive real numbers, and
  thereby, for instance, it is possible to calculate standard errors for
  all relevant parameters.
The variance-covariance matrix is not positive definite. Returning the
  nearest positive definite matrix now.
  This ensures that eigenvalues are all positive real numbers, and
  thereby, for instance, it is possible to calculate standard errors for
  all relevant parameters.

Code

rez$params

Fixed Effects
Parameter	Coefficient	Coefficient_Excluded	p	p_Excluded	Result
(Intercept)	10.68	11.38	< .001	< .001	Exclude
ConditionHuman Forgery	-3.61	-3.12	0.002	0.014	Original
ConditionAI-Generated	-11.29	-11.00	< .001	< .001	Exclude

Code

rez$plot

Code

rez <- quick_test(what = "Reality", family = "ordbeta", exclude = c(exclude$anomaly, exclude$duration, exclude$checks))

Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
problem; singular convergence (7). See vignette('troubleshooting'),
help('diagnose')
Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
problem; non-positive-definite Hessian matrix. See vignette('troubleshooting')

Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
problem; singular convergence (7). See vignette('troubleshooting'),
help('diagnose')

The variance-covariance matrix is not positive definite. Returning the
  nearest positive definite matrix now.
  This ensures that eigenvalues are all positive real numbers, and
  thereby, for instance, it is possible to calculate standard errors for
  all relevant parameters.

Code

rez$params

Fixed Effects
Parameter	Coefficient	Coefficient_Excluded	p	p_Excluded	Result
(Intercept)	0.21	0.22	< .001	< .001	Exclude
ConditionHuman Forgery	-0.06	-0.05	0.004	0.017	Original
ConditionAI-Generated	-0.20	-0.19	< .001	< .001	Exclude

Code

rez$plot

Code

rez <- quick_test(what = "Authenticity", family = "gaussian", exclude = c(exclude$anomaly, exclude$duration, exclude$checks))

Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
problem; non-positive-definite Hessian matrix. See vignette('troubleshooting')
Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
problem; non-positive-definite Hessian matrix. See vignette('troubleshooting')

The variance-covariance matrix is not positive definite. Returning the
  nearest positive definite matrix now.
  This ensures that eigenvalues are all positive real numbers, and
  thereby, for instance, it is possible to calculate standard errors for
  all relevant parameters.
The variance-covariance matrix is not positive definite. Returning the
  nearest positive definite matrix now.
  This ensures that eigenvalues are all positive real numbers, and
  thereby, for instance, it is possible to calculate standard errors for
  all relevant parameters.

Code

rez$params

Fixed Effects
Parameter	Coefficient	Coefficient_Excluded	p	p_Excluded	Result
(Intercept)	19.73	19.83	< .001	< .001	Exclude
ConditionHuman Forgery	-4.25	-4.52	< .001	< .001	Exclude
ConditionAI-Generated	-3.88	-4.14	< .001	< .001	Exclude

Code

rez$plot

Code

rez <- quick_test(what = "Authenticity", family = "ordbeta", exclude = c(exclude$anomaly, exclude$duration, exclude$checks))

Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
problem; non-positive-definite Hessian matrix. See vignette('troubleshooting')
Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
problem; non-positive-definite Hessian matrix. See vignette('troubleshooting')

The variance-covariance matrix is not positive definite. Returning the
  nearest positive definite matrix now.
  This ensures that eigenvalues are all positive real numbers, and
  thereby, for instance, it is possible to calculate standard errors for
  all relevant parameters.
The variance-covariance matrix is not positive definite. Returning the
  nearest positive definite matrix now.
  This ensures that eigenvalues are all positive real numbers, and
  thereby, for instance, it is possible to calculate standard errors for
  all relevant parameters.

Code

rez$params

Fixed Effects
Parameter	Coefficient	Coefficient_Excluded	p	p_Excluded	Result
(Intercept)	0.42	0.40	< .001	< .001	Exclude
ConditionHuman Forgery	-0.06	-0.07	0.001	< .001	Exclude
ConditionAI-Generated	-0.05	-0.07	0.012	0.002	Exclude

Code

rez$plot

Exclude

df <- filter(df, !Participant %in% c(exclude$anomaly, exclude$duration, exclude$checks))
dftask <- filter(dftask, !Participant %in% c(exclude$anomaly, exclude$duration, exclude$checks))

Manipulation Check

Did participant doubt the cover story?

Post-task Check

Code

manipcheck <- df |> 
  select(Participant, Feedback_ConfidenceReal, Feedback_ConfidenceAI) |> 
  pivot_longer(-Participant, values_to = "Confidence", names_to = "Answer") |>
  filter(!is.na(Confidence)) |> 
  mutate(Answer = ifelse(str_detect(Answer, "Real"), "All images are real", "All images are fake"))

manipcheck |> 
  ggplot(aes(x=Confidence, fill=Answer)) +
  geom_bar() +
  facet_grid(~Answer) +
  theme_minimal() +
  theme(legend.position = "none")

Code

manipcheck |> 
  summarize(N = n(), 
            Prop = n() / nrow(df), 
            Mean_Confidence = mean(Confidence, na.rm = TRUE),
            SD_Confidence = sd(Confidence, na.rm = TRUE),
            .by = "Answer") |> 
  gt::gt() |> 
  gt::fmt_auto() |> 
  gt::opt_stylize()

Answer	N	Prop	Mean_Confidence	SD_Confidence
All images are real	44	0.144	2.568	1.336
All images are fake	20	0.066	2.8	1.105

Code

df <- manipcheck |> 
  mutate(Confidence = Confidence + 1,
         Confidence = ifelse(Answer == "All images are fake", -Confidence, Confidence)) |> 
  select(Participant, ManipulationDistrust=Confidence) |> 
  right_join(df, by = "Participant") |> 
  arrange(Participant) |>
  mutate(ManipulationDistrust = ifelse(is.na(ManipulationDistrust), 0, ManipulationDistrust))

We added the manipulation distrust as a variable (“all images are fake”: -5 to -1, “all images are real”: 1 to 5).

Code

df_belief <- dftask |> 
  summarize(
    # Reality = sum(Reality > 0) / n(),
    # Authenticity = sum(Authenticity > 0)  / n(),
    Reality = mean(Reality, na.rm=TRUE),
    Authenticity = mean(Authenticity, na.rm=TRUE),
    .by=c("Participant")) |> 
  full_join(manipcheck, by = "Participant") |> 
  mutate(Confidence = ifelse(is.na(Answer), -1, Confidence),
         Answer = ifelse(is.na(Answer), "No doubts", Answer))

df_belief |> 
  ggplot(aes(x = Reality, y = Authenticity, color = Answer)) +
  geom_jitter(aes(size=Confidence), shape = "+", width = 0.01, height=0.01, alpha = 0.9) +
  ggrepel::geom_label_repel(data = filter(df_belief, abs(Reality) > 95 | abs(Authenticity) > 95),
                            aes(label = Participant), size = 3) +
  ggside::geom_xsidedensity(aes(fill=Answer), alpha=0.3) +
  ggside::geom_ysidedensity(aes(fill=Answer), alpha=0.3) +
  ggside::theme_ggside_void() +
  theme_minimal() +
  scale_size_continuous(range = c(4, 8))

Code

exclude$phase2 <- filter(df_belief, Reality > 90 | Authenticity > 90)$Participant

Expressing cover story doubts in the post-task check was not associated with answers consistent with these doubts in the phase 2 assessments. In other words, participants that ticked “I think all images were real/fake” did not actually rate all images as being real or fake, aside from 2 participants, which had their phase 2 data dropped due to lack of variability.

Phase 2 Instruction Misunderstanding

Did they misunderstand phase 2 and thought it was a recognition task?

Code

phase2congruence <- dftask |> 
  summarize(Reality = sum(Reality > 0) / n(), Authenticity = sum(Authenticity > 0)  / n(),
            # Reality = mean(Reality), Authenticity = mean(Authenticity), 
            .by=c("Participant", "Condition")) |> 
  pivot_wider(names_from = Condition, values_from = c(Reality, Authenticity)) |> 
  # mutate(CongruentReality = (Reality_Human + Reality_AI) / 2,
  #        CongruentAuthenticity = (Authenticity_Human + Authenticity_Forgery) / 2) |> 
  full_join(select(df, Participant, Duration_TaskInstructions2), by = "Participant") |> 
  full_join(manipcheck, by = "Participant") |> 
  mutate(Confidence = ifelse(is.na(Answer), -1, Confidence),
         Answer = ifelse(is.na(Answer), "No doubts", Answer)) |> 
  mutate(Mean_Congruence = (`Reality_Human Original` + (1 - `Reality_AI-Generated`) + (1 - `Authenticity_Human Forgery`) + `Authenticity_Human Original`) / 4) |> 
  arrange(desc(Mean_Congruence))

phase2congruence |> 
  ggplot(aes(x=Mean_Congruence)) +
  geom_histogram(bins = 20)

Code

phase2congruence |> 
  ggplot(aes(x = `Reality_Human Original`, y = `Authenticity_Human Original`, color = Answer)) +
  geom_jitter(aes(size=Confidence), shape = "+", width = 0.01, height=0.01, alpha = 0.9) +
  ggrepel::geom_label_repel(data = filter(phase2congruence, `Reality_Human Original` > 0.95 | `Authenticity_Human Original` > 0.95), 
                            aes(label = Participant), size = 3) +
  ggside::geom_xsidedensity(aes(fill=Answer), alpha=0.3) +
  ggside::geom_ysidedensity(aes(fill=Answer), alpha=0.3) +
  ggside::theme_ggside_void() +
  theme_minimal() +
  scale_size_continuous(range = c(4, 8))

Code

# Note: we know for sure S024 (fghgugdaz0) is an outlier and thought it was recognition

Code

exclude$phase2

[1] "S217" "S253"

Code

# exclude$phase2_cong <- filter(phase2congruence, Reality_Human == 1 | Authenticity_Human == 1)$Participant
exclude$phase2_cong <- filter(phase2congruence, Mean_Congruence > 0.7 & !Participant %in% exclude$phase2)$Participant

dftask[dftask$Participant %in% c(exclude$phase2, exclude$phase2_cong), "Reality"] <- NA
dftask[dftask$Participant %in% c(exclude$phase2, exclude$phase2_cong), "Authenticity"] <- NA

Based on the distribution of congruency scores (i.e., the proportion of items presented as “Human Originals” rated as “Real” and items presented as “Forgeries” rated as “Copies”), we excluded 4 outlying participants’ data of phase 2.

Final Sample

The final sample includes 305 participants (Mean age = 42.0, SD = 12.9, range: [20, 82]; Gender: 51.5% women, 47.9% men, 0.66% non-binary; Education: Bachelor, 47.87%; Doctorate, 5.25%; High school, 30.16%; Master, 16.72%; Country: 75.08% United Kingdom, 20.00% United States, 4.92% other).

Code

p_age <- df |>
  ggplot(aes(x = Age, fill = Gender)) + 
  geom_histogram(data=df, aes(x = Age, fill=Gender), binwidth = 3) +
  geom_vline(xintercept = mean(df$Age), color = "red", linewidth=1, linetype="dashed") +
  scale_fill_viridis_d() +
  scale_x_continuous(expand = c(0, 0), breaks = seq(20, max(df$Age), by = 10 )) +
  scale_y_continuous(expand = c(0, 0)) +
  labs(title = "Age", y = "Number of Participants", color = NULL, subtitle = "Distribution of participants' age") +
  theme_modern(axis.title.space = 10) +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2), vjust = 7),
    axis.text.y = element_text(size = rel(1.1)),
    axis.text.x = element_text(size = rel(1.1)),
    axis.title.x = element_blank()
  )

p_age

Code

p_edu <- df |>
  mutate(Student = ifelse(is.na(Student) | (Student == FALSE), "Not studying", "Currently studying"),
         Education = fct_relevel(Education, "High school", "Bachelor", "Master", "Doctorate")) |>
  ggplot(aes(x = Education)) +
  geom_bar(aes(fill = Student)) +
  scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
  labs(title = "Education", y = "Number of Participants", subtitle = "Participants per achieved education level", fill = "Is currently a student") +
  see::scale_fill_bluebrown_d() +
  theme_modern(axis.title.space = 15) +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2)),
    axis.text.y = element_text(size = rel(1.1)),
    axis.text.x = element_text(size = rel(0.5), angle = 45, hjust =1),
    axis.title.x = element_blank()
  )

p_edu

Code

p_eth <- df |>
  mutate(Ethnicity = ifelse(is.na(Ethnicity), "Prefer not to answer", Ethnicity)) |>
  mutate(Freq = format_percent(n() / nrow(df), digits=1), .by=Ethnicity) |> 
  mutate(Ethnicity = fct_infreq(paste0(Ethnicity, " (", Freq, ")"))) |> 
  ggplot(aes(x = "", fill = Ethnicity)) +
  geom_bar() +
  coord_polar("y") +
  see::scale_fill_social_d() +
  labs(title = "Ethnicity", subtitle = "Self-declared ethnicity") +
  theme_minimal() +
  theme(
    axis.text.x = element_blank(),
    axis.title.x = element_blank(),
    axis.text.y = element_blank(),
    axis.title.y = element_blank(),
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2))
  ) 
    
p_eth

Code

p_map <- df |>
  mutate(Country = case_when(
    Country=="United States"~ "USA",
    Country=="United Kingdom" ~ "UK",
    TRUE ~ Country
  ))|>
  dplyr::select(region = Country) |>
  group_by(region) |>
  summarize(n = n()) |>
  right_join(map_data("world"), by = "region") |>
  # mutate(n = replace_na(n, 0)) |>
  ggplot(aes(long, lat, group = group)) +
  geom_polygon(aes(fill = n)) +
  scale_fill_gradientn(colors = c("#E66101", "#ca0020", "#cc66cc")) +
  labs(fill = "N") +
  theme_void() +
  labs(title = "Country of Residence", subtitle = "Number of participants by country")  +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2))
  ) + 
  coord_fixed()

p_map

Code

sort(table(df$Country)) |> 
  as.data.frame() |>
  rename(Country = Var1) |> 
  arrange(desc(Freq)) |> 
  gt::gt()

Country	Freq
United Kingdom	229
United States	61
Ireland	4
Australia	3
Belgium	1
Germany	1
India	1
Nicaragua	1
Norway	1
Saudi Arabia	1

Code

(p_age / p_edu) | (p_map / p_eth)

Save

Code

dftask <- dftask |> 
  left_join(select(df, Participant, ManipulationDistrust), by = c("Participant")) |> 
  left_join(select(read.csv("../data/rawdata_memory_task.csv"), Participant, Item, SelfRelevance, Beauty2), by = c("Participant", "Item")) |> 
  mutate(Emotion = paste0(ifelse(Norms_Valence < median(Norms_Valence), "Negative", "Positive"), ifelse(Norms_Arousal < median(Norms_Arousal), " - Low intensity", " - High intensity")),
         Beauty = datawizard::rescale(Beauty, to = c(0, 1), range = c(-3, 3)),
         Valence = datawizard::rescale(Valence, to = c(0, 1), range = c(-3, 3)),
         Meaning = datawizard::rescale(Meaning, to = c(0, 1), range = c(0, 6)),
         Worth = datawizard::rescale(Worth, to = c(0, 1), range = c(0, 5)),
         Reality = datawizard::rescale(Reality, to = c(0, 1), range = c(-100, 100)),
         Authenticity = datawizard::rescale(Authenticity, to = c(0, 1), range = c(-100, 100)),
         Beauty2 = datawizard::rescale(Beauty2, to = c(0, 1), range = c(-3, 3)),
         SelfRelevance = datawizard::rescale(SelfRelevance, to = c(0, 1), range = c(0, 6))
         ) |>
  datawizard::normalize(select = c("Norms_Liking", "Norms_Valence", "Norms_Arousal", "Norms_Complexity", "Norms_Familiarity")) 

write.csv(dftask, "../data/data_task.csv", row.names = FALSE)




dfmem <- read.csv("../data/rawdata_memory_task.csv") 
# sum(!unique(dfmem$Participant) %in% df$Participant)
dfmem <- dfmem |> 
  filter(Participant %in% df$Participant) |> 
  select(-SelfRelevance, -Beauty2) |> 
  left_join(dftask, by = c("Participant", "Item")) |>
  rename(AnswerCondition = SourceCondition, AnswerBelief = SourceBelief) |> 
  mutate(Type = ifelse(is.na(Condition), "New", "Old"),
         Condition = ifelse(is.na(Condition), "New Items", as.character(Condition)),
         AnswerCondition = ifelse(is.na(AnswerCondition), "Not recognized", AnswerCondition),
         AnswerBelief = ifelse(is.na(AnswerBelief), "Not recognized", AnswerBelief),
         Belief = case_when(
           Reality >= 0.5 & Authenticity >= 0.5 ~ "Human Original",
           Reality < 0.5 & Authenticity >= 0.5 ~ "AI Original",
           Reality >= 0.5 & Authenticity < 0.5 ~ "Human Forgery",
           Reality < 0.5 & Authenticity < 0.5 ~ "AI Copy",
           .default = "None"
         ))
write.csv(dfmem, "../data/data_memory_task.csv", row.names = FALSE)

df |> 
  left_join(select(read.csv("../data/rawdata_memory_participants.csv"), Participant, Experiment_StartDate2 = Experiment_StartDate), 
            by = "Participant") |> 
  mutate(Delay = as.numeric(as.Date(Experiment_StartDate2) - as.Date(Experiment_StartDate))) |> 
  write.csv("../data/data_participants.csv", row.names = FALSE)