Code
library(tidyverse)
library(easystats)
library(patchwork)
library(ggside)
library(tidyverse)
library(easystats)
library(patchwork)
library(ggside)
<- read.csv("../data/rawdata_participants.csv") |>
df mutate(across(everything(), ~ifelse(.x == "", NA, .x))) |>
mutate(Sample = case_when(
=="English" & Experimenter %in% c("2discord1", "Experimenter2", "Experimenter1") ~ "Students (Non-incentivized)",
Language=="English" & Experimenter %in% c("Unknown", "Readme (GitHub)", "website", "reddit") ~ "General Population",
Language%in% c("enter", "macre", "piaca", "unime", "classroom1") ~ "Students (Non-incentivized)",
Experimenter %in% c("SONA") ~ "Students (Incentivized)",
Experimenter %in% c("gacon", "mafas", "marmar", "mavio", "sobon", "RISC", "Snowball") ~ "General Population",
Experimenter str_detect(Experimenter, "reddit") ~ "General Population",
.default = Experimenter
))
<- read.csv("../data/rawdata_task.csv") |>
dftask full_join(
c("Participant", "Sex", "SexualOrientation")],
df[by = join_by(Participant)
)
The initial sample consisted of 1007 participants (Mean age = 28.4, SD = 11.2, range: [18, 80], 9.9e-02% missing; Sex: 45.3% females, 53.3% males, 1.4% other; Education: Bachelor, 30.09%; Doctorate, 4.87%; High School, 45.38%; Master, 17.18%; Other, 2.09%; Primary School, 0.40%; Country: 36.25% UK, 17.48% Italy, 12.21% USA, 10.23% Colombia, 23.83% other).
# Create Sexual "relevance" variable (Relevant, irrelevant, non-erotic)
<- dftask |>
dftask mutate(Relevance = case_when(
== "Non-erotic" ~ "Non-erotic",
Type == "Male" & SexualOrientation == "Heterosexual" & Category == "Female" ~ "Relevant",
Sex == "Female" & SexualOrientation == "Heterosexual" & Category == "Male" ~ "Relevant",
Sex == "Male" & SexualOrientation == "Homosexual" & Category == "Male" ~ "Relevant",
Sex == "Female" & SexualOrientation == "Homosexual" & Category == "Female" ~ "Relevant",
Sex # TODO: what to do with "Other"?
%in% c("Bisexual", "Other") & Category %in% c("Male", "Female") ~ "Relevant",
SexualOrientation .default = "Irrelevant"
))
<- function(df) {
plot_recruitement # Consecutive count of participants per day (as area)
|>
df mutate(Date = as.Date(Date, format = "%d/%m/%Y")) |>
group_by(Date, Language, Sample) |>
summarize(N = n()) |>
ungroup() |>
# https://bocoup.com/blog/padding-time-series-with-r
complete(Date, Language, Sample, fill = list(N = 0)) |>
group_by(Language, Sample) |>
mutate(N = cumsum(N)) |>
ggplot(aes(x = Date, y = N)) +
geom_area(aes(fill=Sample)) +
scale_y_continuous(expand = c(0, 0)) +
labs(
title = "Recruitment History",
x = "Date",
y = "Total Number of Participants"
+
) ::theme_modern()
see
}
plot_recruitement(df) +
facet_wrap(~Language, nrow=5, scales = "free_y")
# Table
<- function(df) {
table_recruitment summarize(df, N = n(), .by=c("Language", "Experimenter")) |>
arrange(desc(N)) |>
::gt() |>
gt::opt_stylize() |>
gt::opt_interactive(use_compact_mode = TRUE) |>
gt::tab_header("Number of participants per recruitment source")
gt
}table_recruitment(df)
plot_recruitement(filter(df, Language == "English"))
table_recruitment(filter(df, Language == "English"))
plot_recruitement(filter(df, Language == "French"))
table_recruitment(filter(df, Language == "French"))
plot_recruitement(filter(df, Language == "Italian"))
table_recruitment(filter(df, Language == "Italian"))
plot_recruitement(filter(df, Language == "Colombian"))
table_recruitment(filter(df, Language == "Colombian"))
plot_recruitement(filter(df, Language == "Spanish"))
table_recruitment(filter(df, Language == "Spanish"))
The majority of participants found the study to be a “fun” experience. Interestingly, reports of “fun” were significantly associated with finding at least some stimuli arousing. Conversely, reporting “no feelings” was associated with finding the experiment “boring”.
|>
df select(starts_with("Feedback"), -Feedback_Comments) |>
pivot_longer(everything(), names_to = "Question", values_to = "Answer") |>
group_by(Question, Answer) |>
summarise(prop = n()/nrow(df), .groups = 'drop') |>
complete(Question, Answer, fill = list(prop = 0)) |>
filter(Answer == "True") |>
mutate(Question = str_remove(Question, "Feedback_"),
Question = str_replace(Question, "AILessArousing", "AI = Less arousing"),
Question = str_replace(Question, "AIMoreArousing", "AI = More arousing"),
Question = str_replace(Question, "CouldNotDiscriminate", "Hard to discriminate"),
Question = str_replace(Question, "LabelsIncorrect", "Labels were incorrect"),
Question = str_replace(Question, "NoFeels", "Didn't feel anything"),
Question = str_replace(Question, "CouldDiscriminate", "Easy to discriminate"),
Question = str_replace(Question, "LabelsReversed", "Labels were reversed")) |>
mutate(Question = fct_reorder(Question, desc(prop))) |>
ggplot(aes(x = Question, y = prop)) +
geom_bar(stat = "identity") +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks(), labels=scales::percent) +
labs(x="Feedback", y = "Participants", title = "Feedback") +
theme_minimal() +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1), angle = 45, hjust = 1),
axis.title.x = element_blank()
)
<- df |>
cor select(starts_with("Feedback"), -Feedback_Comments) |>
mutate_all(~ifelse(.=="True", 1, 0)) |>
correlation(method="tetrachoric", redundant = TRUE) |>
::cor_sort() |>
correlation::cor_lower()
correlation
|>
cor mutate(val = paste0(insight::format_value(rho), format_p(p, stars_only=TRUE))) |>
mutate(Parameter2 = fct_rev(Parameter2)) |>
mutate(Parameter1 = fct_relabel(Parameter1, \(x) str_remove_all(x, "Feedback_")),
Parameter2 = fct_relabel(Parameter2, \(x) str_remove_all(x, "Feedback_"))) |>
ggplot(aes(x=Parameter1, y=Parameter2)) +
geom_tile(aes(fill = rho), color = "white") +
geom_text(aes(label = val), size = 3) +
labs(title = "Feedback Co-occurence Matrix") +
scale_fill_gradient2(
low = "#2196F3",
mid = "white",
high = "#F44336",
breaks = c(-1, 0, 1),
guide = guide_colourbar(ticks=FALSE),
midpoint = 0,
na.value = "grey85",
limit = c(-1, 1)) +
theme_minimal() +
theme(legend.title = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))
<- list() outliers
|>
df ggplot(aes(x=Mobile, fill=Language)) +
geom_bar() +
geom_hline(yintercept=0.5*nrow(df), linetype="dashed") +
theme_modern()
There were 296 participants that used a mobile device.
# df <- filter(df, Mobile == "False")
# dftask <- filter(dftask, Participant %in% df$Participant)
The experiment’s median duration is 24.72 min (50% HDI [20.12, 27.82]).
|>
df mutate(Participant = fct_reorder(Participant, Experiment_Duration),
Category = ifelse(Experiment_Duration > 60, "extra", "ok"),
Duration = ifelse(Experiment_Duration > 60, 60, Experiment_Duration),
Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |>
ggplot(aes(y = Participant, x = Duration)) +
geom_point(aes(color = Group, shape = Category)) +
geom_vline(xintercept = median(df$Experiment_Duration), color = "red", linetype = "dashed") +
geom_vline(xintercept = 15, color = "orange", linetype = "dotted") +
scale_shape_manual(values = c("extra" = 3, ok = 19)) +
scale_color_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
guides(color = "none", shape = "none") +
::geom_xsidedensity(fill = "#4CAF50", color=NA) +
ggside::scale_xsidey_continuous(expand = c(0, 0)) +
ggsidelabs(
title = "Experiment Completion Time",
x = "Duration (in minutes)",
y = "Participants"
+
) theme_modern() +
::theme_ggside_void() +
ggsidetheme(ggside.panel.scale = .3,
panel.border = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank())
$expe_duration <- filter(df, Experiment_Duration < 15)$Participant outliers
<- function(dat) {
plot_hist <- rbind(
dens mutate(bayestestR::estimate_density(filter(dftask, RT1 < 40 & RT2 < 40)$RT1),
Phase="Emotional ratings",
y = y / max(y)),
mutate(bayestestR::estimate_density(filter(dftask, RT1 < 40 & RT2 < 40)$RT2),
Phase="Reality rating",
y = y / max(y))
)
|>
dat filter(RT1 < 40 & RT2 < 40) |> # Remove very long RTs
# mutate(Participant = fct_reorder(Participant, RT1)) |>
pivot_longer(cols = c(RT1, RT2), names_to = "Phase", values_to = "RT") |>
mutate(Phase = ifelse(Phase == "RT1", "Emotional ratings", "Reality rating")) |>
ggplot(aes(x=RT)) +
geom_area(data=dens, aes(x=x, y=y, fill=Phase), alpha=0.33, position="identity") +
geom_density(aes(color=Phase, y=after_stat(scaled)), linewidth=1.5) +
scale_x_sqrt(breaks=c(0, 2, 5, 10, 20)) +
theme_minimal() +
theme(axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.line.y = element_blank()) +
labs(title = "Distribution of Response Time for each Participant", x="Response time per stimuli (s)") +
facet_wrap(~Participant, ncol=5, scales="free_y") +
coord_cartesian(xlim = c(0, 25))
}
|>
df mutate(Participant = fct_reorder(Participant, BAIT_Duration),
Category = ifelse(BAIT_Duration > 5, "extra", "ok"),
Duration = ifelse(BAIT_Duration > 5, 5, BAIT_Duration),
Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |>
ggplot(aes(y = Participant, x = Duration)) +
geom_point(aes(color = Group, shape = Category)) +
geom_vline(xintercept = median(df$BAIT_Duration), color = "red", linetype = "dashed") +
geom_vline(xintercept = 0.5, color = "orange", linetype = "dotted") +
scale_shape_manual(values = c("extra" = 3, ok = 19)) +
scale_color_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
guides(color = "none", shape = "none") +
::geom_xsidedensity(fill = "#9C27B0", color=NA) +
ggside::scale_xsidey_continuous(expand = c(0, 0)) +
ggsidelabs(
title = "Questionnaire Completion Time",
x = "Duration (in minutes)",
y = "Participant"
+
) theme_modern() +
::theme_ggside_void() +
ggsidetheme(ggside.panel.scale = .3,
panel.border = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank())
$bait_duration <- filter(df, BAIT_Duration < 0.5)$Participant
outliers$Participant %in% outliers$bait_duration, str_detect(names(df), "BAIT_\\d+")] <- NA df[df
We excluded the BAIT data of 7 participants who completed the questionnaire in less than 30 seconds.
# Single arousal response (0)
$novariability <- summarize(dftask, n = length(unique(Arousal)), .by="Participant") |>
outliersfilter(n == 1) |>
select(Participant) |>
pull()
<- filter(df, !Participant %in% outliers$novariability)
df <- filter(dftask, !Participant %in% outliers$novariability) dftask
We removed 7 that showed no variation in their arousal response (did not move the scales).
<- dftask |>
dat filter(Relevance %in% c("Relevant", "Non-erotic")) |>
group_by(Participant, Type) |>
summarise(Arousal = mean(Arousal),
Valence = mean(Valence),
Enticement = mean(Enticement),
.groups = "drop") |>
pivot_wider(names_from = Type, values_from = all_of(c("Arousal", "Valence", "Enticement"))) |>
transmute(Participant = Participant,
Arousal = Arousal_Erotic - `Arousal_Non-erotic`,
Valence = Valence_Erotic - `Valence_Non-erotic`,
Enticement = Enticement_Erotic - `Enticement_Non-erotic`) |>
filter(!is.na(Arousal)) |>
mutate(Participant = fct_reorder(Participant, Arousal))
|>
dat pivot_longer(-Participant) |>
mutate(Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |>
ggplot(aes(x=value, y=Participant, fill=Group)) +
geom_bar(aes(fill=value), stat = "identity") +
scale_fill_gradient2(low = "#3F51B5", mid = "#FF9800", high = "#4CAF50", midpoint = 0) +
# scale_fill_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
theme_bw() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(title = "Difference between Erotic and Neutral", x="Erotic - Neutral") +
facet_wrap(~name, ncol=3, scales="free_x")
$emo_diff <- sort(as.character(dat[dat$Arousal < 0, ]$Participant)) outliers
<- dftask |>
dat filter(!Participant %in% outliers) |>
summarize(cor_ArVal = cor(Arousal, Valence),
cor_ArEnt = cor(Arousal, Enticement),
.by="Participant")
|>
dat left_join(df[c("Participant", "Language")], by="Participant") |>
mutate(Participant = fct_reorder(Participant, cor_ArVal)) |>
pivot_longer(starts_with("cor_")) |>
mutate(Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |>
mutate(name = fct_relevel(name, "cor_ArVal", "cor_ArEnt"),
name = fct_recode(name, "Arousal - Valence" = "cor_ArVal", "Arousal - Enticement" = "cor_ArEnt")) |>
ggplot(aes(y = Participant, x = value)) +
geom_bar(aes(fill = Language), stat = "identity") +
# scale_fill_gradient2(low = "#3F51B5", mid = "#FF9800", high = "#4CAF50", midpoint = 0) +
# scale_fill_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
theme_bw() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(title = "Difference between Erotic and Neutral", x="Erotic - Neutral") +
facet_wrap(~name, ncol=3, scales="free_x")
$emo_cor <- sort(as.character(dat[dat$cor_ArEnt < 0, ]$Participant))
outliers
$arousal <- intersect(outliers$emo_diff, outliers$emo_cor) outliers
We removed 3 participants that had a negative correlation between arousal and enticement, and had a negative arousal difference between erotic and neutral (suggesting a misunderstanding of the scale direction).
<- filter(df, !Participant %in% outliers$arousal)
df <- filter(dftask, !Participant %in% outliers$arousal) dftask
|>
df ggplot(aes(x = Sex)) +
geom_bar(aes(fill = SexualOrientation)) +
scale_y_continuous(expand = c(0, 0), breaks = scales::pretty_breaks()) +
scale_fill_metro_d() +
labs(x = "Biological Sex", y = "Number of Participants", title = "Sex and Sexual Orientation", fill = "Sexual Orientation") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1)),
axis.title.x = element_blank()
)
We removed 57 participants that were incompatible with further analysis.
<- filter(df, !(Sex == "Other" | SexualOrientation == "Other"))
df <- filter(dftask, Participant %in% df$Participant) dftask
<- function(dftask, target="Arousal") {
show_distribution |>
dftask filter(SexualOrientation %in% c("Heterosexual", "Bisexual", "Homosexual")) |>
::estimate_density(select=target,
bayestestRat=c("Relevance", "Category", "Sex", "SexualOrientation"),
method="KernSmooth") |>
ggplot(aes(x = x, y = y)) +
geom_line(aes(color = Relevance, linetype = Category), linewidth=1) +
facet_grid(SexualOrientation~Sex, scales="free_y") +
scale_color_manual(values = c("Relevant" = "red", "Non-erotic" = "blue", "Irrelevant"="darkorange")) +
scale_y_continuous(expand = c(0, 0)) +
scale_x_continuous(expand = c(0, 0)) +
theme_minimal() +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
plot.title = element_text(face="bold")) +
labs(title = target)
}
show_distribution(dftask, "Arousal") | show_distribution(dftask, "Valence")) /
(show_distribution(dftask, "Enticement") | show_distribution(dftask, "Realness")) +
(::plot_layout(guides = "collect") +
patchwork::plot_annotation(title = "Distribution of Appraisals depending on the Sexual Profile",
patchworktheme = theme(plot.title = element_text(hjust = 0.5, face="bold")))
We kept only heterosexual participants (70.64%).
<- filter(df, SexualOrientation == "Heterosexual")
df <- filter(dftask, Participant %in% df$Participant) dftask
The final sample includes 664 participants (Mean age = 29.9, SD = 11.6, range: [18, 80], 0.2% missing; Sex: 37.0% females, 63.0% males, 0.0% other; Education: Bachelor, 29.97%; Doctorate, 5.72%; High School, 41.57%; Master, 20.63%; Other, 1.66%; Primary School, 0.45%; Country: 29.37% UK, 19.73% Italy, 13.25% USA, 11.30% Colombia, 26.36% other).
<- dplyr::select(df, region = Country) |>
p_country group_by(region) |>
summarize(n = n()) |>
right_join(map_data("world"), by = "region") |>
ggplot(aes(long, lat, group = group)) +
geom_polygon(aes(fill = n)) +
scale_fill_gradientn(colors = c("#FFEB3B", "red", "purple")) +
labs(fill = "N") +
theme_void() +
labs(title = "A Geographically Diverse Sample", subtitle = "Number of participants by country") +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2))
) p_country
::waffle_iron(df, ggwaffle::aes_d(group = Ethnicity), rows=10) |>
ggwaffleggplot(aes(x, y, fill = group)) +
::geom_waffle() +
ggwafflecoord_equal() +
scale_fill_flat_d() +
::theme_waffle() +
ggwafflelabs(title = "Self-reported Ethnicity", subtitle = "Each square represents a participant", fill="") +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2)),
axis.title.x = element_blank(),
axis.title.y = element_blank()
)
Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
<- estimate_density(df$Age) |>
p_age normalize(select = y) |>
mutate(y = y * 86) |> # To match the binwidth
ggplot(aes(x = x)) +
geom_histogram(data=df, aes(x = Age), fill = "#616161", bins=28) +
# geom_line(aes(y = y), color = "orange", linewidth=2) +
geom_vline(xintercept = mean(df$Age), color = "red", linewidth=1.5) +
# geom_label(data = data.frame(x = mean(df$Age) * 1.15, y = 0.95 * 75), aes(y = y), color = "red", label = paste0("Mean = ", format_value(mean(df$Age)))) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = "Age", y = "Number of Participants", color = NULL, subtitle = "Distribution of participants' age") +
theme_modern(axis.title.space = 10) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1)),
axis.title.x = element_blank()
) p_age
Warning: Removed 1 row containing non-finite outside the scale range
(`stat_bin()`).
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_vline()`).
<- df |>
p_edu mutate(Education = fct_relevel(Education, "Other", "Primary School", "High School", "Bachelor", "Master", "Doctorate")) |>
ggplot(aes(x = Education)) +
geom_bar(aes(fill = Education)) +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
scale_fill_viridis_d(guide = "none") +
labs(title = "Education", y = "Number of Participants", subtitle = "Participants per achieved education level") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1)),
axis.title.x = element_blank()
) p_edu
<- c(
colors "NA" = "#2196F3", "None" = "#E91E63", "Condoms (for partner)" = "#9C27B0",
"Combined pills" = "#FF9800", "Intrauterine Device (IUD)" = "#FF5722",
"Intrauterine System (IUS)" = "#795548", "Progestogen pills" = "#4CAF50",
"Other" = "#FFC107", "Condoms (female)" = "#607D8B"
)<- colors[names(colors) %in% c("NA", df$BirthControl)]
colors
<- df |>
p_sex mutate(BirthControl = ifelse(Sex == "Male", "NA", BirthControl),
BirthControl = fct_relevel(BirthControl, names(colors))) |>
ggplot(aes(x = Sex)) +
geom_bar(aes(fill = BirthControl)) +
scale_y_continuous(expand = c(0, 0), breaks = scales::pretty_breaks()) +
scale_fill_manual(
values = colors,
breaks = names(colors)[2:length(colors)]
+
) labs(x = "Biological Sex", y = "Number of Participants", title = "Sex and Birth Control Method", fill = "Birth Control") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1)),
axis.title.x = element_blank()
) p_sex
<- df |>
p_sexprofile select(Participant, Sex, SexualOrientation, SexualActivity, COPS_Duration_1, COPS_Frequency_2) |>
pivot_longer(-all_of(c("Participant", "Sex"))) |>
mutate(name = str_replace_all(name, "SexualOrientation", "Sexual Orientation"),
name = str_replace_all(name, "SexualActivity", "Sexual Activity"),
name = str_replace_all(name, "COPS_Duration_1", "Pornography Usage (Duration)"),
name = str_replace_all(name, "COPS_Frequency_2", "Pornography Usage (Frequency)")) |>
ggplot(aes(x = value, fill=Sex)) +
geom_bar() +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
scale_fill_manual(values = c("Male"= "#64B5F6", "Female"= "#F06292")) +
labs(title = "Sexual Profile of Participants") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1), angle = 45, hjust = 1),
axis.title.x = element_blank(),
axis.title.y = element_blank()
+
) facet_wrap(~name, scales = "free")
p_sexprofile
<- select(df, COPS_Duration_1, COPS_Frequency_2, SexualActivity) |>
dat mutate(across(where(is.character), as.factor)) |>
mutate(across(where(is.factor), as.numeric))
<- correlation::correlation(
r |>
dat) as.matrix()
<- parameters::factor_analysis(dat, n = 1, rotation = "oblimin", cor = r)
r $Porn <- get_scores(r)[, 1] df
<- df |>
p_language ggplot(aes(x = Language_Level)) +
geom_bar() +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
labs(x = "Level", y = "Number of Participants", title = "Language Level") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1))
)
<- df |>
p_expertise ggplot(aes(x = AI_Knowledge)) +
geom_bar() +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
labs(x = "Level", y = "Number of Participants", title = "AI-Expertise") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1))
)
| p_expertise p_language
$Screen_Size <- sqrt(df$Screen_Width * df$Screen_Height)
df
|>
df ggplot(aes(x = Screen_Size, fill=Mobile)) +
geom_histogram(bins = 50, color="black") +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
labs(x = expression("Screen Size ("~sqrt(Number~of~Pixels)~")"), y = "Number of Participants", title = "Screen Size") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1))
)
/
p_country + p_edu) (p_age
Warning: Removed 1 row containing non-finite outside the scale range
(`stat_bin()`).
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_vline()`).
This section pertains to the validation of the BAIT scale measuring beliefs and expectations about artificial creations.
<- df |>
bait select(starts_with("BAIT_"), -BAIT_Duration) |>
rename_with(function(x) gsub("BAIT_\\d_", "", x)) |>
filter(!is.na(TextRealistic ))
<- correlation::correlation(bait, redundant = TRUE) |>
cor ::cor_sort() |>
correlation::cor_lower()
correlation
<- function(x) {
clean_labels <- str_remove_all(x, "BAIT_") |>
x str_replace_all("_", " - ")
}
|>
cor mutate(val = paste0(insight::format_value(r), format_p(p, stars_only=TRUE))) |>
mutate(Parameter2 = fct_rev(Parameter2)) |>
mutate(Parameter1 = fct_relabel(Parameter1, clean_labels),
Parameter2 = fct_relabel(Parameter2, clean_labels)) |>
ggplot(aes(x=Parameter1, y=Parameter2)) +
geom_tile(aes(fill = r), color = "white") +
geom_text(aes(label = val), size = 3) +
labs(title = "Correlation Matrix",
subtitle = "Beliefs about Artificial Information Technology (BAIT)") +
scale_fill_gradient2(
low = "#2196F3",
mid = "white",
high = "#F44336",
breaks = c(-1, 0, 1),
guide = guide_colourbar(ticks=FALSE),
midpoint = 0,
na.value = "grey85",
limit = c(-1, 1)) +
theme_minimal() +
theme(legend.title = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))
<- parameters::n_factors(bait, package = "nFactors")
n plot(n)
<- parameters::factor_analysis(bait, cor=cor(bait, use="pairwise.complete.obs"), n=3, rotation = "oblimin",
efa sort=TRUE, scores="tenBerge", fm="ml")
plot(efa)
display(efa)
Variable | ML3 | ML1 | ML2 | Complexity | Uniqueness |
---|---|---|---|---|---|
EnvironmentReal | 0.62 | 0.01 | 5.89e-03 | 1.00 | 0.62 |
ImitatingReality | 0.60 | 0.01 | -0.07 | 1.03 | 0.61 |
ImagesRealistic | 0.59 | 0.05 | -0.04 | 1.02 | 0.65 |
VideosIssues | 0.46 | -0.28 | 0.12 | 1.78 | 0.67 |
VideosRealistic | 2.21e-03 | 1.00 | 0.01 | 1.00 | 5.00e-03 |
ImagesIssues | -0.13 | 0.20 | 0.19 | 2.75 | 0.86 |
TextIssues | 0.05 | 0.04 | 0.80 | 1.01 | 0.37 |
TextRealistic | 0.20 | 0.05 | -0.56 | 1.27 | 0.59 |
The 3 latent factors (oblimin rotation) accounted for 45.37% of the total variance of the original data (ML3 = 17.86%, ML1 = 14.35%, ML2 = 13.16%).
<- lavaan::cfa(
m1 "G =~ ImitatingReality + EnvironmentReal + VideosIssues + TextIssues + VideosRealistic + ImagesRealistic + ImagesIssues + TextRealistic",
data=bait)
<- lavaan::cfa(
m2 "Images =~ ImitatingReality + EnvironmentReal + ImagesRealistic + ImagesIssues + VideosIssues + VideosRealistic
Text =~ TextIssues + TextRealistic",
data=bait)
<- lavaan::cfa(
m3 "Images =~ ImitatingReality + EnvironmentReal + ImagesRealistic + ImagesIssues
Videos =~ VideosIssues + VideosRealistic
Text =~ TextIssues + TextRealistic",
data=bait)
<- lavaan::cfa(
m4 "Environment =~ ImitatingReality + EnvironmentReal
Images =~ ImagesRealistic + ImagesIssues
Videos =~ VideosIssues + VideosRealistic
Text =~ TextIssues + TextRealistic",
data=bait)
<- lavaan::cfa(efa_to_cfa(efa, threshold="max"), data=bait)
m5
# bayestestR::bayesfactor_models(m1, m2)
::anova(m1, m2, m3, m4, m5) lavaan
Warning in lavTestLRT(object = object, ..., model.names = NAMES): lavaan
WARNING: some models have the same degrees of freedom
Chi-Squared Difference Test
Df AIC BIC Chisq Chisq diff RMSEA Df diff Pr(>Chisq)
m4 14 -457.35 -358.62 94.954
m3 17 -450.05 -364.78 108.251 13.297 0.07228 3 0.004037 **
m5 17 -422.82 -337.55 135.481 27.230 0.00000 0
m2 19 -402.29 -326.00 160.014 24.532 0.13095 2 4.708e-06 ***
m1 20 -288.47 -216.67 275.830 115.816 0.41804 1 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
display(parameters::parameters(m4, standardize = TRUE))
Link | Coefficient | SE | 95% CI | z | p |
---|---|---|---|---|---|
Environment =~ ImitatingReality | 0.66 | 0.04 | (0.59, 0.73) | 17.55 | < .001 |
Environment =~ EnvironmentReal | 0.63 | 0.04 | (0.55, 0.70) | 16.73 | < .001 |
Images =~ ImagesRealistic | 0.71 | 0.06 | (0.60, 0.83) | 11.71 | < .001 |
Images =~ ImagesIssues | -0.36 | 0.04 | (-0.44, -0.27) | -7.99 | < .001 |
Videos =~ VideosIssues | 0.80 | 0.06 | (0.69, 0.92) | 13.69 | < .001 |
Videos =~ VideosRealistic | -0.49 | 0.05 | (-0.58, -0.40) | -10.71 | < .001 |
Text =~ TextIssues | 0.57 | 0.05 | (0.48, 0.67) | 11.71 | < .001 |
Text =~ TextRealistic | -0.85 | 0.06 | (-0.97, -0.72) | -13.66 | < .001 |
Link | Coefficient | SE | 95% CI | z | p |
---|---|---|---|---|---|
Environment ~~ Images | 0.73 | 0.07 | (0.58, 0.87) | 9.98 | < .001 |
Environment ~~ Videos | 0.58 | 0.06 | (0.47, 0.70) | 9.77 | < .001 |
Environment ~~ Text | -0.44 | 0.06 | (-0.55, -0.33) | -7.68 | < .001 |
Images ~~ Videos | 0.52 | 0.07 | (0.38, 0.65) | 7.34 | < .001 |
Images ~~ Text | -0.46 | 0.07 | (-0.59, -0.33) | -6.95 | < .001 |
Videos ~~ Text | -0.20 | 0.06 | (-0.31, -0.09) | -3.61 | < .001 |
<- lavaan::sem(
m4b "Environment =~ ImitatingReality + EnvironmentReal
Images =~ ImagesRealistic + ImagesIssues
Videos =~ VideosIssues + VideosRealistic
Text =~ TextIssues + TextRealistic
G =~ Environment + Images + Videos + Text",
data=bait)
<- lavaan::sem(
m4c "Environment =~ ImitatingReality + EnvironmentReal
Images =~ ImagesRealistic + ImagesIssues
Videos =~ VideosIssues + VideosRealistic
Text =~ TextIssues + TextRealistic
Visual =~ Environment + Images",
data=bait)
# bayestestR::bayesfactor_models(m1, m2)
::anova(m4, m4b, m4c) lavaan
Chi-Squared Difference Test
Df AIC BIC Chisq Chisq diff RMSEA Df diff Pr(>Chisq)
m4 14 -457.35 -358.62 94.954
m4c 15 -458.48 -364.24 95.824 0.8695 0.000000 1 0.35110
m4b 16 -454.02 -364.26 102.287 6.4625 0.091183 1 0.01102 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
display(parameters::parameters(m4c, standardize = TRUE))
Link | Coefficient | SE | 95% CI | z | p |
---|---|---|---|---|---|
Environment =~ ImitatingReality | 0.66 | 0.04 | (0.59, 0.73) | 17.57 | < .001 |
Environment =~ EnvironmentReal | 0.62 | 0.04 | (0.55, 0.70) | 16.68 | < .001 |
Images =~ ImagesRealistic | 0.72 | 0.06 | (0.60, 0.84) | 11.66 | < .001 |
Images =~ ImagesIssues | -0.36 | 0.04 | (-0.44, -0.27) | -7.94 | < .001 |
Videos =~ VideosIssues | 0.80 | 0.06 | (0.69, 0.92) | 13.67 | < .001 |
Videos =~ VideosRealistic | -0.49 | 0.05 | (-0.58, -0.40) | -10.70 | < .001 |
Text =~ TextIssues | 0.57 | 0.05 | (0.48, 0.67) | 11.76 | < .001 |
Text =~ TextRealistic | -0.84 | 0.06 | (-0.96, -0.72) | -13.68 | < .001 |
Visual =~ Environment | 0.88 | 0.05 | (0.78, 0.98) | 17.03 | < .001 |
Visual =~ Images | 0.83 | 0.07 | (0.68, 0.97) | 11.20 | < .001 |
Link | Coefficient | SE | 95% CI | z | p |
---|---|---|---|---|---|
Videos ~~ Text | -0.20 | 0.06 | (-0.31, -0.09) | -3.62 | < .001 |
Videos ~~ Visual | 0.65 | 0.06 | (0.53, 0.76) | 10.80 | < .001 |
Text ~~ Visual | -0.52 | 0.06 | (-0.63, -0.41) | -9.22 | < .001 |
library(ggraph)
library(tidySEM)
<- tidySEM::get_edges(m4c) |>
edges mutate(sign = as.factor(sign(as.numeric(est_std))))
::tbl_graph(nodes = tidySEM::get_nodes(m4c), edges = edges) |>
tidygraphggraph(layout = 'kk') +
geom_edge_link(aes(filter=op=="=~", label=est_std, color=sign),
angle_calc="along",
label_dodge=unit(-0.015, "npc"),
edge_width=1) +
geom_edge_arc(aes(filter=op=="~~", label=est_std, color=sign),
linetype="dashed", strength=0.1,
angle_calc="along",
label_dodge=unit(-0.015, "npc"),
edge_width=1) +
geom_node_point(aes(shape=shape, color=shape), size=14, alpha=0.3) +
geom_node_text(aes(label = name)) +
scale_edge_color_manual(values=c("1"="darkgreen", "-1"="red"), guide="none") +
scale_shape_manual(values=c("oval"="circle", "rect"="square"), guide="none") +
scale_color_manual(values=c("oval"="orange", "rect"="black"), guide="none") +
theme_void()
Exploratory Graph Analysis (EGA) is a recently developed framework for psychometric assessment, that can be used to estimate the number of dimensions in questionnaire data using network estimation methods and community detection algorithms, and assess the stability of dimensions and items using bootstrapping.
Unique Variable Analysis (Christensen, Garrido, & Golino, 2023) uses the weighted topological overlap measure (Nowick et al., 2009) on an estimated network. Values greater than 0.25 are determined to have considerable local dependence (i.e., redundancy) that should be handled (variables with the highest maximum weighted topological overlap to all other variables (other than the one it is redundant with) should be removed).
<- EGAnet::UVA(data = bait, cut.off = 0.3)
uva uva
Variable pairs with wTO > 0.30 (large-to-very large redundancy)
node_i node_j wto
TextRealistic TextIssues 0.331
----
Variable pairs with wTO > 0.25 (moderate-to-large redundancy)
----
Variable pairs with wTO > 0.20 (small-to-moderate redundancy)
node_i node_j wto
VideosIssues VideosRealistic 0.234
ImitatingReality EnvironmentReal 0.225
$keep_remove uva
$keep
[1] "TextIssues"
$remove
[1] "TextRealistic"
<- list()
ega for(model in c("glasso", "TMFG")) {
for(algo in c("walktrap", "louvain")) {
for(type in c("ega", "ega.fit", "riEGA")) { # "hierega"
if(type=="ega.fit" & algo=="louvain") next # Too slow
paste0(model, "_", algo, "_", type)]] <- EGAnet::bootEGA(
ega[[data = bait,
seed=123,
model=model,
algorithm=algo,
EGA.type=type,
type="resampling",
plot.typicalStructure=FALSE,
verbose=FALSE)
}
} }
The random-intercept model converged. Wording effects likely. Results are only valid if data are [4;munrecoded[0m.
The random-intercept model converged. Wording effects likely. Results are only valid if data are [4;munrecoded[0m.
The random-intercept model converged. Wording effects likely. Results are only valid if data are [4;munrecoded[0m.
The random-intercept model converged. Wording effects likely. Results are only valid if data are [4;munrecoded[0m.
::compare.EGA.plots(
EGAnet$glasso_walktrap_ega, ega$glasso_walktrap_ega.fit,
ega$glasso_louvain_ega, ega$TMFG_louvain_ega,
ega$glasso_louvain_riEGA, ega$glasso_walktrap_riEGA,
ega$TMFG_walktrap_ega, ega$TMFG_walktrap_ega.fit,
ega$TMFG_louvain_riEGA, ega$TMFG_walktrap_riEGA,
egalabels=c("glasso_walktrap_ega", "glasso_walktrap_ega.fit",
"glasso_louvain_ega", "TMFG_louvain_ega",
"glasso_louvain_riEGA", "glasso_walktrap_riEGA",
"TMFG_walktrap_ega", "TMFG_walktrap_ega.fit",
"TMFG_louvain_riEGA", "TMFG_walktrap_riEGA"),
rows=5,
plot.all = FALSE)$all
Figures shows how often each variable is replicating in their empirical structure across bootstraps.
::wrap_plots(lapply(ega, plot), nrow = 4) patchwork
<- ega$glasso_walktrap_riEGA$EGA
ega_final plot(ega_final)
# Merge with data
<- EGAnet::net.scores(bait, ega_final)$scores$std.scores |>
ega_scores as.data.frame() |>
setNames(c("BAIT_Text_EGA", "BAIT_Visual_EGA", "BAIT_Videos_EGA"))
<- lavaan::predict(m4c) |>
sem_scores as.data.frame() |>
::data_addprefix("BAIT_") |>
datawizard::data_addsuffix("_SEM")
datawizard
<- data.frame(
raw_scores Participant = df[!is.na(df$BAIT_1_ImagesRealistic), ]$Participant,
BAIT_Videos = (bait$VideosRealistic + (1 - bait$VideosIssues)) / 2,
BAIT_Visual = (bait$ImagesRealistic + (1 - bait$ImagesIssues) + bait$ImitatingReality + bait$EnvironmentReal) / 4,
BAIT_Text = (bait$TextRealistic + (1 - bait$TextIssues)) / 2
)
<- cbind(sem_scores, ega_scores, raw_scores)
scores <- merge(df, raw_scores, by="Participant") df
We computed two type of general scores for the BAIT scale, an empirical score based on the average of observed data (of the most loading items) and a model-based score as predicted by the structural model. The first one gives equal weight to all items (and keeps the same [0-1] range), while the second one is based on the factor loadings and the covariance structure.
::cor_test(scores, "BAIT_Visual", "BAIT_Visual_SEM") |>
correlationplot() +
::geom_xsidedensity(aes(x=BAIT_Visual), color="grey", linewidth=1) +
ggside::geom_ysidedensity(aes(y=BAIT_Visual_SEM), color="grey", linewidth=1) +
ggside::scale_xsidey_continuous(expand = c(0, 0)) +
ggside::scale_ysidex_continuous(expand = c(0, 0)) +
ggside::theme_ggside_void() +
ggsidetheme(ggside.panel.scale = .1)
While the two correlate substantially, they have different benefits. The empirical score has a more straightforward meaning and is more reproducible (as it is not based on a model fitted on a specific sample), the model-based score takes into account the relative importance of the contribution of each item to their factor.
<- correlation::correlation(scores) |>
table summary()
format(table) |>
::data_rename("Parameter", "Variables") |>
datawizard::gt() |>
gt::cols_align(align="center") |>
gt::tab_options(column_labels.font.weight="bold") gt
Variables | BAIT_Text | BAIT_Visual | BAIT_Videos | BAIT_Videos_EGA | BAIT_Visual_EGA | BAIT_Text_EGA | BAIT_Visual_SEM | BAIT_Text_SEM | BAIT_Videos_SEM | BAIT_Images_SEM |
---|---|---|---|---|---|---|---|---|---|---|
BAIT_Environment_SEM | 0.46*** | 0.92*** | -0.54*** | 0.18*** | 0.93*** | 0.13** | 0.97*** | -0.57*** | 0.72*** | 0.89*** |
BAIT_Images_SEM | 0.44*** | 0.87*** | -0.51*** | 0.17*** | 0.92*** | 0.15** | 0.95*** | -0.57*** | 0.69*** | |
BAIT_Videos_SEM | 0.20*** | 0.56*** | -0.92*** | 0.34*** | 0.57*** | 0.04 | 0.78*** | -0.27*** | ||
BAIT_Text_SEM | -0.95*** | -0.45*** | 0.20*** | 7.42e-03 | -0.46*** | -0.30*** | -0.64*** | |||
BAIT_Visual_SEM | 0.52*** | 0.89*** | -0.61*** | 0.20*** | 0.91*** | 0.16*** | ||||
BAIT_Text_EGA | 8.46e-03 | 0.06 | 0.02 | 0.09 | 0.08 | |||||
BAIT_Visual_EGA | 0.34*** | 0.99*** | -0.39*** | 0.09 | ||||||
BAIT_Videos_EGA | -0.06 | 0.06 | 1.68e-03 | |||||||
BAIT_Videos | -0.18*** | -0.39*** | ||||||||
BAIT_Visual | 0.34*** |
<- n_factors(select(df, starts_with("GAAIS")))
n n
# Method Agreement Procedure:
The choice of 2 dimensions is supported by 8 (50.00%) methods out of 16 (Optimal coordinates, Acceleration factor, Parallel analysis, Kaiser criterion, Scree (SE), VSS complexity 2, BIC, BIC (adjusted)).
::factor_analysis(
parametersselect(df, starts_with("GAAIS")),
n=2,
rotation="oblimin")
# Rotated loadings from Factor Analysis (oblimin-rotation)
Variable | MR1 | MR2 | Complexity | Uniqueness
-----------------------------------------------------------
GAAIS_Positive_7 | -0.02 | 0.78 | 1.00 | 0.38
GAAIS_Negative_15 | 0.78 | -0.02 | 1.00 | 0.37
GAAIS_Negative_9 | 0.51 | 0.10 | 1.07 | 0.77
GAAIS_Negative_10 | 0.79 | -0.01 | 1.00 | 0.37
GAAIS_Positive_12 | 0.10 | 0.68 | 1.05 | 0.59
GAAIS_Positive_17 | -0.21 | 0.52 | 1.33 | 0.60
The 2 latent factors (oblimin rotation) accounted for 48.70% of the total variance of the original data (MR1 = 26.10%, MR2 = 22.60%).
$GAAIS_Negative <- rowMeans(select(df, starts_with("GAAIS_Negative")))
df$GAAIS_Positive <- rowMeans(select(df, starts_with("GAAIS_Positive"))) df
<- correlation::correlation(
table select(df, all_of(names(raw_scores))),
select(df, starts_with("GAAIS")),
bayesian=TRUE) |>
summary()
format(table) |>
::data_rename("Parameter", "Variables") |>
datawizard::gt() |>
gt::cols_align(align="center") |>
gt::tab_options(column_labels.font.weight="bold") gt
Variables | GAAIS_Positive_7 | GAAIS_Negative_15 | GAAIS_Negative_9 | GAAIS_Negative_10 | GAAIS_Positive_12 | GAAIS_Positive_17 | GAAIS_Negative | GAAIS_Positive |
---|---|---|---|---|---|---|---|---|
BAIT_Videos | 0.12** | -0.19*** | -0.10** | -0.13*** | 0.11*** | 0.06 | -0.17*** | 0.13*** |
BAIT_Visual | 0.17*** | 0.02 | 0.13*** | -0.01 | 0.24*** | 0.16*** | 0.06 | 0.24*** |
BAIT_Text | 0.25*** | -0.07* | 0.05 | -0.09* | 0.17*** | 0.18*** | -0.05 | 0.25*** |
<- df |>
dat select(starts_with("GAAIS"), starts_with("BAIT")) |>
select(matches("[[:digit:]]"))
<- EGAnet::EGA(
ega data = dat,
seed=123,
model="glasso",
algorithm="leiden",
plot.EGA=FALSE,
verbose=FALSE)
plot(ega)
|>
df ggplot(aes(x=as.factor(AI_Knowledge), y=BAIT_Visual)) +
geom_boxplot()
# m <- betareg::betareg(BAIT ~ AI_Knowledge, data=df)
<- lm(BAIT_Visual ~ poly(AI_Knowledge, 2), data=df)
m # m <- brms::brm(BAIT ~ mo(AI_Knowledge), data=df, algorithm = "meanfield")
# m <- brms::brm(BAIT ~ AI_Knowledge, data=dfsub, algorithm = "meanfield")
display(parameters::parameters(m))
Parameter | Coefficient | SE | 95% CI | t(654) | p |
---|---|---|---|---|---|
(Intercept) | 0.68 | 6.18e-03 | (0.67, 0.70) | 110.56 | < .001 |
AI Knowledge (1st degree) | -0.06 | 0.16 | (-0.37, 0.25) | -0.39 | 0.699 |
AI Knowledge (2nd degree) | 0.50 | 0.16 | (0.19, 0.81) | 3.18 | 0.002 |
::predictions(m, by=c("AI_Knowledge"), newdata = "marginalmeans") |>
marginaleffectsas.data.frame() |>
ggplot(aes(x=AI_Knowledge, y=estimate)) +
::stat_halfeye(data=df, aes(y=BAIT_Visual), geom="slab") +
ggdist# ggdist::stat_dots(data=df, aes(y=BAIT_Visual), side="left") +
# geom_boxplot(data=df, aes(y=BAIT_Visual, group=AI_Knowledge)) +
geom_point2(data=df, aes(x=AI_Knowledge-0.08, y=BAIT_Visual), alpha=0.2, position=position_jitter(width=0.06), size=2) +
geom_line(aes(group=1), position = position_dodge(width=0.2)) +
geom_pointrange(aes(ymin = conf.low, ymax=conf.high), position = position_dodge(width=0.2)) +
theme_minimal() +
labs(x = "AI-Knowledge", y="BAIT - Visual")
# m <- betareg::betareg(BAIT ~ Sex / Age, data=df, na.action=na.omit)
<- lm(BAIT_Visual ~ Sex / Age, data=df)
m display(parameters::parameters(m))
Parameter | Coefficient | SE | 95% CI | t(652) | p |
---|---|---|---|---|---|
(Intercept) | 0.66 | 0.03 | (0.60, 0.72) | 22.07 | < .001 |
Sex (Male) | 0.04 | 0.04 | (-0.03, 0.11) | 1.05 | 0.294 |
Sex (Female) × Age | 1.35e-03 | 1.14e-03 | (-8.90e-04, 3.59e-03) | 1.18 | 0.237 |
Sex (Male) × Age | -5.78e-04 | 6.64e-04 | (-1.88e-03, 7.26e-04) | -0.87 | 0.385 |
glm(Feedback_LabelsIncorrect ~ BAIT_Visual * AI_Knowledge,
data=mutate(df, Feedback_LabelsIncorrect = ifelse(Feedback_LabelsIncorrect=="True", 1, 0)),
family="binomial") |>
::parameters() |>
parametersdisplay(title="Predicting 'Labels are Incorrect'")
Parameter | Log-Odds | SE | 95% CI | z | p |
---|---|---|---|---|---|
(Intercept) | 0.22 | 1.06 | (-1.87, 2.31) | 0.20 | 0.838 |
BAIT Visual | -1.56 | 1.48 | (-4.50, 1.33) | -1.05 | 0.293 |
AI Knowledge | 0.09 | 0.30 | (-0.50, 0.67) | 0.29 | 0.774 |
BAIT Visual × AI Knowledge | 0.11 | 0.41 | (-0.70, 0.92) | 0.27 | 0.790 |
glm(Feedback_LabelsReversed ~ BAIT_Visual * AI_Knowledge,
data=mutate(df, Feedback_LabelsReversed = ifelse(Feedback_LabelsReversed=="True", 1, 0)),
family="binomial") |>
::parameters() |>
parametersdisplay(title="Predicting 'Labels are reversed'")
Parameter | Log-Odds | SE | 95% CI | z | p |
---|---|---|---|---|---|
(Intercept) | -3.12 | 2.15 | (-7.44, 0.95) | -1.45 | 0.146 |
BAIT Visual | 0.55 | 2.95 | (-5.24, 6.31) | 0.19 | 0.851 |
AI Knowledge | 0.17 | 0.60 | (-1.00, 1.35) | 0.29 | 0.774 |
BAIT Visual × AI Knowledge | -0.26 | 0.83 | (-1.88, 1.37) | -0.32 | 0.751 |
glm(Feedback_CouldDiscriminate ~ BAIT_Visual * AI_Knowledge,
data=mutate(df, Feedback_CouldDiscriminate = ifelse(Feedback_CouldDiscriminate=="True", 1, 0)),
family="binomial") |>
::parameters() |>
parametersdisplay(title="Predicting 'Easy to discriminate'")
Parameter | Log-Odds | SE | 95% CI | z | p |
---|---|---|---|---|---|
(Intercept) | -1.62 | 1.88 | (-5.40, 1.95) | -0.86 | 0.389 |
BAIT Visual | -1.01 | 2.59 | (-6.07, 4.09) | -0.39 | 0.698 |
AI Knowledge | -0.27 | 0.54 | (-1.32, 0.79) | -0.50 | 0.614 |
BAIT Visual × AI Knowledge | 0.31 | 0.74 | (-1.14, 1.76) | 0.42 | 0.672 |
<- glm(Feedback_CouldNotDiscriminate ~ BAIT_Visual * AI_Knowledge,
m data=mutate(df, Feedback_CouldNotDiscriminate = ifelse(Feedback_CouldNotDiscriminate=="True", 1, 0)),
family="binomial")
::parameters(m) |>
parametersdisplay(title="Predicting 'Hard to discriminate'")
Parameter | Log-Odds | SE | 95% CI | z | p |
---|---|---|---|---|---|
(Intercept) | -4.25 | 1.20 | (-6.66, -1.96) | -3.55 | < .001 |
BAIT Visual | 6.22 | 1.64 | (3.08, 9.53) | 3.78 | < .001 |
AI Knowledge | 0.42 | 0.33 | (-0.23, 1.08) | 1.26 | 0.206 |
BAIT Visual × AI Knowledge | -0.77 | 0.45 | (-1.68, 0.10) | -1.71 | 0.087 |
::estimate_relation(m, length=6) |>
modelbasedplot()
glm(Feedback_Fun ~ BAIT_Visual * AI_Knowledge,
data=mutate(df, Feedback_Fun = ifelse(Feedback_Fun=="True", 1, 0)),
family="binomial") |>
::parameters() |>
parametersdisplay(title="Predicting 'Fun'")
Parameter | Log-Odds | SE | 95% CI | z | p |
---|---|---|---|---|---|
(Intercept) | 0.16 | 1.05 | (-1.90, 2.21) | 0.15 | 0.882 |
BAIT Visual | 0.16 | 1.44 | (-2.67, 3.00) | 0.11 | 0.909 |
AI Knowledge | -0.02 | 0.30 | (-0.60, 0.56) | -0.07 | 0.946 |
BAIT Visual × AI Knowledge | 0.12 | 0.41 | (-0.68, 0.93) | 0.30 | 0.763 |
glm(Feedback_Boring ~ BAIT_Visual * AI_Knowledge,
data=mutate(df, Feedback_Boring = ifelse(Feedback_Boring=="True", 1, 0)),
family="binomial") |>
::parameters() |>
parametersdisplay(title="Predicting 'Boring'")
Parameter | Log-Odds | SE | 95% CI | z | p |
---|---|---|---|---|---|
(Intercept) | -1.87 | 1.39 | (-4.65, 0.83) | -1.34 | 0.181 |
BAIT Visual | -0.14 | 1.94 | (-3.96, 3.67) | -0.07 | 0.943 |
AI Knowledge | 0.20 | 0.38 | (-0.54, 0.96) | 0.54 | 0.592 |
BAIT Visual × AI Knowledge | -0.13 | 0.53 | (-1.18, 0.91) | -0.25 | 0.802 |
glm(Feedback_AILessArousing ~ BAIT_Visual * AI_Knowledge,
data=mutate(df, Feedback_AILessArousing = ifelse(Feedback_AILessArousing=="True", 1, 0)),
family="binomial") |>
::parameters() |>
parametersdisplay(title="Predicting 'AI was less arousing'")
Parameter | Log-Odds | SE | 95% CI | z | p |
---|---|---|---|---|---|
(Intercept) | 1.62 | 1.61 | (-1.56, 4.78) | 1.01 | 0.314 |
BAIT Visual | -5.66 | 2.43 | (-10.51, -0.96) | -2.32 | 0.020 |
AI Knowledge | -0.64 | 0.46 | (-1.55, 0.26) | -1.40 | 0.161 |
BAIT Visual × AI Knowledge | 0.93 | 0.68 | (-0.39, 2.26) | 1.38 | 0.169 |
glm(Feedback_AIMoreArousing ~ BAIT_Visual * AI_Knowledge,
data=mutate(df, Feedback_AIMoreArousing = ifelse(Feedback_AIMoreArousing=="True", 1, 0)),
family="binomial") |>
::parameters() |>
parametersdisplay(title="Predicting 'AI was more arousing'")
Parameter | Log-Odds | SE | 95% CI | z | p |
---|---|---|---|---|---|
(Intercept) | -3.11 | 1.74 | (-6.63, 0.20) | -1.78 | 0.075 |
BAIT Visual | 1.41 | 2.29 | (-3.02, 5.97) | 0.62 | 0.537 |
AI Knowledge | -0.16 | 0.49 | (-1.12, 0.80) | -0.34 | 0.736 |
BAIT Visual × AI Knowledge | 0.27 | 0.64 | (-0.99, 1.51) | 0.42 | 0.678 |
setdiff(unique(df$Participant), unique(dftask$Participant))
character(0)
write.csv(df, "../data/data_participants.csv", row.names = FALSE)
write.csv(dftask, "../data/data.csv", row.names = FALSE)
Comments
Code