FaceFake2 - Data Cleaning

Data Preparation

Code

library(tidyverse)
library(easystats)
library(patchwork)
library(ggside)

df <- read.csv("../data/rawdata_participants.csv") |> 
  filter(Age >= 18)


dftask <- read.csv("../data/rawdata_task.csv") |> 
  full_join(
    df[c("Participant", "Gender", "SexualOrientation")],
    by = join_by(Participant)
    ) |> 
  mutate(Category = ifelse(str_detect(Stimulus, "NF"), "Female", "Male")) |> 
  mutate(Condition = ifelse(Condition == "Reality", "Photograph", "AI-Generated")) |> 
  mutate(Relevance = case_when(
    Gender == "Male" & SexualOrientation == "Heterosexual" & Category == "Female" ~ "Relevant",
    Gender == "Female" & SexualOrientation == "Heterosexual" & Category == "Male" ~ "Relevant",
    Gender == "Male" & SexualOrientation == "Homosexual" & Category == "Male" ~ "Relevant",
    Gender == "Female" & SexualOrientation == "Homosexual" & Category == "Female" ~ "Relevant",
    SexualOrientation == "Bisexual" ~ "Relevant",
    SexualOrientation %in% c("Other") ~ "Irrelevant",  # What to do with "Other"? 
    .default = "Irrelevant"
  )) 

dftask <- filter(dftask, Participant %in% unique(df$Participant))

The initial sample consisted of 248 participants (Mean age = 27.6, SD = 13.4, range: [18, 69]; Gender: 75.0% women, 23.0% men, 2.02% non-binary; Education: Bachelor, 35.08%; Doctorate, 4.03%; High school, 43.95%; Master, 16.53%; Other, 0.40%; Country: 62.10% United Kingdom, 12.50% Spain, 25.40% other), for a total trial number of 27032.

Reliability

HEXACO

Code

dplyr::select(df, starts_with("HEXACO18_HonestyHumility_")) |>
  psych::alpha(check.keys = TRUE) # 0.57

Warning in psych::alpha(dplyr::select(df, starts_with("HEXACO18_HonestyHumility_")), : Some items were negatively correlated with the first principal component and were automatically reversed.
 This is indicated by a negative sign for the variable name.


Reliability analysis   
Call: psych::alpha(x = dplyr::select(df, starts_with("HEXACO18_HonestyHumility_")), 
    check.keys = TRUE)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.57      0.57    0.59      0.31 1.3 0.048  2.1 1.3     0.13

    95% confidence boundaries 
         lower alpha upper
Feldt     0.47  0.57  0.66
Duhachek  0.48  0.57  0.67

 Reliability if an item is dropped:
                                            raw_alpha std.alpha G6(smc)
HEXACO18_HonestyHumility_Sincerity_1_NR-         0.82      0.82   0.700
HEXACO18_HonestyHumility_GreedAvoidance_2_R      0.18      0.18   0.097
HEXACO18_HonestyHumility_Modesty_3_R             0.22      0.22   0.126
                                            average_r  S/N alpha se var.r med.r
HEXACO18_HonestyHumility_Sincerity_1_NR-        0.700 4.66    0.023    NA 0.700
HEXACO18_HonestyHumility_GreedAvoidance_2_R     0.097 0.21    0.105    NA 0.097
HEXACO18_HonestyHumility_Modesty_3_R            0.126 0.29    0.098    NA 0.126

 Item statistics 
                                              n raw.r std.r r.cor r.drop mean
HEXACO18_HonestyHumility_Sincerity_1_NR-    248  0.55  0.56  0.14   0.12  2.1
HEXACO18_HonestyHumility_GreedAvoidance_2_R 248  0.84  0.83  0.78   0.55  2.2
HEXACO18_HonestyHumility_Modesty_3_R        248  0.81  0.82  0.76   0.54  2.1
                                             sd
HEXACO18_HonestyHumility_Sincerity_1_NR-    1.8
HEXACO18_HonestyHumility_GreedAvoidance_2_R 1.9
HEXACO18_HonestyHumility_Modesty_3_R        1.7

Non missing response frequency for each item
                                               0    1    2    3    4    5    6
HEXACO18_HonestyHumility_Sincerity_1_NR     0.04 0.09 0.16 0.09 0.14 0.26 0.23
HEXACO18_HonestyHumility_GreedAvoidance_2_R 0.29 0.15 0.16 0.13 0.13 0.08 0.07
HEXACO18_HonestyHumility_Modesty_3_R        0.23 0.22 0.13 0.19 0.13 0.06 0.04
                                            miss
HEXACO18_HonestyHumility_Sincerity_1_NR        0
HEXACO18_HonestyHumility_GreedAvoidance_2_R    0
HEXACO18_HonestyHumility_Modesty_3_R           0

Code

dplyr::select(df, starts_with("HEXACO18_Emotionality_")) |>
  psych::alpha(check.keys = TRUE) # 0.67

Warning in psych::alpha(dplyr::select(df, starts_with("HEXACO18_Emotionality_")), : Some items were negatively correlated with the first principal component and were automatically reversed.
 This is indicated by a negative sign for the variable name.


Reliability analysis   
Call: psych::alpha(x = dplyr::select(df, starts_with("HEXACO18_Emotionality_")), 
    check.keys = TRUE)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.67      0.67    0.58       0.4   2 0.036  3.9 1.3      0.4

    95% confidence boundaries 
         lower alpha upper
Feldt     0.59  0.67  0.73
Duhachek  0.60  0.67  0.74

 Reliability if an item is dropped:
                                       raw_alpha std.alpha G6(smc) average_r
HEXACO18_Emotionality_Fearfulness_4_R-      0.57      0.57    0.40      0.40
HEXACO18_Emotionality_Dependence_5_NR       0.59      0.60    0.43      0.43
HEXACO18_Emotionality_Anxiety_6_NR          0.56      0.56    0.39      0.39
                                       S/N alpha se var.r med.r
HEXACO18_Emotionality_Fearfulness_4_R- 1.3    0.055    NA  0.40
HEXACO18_Emotionality_Dependence_5_NR  1.5    0.051    NA  0.43
HEXACO18_Emotionality_Anxiety_6_NR     1.3    0.056    NA  0.39

 Item statistics 
                                         n raw.r std.r r.cor r.drop mean  sd
HEXACO18_Emotionality_Fearfulness_4_R- 248  0.79  0.78  0.60   0.49  3.7 1.8
HEXACO18_Emotionality_Dependence_5_NR  248  0.78  0.77  0.57   0.46  3.5 1.8
HEXACO18_Emotionality_Anxiety_6_NR     248  0.76  0.78  0.60   0.49  4.5 1.6

Non missing response frequency for each item
                                         0    1    2    3    4    5    6 miss
HEXACO18_Emotionality_Fearfulness_4_R 0.22 0.16 0.19 0.18 0.10 0.12 0.04    0
HEXACO18_Emotionality_Dependence_5_NR 0.04 0.13 0.16 0.08 0.22 0.21 0.15    0
HEXACO18_Emotionality_Anxiety_6_NR    0.03 0.04 0.05 0.09 0.15 0.31 0.33    0

Code

dplyr::select(df, starts_with("HEXACO18_Extraversion_")) |>
  psych::alpha(check.keys = TRUE) # 0.74


Reliability analysis   
Call: psych::alpha(x = dplyr::select(df, starts_with("HEXACO18_Extraversion_")), 
    check.keys = TRUE)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.74      0.74    0.65      0.48 2.8 0.029  2.6 1.4     0.48

    95% confidence boundaries 
         lower alpha upper
Feldt     0.67  0.74  0.79
Duhachek  0.68  0.74  0.79

 Reliability if an item is dropped:
                                           raw_alpha std.alpha G6(smc)
HEXACO18_Extraversion_SocialSelfEsteem_7_R      0.61      0.61    0.44
HEXACO18_Extraversion_SocialBoldness_8_R        0.68      0.68    0.52
HEXACO18_Extraversion_Liveliness_9_R            0.65      0.65    0.48
                                           average_r S/N alpha se var.r med.r
HEXACO18_Extraversion_SocialSelfEsteem_7_R      0.44 1.6    0.049    NA  0.44
HEXACO18_Extraversion_SocialBoldness_8_R        0.52 2.2    0.040    NA  0.52
HEXACO18_Extraversion_Liveliness_9_R            0.48 1.9    0.044    NA  0.48

 Item statistics 
                                             n raw.r std.r r.cor r.drop mean
HEXACO18_Extraversion_SocialSelfEsteem_7_R 248  0.82  0.83  0.69   0.59  2.5
HEXACO18_Extraversion_SocialBoldness_8_R   248  0.80  0.79  0.62   0.53  2.6
HEXACO18_Extraversion_Liveliness_9_R       248  0.81  0.81  0.65   0.56  2.7
                                            sd
HEXACO18_Extraversion_SocialSelfEsteem_7_R 1.7
HEXACO18_Extraversion_SocialBoldness_8_R   1.8
HEXACO18_Extraversion_Liveliness_9_R       1.7

Non missing response frequency for each item
                                              0    1    2    3    4    5    6
HEXACO18_Extraversion_SocialSelfEsteem_7_R 0.14 0.18 0.21 0.17 0.16 0.08 0.05
HEXACO18_Extraversion_SocialBoldness_8_R   0.12 0.25 0.19 0.14 0.12 0.10 0.09
HEXACO18_Extraversion_Liveliness_9_R       0.11 0.17 0.25 0.12 0.18 0.12 0.06
                                           miss
HEXACO18_Extraversion_SocialSelfEsteem_7_R    0
HEXACO18_Extraversion_SocialBoldness_8_R      0
HEXACO18_Extraversion_Liveliness_9_R          0

Code

dplyr::select(df, starts_with("HEXACO18_Agreeableness_")) |>
  psych::alpha(check.keys = TRUE) # 0.53

Warning in psych::alpha(dplyr::select(df, starts_with("HEXACO18_Agreeableness_")), : Some items were negatively correlated with the first principal component and were automatically reversed.
 This is indicated by a negative sign for the variable name.


Reliability analysis   
Call: psych::alpha(x = dplyr::select(df, starts_with("HEXACO18_Agreeableness_")), 
    check.keys = TRUE)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.53      0.54    0.47      0.28 1.2 0.052    3 1.2      0.2

    95% confidence boundaries 
         lower alpha upper
Feldt     0.42  0.53  0.62
Duhachek  0.43  0.53  0.63

 Reliability if an item is dropped:
                                         raw_alpha std.alpha G6(smc) average_r
HEXACO18_Agreeableness_Forgiveness_10_NR      0.30      0.31    0.18      0.18
HEXACO18_Agreeableness_Gentleness_11_NR       0.33      0.33    0.20      0.20
HEXACO18_Agreeableness_Patience_12_R-         0.63      0.64    0.47      0.47
                                          S/N alpha se var.r med.r
HEXACO18_Agreeableness_Forgiveness_10_NR 0.45    0.087    NA  0.18
HEXACO18_Agreeableness_Gentleness_11_NR  0.49    0.085    NA  0.20
HEXACO18_Agreeableness_Patience_12_R-    1.75    0.046    NA  0.47

 Item statistics 
                                           n raw.r std.r r.cor r.drop mean  sd
HEXACO18_Agreeableness_Forgiveness_10_NR 248  0.78  0.77  0.60   0.41  2.8 1.7
HEXACO18_Agreeableness_Gentleness_11_NR  248  0.72  0.76  0.59   0.42  3.0 1.4
HEXACO18_Agreeableness_Patience_12_R-    248  0.66  0.64  0.29   0.22  3.0 1.7

Non missing response frequency for each item
                                            0    1    2    3    4    5    6
HEXACO18_Agreeableness_Forgiveness_10_NR 0.10 0.14 0.24 0.17 0.15 0.15 0.06
HEXACO18_Agreeableness_Gentleness_11_NR  0.04 0.09 0.24 0.23 0.24 0.13 0.03
HEXACO18_Agreeableness_Patience_12_R     0.08 0.15 0.15 0.17 0.23 0.14 0.06
                                         miss
HEXACO18_Agreeableness_Forgiveness_10_NR    0
HEXACO18_Agreeableness_Gentleness_11_NR     0
HEXACO18_Agreeableness_Patience_12_R        0

Code

dplyr::select(df, starts_with("HEXACO18_Conscientiousnes_")) |>
  psych::alpha(check.keys = TRUE) # 0.79


Reliability analysis   
Call: psych::alpha(x = dplyr::select(df, starts_with("HEXACO18_Conscientiousnes_")), 
    check.keys = TRUE)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.79      0.79    0.71      0.56 3.8 0.023  2.6 1.5     0.55

    95% confidence boundaries 
         lower alpha upper
Feldt     0.74  0.79  0.83
Duhachek  0.74  0.79  0.83

 Reliability if an item is dropped:
                                            raw_alpha std.alpha G6(smc)
HEXACO18_Conscientiousnes_Diligence_13_R         0.71      0.71    0.55
HEXACO18_Conscientiousnes_Prudence_14_R          0.71      0.71    0.55
HEXACO18_Conscientiousnes_Organization_15_R      0.72      0.72    0.56
                                            average_r S/N alpha se var.r med.r
HEXACO18_Conscientiousnes_Diligence_13_R         0.55 2.5    0.037    NA  0.55
HEXACO18_Conscientiousnes_Prudence_14_R          0.55 2.5    0.037    NA  0.55
HEXACO18_Conscientiousnes_Organization_15_R      0.56 2.6    0.036    NA  0.56

 Item statistics 
                                              n raw.r std.r r.cor r.drop mean
HEXACO18_Conscientiousnes_Diligence_13_R    248  0.83  0.84  0.71   0.63  2.4
HEXACO18_Conscientiousnes_Prudence_14_R     248  0.83  0.84  0.71   0.63  2.4
HEXACO18_Conscientiousnes_Organization_15_R 248  0.85  0.84  0.70   0.63  3.0
                                             sd
HEXACO18_Conscientiousnes_Diligence_13_R    1.7
HEXACO18_Conscientiousnes_Prudence_14_R     1.7
HEXACO18_Conscientiousnes_Organization_15_R 2.0

Non missing response frequency for each item
                                               0    1    2    3    4    5    6
HEXACO18_Conscientiousnes_Diligence_13_R    0.15 0.21 0.19 0.17 0.17 0.06 0.06
HEXACO18_Conscientiousnes_Prudence_14_R     0.15 0.19 0.20 0.16 0.17 0.08 0.05
HEXACO18_Conscientiousnes_Organization_15_R 0.12 0.17 0.15 0.11 0.16 0.17 0.11
                                            miss
HEXACO18_Conscientiousnes_Diligence_13_R       0
HEXACO18_Conscientiousnes_Prudence_14_R        0
HEXACO18_Conscientiousnes_Organization_15_R    0

Code

dplyr::select(df, starts_with("HEXACO18_Openness_")) |>
  psych::alpha(check.keys = TRUE) # 0.5

Warning in psych::alpha(dplyr::select(df, starts_with("HEXACO18_Openness_")), : Some items were negatively correlated with the first principal component and were automatically reversed.
 This is indicated by a negative sign for the variable name.


Reliability analysis   
Call: psych::alpha(x = dplyr::select(df, starts_with("HEXACO18_Openness_")), 
    check.keys = TRUE)

  raw_alpha std.alpha G6(smc) average_r  S/N   ase mean  sd median_r
       0.5      0.49    0.42      0.24 0.94 0.053  4.1 1.3     0.18

    95% confidence boundaries 
         lower alpha upper
Feldt     0.38   0.5   0.6
Duhachek  0.39   0.5   0.6

 Reliability if an item is dropped:
                                              raw_alpha std.alpha G6(smc)
HEXACO18_Openness_Unconventionality_16_R-          0.61      0.62   0.444
HEXACO18_Openness_AestheticAppreciation_17_NR      0.31      0.31   0.183
HEXACO18_Openness_Creativity_18_NR                 0.16      0.17   0.091
                                              average_r  S/N alpha se var.r
HEXACO18_Openness_Unconventionality_16_R-         0.444 1.60    0.049    NA
HEXACO18_Openness_AestheticAppreciation_17_NR     0.183 0.45    0.087    NA
HEXACO18_Openness_Creativity_18_NR                0.091 0.20    0.102    NA
                                              med.r
HEXACO18_Openness_Unconventionality_16_R-     0.444
HEXACO18_Openness_AestheticAppreciation_17_NR 0.183
HEXACO18_Openness_Creativity_18_NR            0.091

 Item statistics 
                                                n raw.r std.r r.cor r.drop mean
HEXACO18_Openness_Unconventionality_16_R-     248  0.53  0.61  0.22   0.16  3.9
HEXACO18_Openness_AestheticAppreciation_17_NR 248  0.78  0.73  0.53   0.37  3.9
HEXACO18_Openness_Creativity_18_NR            248  0.78  0.77  0.62   0.45  4.5
                                               sd
HEXACO18_Openness_Unconventionality_16_R-     1.5
HEXACO18_Openness_AestheticAppreciation_17_NR 2.1
HEXACO18_Openness_Creativity_18_NR            1.8

Non missing response frequency for each item
                                                 0    1    2    3    4    5
HEXACO18_Openness_Unconventionality_16_R      0.17 0.21 0.25 0.20 0.08 0.05
HEXACO18_Openness_AestheticAppreciation_17_NR 0.11 0.07 0.08 0.08 0.15 0.20
HEXACO18_Openness_Creativity_18_NR            0.05 0.06 0.06 0.07 0.14 0.19
                                                 6 miss
HEXACO18_Openness_Unconventionality_16_R      0.03    0
HEXACO18_Openness_AestheticAppreciation_17_NR 0.31    0
HEXACO18_Openness_Creativity_18_NR            0.43    0

BAIT

Code

#expectations about AI
dplyr::select(df, BAIT_1_ImagesRealistic, BAIT_2_ImagesIssues, BAIT_3_VideosRealistic, BAIT_4_VideosIssues, BAIT_5_ImitatingReality, BAIT_6_EnvironmentReal, BAIT_7_TextRealistic, BAIT_8_TextIssues) |>
psych::alpha(check.keys = TRUE) # 0.75

Warning in psych::alpha(dplyr::select(df, BAIT_1_ImagesRealistic, BAIT_2_ImagesIssues, : Some items were negatively correlated with the first principal component and were automatically reversed.
 This is indicated by a negative sign for the variable name.


Reliability analysis   
Call: psych::alpha(x = dplyr::select(df, BAIT_1_ImagesRealistic, BAIT_2_ImagesIssues, 
    BAIT_3_VideosRealistic, BAIT_4_VideosIssues, BAIT_5_ImitatingReality, 
    BAIT_6_EnvironmentReal, BAIT_7_TextRealistic, BAIT_8_TextIssues), 
    check.keys = TRUE)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
      0.75      0.75    0.78      0.27   3 0.025  3.3 0.86     0.27

    95% confidence boundaries 
         lower alpha upper
Feldt      0.7  0.75  0.79
Duhachek   0.7  0.75  0.80

 Reliability if an item is dropped:
                        raw_alpha std.alpha G6(smc) average_r S/N alpha se
BAIT_1_ImagesRealistic       0.70      0.70    0.72      0.25 2.3    0.030
BAIT_2_ImagesIssues-         0.74      0.74    0.76      0.29 2.9    0.026
BAIT_3_VideosRealistic-      0.72      0.73    0.74      0.28 2.7    0.027
BAIT_4_VideosIssues          0.70      0.71    0.72      0.25 2.4    0.029
BAIT_5_ImitatingReality      0.71      0.71    0.74      0.26 2.5    0.029
BAIT_6_EnvironmentReal       0.72      0.73    0.75      0.28 2.7    0.027
BAIT_7_TextRealistic         0.73      0.73    0.76      0.28 2.8    0.027
BAIT_8_TextIssues-           0.75      0.75    0.77      0.30 3.0    0.025
                        var.r med.r
BAIT_1_ImagesRealistic  0.020  0.20
BAIT_2_ImagesIssues-    0.025  0.29
BAIT_3_VideosRealistic- 0.024  0.24
BAIT_4_VideosIssues     0.021  0.24
BAIT_5_ImitatingReality 0.022  0.24
BAIT_6_EnvironmentReal  0.020  0.24
BAIT_7_TextRealistic    0.029  0.29
BAIT_8_TextIssues-      0.025  0.29

 Item statistics 
                          n raw.r std.r r.cor r.drop mean  sd
BAIT_1_ImagesRealistic  248  0.71  0.73  0.71   0.60  4.4 1.2
BAIT_2_ImagesIssues-    248  0.54  0.54  0.44   0.36  2.7 1.4
BAIT_3_VideosRealistic- 248  0.59  0.58  0.52   0.42  2.5 1.5
BAIT_4_VideosIssues     248  0.68  0.69  0.66   0.54  4.0 1.4
BAIT_5_ImitatingReality 248  0.66  0.65  0.60   0.50  3.4 1.6
BAIT_6_EnvironmentReal  248  0.57  0.59  0.51   0.42  3.8 1.3
BAIT_7_TextRealistic    248  0.58  0.57  0.47   0.41  3.3 1.5
BAIT_8_TextIssues-      248  0.50  0.49  0.37   0.32  2.2 1.5

Non missing response frequency for each item
                           0    1    2    3    4    5    6 miss
BAIT_1_ImagesRealistic  0.01 0.01 0.05 0.12 0.30 0.33 0.17    0
BAIT_2_ImagesIssues     0.03 0.09 0.15 0.28 0.21 0.19 0.05    0
BAIT_3_VideosRealistic  0.02 0.07 0.15 0.21 0.28 0.18 0.08    0
BAIT_4_VideosIssues     0.01 0.05 0.09 0.17 0.27 0.25 0.16    0
BAIT_5_ImitatingReality 0.04 0.08 0.19 0.21 0.19 0.21 0.09    0
BAIT_6_EnvironmentReal  0.01 0.04 0.13 0.19 0.33 0.21 0.08    0
BAIT_7_TextRealistic    0.02 0.09 0.21 0.21 0.23 0.17 0.06    0
BAIT_8_TextIssues       0.03 0.06 0.10 0.13 0.33 0.24 0.10    0

Code

## positive Attitudes
dplyr::select(df,BAIT_11_PositiveAttitutes,BAIT_12_PositiveAttitutes ) |>
psych::alpha(check.keys = TRUE) # 0.71


Reliability analysis   
Call: psych::alpha(x = dplyr::select(df, BAIT_11_PositiveAttitutes, 
    BAIT_12_PositiveAttitutes), check.keys = TRUE)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.71      0.71    0.55      0.55 2.5 0.037  3.5 1.3     0.55

    95% confidence boundaries 
         lower alpha upper
Feldt     0.63  0.71  0.77
Duhachek  0.64  0.71  0.78

 Reliability if an item is dropped:
                          raw_alpha std.alpha G6(smc) average_r S/N alpha se
BAIT_11_PositiveAttitutes      0.53      0.55     0.3      0.55 1.2       NA
BAIT_12_PositiveAttitutes      0.57      0.55     0.3      0.55 1.2       NA
                          var.r med.r
BAIT_11_PositiveAttitutes     0  0.55
BAIT_12_PositiveAttitutes     0  0.55

 Item statistics 
                            n raw.r std.r r.cor r.drop mean  sd
BAIT_11_PositiveAttitutes 248  0.88  0.88  0.65   0.55  3.9 1.5
BAIT_12_PositiveAttitutes 248  0.88  0.88  0.65   0.55  3.1 1.5

Non missing response frequency for each item
                             0    1    2    3    4    5    6 miss
BAIT_11_PositiveAttitutes 0.03 0.05 0.08 0.19 0.29 0.21 0.15    0
BAIT_12_PositiveAttitutes 0.06 0.11 0.14 0.27 0.25 0.10 0.07    0

Code

## negtavive GAAIS
dplyr::select(df, BAIT_9_NegativeAttitutes, BAIT_10_NegativeAttitutes) |>
psych::alpha(check.keys = TRUE) # 0.79


Reliability analysis   
Call: psych::alpha(x = dplyr::select(df, BAIT_9_NegativeAttitutes, 
    BAIT_10_NegativeAttitutes), check.keys = TRUE)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.79      0.79    0.65      0.65 3.7 0.027  4.4 1.4     0.65

    95% confidence boundaries 
         lower alpha upper
Feldt     0.73  0.79  0.83
Duhachek  0.73  0.79  0.84

 Reliability if an item is dropped:
                          raw_alpha std.alpha G6(smc) average_r S/N alpha se
BAIT_9_NegativeAttitutes       0.67      0.65    0.42      0.65 1.8       NA
BAIT_10_NegativeAttitutes      0.63      0.65    0.42      0.65 1.8       NA
                          var.r med.r
BAIT_9_NegativeAttitutes      0  0.65
BAIT_10_NegativeAttitutes     0  0.65

 Item statistics 
                            n raw.r std.r r.cor r.drop mean  sd
BAIT_9_NegativeAttitutes  248  0.91  0.91  0.73   0.65  4.2 1.5
BAIT_10_NegativeAttitutes 248  0.90  0.91  0.73   0.65  4.6 1.5

Non missing response frequency for each item
                             0    1    2    3    4    5    6 miss
BAIT_9_NegativeAttitutes  0.03 0.04 0.07 0.16 0.25 0.23 0.23    0
BAIT_10_NegativeAttitutes 0.03 0.02 0.04 0.09 0.18 0.30 0.33    0

Compute Dimensions

# Reverse
df <- datawizard::rescale(df, select=names(df)[grepl("_R", names(df))], range=c(0, 6), to=c(6, 0)) 

# Compute scores
df$HEXACO18_HonestyHumility <- rowMeans(df[grepl("HonestyHumility", names(df))])
df$HEXACO18_Emotionality <- rowMeans(df[grepl("Emotionality", names(df))])
df$HEXACO18_Extraversion <- rowMeans(df[grepl("Extraversion", names(df))])
df$HEXACO18_Agreeableness <- rowMeans(df[grepl("Agreeableness", names(df))])
df$HEXACO18_Conscientiousness <- rowMeans(df[grepl("Conscientiousnes", names(df))])
df$HEXACO18_Openness <- rowMeans(df[grepl("Openness", names(df))])

Recruitment History

Code

df |>
  mutate(Date = as.Date(Date, format = "%d/%m/%Y")) |> 
  summarize(N = n(), .by=c("Date", "Source")) |>
  complete(Date, Source, fill = list(N = 0)) |> 
  group_by(Source) |>
  mutate(N = cumsum(N)) |>
  ggplot(aes(x = Date, y = N)) +
  geom_area(aes(fill=Source)) +
  scale_y_continuous(expand = c(0, 0)) +
  labs(
    title = "Recruitment History",
    x = NULL,
    y = "Total Number of Participants"
  ) +
  theme_minimal()

Code

# Table
summarize(df, N = n(), .by=c("Source")) |> 
  arrange(desc(N)) |> 
  gt::gt() |> 
  gt::opt_stylize() |> 
  gt::opt_interactive(use_compact_mode = TRUE) |> 
  gt::tab_header("Number of participants per recruitment source")

Number of participants per recruitment source

Feedback

Evaluation

The majority of participants found the study to be a “fun” experience. Interestingly, reports of “fun” were significantly associated with finding at least some stimuli arousing. Conversely, reporting “no feelings” was associated with finding the experiment “boring”.

Code

df |> 
  summarize(n = n(), .by="Feedback_Enjoyment") |> 
  arrange(desc(Feedback_Enjoyment)) |>
  mutate(Feedback_Enjoyment = fct_rev(as.factor(Feedback_Enjoyment))) |> 
  ggplot(aes(x=0, y=n)) +
  geom_bar(aes(fill=Feedback_Enjoyment), stat="identity") +
  scale_fill_manual(values=c("#4CAF50", "#8BC34A", "#CDDC39", "#FF9800", "#F44336")) +
  scale_x_continuous(expand=c(0, 0)) +
  coord_flip() +
  theme_minimal() +
  theme(axis.title.y = element_blank(), 
        axis.ticks.y = element_blank(), 
        axis.text.y = element_blank()) +
  labs(x="Answer", y = "Participants", title = "Did you enjoy doing this experiment?")

Code

df |> 
  select(starts_with("Feedback"), -Feedback_Text, -ends_with("Confidence"), -Feedback_Enjoyment) |>
  pivot_longer(everything(), names_to = "Question", values_to = "Answer") |>
  group_by(Question, Answer) |> 
  summarise(prop = n()/nrow(df), .groups = 'drop') |> 
  complete(Question, Answer, fill = list(prop = 0)) |> 
  filter(Answer == "True") |> 
  mutate(Question = str_remove(Question, "Feedback_"),
         Question = str_replace(Question, "NoFacesAttractive", "No Faces Attractive"),
         Question = str_replace(Question, "LabelsIncorrect", "Labels were Incorrect"),
         Question = str_replace(Question, "LabelsReversed", "Labels were Reversed"),
         Question = str_replace(Question, "DiffNone", "No Difference Real/AI"),
         Question = str_replace(Question, "DiffObvious", "Obvious Difference Real/AI"),
         Question = str_replace(Question, "DiffSubtle", "Subtle Difference Real/AI"),
         Question = str_replace(Question, "AILessAttractive", "AI = less attractive"),
         Question = str_replace(Question, "AIMoreAttractive", "AI = more attractive"),
         Question = str_replace(Question, "SomeFacesAttractive", "Some Faces Attractive"),
         Question = str_replace(Question, "AllReal", "All Faces were Real"),
         Question = str_replace(Question, "AllFake", "All Faces were Fake")) |>
  mutate(Question = fct_reorder(Question, desc(prop))) |> 
  ggplot(aes(x = Question, y = prop)) +
  geom_bar(stat = "identity") +
  scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks(), labels=scales::percent) +
  labs(x="Feedback", y = "Participants", title = "Feedback") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2), vjust = 7),
    axis.text.y = element_text(size = rel(1.1)),
    axis.text.x = element_text(size = rel(1.1), angle = 45, hjust = 1),
    axis.title.x = element_blank()
  )

Code

cor <- df |> 
  select(starts_with("Feedback"), -Feedback_Text, -ends_with("Confidence"), -Feedback_Enjoyment)  |> 
  select(-Feedback_DiffSubtle) |>  # No variance
  mutate_all(~ifelse(.=="True", 1, 0)) |> 
  correlation(method="tetrachoric", redundant = TRUE) |> 
  correlation::cor_sort() |> 
  correlation::cor_lower()

cor |> 
  mutate(val = paste0(insight::format_value(rho), format_p(p, stars_only=TRUE))) |>
  mutate(Parameter2 = fct_rev(Parameter2)) |>
  mutate(Parameter1 = fct_relabel(Parameter1, \(x) str_remove_all(x, "Feedback_")),
         Parameter2 = fct_relabel(Parameter2, \(x) str_remove_all(x, "Feedback_"))) |>
  ggplot(aes(x=Parameter1, y=Parameter2)) +
  geom_tile(aes(fill = rho), color = "white") +
  geom_text(aes(label = val), size = 3) +
  labs(title = "Feedback Co-occurence Matrix") +
  scale_fill_gradient2(
    low = "#2196F3",
    mid = "white",
    high = "#F44336",
    breaks = c(-1, 0, 1),
    guide = guide_colourbar(ticks=FALSE),
    midpoint = 0,
    na.value = "grey85",
    limit = c(-1, 1))  + 
  theme_minimal() +
  theme(legend.title = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1))

Comments

In summary:

The term “trustworthiness” seemed to cause some confusion, as some participants lowered trustworthiness when asked about it. Also, image presentation format (few seconds and with no background whatsoever) seemed to affect ratings of trustworthiness. I think some found the term misleading and inherently suspicious so maybe clarify it or change wording for next time.
Lots of people said faces from database were “too ugly to be real” and someone suggested that 1-10 scales for attractiveness, trustworthiness and beauty might work better than “Agree-Disagree” scales.
In general, the most important bit is that the experiment is too long and tiring. For future improvement either have some incentives and add more breaks in-between or cut the number of faces in half as I think people started to get fatigued near the end and definitely distracted (some didn’t really read the tags at the start). Participants propose either a controlled experimental setting like the lab or a shorter time to complete to avoid fatigue and getting distracted.
Some thought the eye-tracking was a very cool idea.

Code

data.frame(Source = df$Source,
           Comments = trimws(df$Feedback_Text)) |> 
  filter(!tolower(Comments) %in% c(NA, "", "n/a", "no", "none")) |> 
  arrange(Source) |>
  gt::gt() |> 
  gt::opt_stylize() |> 
  gt::opt_interactive(use_compact_mode = TRUE)

Exclusion

outliers <- list()

Mobile

Code

df |>
  ggplot(aes(x=Mobile, fill=Source)) +
  geom_bar() +
  geom_hline(yintercept=0.5*nrow(df), linetype="dashed") +
  theme_modern() +
  scale_y_continuous(expand=c(0, 0))

There were 50 (20.16%) participants that used a mobile device.

Invalid Responses

Code

outliers$invalid <- dftask |>
  group_by(Participant) |>
  summarize(Mean_Attractiveness = mean(Attractiveness),
            SD_Attractiveness = sd(Attractiveness),
            N_Attractiveness = length(unique(Attractiveness)),
            SD_Trustworthiness = sd(Trustworthiness),
            SD_Beauty = sd(Beauty)) |> 
  arrange(N_Attractiveness) |>
  filter(SD_Attractiveness == 0 | SD_Trustworthiness == 0 | SD_Beauty == 0) 

gt::gt(outliers$invalid)

Participant	Mean_Attractiveness	SD_Attractiveness	N_Attractiveness	SD_Trustworthiness	SD_Beauty
S103	4.0000000	0.00000000	1	0.09578263	0.0000000
S188	0.0000000	0.00000000	1	0.00000000	0.0000000
S193	0.0000000	0.00000000	1	0.27650063	0.3203379
S247	0.0000000	0.00000000	1	1.10784433	1.3785035
S254	0.0000000	0.00000000	1	1.76845158	0.0000000
S082	3.0091743	0.09578263	2	0.00000000	0.0000000
S106	0.3944954	0.77002103	4	0.00000000	0.8115920
S224	0.8623853	1.54241207	7	0.00000000	2.3022541

Code

outliers$invalid <- outliers$invalid$Participant

We removed 8 (3.23%) participants with no variation in their responses.

Code

df <- filter(df, !Participant %in% outliers$invalid)
dftask <- filter(dftask, !Participant %in% outliers$invalid)

Experiment Duration

The experiment’s median duration is 38.88 min (50% HDI [32.38, 43.14]).

Code

df |>
  mutate(Participant = fct_reorder(Participant, Experiment_Duration),
         Category = ifelse(Experiment_Duration > 60, "extra", "ok"),
         Duration = ifelse(Experiment_Duration > 60, 60, Experiment_Duration),
         Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |>
  ggplot(aes(y = Participant, x = Duration)) +
  geom_point(aes(color = Group, shape = Category)) +
  geom_vline(xintercept = median(df$Experiment_Duration), color = "red", linetype = "dashed") +
  geom_vline(xintercept = 15, color = "orange", linetype = "dotted") +
  scale_shape_manual(values = c("extra" = 3, ok = 19)) +
  scale_color_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
  guides(color = "none", shape = "none") +
  ggside::geom_xsidedensity(fill = "#4CAF50", color=NA) +
  ggside::scale_xsidey_continuous(expand = c(0, 0)) +
  labs(
    title = "Experiment Completion Time",
    x = "Duration (in minutes)",
    y = "Participants"
  )  +
  theme_minimal() +
  ggside::theme_ggside_void() +
  theme(ggside.panel.scale = .3,
        panel.border = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank())

Code

# outliers$expe_duration <- filter(df, Experiment_Duration < 15)$Participant

Instruction Reading Time

Code

p_time1 <- df |>
  select(Participant, Instruction_Duration1, Instruction_Duration2) |> 
  mutate(Participant = fct_reorder(Participant, Instruction_Duration1)) |> 
  pivot_longer(cols = -Participant) |>
  mutate(name = ifelse(name == "Instruction_Duration1", "Phase 1", "Phase 2"),
         value = value / 60,
         Category = ifelse(value > 4, "extra", "ok"),
         value = ifelse(value > 4,4, value)) |> 
  ggplot(aes(y = Participant, x = value)) +
  geom_point(aes(shape=Category), alpha=0.5) +
  facet_wrap(~name, scales = "free")+
  guides(color = "none", shape = "none") +
  scale_shape_manual(values = c("extra" = 3, "ok" = 19)) +
  ggside::geom_xsidedensity(fill = "#4CAF50", color=NA) +
  ggside::scale_xsidey_continuous(expand = c(0, 0)) +
  labs(
    title = "Experiment Completion Time",
    x = "Duration (in minutes)",
    y = "Participants"
  )  +
  theme_minimal() +
  ggside::theme_ggside_void() +
  theme(ggside.panel.scale = .3,
        panel.border = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank())

p_time2 <- df |> 
  filter(Instruction_Duration1 < 200, Instruction_Duration2 < 100) |>
  ggplot(aes(x=Instruction_Duration1, y=Instruction_Duration2)) +
  geom_point(size=3, alpha=0.5) +
  geom_smooth(method = "lm", formula = 'y ~ x') +
  theme_minimal()

p_time1 / p_time2

Code

# outliers$expe_duration <- filter(df, Experiment_Duration < 15)$Participant

Response Coherence

Code

dfcoherence <- dftask |> 
  group_by(Participant) |>
  summarize(r = cor(Attractiveness, Beauty, method = "spearman")) |> 
  arrange(r) |> 
  mutate(Participant = factor(Participant, levels = Participant)) 

dfcoherence |> 
  ggplot(aes(x = Participant, y=r)) +
  geom_bar(stat = "identity") +
  geom_hline(yintercept = 0.1, color = "red", linetype = "dashed") +
  ggside::geom_ysidedensity(fill = "darkgrey", color=NA) +
  theme_minimal() +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank())

Code

outliers$coherence <- filter(dfcoherence, r < 0.1)$Participant

We removed 7 (2.92%) participants that did not show coherence in their responses (Spearman correlation < 0.1).

Code

df <- filter(df, !Participant %in% outliers$coherence)
dftask <- filter(dftask, !Participant %in% outliers$coherence)

Code

items <- select(df, starts_with("HEXACO18")) |> 
  select(ends_with("_NR"), ends_with("_R")) 

for(d in c("HonestyHumility", "Emotionality", "Extraversion", "Agreeableness",
           "Conscientiousnes", "Openness")) {
  items[paste0(d, "_SD")] <- sapply(as.data.frame(t(items[grepl(d, names(items))])), sd, na.rm=TRUE)
}

sds <- select(items, ends_with("_SD"))
sds$Mean <- rowMeans(sds)
sds$Participant <- df$Participant

sds |> 
  mutate(Participant = fct_reorder(Participant, Mean)) |> 
  ggplot(aes(x=Participant, y=Mean)) +
  geom_bar(stat = "identity") +
  # geom_hline(yintercept=c(0.25, 2.25), color="red", linetype="dashed") +
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  ggside::geom_ysidedensity(fill="darkgrey", color=NA) +
  ggside::scale_ysidex_continuous(expand = c(0, 0)) +
  ggside::theme_ggside_void() +
  labs(y = "Mean SD of each HEXACO dimension", x="Participants")

Code

outliers$coherence2 <- filter(sds, Mean < 0.25 | Mean > 2.25)$Participant

We did not remove participants based on the HEXACO scores, but we provide the information for transparency.

Code

# df <- filter(df, !Participant %in% outliers$coherence2)
# dftask <- filter(dftask, !Participant %in% outliers$coherence2)

Manipulation

Code

df |> 
  select(Participant, Feedback_AllRealConfidence, Feedback_AllFakeConfidence) |> 
  pivot_longer(-Participant, values_to = "Confidence") |> 
  filter(!is.na(Confidence)) |> 
  mutate(name = ifelse(str_detect(name, "Real"), "All images are real", "All images are fake")) |> 
  ggplot(aes(x=Confidence, fill=name)) +
  geom_bar() +
  facet_grid(~name) +
  theme_minimal() +
  theme(legend.position = "none")

Code

outliers$manipulation <- filter(df, Feedback_AllRealConfidence >= 4 | Feedback_AllFakeConfidence >= 4)$Participant

We removed 16 (6.87%) participants that did not believe in the manipulation and were fully confident that all images were real (or fake).

Code

df <- filter(df, !Participant %in% outliers$manipulation)
dftask <- filter(dftask, !Participant %in% outliers$manipulation)

Gender and Sex

Code

outliers$valid <- filter(df, Gender == "Other" | SexualOrientation == "Other")$Participant

We removed 11 (5.07%) participants that answered “Other” for gender or sexual orientation.

Code

df <- filter(df, !Participant %in% outliers$valid)
dftask <- filter(dftask, !Participant %in% outliers$valid)

Final Sample

Age

Code

p_age <- estimate_density(df$Age) |>
  normalize(select = y) |>
  mutate(y = y * 86) |>  # To match the binwidth
  ggplot(aes(x = x)) +
  geom_histogram(data=df, aes(x = Age, fill=Gender), bins=28) +
  # geom_line(aes(y = y), color = "orange", linewidth=2) +
  geom_vline(xintercept = mean(df$Age), color = "red", linewidth=1.5) +
  # geom_label(data = data.frame(x = mean(df$Age) * 1.15, y = 0.95 * 75), aes(y = y), color = "red", label = paste0("Mean = ", format_value(mean(df$Age)))) +
  scale_fill_manual(values = c("Male"= "#64B5F6", "Female"= "#F06292", "Other"="orange")) +
  scale_x_continuous(expand = c(0, 0)) +
  scale_y_continuous(expand = c(0, 0)) +
  labs(title = "Age", y = "Number of Participants", color = NULL, subtitle = "Distribution of participants' age") +
  theme_modern(axis.title.space = 10) +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2), vjust = 7),
    axis.text.y = element_text(size = rel(1.1)),
    axis.text.x = element_text(size = rel(1.1)),
    axis.title.x = element_blank()
  )
p_age

Education

Code

p_edu <- df |>
  mutate(Education = fct_relevel(Education, "High school", "Bachelor", "Master", "Doctorate")) |>
  ggplot(aes(x = Education)) +
  geom_bar(aes(fill = Education)) +
  scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
  scale_fill_viridis_d(guide = "none") +
  labs(title = "Education", y = "Number of Participants", subtitle = "Participants per achieved education level") +
  theme_modern(axis.title.space = 15) +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2), vjust = 7),
    axis.text.y = element_text(size = rel(1.1)),
    axis.text.x = element_text(size = rel(1.1)),
    axis.title.x = element_blank()
  )
p_edu

Sexual Profile

Code

df |>
  ggplot(aes(x = SexualOrientation)) +
  geom_bar(aes(fill=Gender)) +
  scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
  scale_fill_manual(values = c("Male"= "#64B5F6", "Female"= "#F06292")) +
  labs(title = "Sexual Orientation of Participants") +
  theme_modern(axis.title.space = 15) +
  theme(
    plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
    plot.subtitle = element_text(size = rel(1.2), vjust = 7),
    axis.text.y = element_text(size = rel(1.1)),
    axis.text.x = element_text(size = rel(1.1), angle = 45, hjust = 1),
    axis.title.x = element_blank(),
    axis.title.y = element_blank()
  )

Summary

The final sample includes 206 participants (Mean age = 27.8, SD = 13.6, range: [18, 69]; Gender: 76.7% women, 23.3% men, 0.00% non-binary; Education: Bachelor, 33.98%; Doctorate, 3.88%; High school, 43.20%; Master, 18.45%; Other, 0.49%; Country: 60.68% United Kingdom, 14.08% Spain, 25.24% other).

Save

write.csv(df, "../data/data_participants.csv", row.names = FALSE)
write.csv(dftask, "../data/data_task.csv", row.names = FALSE)