Exercise 05

Computational Analysis of Communication — Spring Term 2024

CCS
Exercise
Data Science
Author

Felix Dietrich

Gold Standard

library(tidyverse)
library(here)
library(readxl)
library(report)

# list directory
files <- list.files(here("exercises/gs"), pattern = ".xlsx", full.names = TRUE)

# read files
gs <- 
  files %>% 
  map(read_xlsx) %>% 
  map(~ .x %>% mutate(sentiment_overall = as.numeric(sentiment_overall))) %>% 
  list_rbind()

gs %>% 
  report_table() %>% 
  display()
Variable n_Obs percentage_Missing Mean SD Median MAD Min Max Skewness Kurtosis n_Entries n_Missing
coder_id 1200 0.00 6 0
comment_id 1200 0.00 800.50 346.55 800.50 444.78 201 1400 0.00 -1.20
excl_other 1200 0.00 0.07 0.25 0.00 0.00 0 1 3.48 10.12
ref_persona 1200 0.92 0.76 0.43 0.00 0 2 -1.18 -0.50
sentiment_overall 1200 15.75 0.56 0.58 0.00 -1 1 -0.91 -0.17
ambiguity 1200 15.75 0.18 0.39 0.00 -1 1 1.53 0.69
comprehension 1200 15.75 0.18 0.39 0.00 0 1 1.66 0.76
memory_relate 1200 15.75 0.06 0.24 0.00 0 1 3.70 11.70
self_relate 1200 15.83 0.13 0.34 0.00 0 1 2.19 2.82
evaluation 1200 15.75 0.55 0.52 0.00 -1 1 -0.44 -1.31
future 1200 15.83 0.23 0.42 0.00 0 1 1.31 -0.27
sympathy_antipathy 1200 15.75 0.43 0.49 0.00 0 1 0.30 -1.91
empathy_counterempathy 1200 15.92 0.07 0.26 0.00 0 1 3.28 8.76
emotion_release 1200 15.75 0.07 0.25 0.00 0 1 3.43 9.78

CFA

library(lavaan)
This is lavaan 0.6-16
lavaan is FREE software! Please report any bugs.
library(parameters)

# define cfa
cfa <- 'cognitive =~ comprehension + memory_relate + self_relate + evaluation + future
        affective =~ sympathy_antipathy + empathy_counterempathy + emotion_release'
cfa_fit <- cfa(cfa, estimator = "MLM", data = gs)
Warning in lavaan::lavaan(model = cfa, data = gs, estimator = "MLM", model.type = "cfa", : lavaan WARNING:
    the optimizer warns that a solution has NOT been found!
# remove comprehension (very few cases)
cfa <- 'cognitive =~ self_relate + memory_relate + evaluation + future
        affective =~ sympathy_antipathy + empathy_counterempathy + emotion_release'
cfa_fit <- cfa(cfa, estimator = "MLM", data = gs)
Warning in lav_object_post_check(object): lavaan WARNING: some estimated lv
variances are negative
model_parameters(cfa_fit) %>% display()
# Loading
Link Coefficient SE 95% CI z p
cognitive =~ self_relate 1.00 0.00 (1.00, 1.00) < .001
cognitive =~ memory_relate -0.21 0.14 (-0.48, 0.06) -1.52 0.128
cognitive =~ evaluation 1.32 0.58 (0.19, 2.46) 2.29 0.022
cognitive =~ future 0.42 0.24 (-0.06, 0.90) 1.70 0.089
affective =~ sympathy_antipathy 1.00 0.00 (1.00, 1.00) < .001
affective =~ empathy_counterempathy 1.14 0.28 (0.59, 1.70) 4.04 < .001
affective =~ emotion_release 0.92 0.24 (0.46, 1.38) 3.91 < .001
# Correlation
Link Coefficient SE 95% CI z p
cognitive ~~ affective 7.40e-03 2.69e-03 (2.12e-03, 0.01) 2.75 0.006

IRT

See also https://philippmasur.de/2022/05/13/how-to-run-irt-analyses-in-r/

library(mirt)

irt_data <- 
  gs %>%
  select(7:last_col()) %>% 
  mutate(evaluation = if_else(evaluation == -1, NA, evaluation))

model <- '
  cognitive = 1-5
  affective = 6-8
  COV = F1*F2'

fit3PL <- mirt(data = irt_data, 
               model = model,
               # model = 2,
               itemtype = "3PL", 
               verbose = FALSE,
               technical = list(NCYCLES = 5000))
fit3PL

Call:
mirt(data = irt_data, model = model, itemtype = "3PL", verbose = FALSE, 
    technical = list(NCYCLES = 5000))

Full-information item factor analysis with 2 factor(s).
Converged within 1e-04 tolerance after 3849 EM iterations.
mirt version: 1.41 
M-step optimizer: BFGS 
EM acceleration: Ramsay 
Number of rectangular quadrature: 31
Latent density type: Gaussian 

Log-likelihood = -3484.755
Estimated parameters: 24 
AIC = 7017.509
BIC = 7135.558; SABIC = 7059.332
summary(fit3PL)
                       cognitive affective     h2
comprehension              0.880     0.000 0.7737
memory_relate              0.992     0.000 0.9848
self_relate                0.994     0.000 0.9882
evaluation                 0.202     0.000 0.0407
future                     0.839     0.000 0.7033
sympathy_antipathy         0.000     0.477 0.2274
empathy_counterempathy     0.000     0.897 0.8042
emotion_release            0.000     0.903 0.8150

SS loadings:  3.491 1.847 
Proportion Var:  0.436 0.231 

Factor correlations: 

          cognitive affective
cognitive         1          
affective         0         1

Centering

See also https://philippmasur.de/2018/05/23/how-to-center-in-multilevel-models/

... %>% 
  mutate(
        across(all_of(vars),
               ~ .x - mean(.x, na.rm = TRUE), .names = "{col}_grand_mean_centered")
      ) %>% 
      group_by(author_channel_id) %>%
      mutate(
        across(all_of(vars),
               ~ mean(.x, na.rm = TRUE), .names = "{col}_person_mean"),
        across(all_of(vars),
               ~ .x - get(glue::glue("{cur_column()}_person_mean")), .names = "{col}_centered_within_person")
      ) %>%
      ungroup() %>%
      mutate(
        across(all_of(str_c(vars, "_person_mean")),
               ~ .x - mean(.x, na.rm = TRUE), .names = "{col}_centered")
      ) %>% 
  ...

Already done in scored comments

Modeling

library(here)
library(arrow)
options(arrow.skip_nul = TRUE)
library(lme4)
library(lmerTest)
library(parameters)

d <- read_parquet(here("scored_comments.parquet.gzip"))

# example:
# verbal immediacy
if(!file.exists(here("models/growthmodel_vi.rds"))) {
  lmer(verbal_immediacy ~
         weeks_since_first_comment + 
         weeks_since_last_comment_centered_within_person + 
         weeks_since_last_comment_person_mean_centered + 
         weeks_since_last_video_centered_within_channel +
         weeks_since_last_video_channel_mean_centered +
         (1 | author_channel_id) +
         (1 | channel_name),
       data = d) %>% 
    write_rds(here("models/growthmodel_vi.rds"))
}

read_rds(here("models/growthmodel_vi.rds")) %>%
  parameters() %>%
  display(digits = 4, zap_small = TRUE)
# Fixed Effects
Parameter Coefficient SE 95% CI t(244218) p
(Intercept) 0.6950 0.1964 (0.31, 1.08) 3.5389 < .001
weeks since first comment -0.0021 0.0001 (0.00, 0.00) -14.4931 < .001
weeks since last comment centered within person 0.0010 0.0004 (0.00, 0.00) 2.9251 0.003
weeks since last comment person mean centered -0.0020 0.0008 (0.00, 0.00) -2.5941 0.009
weeks since last video centered within channel 0.0052 0.0155 (-0.03, 0.04) 0.3369 0.736
weeks since last video channel mean centered -0.0774 0.1857 (-0.44, 0.29) -0.4167 0.677
# Random Effects
Parameter Coefficient
SD (Intercept: author_channel_id) 0.9038
SD (Intercept: channel_name) 0.6514
SD (Residual) 2.3956