Exercise 05

Computational Analysis of Communication — Spring Term 2024

CCS

Exercise

Data Science

Author

Felix Dietrich

Gold Standard

library(tidyverse)
library(here)
library(readxl)
library(report)

# list directory
files <- list.files(here("exercises/gs"), pattern = ".xlsx", full.names = TRUE)

# read files
gs <- 
  files %>% 
  map(read_xlsx) %>% 
  map(~ .x %>% mutate(sentiment_overall = as.numeric(sentiment_overall))) %>% 
  list_rbind()

gs %>% 
  report_table() %>% 
  display()

Variable	n_Obs	percentage_Missing	Mean	SD	Median	MAD	Min	Max	Skewness	Kurtosis	n_Entries	n_Missing
coder_id	1200	0.00									6	0
comment_id	1200	0.00	800.50	346.55	800.50	444.78	201	1400	0.00	-1.20
excl_other	1200	0.00	0.07	0.25	0.00	0.00	0	1	3.48	10.12
ref_persona	1200	0.92	0.76	0.43		0.00	0	2	-1.18	-0.50
sentiment_overall	1200	15.75	0.56	0.58		0.00	-1	1	-0.91	-0.17
ambiguity	1200	15.75	0.18	0.39		0.00	-1	1	1.53	0.69
comprehension	1200	15.75	0.18	0.39		0.00	0	1	1.66	0.76
memory_relate	1200	15.75	0.06	0.24		0.00	0	1	3.70	11.70
self_relate	1200	15.83	0.13	0.34		0.00	0	1	2.19	2.82
evaluation	1200	15.75	0.55	0.52		0.00	-1	1	-0.44	-1.31
future	1200	15.83	0.23	0.42		0.00	0	1	1.31	-0.27
sympathy_antipathy	1200	15.75	0.43	0.49		0.00	0	1	0.30	-1.91
empathy_counterempathy	1200	15.92	0.07	0.26		0.00	0	1	3.28	8.76
emotion_release	1200	15.75	0.07	0.25		0.00	0	1	3.43	9.78

CFA

library(lavaan)

This is lavaan 0.6-16
lavaan is FREE software! Please report any bugs.

library(parameters)

# define cfa
cfa <- 'cognitive =~ comprehension + memory_relate + self_relate + evaluation + future
        affective =~ sympathy_antipathy + empathy_counterempathy + emotion_release'
cfa_fit <- cfa(cfa, estimator = "MLM", data = gs)

Warning in lavaan::lavaan(model = cfa, data = gs, estimator = "MLM", model.type = "cfa", : lavaan WARNING:
    the optimizer warns that a solution has NOT been found!

# remove comprehension (very few cases)
cfa <- 'cognitive =~ self_relate + memory_relate + evaluation + future
        affective =~ sympathy_antipathy + empathy_counterempathy + emotion_release'
cfa_fit <- cfa(cfa, estimator = "MLM", data = gs)

Warning in lav_object_post_check(object): lavaan WARNING: some estimated lv
variances are negative

model_parameters(cfa_fit) %>% display()

# Loading
Link	Coefficient	SE	95% CI	z	p
cognitive =~ self_relate	1.00	0.00	(1.00, 1.00)		< .001
cognitive =~ memory_relate	-0.21	0.14	(-0.48, 0.06)	-1.52	0.128
cognitive =~ evaluation	1.32	0.58	(0.19, 2.46)	2.29	0.022
cognitive =~ future	0.42	0.24	(-0.06, 0.90)	1.70	0.089
affective =~ sympathy_antipathy	1.00	0.00	(1.00, 1.00)		< .001
affective =~ empathy_counterempathy	1.14	0.28	(0.59, 1.70)	4.04	< .001
affective =~ emotion_release	0.92	0.24	(0.46, 1.38)	3.91	< .001

# Correlation
Link	Coefficient	SE	95% CI	z	p
cognitive ~~ affective	7.40e-03	2.69e-03	(2.12e-03, 0.01)	2.75	0.006

IRT

library(mirt)

irt_data <- 
  gs %>%
  select(7:last_col()) %>% 
  mutate(evaluation = if_else(evaluation == -1, NA, evaluation))

model <- '
  cognitive = 1-5
  affective = 6-8
  COV = F1*F2'

fit3PL <- mirt(data = irt_data, 
               model = model,
               # model = 2,
               itemtype = "3PL", 
               verbose = FALSE,
               technical = list(NCYCLES = 5000))
fit3PL


Call:
mirt(data = irt_data, model = model, itemtype = "3PL", verbose = FALSE, 
    technical = list(NCYCLES = 5000))

Full-information item factor analysis with 2 factor(s).
Converged within 1e-04 tolerance after 3849 EM iterations.
mirt version: 1.41 
M-step optimizer: BFGS 
EM acceleration: Ramsay 
Number of rectangular quadrature: 31
Latent density type: Gaussian 

Log-likelihood = -3484.755
Estimated parameters: 24 
AIC = 7017.509
BIC = 7135.558; SABIC = 7059.332

summary(fit3PL)

                       cognitive affective     h2
comprehension              0.880     0.000 0.7737
memory_relate              0.992     0.000 0.9848
self_relate                0.994     0.000 0.9882
evaluation                 0.202     0.000 0.0407
future                     0.839     0.000 0.7033
sympathy_antipathy         0.000     0.477 0.2274
empathy_counterempathy     0.000     0.897 0.8042
emotion_release            0.000     0.903 0.8150

SS loadings:  3.491 1.847 
Proportion Var:  0.436 0.231 

Factor correlations: 

          cognitive affective
cognitive         1          
affective         0         1

Centering

... %>% 
  mutate(
        across(all_of(vars),
               ~ .x - mean(.x, na.rm = TRUE), .names = "{col}_grand_mean_centered")
      ) %>% 
      group_by(author_channel_id) %>%
      mutate(
        across(all_of(vars),
               ~ mean(.x, na.rm = TRUE), .names = "{col}_person_mean"),
        across(all_of(vars),
               ~ .x - get(glue::glue("{cur_column()}_person_mean")), .names = "{col}_centered_within_person")
      ) %>%
      ungroup() %>%
      mutate(
        across(all_of(str_c(vars, "_person_mean")),
               ~ .x - mean(.x, na.rm = TRUE), .names = "{col}_centered")
      ) %>% 
  ...

Already done in scored comments

Modeling

library(here)
library(arrow)
options(arrow.skip_nul = TRUE)
library(lme4)
library(lmerTest)
library(parameters)

d <- read_parquet(here("scored_comments.parquet.gzip"))

# example:
# verbal immediacy
if(!file.exists(here("models/growthmodel_vi.rds"))) {
  lmer(verbal_immediacy ~
         weeks_since_first_comment + 
         weeks_since_last_comment_centered_within_person + 
         weeks_since_last_comment_person_mean_centered + 
         weeks_since_last_video_centered_within_channel +
         weeks_since_last_video_channel_mean_centered +
         (1 | author_channel_id) +
         (1 | channel_name),
       data = d) %>% 
    write_rds(here("models/growthmodel_vi.rds"))
}

read_rds(here("models/growthmodel_vi.rds")) %>%
  parameters() %>%
  display(digits = 4, zap_small = TRUE)

# Fixed Effects
Parameter	Coefficient	SE	95% CI	t(244218)	p
(Intercept)	0.6950	0.1964	(0.31, 1.08)	3.5389	< .001
weeks since first comment	-0.0021	0.0001	(0.00, 0.00)	-14.4931	< .001
weeks since last comment centered within person	0.0010	0.0004	(0.00, 0.00)	2.9251	0.003
weeks since last comment person mean centered	-0.0020	0.0008	(0.00, 0.00)	-2.5941	0.009
weeks since last video centered within channel	0.0052	0.0155	(-0.03, 0.04)	0.3369	0.736
weeks since last video channel mean centered	-0.0774	0.1857	(-0.44, 0.29)	-0.4167	0.677

# Random Effects
Parameter	Coefficient
SD (Intercept: author_channel_id)	0.9038
SD (Intercept: channel_name)	0.6514
SD (Residual)	2.3956