Gold Standard
library (tidyverse)
library (here)
library (readxl)
library (report)
# list directory
files <- list.files (here ("exercises/gs" ), pattern = ".xlsx" , full.names = TRUE )
# read files
gs <-
files %>%
map (read_xlsx) %>%
map (~ .x %>% mutate (sentiment_overall = as.numeric (sentiment_overall))) %>%
list_rbind ()
gs %>%
report_table () %>%
display ()
coder_id
1200
0.00
6
0
comment_id
1200
0.00
800.50
346.55
800.50
444.78
201
1400
0.00
-1.20
excl_other
1200
0.00
0.07
0.25
0.00
0.00
0
1
3.48
10.12
ref_persona
1200
0.92
0.76
0.43
0.00
0
2
-1.18
-0.50
sentiment_overall
1200
15.75
0.56
0.58
0.00
-1
1
-0.91
-0.17
ambiguity
1200
15.75
0.18
0.39
0.00
-1
1
1.53
0.69
comprehension
1200
15.75
0.18
0.39
0.00
0
1
1.66
0.76
memory_relate
1200
15.75
0.06
0.24
0.00
0
1
3.70
11.70
self_relate
1200
15.83
0.13
0.34
0.00
0
1
2.19
2.82
evaluation
1200
15.75
0.55
0.52
0.00
-1
1
-0.44
-1.31
future
1200
15.83
0.23
0.42
0.00
0
1
1.31
-0.27
sympathy_antipathy
1200
15.75
0.43
0.49
0.00
0
1
0.30
-1.91
empathy_counterempathy
1200
15.92
0.07
0.26
0.00
0
1
3.28
8.76
emotion_release
1200
15.75
0.07
0.25
0.00
0
1
3.43
9.78
CFA
This is lavaan 0.6-16
lavaan is FREE software! Please report any bugs.
library (parameters)
# define cfa
cfa <- 'cognitive =~ comprehension + memory_relate + self_relate + evaluation + future
affective =~ sympathy_antipathy + empathy_counterempathy + emotion_release'
cfa_fit <- cfa (cfa, estimator = "MLM" , data = gs)
Warning in lavaan::lavaan(model = cfa, data = gs, estimator = "MLM", model.type = "cfa", : lavaan WARNING:
the optimizer warns that a solution has NOT been found!
# remove comprehension (very few cases)
cfa <- 'cognitive =~ self_relate + memory_relate + evaluation + future
affective =~ sympathy_antipathy + empathy_counterempathy + emotion_release'
cfa_fit <- cfa (cfa, estimator = "MLM" , data = gs)
Warning in lav_object_post_check(object): lavaan WARNING: some estimated lv
variances are negative
model_parameters (cfa_fit) %>% display ()
# Loading
cognitive =~ self_relate
1.00
0.00
(1.00, 1.00)
< .001
cognitive =~ memory_relate
-0.21
0.14
(-0.48, 0.06)
-1.52
0.128
cognitive =~ evaluation
1.32
0.58
(0.19, 2.46)
2.29
0.022
cognitive =~ future
0.42
0.24
(-0.06, 0.90)
1.70
0.089
affective =~ sympathy_antipathy
1.00
0.00
(1.00, 1.00)
< .001
affective =~ empathy_counterempathy
1.14
0.28
(0.59, 1.70)
4.04
< .001
affective =~ emotion_release
0.92
0.24
(0.46, 1.38)
3.91
< .001
# Correlation
cognitive ~~ affective
7.40e-03
2.69e-03
(2.12e-03, 0.01)
2.75
0.006
IRT
See also https://philippmasur.de/2022/05/13/how-to-run-irt-analyses-in-r/
library (mirt)
irt_data <-
gs %>%
select (7 : last_col ()) %>%
mutate (evaluation = if_else (evaluation == - 1 , NA , evaluation))
model <- '
cognitive = 1-5
affective = 6-8
COV = F1*F2'
fit3PL <- mirt (data = irt_data,
model = model,
# model = 2,
itemtype = "3PL" ,
verbose = FALSE ,
technical = list (NCYCLES = 5000 ))
fit3PL
Call:
mirt(data = irt_data, model = model, itemtype = "3PL", verbose = FALSE,
technical = list(NCYCLES = 5000))
Full-information item factor analysis with 2 factor(s).
Converged within 1e-04 tolerance after 3849 EM iterations.
mirt version: 1.41
M-step optimizer: BFGS
EM acceleration: Ramsay
Number of rectangular quadrature: 31
Latent density type: Gaussian
Log-likelihood = -3484.755
Estimated parameters: 24
AIC = 7017.509
BIC = 7135.558; SABIC = 7059.332
cognitive affective h2
comprehension 0.880 0.000 0.7737
memory_relate 0.992 0.000 0.9848
self_relate 0.994 0.000 0.9882
evaluation 0.202 0.000 0.0407
future 0.839 0.000 0.7033
sympathy_antipathy 0.000 0.477 0.2274
empathy_counterempathy 0.000 0.897 0.8042
emotion_release 0.000 0.903 0.8150
SS loadings: 3.491 1.847
Proportion Var: 0.436 0.231
Factor correlations:
cognitive affective
cognitive 1
affective 0 1
Centering
See also https://philippmasur.de/2018/05/23/how-to-center-in-multilevel-models/
... %>%
mutate (
across (all_of (vars),
~ .x - mean (.x, na.rm = TRUE ), .names = "{col}_grand_mean_centered" )
) %>%
group_by (author_channel_id) %>%
mutate (
across (all_of (vars),
~ mean (.x, na.rm = TRUE ), .names = "{col}_person_mean" ),
across (all_of (vars),
~ .x - get (glue:: glue ("{cur_column()}_person_mean" )), .names = "{col}_centered_within_person" )
) %>%
ungroup () %>%
mutate (
across (all_of (str_c (vars, "_person_mean" )),
~ .x - mean (.x, na.rm = TRUE ), .names = "{col}_centered" )
) %>%
...
Already done in scored comments
Modeling
library (here)
library (arrow)
options (arrow.skip_nul = TRUE )
library (lme4)
library (lmerTest)
library (parameters)
d <- read_parquet (here ("scored_comments.parquet.gzip" ))
# example:
# verbal immediacy
if (! file.exists (here ("models/growthmodel_vi.rds" ))) {
lmer (verbal_immediacy ~
weeks_since_first_comment +
weeks_since_last_comment_centered_within_person +
weeks_since_last_comment_person_mean_centered +
weeks_since_last_video_centered_within_channel +
weeks_since_last_video_channel_mean_centered +
(1 | author_channel_id) +
(1 | channel_name),
data = d) %>%
write_rds (here ("models/growthmodel_vi.rds" ))
}
read_rds (here ("models/growthmodel_vi.rds" )) %>%
parameters () %>%
display (digits = 4 , zap_small = TRUE )
# Fixed Effects
(Intercept)
0.6950
0.1964
(0.31, 1.08)
3.5389
< .001
weeks since first comment
-0.0021
0.0001
(0.00, 0.00)
-14.4931
< .001
weeks since last comment centered within person
0.0010
0.0004
(0.00, 0.00)
2.9251
0.003
weeks since last comment person mean centered
-0.0020
0.0008
(0.00, 0.00)
-2.5941
0.009
weeks since last video centered within channel
0.0052
0.0155
(-0.03, 0.04)
0.3369
0.736
weeks since last video channel mean centered
-0.0774
0.1857
(-0.44, 0.29)
-0.4167
0.677
# Random Effects
SD (Intercept: author_channel_id)
0.9038
SD (Intercept: channel_name)
0.6514
SD (Residual)
2.3956