align_topics.Rd
This function takes a list of LDA models and returns an object of class
alignment
. Each element in the models list must be itself a named
list, corresponding to the mixed memberships ($gamma
) and topics
($beta
). The resulting alignment object can be plotted using `plot`
and its weights can be extracted using the `weights` accessor function. See
the documentation for class alignment
for further details.
align_topics(models, method = "product", ...)
(required) A list of LDA models object. Each list component
must be a list with two named entries, $gamma (containing mixed memberships)
and $beta (containing topic parameters in log sapce). See
run_lda_models
for details.
(required) Either product
or transport
, giving
two types of alignment strategies, using inner products between gamma vectors
or optimal transport between gamma-beta pairs, respectively. Defaults to
product
.
(optional) Further keyword arguments passed to the weight
function. For example, passing reg = 10
when using the
transport
method will use a regularization level to 10 in the Sinkhorn
optimal transport algorithm.
An object of class alignment
providing the weights between
every pair of topics of each model pairs in the input edgelist
(comparisons
).
After topics are aligned, they are re-ordered such that topics connected by high weights are ranked similarly within their respective models.
Topic paths (sets of topics connected by high weights across
models) are then identified and alignment diagnostics (topic refinement and
coherence scores) are computed. These variables are included to the
topics
container of the returned alignment
.
alignment
library(purrr)
data <- rmultinom(10, 20, rep(0.1, 20))
lda_params <- setNames(map(1:5, ~ list(k = .)), 1:5)
lda_models <- run_lda_models(data, lda_params)
#> Using default value 'VEM' for 'method' LDA parameter.
#> Using default value 'VEM' for 'method' LDA parameter.
#> Using default value 'VEM' for 'method' LDA parameter.
#> Using default value 'VEM' for 'method' LDA parameter.
#> Using default value 'VEM' for 'method' LDA parameter.
alignment <- align_topics(lda_models)
alignment
#> # An alignment: 5 models, 15 topics:
#> # A tibble: 6 × 8
#> m m_next k k_next weight document_mass bw_weight fw_weight
#> <fct> <fct> <int> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 2 1 1 0.500 10.0 1 0.500
#> 2 1 2 1 2 0.500 10.0 1 0.500
#> 3 1 3 1 1 0.333 6.67 1 0.333
#> 4 1 3 1 2 0.333 6.67 1 0.333
#> 5 1 3 1 3 0.333 6.67 1 0.333
#> 6 1 4 1 1 0.250 5.00 1 0.250
#> # ... with 79 more rows
plot(alignment)
plot(alignment, color_by = "refinement")
alignment <- align_topics(lda_models, method = "transport")
plot(alignment)
plot_beta(alignment)
topics(alignment)
#> # A tibble: 15 × 8
#> m k k_label mass prop path coherence refinement
#> <fct> <int> <fct> <dbl> <dbl> <fct> <dbl> <dbl>
#> 1 1 1 1 20 1 4 0.321 1
#> 2 2 1 1 10.0 0.500 1 0.346 1.06
#> 3 2 2 2 10.0 0.500 4 0.353 1.06
#> 4 3 1 1 6.67 0.333 1 0.430 1.28
#> 5 3 2 2 6.67 0.333 4 0.338 1.17
#> 6 3 3 3 6.67 0.333 5 0.478 1.12
#> 7 4 1 1 5.00 0.250 1 0.406 1.23
#> 8 4 2 4 5.00 0.250 4 0.296 1.17
#> 9 4 3 3 5.00 0.250 5 0.342 1.04
#> 10 4 4 2 5.00 0.250 5 0.281 1.07
#> 11 5 1 5 4.00 0.200 1 0.328 NA
#> 12 5 2 2 4.00 0.200 2 0 NA
#> 13 5 3 3 4.00 0.200 3 0 NA
#> 14 5 4 4 4.00 0.200 4 0.258 NA
#> 15 5 5 1 4.00 0.200 5 0.396 NA
weights(alignment)
#> # A tibble: 85 × 8
#> m m_next k k_next weight document_mass bw_weight fw_weight
#> <fct> <fct> <int> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 2 1 1 0.500 10.0 1 0.500
#> 2 1 2 1 2 0.500 10.0 1 0.500
#> 3 1 3 1 1 0.333 6.67 1 0.333
#> 4 1 3 1 2 0.333 6.67 1 0.333
#> 5 1 3 1 3 0.333 6.67 1 0.333
#> 6 1 4 1 1 0.250 5.00 1 0.250
#> 7 1 4 1 2 0.250 5.00 1 0.250
#> 8 1 4 1 3 0.250 5.00 1 0.250
#> 9 1 4 1 4 0.250 5.00 1 0.250
#> 10 1 5 1 1 0.200 4.00 1 0.200
#> # … with 75 more rows
models(alignment)
#> $`1`
#> $`1`$beta
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7]
#> [1,] -2.302585 -2.302585 -2.302585 -2.302585 -2.302585 -2.302585 -2.302585
#> [,8] [,9] [,10]
#> [1,] -2.302585 -2.302585 -2.302585
#>
#> $`1`$gamma
#> [,1]
#> 1 1
#> 2 1
#> 3 1
#> 4 1
#> 5 1
#> 6 1
#> 7 1
#> 8 1
#> 9 1
#> 10 1
#> 11 1
#> 12 1
#> 13 1
#> 14 1
#> 15 1
#> 16 1
#> 17 1
#> 18 1
#> 19 1
#> 20 1
#>
#>
#> $`2`
#> $`2`$beta
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7]
#> [1,] -2.221426 -2.410574 -1.720834 -2.184779 -2.177212 -3.137572 -3.09902
#> [2,] -2.390900 -2.205147 -3.858581 -2.436120 -2.445931 -1.854052 -1.86500
#> [,8] [,9] [,10]
#> [1,] -2.060149 -3.086652 -1.999268
#> [2,] -2.623200 -1.868628 -2.739969
#>
#> $`2`$gamma
#> [,1] [,2]
#> 1 0.4991603 0.5008397
#> 2 0.4994551 0.5005449
#> 3 0.5058616 0.4941384
#> 4 0.5014040 0.4985960
#> 5 0.4992529 0.5007471
#> 6 0.5061223 0.4938777
#> 7 0.5018619 0.4981381
#> 8 0.4951765 0.5048235
#> 9 0.5025168 0.4974832
#> 10 0.4948057 0.5051943
#> 11 0.4951915 0.5048085
#> 12 0.5098842 0.4901158
#> 13 0.4965243 0.5034757
#> 14 0.4965605 0.5034395
#> 15 0.5031980 0.4968020
#> 16 0.5017252 0.4982748
#> 17 0.5040604 0.4959396
#> 18 0.4880635 0.5119365
#> 19 0.4985139 0.5014861
#> 20 0.5006574 0.4993426
#>
#>
#> $`3`
#> $`3`$beta
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7]
#> [1,] -3.332004 -3.606467 -1.649381 -2.038331 -2.105024 -2.007271 -2.815123
#> [2,] -2.075926 -1.981425 -2.693847 -2.585517 -2.029138 -2.628254 -1.905330
#> [3,] -1.974458 -2.002635 -3.213539 -2.360258 -3.063681 -2.370441 -2.393242
#> [,8] [,9] [,10]
#> [1,] -2.888347 -2.328944 -1.927204
#> [2,] -2.004245 -2.465614 -3.836720
#> [3,] -2.211180 -2.140108 -2.018361
#>
#> $`3`$gamma
#> [,1] [,2] [,3]
#> 1 0.3325723 0.3352247 0.3322030
#> 2 0.3298021 0.3351823 0.3350156
#> 3 0.3362974 0.3294615 0.3342411
#> 4 0.3325966 0.3345156 0.3328878
#> 5 0.3334656 0.3323240 0.3342105
#> 6 0.3322283 0.3299747 0.3377970
#> 7 0.3340599 0.3333628 0.3325772
#> 8 0.3327255 0.3326514 0.3346231
#> 9 0.3364484 0.3316156 0.3319361
#> 10 0.3325973 0.3347754 0.3326272
#> 11 0.3320970 0.3336542 0.3342488
#> 12 0.3330936 0.3343542 0.3325522
#> 13 0.3330419 0.3313348 0.3356233
#> 14 0.3292177 0.3352328 0.3355494
#> 15 0.3334240 0.3337756 0.3328004
#> 16 0.3367173 0.3354961 0.3277867
#> 17 0.3356388 0.3319756 0.3323855
#> 18 0.3347148 0.3334221 0.3318631
#> 19 0.3324965 0.3340851 0.3334185
#> 20 0.3334168 0.3342319 0.3323513
#>
#>
#> $`4`
#> $`4`$beta
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7]
#> [1,] -3.405719 -3.460417 -1.795135 -1.782505 -3.082655 -1.820099 -2.955589
#> [2,] -2.106813 -3.061481 -1.978934 -4.158973 -1.258122 -3.204228 -2.003133
#> [3,] -2.103817 -1.887045 -3.393542 -2.169454 -4.013279 -2.228504 -2.583270
#> [4,] -2.094642 -1.770273 -2.779361 -2.281660 -2.965194 -2.409534 -1.984677
#> [,8] [,9] [,10]
#> [1,] -2.844493 -2.430800 -1.634480
#> [2,] -2.545233 -2.076160 -4.257208
#> [3,] -2.213089 -2.263457 -1.807773
#> [4,] -1.870613 -2.494022 -3.613127
#>
#> $`4`$gamma
#> [,1] [,2] [,3] [,4]
#> 1 0.2485807 0.2524373 0.2484634 0.2505187
#> 2 0.2480394 0.2488276 0.2510311 0.2521020
#> 3 0.2538777 0.2465875 0.2515378 0.2479970
#> 4 0.2491370 0.2513116 0.2492277 0.2503237
#> 5 0.2506782 0.2480253 0.2514041 0.2498924
#> 6 0.2509556 0.2466908 0.2537923 0.2485613
#> 7 0.2501283 0.2511701 0.2492748 0.2494268
#> 8 0.2511021 0.2469523 0.2516092 0.2503364
#> 9 0.2517569 0.2507996 0.2491987 0.2482448
#> 10 0.2483532 0.2532372 0.2486794 0.2497301
#> 11 0.2498553 0.2476656 0.2512300 0.2512491
#> 12 0.2492752 0.2511833 0.2489430 0.2505986
#> 13 0.2512803 0.2466180 0.2524094 0.2496923
#> 14 0.2484132 0.2468431 0.2518889 0.2528547
#> 15 0.2496929 0.2500362 0.2491688 0.2511021
#> 16 0.2478059 0.2598803 0.2448340 0.2474798
#> 17 0.2512663 0.2505830 0.2494057 0.2487449
#> 18 0.2510349 0.2497210 0.2492582 0.2499859
#> 19 0.2502406 0.2477903 0.2503221 0.2516470
#> 20 0.2483267 0.2539404 0.2483024 0.2494305
#>
#>
#> $`5`
#> $`5`$beta
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7]
#> [1,] -2.261893 -2.473429 -1.742864 -2.323325 -2.306845 -1.855270 -3.301645
#> [2,] -3.169762 -2.294757 -1.873941 -3.606212 -1.915103 -2.324456 -2.357774
#> [3,] -2.479596 -2.410263 -2.558437 -1.433365 -2.445166 -2.987307 -2.336730
#> [4,] -2.903589 -1.972317 -2.974791 -3.250614 -2.232442 -3.226686 -1.839713
#> [5,] -1.535926 -2.453693 -3.146832 -2.326179 -2.828222 -1.860064 -2.180139
#> [,8] [,9] [,10]
#> [1,] -3.170051 -3.365868 -1.776413
#> [2,] -1.783501 -2.724226 -2.272163
#> [3,] -2.232565 -2.363111 -2.585708
#> [4,] -2.819434 -1.332827 -2.441353
#> [5,] -2.094758 -3.171599 -2.729238
#>
#> $`5`$gamma
#> [,1] [,2] [,3] [,4] [,5]
#> 1 0.1988354 0.1999509 0.2005477 0.2006266 0.2000394
#> 2 0.2003111 0.1986580 0.1999507 0.1988605 0.2022197
#> 3 0.2014888 0.1990114 0.2010836 0.1986758 0.1997404
#> 4 0.1989111 0.2004324 0.2004298 0.2002658 0.1999608
#> 5 0.2006196 0.1999597 0.1997539 0.2010203 0.1986465
#> 6 0.2018181 0.1990455 0.1986763 0.2000360 0.2004242
#> 7 0.2005605 0.2007199 0.1991728 0.1990451 0.2005016
#> 8 0.2003444 0.2004047 0.1989344 0.1997965 0.2005200
#> 9 0.2013548 0.2006844 0.1992566 0.1992264 0.1994778
#> 10 0.1986928 0.1996971 0.2002557 0.2013088 0.2000457
#> 11 0.2005502 0.1999800 0.1991934 0.1997135 0.2005629
#> 12 0.2002723 0.1997958 0.2012373 0.1971296 0.2015651
#> 13 0.2002065 0.1989658 0.2004991 0.2003939 0.1999348
#> 14 0.1973402 0.1998782 0.2015896 0.2017980 0.1993940
#> 15 0.1997424 0.2002182 0.1998508 0.1999061 0.2002825
#> 16 0.2003941 0.2020528 0.1987750 0.2009385 0.1978396
#> 17 0.2001219 0.2004371 0.2005367 0.2000490 0.1988554
#> 18 0.2005129 0.2002672 0.1990028 0.1991482 0.2010689
#> 19 0.1995374 0.1994838 0.2007590 0.2002648 0.1999550
#> 20 0.1984139 0.2003351 0.2004687 0.2018189 0.1989635
#>
#>