Adapted version of distme function from the semdistflow package

This function just reads in a single document structured as a dataframe with no lemmatization

autoseg_vfz1 <- function(targetdf) {
    {
        message("Loading lookup databases and joining your data to SemDist15 and Glowca")
        # load lookup databases
        load(here("data", "glowca_lite.rda"))  #rounded 5,subtlex matched 60k
        load(here("data", "semdist15_new.rda"))
        dat <- targetdf
        joindf_semdist15 <- left_join(dat, semdist15_new, by = c("word")) %>%
            data.frame()
        joindf_glowca <- left_join(dat, glowca_lite, by = c("word")) %>%
            data.frame()
        # Select numeric columns for cosine calculations, eliminate columns
        # with string data
        dat_sd15 <- joindf_semdist15 %>%
            select_if(is.numeric)
        datglo <- joindf_glowca %>%
            select_if(is.numeric)
        # convert join dataframes containing hyperparameter values (glowca and
        # sd15) to matrices
        mat_sd15 <- as.matrix(dat_sd15)
        mat_glo <- as.matrix(datglo)
        # compute cosine distance for each running pair of words in sd15 and
        # glowca
        vals_sd15 <- unlist(lapply(2:nrow(mat_sd15), function(i) {
            lsa::cosine(mat_sd15[i - 1, ], mat_sd15[i, ])
        }))
        vals_glo <- unlist(lapply(2:nrow(mat_glo), function(i) {
            lsa::cosine(mat_glo[i - 1, ], mat_glo[i, ])
        }))
        # Convert matrices back to dataframes
        vals_sd15 <- data.frame(vals_sd15)
        vals_glo <- data.frame(vals_glo)
        # Rename first column of cosine distance values
        names(vals_sd15)[1] <- "Sd15_Cosine"
        names(vals_glo)[1] <- "Glo_Cosine"
        # Add NA to final row to match length, there is no pairwise distance
        # for final observation
        vals_sd15[nrow(vals_sd15) + 1, ] <- NA
        vals_glo[nrow(vals_glo) + 1, ] <- NA
        # Reverse scale the cosine values for gloca and sd15, subtract each obs
        # from max (1)
        sd15vals <- vals_sd15 %>%
            mutate(Sd15_CosRev0Score = 1 - Sd15_Cosine)
        glowcavals <- vals_glo %>%
            mutate(Glowca_CosRev0Score = 1 - Glo_Cosine)
        # Rebuild the dataframe creating bigram columns, cbind words to their
        # cosine values
        dat2 <- dat %>%
            ungroup() %>%
            mutate(word2 = lead(word, 1))  #rebuild dataframe, cast lead (+1) bigram
        # Bind the sd15 cosine data to the original dataframe
        mydists <- cbind(dat2, sd15vals, glowcavals)
        mydists <- mydists %>%
            mutate(zGlowcaRev = scale(Glowca_CosRev0Score))
        # convert to z-score beased on norming study in theory paper where
        # random bigram distance is .72 (normalized from 0 to 2) with SD=.22.
        # GLOWca reverse scores where 0 is zero distancescores where 0 is zero
        # distance
        mydists <- mydists %>%
            mutate(zSemdistRev = scale(Sd15_CosRev0Score))
        # convert to z-score beased on norming study in theory paper where
        # random bigram distance is .92 (normalized from 0 to 2) with SD=.42.
        # SemDist15 reverse scores where 0 is zero distance
        mydists <- mydists %>%
            mutate(Switch_GlowcaZ1 = ifelse(zGlowcaRev >= 1, 1, 0))  #add cluster break markers based on semantic distance z>1 where 1 is a cluster break and 0 is within category
        mydists <- mydists %>%
            mutate(Switch_SemDistZ1 = ifelse(zSemdistRev >= 1, 1, 0))  #add cluster break markers based on SemDist15 semantic distance z>1 where 1 is a cluster break and 0 is within category
        # output a formatted dataframe
        return(as_tibble(mydists))
    }
}

Simulate some data

animals, musical instruments, fruits alternating from fixed lists

# animals alternating with tools every ten words
music <- read.csv(here("data", "MusicalList.csv"))
animals <- read.csv(here("data", "AnimalList.csv"))
fruitveg <- read.csv(here("data", "FruitVegList.csv"))
set.seed(222)
a <- seq(1, 7500)
b <- as.vector(replicate(250, c(sample(animals$word, 10, replace = TRUE), sample(music$word,
    10, replace = T), sample(fruitveg$word, 10, replace = T))))
c <- lead(b, 1)  #lead 1 for bigram pair
d <- as.vector(replicate(250, c(rep("animals", 10), rep("music", 10), rep("fruitveg",
    10))))
e <- as.vector(replicate(250, c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1)))
# lag vector for switch pair
all <- data.frame(cbind(a, b, c, d, e))
names(all)[1] <- "order"
names(all)[2] <- "word"
names(all)[3] <- "word2"
names(all)[4] <- "categ_cond"
names(all)[5] <- "switch_real"
head(all, n = 20)
order word word2 categ_cond switch_real
1 crow duck animals 0
2 duck eagle animals 0
3 eagle donkey animals 0
4 donkey porcupine animals 0
5 porcupine buffalo animals 0
6 buffalo camel animals 0
7 camel leopard animals 0
8 leopard panther animals 0
9 panther mouse animals 0
10 mouse trumpet animals 1
11 trumpet cello music 0
12 cello cymbal music 0
13 cymbal flute music 0
14 flute guitar music 0
15 guitar trumpet music 0
16 trumpet lute music 0
17 lute bell music 0
18 bell cymbal music 0
19 cymbal accordion music 0
20 accordion banana music 1
colnames(all)
## [1] "order"       "word"        "word2"       "categ_cond"  "switch_real"

Compute semantic distances z>1, rebuild dataframe

justwords <- all %>%
    dplyr::select("word")
vfdistsz1 <- autoseg_vfz1(justwords)
justswitch <- all %>%
    dplyr::select(switch_real, categ_cond)
together <- cbind(all$order, vfdistsz1, justswitch)
together$Switch_GlowcaZ1 <- as.factor(together$Switch_GlowcaZ1)
together$Switch_SemDistZ1 <- as.factor(together$Switch_SemDistZ1)
together$switch_real <- as.factor(together$switch_real)
head(together, n = 200)
all$order word word2 Sd15_Cosine Sd15_CosRev0Score Glo_Cosine Glowca_CosRev0Score zGlowcaRev zSemdistRev Switch_GlowcaZ1 Switch_SemDistZ1 switch_real categ_cond
1 crow duck 0.84 0.16 0.23 0.77 0.160589862 0.302903065 0 0 0 animals
2 duck eagle 0.91 0.09 0.24 0.76 0.098886135 -0.329085861 0 0 0 animals
3 eagle donkey 0.84 0.16 0.08 0.92 0.975452401 0.265856952 0 0 0 animals
4 donkey porcupine 0.91 0.09 0.29 0.71 -0.151001951 -0.299321898 0 0 0 animals
5 porcupine buffalo 0.89 0.11 0.19 0.81 0.402263075 -0.164206137 0 0 0 animals
6 buffalo camel 0.92 0.08 0.17 0.83 0.517067672 -0.358297069 0 0 0 animals
7 camel leopard 0.86 0.14 0.20 0.80 0.321143247 0.143028887 0 0 0 animals
8 leopard panther 0.98 0.02 0.30 0.70 -0.213181138 -0.887924732 0 0 0 animals
9 panther mouse 0.87 0.13 0.09 0.91 0.921122890 -0.003956326 0 0 0 animals
10 mouse trumpet 0.87 0.13 0.08 0.92 0.985499025 0.027828111 0 0 1 animals
11 trumpet cello 0.96 0.04 0.33 0.67 -0.376138446 -0.753103814 0 0 0 music
12 cello cymbal 0.98 0.02 0.06 0.94 1.101610729 -0.846789478 1 0 0 music
13 cymbal flute 0.96 0.04 0.13 0.87 0.708801065 -0.741848033 0 0 0 music
14 flute guitar 0.81 0.19 0.48 0.52 -1.170586356 0.539969569 0 0 0 music
15 guitar trumpet 0.81 0.19 0.45 0.55 -1.000467078 0.503315409 0 0 0 music
16 trumpet lute NA NA 0.24 0.76 0.121030387 NA 0 NA 0 music
17 lute bell NA NA 0.05 0.95 1.130413467 NA 1 NA 0 music
18 bell cymbal 0.92 0.08 0.04 0.96 1.164128647 -0.389257853 1 0 0 music
19 cymbal accordion 0.98 0.02 0.14 0.86 0.671122360 -0.883919863 0 0 0 music
20 accordion banana 0.57 0.43 0.06 0.94 1.085647597 2.505651317 1 1 1 music
21 banana mango 0.92 0.08 0.48 0.52 -1.161804901 -0.419701176 0 0 0 fruitveg
22 mango zucchini 0.96 0.04 0.38 0.62 -0.637211566 -0.682714830 0 0 0 fruitveg
23 zucchini cauliflower 0.98 0.02 0.56 0.44 -1.590016096 -0.922937298 0 0 0 fruitveg
24 cauliflower garlic 0.96 0.04 0.47 0.53 -1.104267540 -0.692970537 0 0 0 fruitveg
25 garlic banana 0.93 0.07 0.32 0.68 -0.290215109 -0.430893731 0 0 0 fruitveg
26 banana tangerine 0.94 0.06 0.27 0.73 -0.049554728 -0.532324793 0 0 0 fruitveg
27 tangerine avocado 0.86 0.14 0.34 0.66 -0.398473872 0.152313593 0 0 0 fruitveg
28 avocado eggplant 0.98 0.02 0.42 0.58 -0.845247654 -0.850902604 0 0 0 fruitveg
29 eggplant tomato 0.99 0.01 0.59 0.41 -1.752360608 -0.935968023 0 0 0 fruitveg
30 tomato iguana 0.80 0.20 0.06 0.94 1.105997357 0.589954690 1 0 1 fruitveg
31 iguana horse 0.86 0.14 0.13 0.87 0.703315480 0.084196897 0 0 0 animals
32 horse mule 0.90 0.10 0.54 0.46 -1.494719004 -0.242277860 0 0 0 animals
33 mule alligator 0.93 0.07 0.23 0.77 0.156115161 -0.432959645 0 0 0 animals
34 alligator swan 0.90 0.10 0.14 0.86 0.632400658 -0.186444859 0 0 0 animals
35 swan tiger 0.91 0.09 0.18 0.82 0.452830858 -0.300548887 0 0 0 animals
36 tiger porcupine 0.94 0.06 0.08 0.92 0.991958175 -0.591190335 0 0 0 animals
37 porcupine fox 0.93 0.07 0.13 0.87 0.718537058 -0.470746929 0 0 0 animals
38 fox badger 0.91 0.09 0.17 0.83 0.493068209 -0.326935482 0 0 0 animals
39 badger cheetah 0.84 0.16 -0.01 1.01 1.435290556 0.297709251 1 0 0 animals
40 cheetah trumpet 0.86 0.14 0.04 0.96 1.209229632 0.086307108 1 0 1 animals
41 trumpet organ 0.92 0.08 0.20 0.80 0.340873792 -0.400493249 0 0 0 music
42 organ sitar NA NA 0.06 0.94 1.074235333 NA 1 NA 0 music
43 sitar whistle NA NA 0.09 0.91 0.913247949 NA 0 NA 0 music
44 whistle organ 0.72 0.28 0.09 0.91 0.924854868 1.242324552 0 1 0 music
45 organ drum 0.93 0.07 0.35 0.65 -0.477276405 -0.459127967 0 0 0 music
46 drum flute 0.95 0.05 0.39 0.61 -0.687042400 -0.664624826 0 0 0 music
47 flute marimba NA NA 0.14 0.86 0.639533917 NA 0 NA 0 music
48 marimba cymbal NA NA 0.09 0.91 0.905231176 NA 0 NA 0 music
49 cymbal viola 0.84 0.16 0.01 0.99 1.355951829 0.297138885 1 0 0 music
50 viola pear 0.50 0.50 -0.01 1.01 1.430829336 3.127543915 1 1 1 music
51 pear tomato 0.97 0.03 0.53 0.47 -1.431951825 -0.822508636 0 0 0 fruitveg
52 tomato cucumber 0.99 0.01 0.61 0.39 -1.858838598 -0.987715367 0 0 0 fruitveg
53 cucumber watermelon 0.98 0.02 0.49 0.51 -1.208193218 -0.864007814 0 0 0 fruitveg
54 watermelon kale 0.95 0.05 0.29 0.71 -0.169328305 -0.597316587 0 0 0 fruitveg
55 kale lettuce 0.97 0.03 0.53 0.47 -1.440684661 -0.835616349 0 0 0 fruitveg
56 lettuce cabbage 0.97 0.03 0.60 0.40 -1.823921044 -0.764689047 0 0 0 fruitveg
57 cabbage strawberry 0.85 0.15 0.29 0.71 -0.139111110 0.193268933 0 0 0 fruitveg
58 strawberry squash 0.84 0.16 0.25 0.75 0.040394801 0.289905876 0 0 0 fruitveg
59 squash carrot 0.94 0.06 0.47 0.53 -1.145420151 -0.562043851 0 0 0 fruitveg
60 carrot deer 0.66 0.34 0.11 0.89 0.800776688 1.787016927 0 1 1 fruitveg
61 deer beagle 0.86 0.14 0.03 0.97 1.267642600 0.145149454 1 0 0 animals
62 beagle elephant 0.94 0.06 0.20 0.80 0.340929391 -0.549872305 0 0 0 animals
63 elephant turtle 0.83 0.17 0.36 0.64 -0.544582426 0.376269316 0 0 0 animals
64 turtle bug 0.84 0.16 0.25 0.75 0.087084060 0.258965621 0 0 0 animals
65 bug spider 0.89 0.11 0.34 0.66 -0.419189020 -0.111324752 0 0 0 animals
66 spider cheetah 0.86 0.14 0.11 0.89 0.837310739 0.135463190 0 0 0 animals
67 cheetah tiger 0.95 0.05 0.20 0.80 0.336889602 -0.601853336 0 0 0 animals
68 tiger monkey 0.94 0.06 0.28 0.72 -0.081089974 -0.514994774 0 0 0 animals
69 monkey leopard 0.94 0.06 0.26 0.74 0.031908788 -0.519567901 0 0 0 animals
70 leopard viola 0.84 0.16 0.03 0.97 1.243474346 0.318590706 1 0 1 animals
71 viola tuba 0.90 0.10 0.25 0.75 0.048435991 -0.223220772 0 0 0 music
72 tuba bell 0.94 0.06 0.09 0.91 0.930555595 -0.517623856 0 0 0 music
73 bell tambourine 0.90 0.10 0.02 0.98 1.270053514 -0.213181107 1 0 0 music
74 tambourine trumpet 0.97 0.03 0.17 0.83 0.488253589 -0.843193215 0 0 0 music
75 trumpet mandolin 0.98 0.02 0.24 0.76 0.120865215 -0.874815362 0 0 0 music
76 mandolin saxophone 0.98 0.02 0.32 0.68 -0.338832051 -0.874342100 0 0 0 music
77 saxophone mandolin 0.98 0.02 0.32 0.68 -0.338832051 -0.874342100 0 0 0 music
78 mandolin clarinet 0.96 0.04 0.34 0.66 -0.424952532 -0.752316514 0 0 0 music
79 clarinet tambourine 0.98 0.02 0.24 0.76 0.097244283 -0.916578133 0 0 0 music
80 tambourine tomato 0.62 0.38 0.00 1.00 1.412977505 2.118827300 1 1 1 music
81 tomato grape 0.97 0.03 0.51 0.49 -1.325930881 -0.825014557 0 0 0 fruitveg
82 grape cucumber 0.97 0.03 0.39 0.61 -0.714801363 -0.763141645 0 0 0 fruitveg
83 cucumber peach 0.94 0.06 0.33 0.67 -0.366271016 -0.577702066 0 0 0 fruitveg
84 peach spinach 0.89 0.11 0.28 0.72 -0.120391846 -0.173494038 0 0 0 fruitveg
85 spinach garlic 0.96 0.04 0.60 0.40 -1.798961894 -0.725015435 0 0 0 fruitveg
86 garlic strawberry 0.81 0.19 0.22 0.78 0.205908321 0.497437104 0 0 0 fruitveg
87 strawberry garlic 0.81 0.19 0.22 0.78 0.205908321 0.497437104 0 0 0 fruitveg
88 garlic pear 0.93 0.07 0.32 0.68 -0.319777393 -0.433646218 0 0 0 fruitveg
89 pear kale 0.97 0.03 0.29 0.71 -0.168540132 -0.833000382 0 0 0 fruitveg
90 kale rat 0.55 0.45 0.10 0.90 0.866670431 2.723131661 0 1 1 fruitveg
91 rat frog 0.72 0.28 0.32 0.68 -0.334901252 1.271517011 0 1 0 animals
92 frog rabbit 0.83 0.17 0.37 0.63 -0.605632062 0.356966733 0 0 0 animals
93 rabbit zebra 0.89 0.11 0.22 0.78 0.244480677 -0.116399011 0 0 0 animals
94 zebra squirrel 0.78 0.22 0.23 0.77 0.195106245 0.742541910 0 0 0 animals
95 squirrel horse 0.84 0.16 0.27 0.73 -0.067139405 0.307413567 0 0 0 animals
96 horse hyena 0.73 0.27 0.12 0.88 0.752176454 1.171031133 0 1 0 animals
97 hyena horse 0.73 0.27 0.12 0.88 0.752176454 1.171031133 0 1 0 animals
98 horse zebra 0.91 0.09 0.16 0.84 0.525245754 -0.315998589 0 0 0 animals
99 zebra caterpillar 0.79 0.21 0.07 0.93 1.043642688 0.738051597 1 0 0 animals
100 caterpillar xylophone 0.68 0.32 -0.05 1.05 1.645567135 1.650780632 1 1 1 animals
101 xylophone cello 0.99 0.01 0.16 0.84 0.550201141 -0.972913669 0 0 0 music
102 cello xylophone 0.99 0.01 0.16 0.84 0.550201141 -0.972913669 0 0 0 music
103 xylophone flute 0.97 0.03 0.21 0.79 0.260162496 -0.820890147 0 0 0 music
104 flute cello 0.96 0.04 0.40 0.60 -0.736690007 -0.742570559 0 0 0 music
105 cello bassoon 0.97 0.03 0.19 0.81 0.401208970 -0.772820399 0 0 0 music
106 bassoon cello 0.97 0.03 0.19 0.81 0.401208970 -0.772820399 0 0 0 music
107 cello sitar NA NA 0.10 0.90 0.889149954 NA 0 NA 0 music
108 sitar marimba NA NA 0.15 0.85 0.612421361 NA 0 NA 0 music
109 marimba bassoon NA NA 0.08 0.92 0.974699141 NA 0 NA 0 music
110 bassoon pear 0.49 0.51 0.02 0.98 1.280023320 3.222766695 1 1 1 music
111 pear apple 0.86 0.14 0.47 0.53 -1.104178353 0.134529105 0 0 0 fruitveg
112 apple fig 0.83 0.17 0.22 0.78 0.216775890 0.357243492 0 0 0 fruitveg
113 fig pineapple 0.91 0.09 0.20 0.80 0.332996299 -0.273714509 0 0 0 fruitveg
114 pineapple pea 0.94 0.06 0.21 0.79 0.265846642 -0.524722940 0 0 0 fruitveg
115 pea pear 0.97 0.03 0.24 0.76 0.101909932 -0.830557706 0 0 0 fruitveg
116 pear grape 0.97 0.03 0.40 0.60 -0.730514892 -0.837139126 0 0 0 fruitveg
117 grape pea 0.97 0.03 0.26 0.74 0.014666070 -0.829766861 0 0 0 fruitveg
118 pea pear 0.97 0.03 0.24 0.76 0.101909932 -0.830557706 0 0 0 fruitveg
119 pear mushroom 0.93 0.07 0.34 0.66 -0.404771555 -0.505774334 0 0 0 fruitveg
120 mushroom lion 0.67 0.33 0.22 0.78 0.219056184 1.737959111 0 1 1 fruitveg
121 lion giraffe 0.84 0.16 0.31 0.69 -0.236132045 0.309341638 0 0 0 animals
122 giraffe lion 0.84 0.16 0.31 0.69 -0.236132045 0.309341638 0 0 0 animals
123 lion turtle 0.83 0.17 0.34 0.66 -0.443859060 0.358486537 0 0 0 animals
124 turtle mule 0.86 0.14 0.11 0.89 0.834677418 0.085918157 0 0 0 animals
125 mule bear 0.79 0.21 0.35 0.65 -0.456612802 0.665963252 0 0 0 animals
126 bear caterpillar 0.75 0.25 0.18 0.82 0.434787440 1.057697321 0 1 0 animals
127 caterpillar wolf 0.83 0.17 0.18 0.82 0.411725119 0.336542475 0 0 0 animals
128 wolf porcupine 0.91 0.09 0.19 0.81 0.399850481 -0.277077659 0 0 0 animals
129 porcupine hamster 0.90 0.10 0.14 0.86 0.671439692 -0.181561122 0 0 0 animals
130 hamster viola 0.76 0.24 0.11 0.89 0.834089340 0.921605620 0 0 1 animals
131 viola sitar NA NA 0.09 0.91 0.910439449 NA 0 NA 0 music
132 sitar cymbal NA NA 0.06 0.94 1.059757426 NA 1 NA 0 music
133 cymbal drum 0.98 0.02 0.19 0.81 0.402322679 -0.864143947 0 0 0 music
134 drum saxophone 0.97 0.03 0.31 0.69 -0.258698386 -0.777389303 0 0 0 music
135 saxophone xylophone 0.97 0.03 0.21 0.79 0.266414749 -0.776363970 0 0 0 music
136 xylophone marimba NA NA 0.07 0.93 1.024042942 NA 1 NA 0 music
137 marimba harmonica NA NA 0.20 0.80 0.351130536 NA 0 NA 0 music
138 harmonica clarinet 0.83 0.17 0.37 0.63 -0.594248917 0.391345608 0 0 0 music
139 clarinet viola 0.82 0.18 0.35 0.65 -0.488154370 0.423758791 0 0 0 music
140 viola corn 0.55 0.45 0.07 0.93 1.039965775 2.683435675 1 1 1 music
141 corn strawberry 0.87 0.13 0.30 0.70 -0.219153983 0.056475409 0 0 0 fruitveg
142 strawberry broccoli 0.74 0.26 0.21 0.79 0.270829499 1.150418280 0 1 0 fruitveg
143 broccoli tomato 0.97 0.03 0.47 0.53 -1.145203551 -0.813192845 0 0 0 fruitveg
144 tomato mushroom 0.96 0.04 0.49 0.51 -1.221611424 -0.736415474 0 0 0 fruitveg
145 mushroom broccoli 0.95 0.05 0.36 0.64 -0.541256533 -0.613769828 0 0 0 fruitveg
146 broccoli lettuce 0.98 0.02 0.51 0.49 -1.332271694 -0.870457295 0 0 0 fruitveg
147 lettuce watermelon 0.95 0.05 0.46 0.54 -1.061947409 -0.674663163 0 0 0 fruitveg
148 watermelon spinach 0.94 0.06 0.44 0.56 -0.942107961 -0.589138209 0 0 0 fruitveg
149 spinach potato 0.96 0.04 0.52 0.48 -1.369238751 -0.691142783 0 0 0 fruitveg
150 potato hamster 0.70 0.30 0.10 0.90 0.881136615 1.408956229 0 1 1 fruitveg
151 hamster donkey 0.93 0.07 0.19 0.81 0.378124847 -0.432482246 0 0 0 animals
152 donkey gorilla 0.82 0.18 0.20 0.80 0.312418699 0.486751971 0 0 0 animals
153 gorilla racoon 0.88 0.12 0.08 0.92 0.960581435 -0.078039020 0 0 0 animals
154 racoon bug 0.87 0.13 -0.05 1.05 1.692863713 0.058074244 1 0 0 animals
155 bug horse 0.81 0.19 0.20 0.80 0.341179433 0.490780248 0 0 0 animals
156 horse leopard 0.86 0.14 0.19 0.81 0.362283636 0.088176769 0 0 0 animals
157 leopard parrot 0.94 0.06 0.26 0.74 0.006247129 -0.541827314 0 0 0 animals
158 parrot panther 0.96 0.04 0.27 0.73 -0.032615281 -0.705931321 0 0 0 animals
159 panther mouse 0.87 0.13 0.09 0.91 0.921122890 -0.003956326 0 0 0 animals
160 mouse trombone 0.85 0.15 0.08 0.92 0.992744944 0.217895458 0 0 1 animals
161 trombone horn 0.94 0.06 0.21 0.79 0.253448778 -0.570638324 0 0 0 music
162 horn horn 1.00 0.00 1.00 0.00 -3.963345162 -1.052125397 0 0 0 music
163 horn trombone 0.94 0.06 0.21 0.79 0.253448778 -0.570638324 0 0 0 music
164 trombone mandolin 0.97 0.03 0.24 0.76 0.124858953 -0.806316588 0 0 0 music
165 mandolin trombone 0.97 0.03 0.24 0.76 0.124858953 -0.806316588 0 0 0 music
166 trombone banjo 0.96 0.04 0.33 0.67 -0.348765977 -0.743171701 0 0 0 music
167 banjo sitar NA NA 0.13 0.87 0.717246727 NA 0 NA 0 music
168 sitar harp NA NA 0.10 0.90 0.892333360 NA 0 NA 0 music
169 harp guitar 0.77 0.23 0.42 0.58 -0.871855941 0.860375757 0 0 0 music
170 guitar strawberry 0.69 0.31 0.09 0.91 0.902802336 1.536585736 0 1 1 music
171 strawberry asparagus 0.83 0.17 0.27 0.73 -0.061680866 0.331915979 0 0 0 fruitveg
172 asparagus apple 0.85 0.15 0.17 0.83 0.486909505 0.216058708 0 0 0 fruitveg
173 apple corn 0.89 0.11 0.36 0.64 -0.510967219 -0.161548556 0 0 0 fruitveg
174 corn grape 0.97 0.03 0.39 0.61 -0.710352339 -0.826073112 0 0 0 fruitveg
175 grape onion 0.91 0.09 0.36 0.64 -0.513016616 -0.330581280 0 0 0 fruitveg
176 onion grape 0.91 0.09 0.36 0.64 -0.513016616 -0.330581280 0 0 0 fruitveg
177 grape pear 0.97 0.03 0.40 0.60 -0.730514892 -0.837139126 0 0 0 fruitveg
178 pear celery 0.94 0.06 0.33 0.67 -0.353789829 -0.591559966 0 0 0 fruitveg
179 celery banana 0.90 0.10 0.24 0.76 0.092912781 -0.250771201 0 0 0 fruitveg
180 banana rat 0.49 0.51 0.08 0.92 0.998098075 3.166311153 0 1 1 fruitveg
181 rat iguana 0.81 0.19 0.22 0.78 0.240959644 0.528737631 0 0 0 animals
182 iguana caterpillar 0.83 0.17 0.10 0.90 0.886781602 0.329020829 0 0 0 animals
183 caterpillar wolf 0.83 0.17 0.18 0.82 0.411725119 0.336542475 0 0 0 animals
184 wolf swan 0.83 0.17 0.31 0.69 -0.253223969 0.401479862 0 0 0 animals
185 swan cougar 0.93 0.07 0.16 0.84 0.520063453 -0.484847391 0 0 0 animals
186 cougar rhinoceros 0.90 0.10 0.08 0.92 0.963573264 -0.207205542 0 0 0 animals
187 rhinoceros deer 0.93 0.07 0.16 0.84 0.528652890 -0.474899376 0 0 0 animals
188 deer goose 0.90 0.10 0.39 0.61 -0.699453840 -0.178462759 0 0 0 animals
189 goose penguin 0.80 0.20 0.10 0.90 0.872164775 0.599182245 0 0 0 animals
190 penguin organ 0.86 0.14 -0.02 1.02 1.493723040 0.136674113 1 0 1 animals
191 organ marimba NA NA 0.06 0.94 1.099048725 NA 1 NA 0 music
192 marimba tambourine NA NA 0.07 0.93 1.009098556 NA 1 NA 0 music
193 tambourine fiddle 0.98 0.02 0.16 0.84 0.543765929 -0.868807883 0 0 0 music
194 fiddle bassoon 0.96 0.04 0.16 0.84 0.521532488 -0.705143113 0 0 0 music
195 bassoon clarinet 0.98 0.02 0.31 0.69 -0.249723555 -0.915061300 0 0 0 music
196 clarinet harp 0.94 0.06 0.28 0.72 -0.118794710 -0.547869951 0 0 0 music
197 harp bell 0.94 0.06 0.14 0.86 0.678112891 -0.543391387 0 0 0 music
198 bell guitar 0.72 0.28 0.25 0.75 0.087363660 1.301894116 0 1 0 music
199 guitar sitar NA NA 0.20 0.80 0.347350788 NA 0 NA 0 music
200 sitar cauliflower NA NA -0.01 1.01 1.460393502 NA 1 NA 1 music
write.csv(together, "VF_distresults.csv")

total switches vs in cluster observations in the original set

table(together$switch_real)
## 
##    0    1 
## 6750  750
together$Switch_GlowcaZ1 <- as.numeric(together$Switch_GlowcaZ1)
together$Switch_SemDistZ1 <- as.numeric(together$Switch_SemDistZ1)
together$switch_real <- as.numeric(together$switch_real)

Stats on GlowcaZ1 (Z>=1) by Switch_real

tab.out Contingency table
TS Threat score a.k.a. Critical success index (CSI) or d’
TS.se Standard Error for TS
POD Hit Rate aka probability of detection
POD.se Standard Error for POD
M Miss rate
F False Alarm RATE
F.se Standard Error for F
FAR False Alarm RATIO
FAR.se Standard Error for FAR
HSS Heidke Skill Score
HSS.se Standard Error for HSS
PSS Peirce Skill Score
PSS.se Standard Error for PSS
KSS Kuiper’s Skill Score
PC Percent correct - events along the diagonal
PC.se Standard Error for PC
BIAS Bias
ETS Equitable Threat Score
ETS.se Standard Error for ETS
theta Odds Ratio
log.theta Log Odds Ratio

Stats on SemDist15Z1 (Z>=1) by Switch_real

conting_semdistz1 <- table(together$Switch_SemDistZ1, together$switch_real)
pdf(file = "SemDistMosaic.pdf")
mosaicplot(conting_semdistz1)
verification::table.stats(conting_semdistz1)
## [1] " Assume data entered as c(n11, n01, n10, n00) Obs*Forecast"
## [1] " Assume contingency table has observed values in columns, forecasts in rows"
## $tab
##      [,1] [,2]
## [1,] 5839  181
## [2,]  411  518
## 
## $TS
## [1] 0.9079445
## 
## $TS.se
## [1] 0.003458024
## 
## $POD
## [1] 0.9342385
## 
## $POD.se
## [1] 0.003135266
## 
## $M
## [1] 0.06575989
## 
## $F
## [1] 0.2589376
## 
## $F.se
## [1] 0.0165685
## 
## $FAR
## [1] 0.0300664
## 
## $FAR.se
## [1] 0.001868509
## 
## $HSS
## [1] 0.5892038
## 
## $HSS.se
## [1] 0.01404901
## 
## $PSS
## [1] 0.6753025
## 
## $PSS.se
## [1] 0.01686254
## 
## $KSS
## [1] 0.6752987
## 
## $PC
## [1] 0.9148066
## 
## $PC.se
## [1] 0.003275587
## 
## $BIAS
## [1] 0.9632
## 
## $ETS
## [1] 0.4176392
## 
## $ETS.se
## [1] 0.01411716
## 
## $theta
## [1] 40.65817
## 
## $log.theta
## [1] 3.7052
## 
## $LOR.se
## [1] 0.1002981
## 
## $n.h
## [1] 99.40638
## 
## $orss
## [1] 0.9519902
## 
## $orss.se
## [1] 0.004699702
## 
## $eds
## [1] 0.2183106
## 
## $eds.se
## [1] 0.02349213
## 
## $seds
## [1] 0.4337472
## 
## $seds.se
## [1] 0.02764629
## 
## $EDI
## [1] 0.9041377
## 
## $EDI.se
## [1] 0.007723115
## 
## $SEDI
## [1] 0.8343938
## 
## $SEDI.se
## [1] 0.01182411
roc1 <- rocit(together$Switch_SemDistZ1, together$switch_real)
plot(roc1, values = F)
# psycho::dprime(n_hit, n_fa,n_miss = NULL,n_cr = NULL, adjusted = TRUE)
psycho::dprime(518, 181, 411, 5839, adjusted = TRUE)
## $dprime
## [1] 2.023376
## 
## $beta
## [1] 5.779049
## 
## $aprime
## [1] 0.8724881
## 
## $bppd
## [1] 0.7885599
## 
## $c
## [1] 0.8669862
nmisssemdist <- sum(is.na(together$Switch_GlowcaZ1))
print(paste("Semdist15 has", 7500 - nmisssemdist, "complete observations"))
## [1] "Semdist15 has 7499 complete observations"

Stats on GlowcaZ1 (Z>=1) by Switch_real

conting_glowcaz1 <- table(together$Switch_GlowcaZ1, together$switch_real)
mosaicplot(conting_glowcaz1)

verification::table.stats(conting_glowcaz1)
## [1] " Assume data entered as c(n11, n01, n10, n00) Obs*Forecast"
## [1] " Assume contingency table has observed values in columns, forecasts in rows"
## $tab
##      [,1] [,2]
## [1,] 6332  259
## [2,]  418  490
## 
## $TS
## [1] 0.9034086
## 
## $TS.se
## [1] 0.003285867
## 
## $POD
## [1] 0.9380727
## 
## $POD.se
## [1] 0.002933642
## 
## $M
## [1] 0.06192583
## 
## $F
## [1] 0.3457898
## 
## $F.se
## [1] 0.01737885
## 
## $FAR
## [1] 0.03929595
## 
## $FAR.se
## [1] 0.001900974
## 
## $HSS
## [1] 0.5412092
## 
## $HSS.se
## [1] 0.01533783
## 
## $PSS
## [1] 0.5922844
## 
## $PSS.se
## [1] 0.01762471
## 
## $KSS
## [1] 0.5922797
## 
## $PC
## [1] 0.9097201
## 
## $PC.se
## [1] 0.003160059
## 
## $BIAS
## [1] 0.9764444
## 
## $ETS
## [1] 0.3709985
## 
## $ETS.se
## [1] 0.01441478
## 
## $theta
## [1] 28.65899
## 
## $log.theta
## [1] 3.355467
## 
## $LOR.se
## [1] 0.09193526
## 
## $n.h
## [1] 118.3139
## 
## $orss
## [1] 0.9325668
## 
## $orss.se
## [1] 0.005990461
## 
## $eds
## [1] 0.2441617
## 
## $eds.se
## [1] 0.02300166
## 
## $seds
## [1] 0.3850835
## 
## $seds.se
## [1] 0.02560697
## 
## $EDI
## [1] 0.8864365
## 
## $EDI.se
## [1] 0.01001841
## 
## $SEDI
## [1] 0.774581
## 
## $SEDI.se
## [1] 0.01646847
roc2 <- rocit(together$Switch_GlowcaZ1, together$switch_real)
plot(roc2, values = F)

# psycho::dprime(n_hit, n_fa,n_miss = NULL,n_cr = NULL, adjusted = TRUE)
psycho::dprime(490, 259, 418, 6332, adjusted = TRUE)
## $dprime
## [1] 1.857528
## 
## $beta
## [1] 4.666926
## 
## $aprime
## [1] 0.8619999
## 
## $bppd
## [1] 0.7361671
## 
## $c
## [1] 0.8293282
nmissglow <- sum(is.na(together$Switch_GlowcaZ1))
print(paste("Glowca has", 7500 - nmissglow, "complete observations"))
## [1] "Glowca has 7499 complete observations"

Derive measures of association between glowca and semdist15 (phi)

both <- table(together$Switch_SemDistZ1, together$Switch_GlowcaZ1)
psych::phi(both)
## [1] 0.36