library(tm)
library(wordcloud)
library(RColorBrewer)

1 Prep the data to generate a word cloud

Read in the raw word list vector. Convert the word list to text corpus object. Convert all terms to lowercase. Then convert the corpus to a term document matrix.

cog <- read.csv("cloud.csv", header=T)
words <- Corpus(VectorSource(cog$WORD))  #Convert to a corpus object
words <- tm_map(words, content_transformer(tolower)) #converts all words to lowercase
wtm <- TermDocumentMatrix(words) #Convert to a term document matrix
m <- as.matrix(wtm)  #Unspool to a regular matrix
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)

2 Word cloud

set.seed(9988)
wordcloud(words = d$word, freq = d$freq, min.freq = 1,
          max.words=200, random.order=FALSE, rot.per=0.25, 
          colors=brewer.pal(10, "Spectral"))

## quartz_off_screen 
##                 2

3 Barplot by frequency

25 most frequent words

barplot(d[1:25,]$freq, las = 2, names.arg = d[1:25,]$word,
        col ="green", main ="Most frequent",
        ylab = "Word frequencies")

4 Frequency tables

25 MOST frequent

head(d, 20)

##          word freq
## help     help   24
## yes       yes   22
## you       you   20
## food     food   19
## good     good   17
## water   water   16
## need     need   13
## stop     stop   12
## want     want   11
## bad       bad   11
## what     what   11
## there   there   11
## danger danger   11
## make     make   10
## here     here   10
## find     find   10
## eat       eat    9
## more     more    9
## love     love    9
## look     look    9

25 LEAST frequent

tail(d, 25)

##            word freq
## person   person    1
## big         big    1
## small     small    1
## thirsty thirsty    1
## are         are    1
## family   family    1
## funny     funny    1
## say         say    1
## roger     roger    1
## wilco     wilco    1
## repeat   repeat    1
## error     error    1
## rescue   rescue    1
## two         two    1
## three     three    1
## four       four    1
## five       five    1
## six         six    1
## seven     seven    1
## eight     eight    1
## nine       nine    1
## zero       zero    1
## sos         sos    1
## this       this    1
## laugh     laugh    1

Creating a word cloud for desert island exercise

Jamie Reilly

2020-04-11

1 Prep the data to generate a word cloud

2 Word cloud

3 Barplot by frequency

4 Frequency tables