library(tm)
library(wordcloud)
library(RColorBrewer)
Read in the raw word list vector. Convert the word list to text corpus object. Convert all terms to lowercase. Then convert the corpus to a term document matrix.
cog <- read.csv("cloud.csv", header=T)
words <- Corpus(VectorSource(cog$WORD)) #Convert to a corpus object
words <- tm_map(words, content_transformer(tolower)) #converts all words to lowercase
wtm <- TermDocumentMatrix(words) #Convert to a term document matrix
m <- as.matrix(wtm) #Unspool to a regular matrix
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
set.seed(9988)
wordcloud(words = d$word, freq = d$freq, min.freq = 1,
max.words=200, random.order=FALSE, rot.per=0.25,
colors=brewer.pal(10, "Spectral"))
## quartz_off_screen
## 2
25 most frequent words
barplot(d[1:25,]$freq, las = 2, names.arg = d[1:25,]$word,
col ="green", main ="Most frequent",
ylab = "Word frequencies")
25 MOST frequent
head(d, 20)
## word freq
## help help 24
## yes yes 22
## you you 20
## food food 19
## good good 17
## water water 16
## need need 13
## stop stop 12
## want want 11
## bad bad 11
## what what 11
## there there 11
## danger danger 11
## make make 10
## here here 10
## find find 10
## eat eat 9
## more more 9
## love love 9
## look look 9
25 LEAST frequent
tail(d, 25)
## word freq
## person person 1
## big big 1
## small small 1
## thirsty thirsty 1
## are are 1
## family family 1
## funny funny 1
## say say 1
## roger roger 1
## wilco wilco 1
## repeat repeat 1
## error error 1
## rescue rescue 1
## two two 1
## three three 1
## four four 1
## five five 1
## six six 1
## seven seven 1
## eight eight 1
## nine nine 1
## zero zero 1
## sos sos 1
## this this 1
## laugh laugh 1