# Load the required packages
library(wordcloud)
library(tm)
library(tidyverse)
library(tidytext)
# sample text dataset
load("text.RData")
# Create a corpus from the text dataset
<- Corpus(VectorSource(text))
corpus
# Clean the corpus by removing stopwords and punctuation
<- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, removeWords, stopwords("english"))
corpus
# Create a term-document matrix from the corpus
<- TermDocumentMatrix(corpus)
tdm
# Convert the term-document matrix to a matrix
<- as.matrix(tdm)
m
# Get the frequency of each term in the matrix
<- sort(rowSums(m), decreasing = TRUE)
v
# Create a data frame with the terms and their frequency
set.seed(1111)
<- data.frame(word = names(v), freq = v) %>%
df inner_join(get_sentiments("bing"), by = "word") %>% #count(sentiment)
filter(sentiment %in% c("positive",
"negative")) %>%
mutate(
color = ifelse(sentiment == "positive",
"#250c5f", "#cf5f26")) %>%
sample_n(300)
#summary(colors$freq)
png("wordcloud.png", res = 180)
wordcloud(df$word,
freq = df$freq,
ordered.colors = TRUE,
random.color = FALSE,
min.freq = 1,
scale = c(5, 0.5),
colors = df$color)
dev.off()
Gazzelloni F. (2023), Data Visualization: Wordcloud Good/Bad