0% found this document useful (0 votes)
11 views5 pages

Sentiment

gg

Uploaded by

Ajay
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
11 views5 pages

Sentiment

gg

Uploaded by

Ajay
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

library(tm)

library(SnowballC)
library(wordcloud)
library(RColorBrewer)
library(syuzhet)
library(ggplot2)

data <- read.csv('spam.csv', stringsAsFactors = FALSE)

str(data)

spam_text <- data$v2

spam_corpus <- Corpus(VectorSource(spam_text))

clean_corpus <- tm_map(spam_corpus, content_transformer(tolower))


clean_corpus <- tm_map(clean_corpus, removePunctuation)
clean_corpus <- tm_map(clean_corpus, removeNumbers)
clean_corpus <- tm_map(clean_corpus, removeWords,
stopwords("english"))

custom_stopwords <- c("u", "so")


clean_corpus <- tm_map(clean_corpus, removeWords, custom_stopwords)

clean_corpus <- tm_map(clean_corpus, stripWhitespace)

dtm <- TermDocumentMatrix(clean_corpus)


dtm_matrix <- as.matrix(dtm)

word_freq <- sort(rowSums(dtm_matrix), decreasing = TRUE)


word_freq_df <- data.frame(word = names(word_freq), freq = word_freq)

top_5_words <- head(word_freq_df, 5)


print(top_5_words)

x11()
ggplot(top_5_words, aes(x = reorder(word, -freq), y = freq)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(title = "Top 5 Most Frequent Words in Spam Text", x = "Words",
y = "Frequency") +
theme_minimal()

set.seed(1234)
x11()
wordcloud(words = word_freq_df$word, freq = word_freq_df$freq,
min.freq = 1,
max.words = 100, random.order = FALSE, rot.per = 0.35,
colors = brewer.pal(8, "Dark2"))

sentiment_scores <- get_nrc_sentiment(spam_text)


sentiment_totals <- data.frame(colSums(sentiment_scores[, 1:8]))
names(sentiment_totals) <- c("sentiment", "score")
sentiment_totals <- sentiment_totals[order(sentiment_totals$score,
decreasing = TRUE), ]

print(sentiment_totals)

sentiment_freq <- data.frame(sentiment = colnames(sentiment_scores),


frequency = colSums(sentiment_scores))

sentiment_freq <- sentiment_freq[sentiment_freq$frequency > 0, ]

x11()
ggplot(sentiment_freq, aes(x = reorder(sentiment, -frequency), y =
frequency)) +
geom_bar(stat = "identity", fill = "coral") +
labs(title = "Frequency of Sentiments in Spam Text", x =
"Sentiment", y = "Frequency") +
theme_minimal()

You might also like