In [ ]:
load( url("https://cbail.github.io/Trump_Tweets.Rdata") )
library(tidytext)
library(dplyr)
tidy_trump_tweets<- trumptweets %>%
select(created_at,text) %>%
unnest_tokens("word", text)
In [ ]:
data("stop_words")
tidy_trump_tweets<-tidy_trump_tweets %>%
anti_join(stop_words)
tidy_trump_tweets<-tidy_trump_tweets[-grep("\\b\\d+\\b", tidy_trump_tweets$word),]
tidy_trump_tweets$word <- gsub("\\s+","",tidy_trump_tweets$word)
library(SnowballC)
tidy_trump_tweets<-tidy_trump_tweets %>%
mutate_at("word", funs(wordStem((.), language="en")))
In [ ]:
trump_tweet_top_words<-
tidy_trump_tweets %>%
anti_join(stop_words) %>%
count(word) %>%
arrange(desc(n))
trump_tweet_top_words<-
trump_tweet_top_words[-grep("https|t.co|amp|rt",
trump_tweet_top_words$word),]
#select only top words
top_20<-trump_tweet_top_words[1:20,]
#create factor variable to sort by frequency
trump_tweet_top_words$word <- factor(trump_tweet_top_words$word, levels = trump_tweet_top_words$word[order(trump_tweet_top_words$n,decreasing=TRUE)])
library(ggplot2)
ggplot(top_20, aes(x=word, y=n, fill=word))+
geom_bar(stat="identity")+
theme_minimal()+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
ylab("Number of Times Word Appears in Trump's Tweets")+
xlab("")+
guides(fill=FALSE)
In [ ]:
economic_dictionary<-c("economy","unemployment","trade","tariffs")
library(stringr)
economic_tweets<-trumptweets[str_detect(trumptweets$text, economic_dictionary),]
head(economic_tweets$text)
dim( economic_tweets )
table( economic_tweets$source )
In [ ]:
head( get_sentiments("bing") )
In [ ]:
trump_tweet_sentiment <- tidy_trump_tweets %>%
inner_join(get_sentiments("bing")) %>%
count(created_at, sentiment)
head(trump_tweet_sentiment)
In [ ]:
tidy_trump_tweets$date<-as.Date(tidy_trump_tweets$created_at,
format="%Y-%m-%d %x")
trump_sentiment_plot <-
tidy_trump_tweets %>%
inner_join(get_sentiments("bing")) %>%
filter(sentiment=="negative") %>%
count(date, sentiment)
library(ggplot2)
ggplot(trump_sentiment_plot, aes(x=date, y=n))+
geom_line(color="red")+
theme_minimal()+
ylab("Frequency of Negative Words in Trump's Tweets")+
xlab("Date")
In [ ]:
library(stm)
In [ ]:
head( trumptweets )
In [ ]:
processed <- textProcessor( trumptweets$text, metadata = trumptweets )
In [ ]:
out <- prepDocuments(processed$documents, processed$vocab, processed$meta)
In [ ]:
model <- stm(documents = out$documents, vocab = out$vocab,
K = 0,
max.em.its = 75, data = out$meta,
init.type = "Spectral", verbose = FALSE)
In [ ]:
plot( model )
In [ ]:
predict_topics<-estimateEffect(formula = 1 ~ source, stmobj = model, metadata = out$meta, uncertainty = "Global")
In [ ]:
summary(predict_topics )
In [ ]: