In [ ]:

    
load( url("https://cbail.github.io/Trump_Tweets.Rdata") )
library(tidytext)
library(dplyr)
tidy_trump_tweets<- trumptweets %>%
    select(created_at,text) %>%
    unnest_tokens("word", text)



In [ ]:

    
data("stop_words")
tidy_trump_tweets<-tidy_trump_tweets %>%
      anti_join(stop_words)

tidy_trump_tweets<-tidy_trump_tweets[-grep("\\b\\d+\\b", tidy_trump_tweets$word),]
tidy_trump_tweets$word <- gsub("\\s+","",tidy_trump_tweets$word)

library(SnowballC)
  tidy_trump_tweets<-tidy_trump_tweets %>%
      mutate_at("word", funs(wordStem((.), language="en")))

What are the most common words?



In [ ]:

    
trump_tweet_top_words<-
   tidy_trump_tweets %>%
      anti_join(stop_words) %>%
        count(word) %>%
        arrange(desc(n))

trump_tweet_top_words<-
  trump_tweet_top_words[-grep("https|t.co|amp|rt",
                              trump_tweet_top_words$word),]

#select only top words
top_20<-trump_tweet_top_words[1:20,]

#create factor variable to sort by frequency
trump_tweet_top_words$word <- factor(trump_tweet_top_words$word, levels = trump_tweet_top_words$word[order(trump_tweet_top_words$n,decreasing=TRUE)])


library(ggplot2)
ggplot(top_20, aes(x=word, y=n, fill=word))+
  geom_bar(stat="identity")+
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  ylab("Number of Times Word Appears in Trump's Tweets")+
  xlab("")+
  guides(fill=FALSE)

Dictonary based approach



In [ ]:

    
economic_dictionary<-c("economy","unemployment","trade","tariffs")

library(stringr)
economic_tweets<-trumptweets[str_detect(trumptweets$text, economic_dictionary),]
head(economic_tweets$text)

dim( economic_tweets )

table( economic_tweets$source )

Sentiment analysis



In [ ]:

    
head( get_sentiments("bing") )



In [ ]:

    
trump_tweet_sentiment <- tidy_trump_tweets %>%
  inner_join(get_sentiments("bing")) %>%
    count(created_at, sentiment) 

head(trump_tweet_sentiment)



In [ ]:

    
tidy_trump_tweets$date<-as.Date(tidy_trump_tweets$created_at, 
                                          format="%Y-%m-%d %x")
trump_sentiment_plot <-
  tidy_trump_tweets %>%
    inner_join(get_sentiments("bing")) %>% 
      filter(sentiment=="negative") %>%
          count(date, sentiment)

library(ggplot2)

ggplot(trump_sentiment_plot, aes(x=date, y=n))+
  geom_line(color="red")+
    theme_minimal()+
      ylab("Frequency of Negative Words in Trump's Tweets")+
        xlab("Date")



In [ ]:

    
library(stm)



In [ ]:

    
head( trumptweets )



In [ ]:

    
processed <- textProcessor( trumptweets$text, metadata = trumptweets )



In [ ]:

    
out <- prepDocuments(processed$documents, processed$vocab, processed$meta)



In [ ]:

    
model <- stm(documents = out$documents, vocab = out$vocab,
              K = 0,
              max.em.its = 75, data = out$meta,
              init.type = "Spectral", verbose = FALSE)



In [ ]:

    
plot( model )



In [ ]:

    
predict_topics<-estimateEffect(formula = 1 ~ source, stmobj =  model, metadata = out$meta, uncertainty = "Global")



In [ ]:

    
summary(predict_topics )



In [ ]: