notebook.community

Edit and run



In [3]:

    
import numpy as np
import pandas as pd



In [4]:

    
df = pd.read_csv('mytwtl.csv')



In [5]:

    
df.head()









    Out[5]:






  
    
      
      Unnamed: 0
      timestamp
      tweet
    
  
  
    
      0
       0
       2015-08-03 21:45:12
       RT @aficionado: Loving @webscriptio by @smarx....
    
    
      1
       1
       2015-08-03 00:15:37
       RT @indizen_insight: When Data Cleaning Meets ...
    
    
      2
       2
       2015-06-03 17:49:09
       US economy adds 295,000 jobs in February http:...
    
    
      3
       3
       2015-06-03 16:44:37
       RT @DiegoKuonen: MT @larrykim: Great stuff @ro...
    
    
      4
       4
       2015-06-03 13:04:45
       RT @ndiakopoulos: Folks asking for my slides o...



In [6]:

    
%matplotlib inline
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS



In [19]:

    
words = ' '.join(df['tweet'])

# remove URLs, RTs, and twitter handles
no_urls_no_tags = " ".join([word for word in words.split() 
                            if 'http' not in word 
                                and not word.startswith('@')
                                and word != 'RT'
                            ])

no_urls_no_tags


wordcloud = WordCloud( 
                      #font_path='CabinSketch-Bold.ttf',
                      stopwords=STOPWORDS,
                      background_color='white',
                      width=1800,
                      height=800
                     ).generate(no_urls_no_tags)

plt.imshow(wordcloud)
plt.axis('off')
plt.savefig('./my_twitter_wordcloud_3.png', dpi=300)
plt.show()



In [20]:

    
from scipy.misc import imread

twitter_mask = imread('./twitter_mask.png', flatten=True)


wordcloud = WordCloud( 
                      #font_path='CabinSketch-Bold.ttf',
                      stopwords=STOPWORDS,
                      background_color='white',
                      width=1800,
                      height=1400,
                      mask=twitter_mask
            ).generate(no_urls_no_tags)

plt.imshow(wordcloud)
plt.axis("off")
plt.savefig('./my_twitter_wordcloud_4.png', dpi=300)
plt.show()



In [ ]:

	Unnamed: 0	timestamp	tweet
0	0	2015-08-03 21:45:12	RT @aficionado: Loving @webscriptio by @smarx....
1	1	2015-08-03 00:15:37	RT @indizen_insight: When Data Cleaning Meets ...
2	2	2015-06-03 17:49:09	US economy adds 295,000 jobs in February http:...
3	3	2015-06-03 16:44:37	RT @DiegoKuonen: MT @larrykim: Great stuff @ro...
4	4	2015-06-03 13:04:45	RT @ndiakopoulos: Folks asking for my slides o...