In [3]:
import numpy as np
import pandas as pd

In [4]:
df = pd.read_csv('mytwtl.csv')

In [5]:
df.head()


Out[5]:
Unnamed: 0 timestamp tweet
0 0 2015-08-03 21:45:12 RT @aficionado: Loving @webscriptio by @smarx....
1 1 2015-08-03 00:15:37 RT @indizen_insight: When Data Cleaning Meets ...
2 2 2015-06-03 17:49:09 US economy adds 295,000 jobs in February http:...
3 3 2015-06-03 16:44:37 RT @DiegoKuonen: MT @larrykim: Great stuff @ro...
4 4 2015-06-03 13:04:45 RT @ndiakopoulos: Folks asking for my slides o...

In [6]:
%matplotlib inline
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS

In [19]:
words = ' '.join(df['tweet'])

# remove URLs, RTs, and twitter handles
no_urls_no_tags = " ".join([word for word in words.split() 
                            if 'http' not in word 
                                and not word.startswith('@')
                                and word != 'RT'
                            ])

no_urls_no_tags


wordcloud = WordCloud( 
                      #font_path='CabinSketch-Bold.ttf',
                      stopwords=STOPWORDS,
                      background_color='white',
                      width=1800,
                      height=800
                     ).generate(no_urls_no_tags)

plt.imshow(wordcloud)
plt.axis('off')
plt.savefig('./my_twitter_wordcloud_3.png', dpi=300)
plt.show()



In [20]:
from scipy.misc import imread

twitter_mask = imread('./twitter_mask.png', flatten=True)


wordcloud = WordCloud( 
                      #font_path='CabinSketch-Bold.ttf',
                      stopwords=STOPWORDS,
                      background_color='white',
                      width=1800,
                      height=1400,
                      mask=twitter_mask
            ).generate(no_urls_no_tags)

plt.imshow(wordcloud)
plt.axis("off")
plt.savefig('./my_twitter_wordcloud_4.png', dpi=300)
plt.show()



In [ ]: