In [3]:
import numpy as np
import pandas as pd
In [4]:
df = pd.read_csv('mytwtl.csv')
In [5]:
df.head()
Out[5]:
In [6]:
%matplotlib inline
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS
In [19]:
words = ' '.join(df['tweet'])
# remove URLs, RTs, and twitter handles
no_urls_no_tags = " ".join([word for word in words.split()
if 'http' not in word
and not word.startswith('@')
and word != 'RT'
])
no_urls_no_tags
wordcloud = WordCloud(
#font_path='CabinSketch-Bold.ttf',
stopwords=STOPWORDS,
background_color='white',
width=1800,
height=800
).generate(no_urls_no_tags)
plt.imshow(wordcloud)
plt.axis('off')
plt.savefig('./my_twitter_wordcloud_3.png', dpi=300)
plt.show()
In [20]:
from scipy.misc import imread
twitter_mask = imread('./twitter_mask.png', flatten=True)
wordcloud = WordCloud(
#font_path='CabinSketch-Bold.ttf',
stopwords=STOPWORDS,
background_color='white',
width=1800,
height=1400,
mask=twitter_mask
).generate(no_urls_no_tags)
plt.imshow(wordcloud)
plt.axis("off")
plt.savefig('./my_twitter_wordcloud_4.png', dpi=300)
plt.show()
In [ ]: