In [1]:
%load_ext watermark
%watermark -a 'Vahid Mirjalili' -d -p numpy,numexpr,pandas,matplotlib,plotly -v


Vahid Mirjalili 24/12/2014 

CPython 2.7.3
IPython 2.3.1

numpy 1.9.1
numexpr 2.2.2
pandas 0.15.1
matplotlib 1.4.2
plotly 1.4.7

In [2]:
from matplotlib import pyplot as plt

import pandas as pd
import numpy as np
import scipy

%matplotlib inline

In [3]:
df = pd.read_table('../data/labeledTrainData.tsv')
df.head()


Out[3]:
id sentiment review
0 5814_8 1 With all this stuff going down at the moment w...
1 2381_9 1 \The Classic War of the Worlds\" by Timothy Hi...
2 7759_3 0 The film starts with a manager (Nicholas Bell)...
3 3630_4 0 It must be assumed that those who praised this...
4 9495_8 1 Superbly trashy and wondrously unpretentious 8...

Counting te number of examples in each class


In [4]:
print(sum(df.iloc[:,1] == 0))

print(sum(df.iloc[:,1] == 1))


12500
12500

Wordclouds of + and - Reviews


In [10]:
import wordcloud

pos_reviews = ' '.join(df.loc[df.iloc[:,1]==1, 'review']).decode("utf-8", "replace")


pos_wordcloud = wordcloud.WordCloud( 
                      stopwords=wordcloud.STOPWORDS,
                      background_color='gray',
                      width=1000,
                      height=600
            ).generate(pos_reviews)


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-10-5820ad2dfff7> in <module>()
      9                       width=1000,
     10                       height=600
---> 11             ).generate(pos_reviews)
     12 

/usr/local/lib/python2.7/dist-packages/wordcloud/wordcloud.pyc in generate(self, text)
    311         """
    312         self.process_text(text)
--> 313         self.fit_words(self.words_)
    314         return self
    315 

/usr/local/lib/python2.7/dist-packages/wordcloud/wordcloud.pyc in fit_words(self, words)
    178             while True:
    179                 # try to find a position
--> 180                 font = ImageFont.truetype(self.font_path, font_size)
    181                 # transpose font optionally
    182                 if random_state.random() < self.prefer_horizontal:

/usr/local/lib/python2.7/dist-packages/PIL/ImageFont.pyc in truetype(font, size, index, encoding, filename)
    226 
    227     try:
--> 228         return FreeTypeFont(font, size, index, encoding)
    229     except IOError:
    230         if sys.platform == "win32":

/usr/local/lib/python2.7/dist-packages/PIL/ImageFont.pyc in __init__(self, font, size, index, encoding, file)
    129 
    130         if isPath(font):
--> 131             self.font = core.getfont(font, size, index, encoding)
    132         else:
    133             self.font_bytes = font.read()

/usr/local/lib/python2.7/dist-packages/PIL/ImageFont.pyc in __getattr__(self, id)
     40     # module placeholder
     41     def __getattr__(self, id):
---> 42         raise ImportError("The _imagingft C module is not installed")
     43 
     44 try:

ImportError: The _imagingft C module is not installed

In [ ]: