In [1]:
import pandas as pd
import os,sys,re,csv
import cPickle as pickle
import numpy as np
import matplotlib.pyplot as plt
import operator
%matplotlib inline

In [2]:
sys.path.append('/mnt/home/ubuntu/projects/tools/')
import gp_colours

In [3]:
gp_colours.setStyle()


SETTING GP STYLE

In [19]:
data=pickle.load(open('../data/counters_en.dat','r'))
#data=pickle.load(open('../data/counters.dat','r'))

In [17]:
#print data.keys()
for k in data['tw'].keys():
    print k
#for k,v in data['tw']['languages'].items():
#    if k[2]=='pt':print k,v
print data['tw']['topics'].keys()


topic_sums
rawdomains
users
links
country
topics
hashtags
topic_coloc
pos
ids
languages
domains
unigrams
time
neg
topicCountry
trigrams
mentions
bigrams
genderTopic
[u'discrimination', u'prevention', u'campaign']

In [20]:
data['tw']['topic_sums']


Out[20]:
defaultdict(<type 'int'>, {u'discrimination': 2367.0, u'prevention': 456.0, u'campaign': 300.0})

In [12]:
data['tw']['time'].plot()


Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fb84eefe750>

In [12]:
data['tw']['topics']['discrimination'].plot()


Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f9909a20b50>

In [6]:
data['fb']['time'].plot(label='FB',legend=True)
data['tw']['time'].plot(label='TW',legend=True)


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fddcba19a10>

In [37]:
v=sorted(data['fb']['unigrams'].items(),key=operator.itemgetter(1))
v.reverse()

In [38]:
v[0:30]


Out[38]:
[('', 9196),
 ('de', 3234),
 ('e', 2522),
 ('que', 2502),
 ('o', 2079),
 ('do', 1218),
 ('n\xc3\xa3o', 959),
 ('da', 932),
 ('com', 904),
 ('\xc3\xa9', 859),
 ('em', 791),
 ('um', 768),
 ('para', 761),
 ('os', 733),
 ('se', 647),
 ('no', 611),
 ('uma', 576),
 ('por', 479),
 ('na', 467),
 ('mais', 432),
 ('as', 422),
 ('como', 397),
 ('w', 357),
 ('eu', 328),
 ('dos', 312),
 ('ou', 299),
 ('ao', 298),
 ('ser', 292),
 ('locked="false"', 274),
 ('lsdexception', 274)]

In [36]:
data['fb']['unigrams'].items()[0:10]


Out[36]:
[('', 9196),
 ('sucateamento', 2),
 ('duro', 14),
 ('acima,', 3),
 ('instalado', 1),
 ('mantiveram-se', 1),
 ("hop,\nit's", 1),
 ('vivemos', 5),
 ('1,4', 1),
 ('\nprezados', 1)]

In [1]:
from IPython.core.display import HTML
styles = open("../css/custom.css", "r").read()
HTML(styles)


Out[1]:

In [ ]: