In [1]:
%load_ext cypher
%matplotlib inline
import pandas as pd


/home/davebshow/.virtualenvs/scientific3/lib/python3.4/site-packages/IPython/config.py:13: ShimWarning: The `IPython.config` package has been deprecated. You should import from traitlets.config instead.
  "You should import from traitlets.config instead.", ShimWarning)
/home/davebshow/.virtualenvs/scientific3/lib/python3.4/site-packages/IPython/utils/traitlets.py:5: UserWarning: IPython.utils.traitlets has moved to a top-level traitlets package.
  warn("IPython.utils.traitlets has moved to a top-level traitlets package.")

Number of Tweets


In [3]:
%cypher match (n:tweet) return count(n)


1 rows affected.
Out[3]:
count(n)
4306752

Number of users


In [4]:
%cypher match (u:user) return count(u)


1 rows affected.
Out[4]:
count(u)
2531874

Top Tweets


In [5]:
%%cypher
match (n:tweet)-[r]-()
with n, count(r) as deg
order by deg desc
limit 10
match (n)<-[:TWEETS]-(u:user)
return u.screen_name as user, n.tid as tid, substring(n.text, 0, 20) as tweet, deg


10 rows affected.
Out[5]:
user tid tweet deg
Louis_Tomlinson 665496205854687232 Thoughts go out to e 38200
NiallOfficial 665322019291013120 thoughts and prayers 27828
Harry_Styles 665284325542371329 Thinking of everyone 22159
justinbieber 665334709996523520 Had a great first sh 17658
Michael5SOS 665336782418919425 all of my love and p 16861
RFCdan 665302723592519680 To people blaming re 14071
jean_jullien 665305363500011521 Peace for Paris http 13472
KingSalman 665642882125209601 Paris massacre is an 12739
Calum5SOS 665313435383672832 Please, pray for Par 12263
taylorswift13 665442458872979460 Praying for Paris. O 11637

Top Users


In [4]:
%%cypher
match (n:user)-[r]-()
return n.screen_name as user, n.uid, count(r) as deg
order by deg desc
limit 10


10 rows affected.
Out[4]:
user n.uid deg
Louis_Tomlinson 84279963 38335
NiallOfficial 105119490 28089
RecherchesP 4185722537 23333
Harry_Styles 181561712 22686
infos140 1356382759 21227
justinbieber 27260086 21020
nytimes 807095 18266
AP 51241574 17747
jean_jullien 1851229334 17530
Michael5SOS 403246803 16901

Top Tags


In [32]:
%%cypher
match (n:hashtag)-[r]-()
return n.hashtag as hashtags, count(r) as deg
order by deg desc
limit 10


10 rows affected.
Out[32]:
hashtags deg
paris 719571
prayforparis 368931
parisattacks 206159
prayers4paris 52805
bataclan 51524
rechercheparis 46333
prayersforparis 31736
france 31605
porteouverte 30484
fusillade 27833

Language data


In [6]:
langs = %cypher match (n:tweet) where n.lang is not null return distinct n.lang, count(*) as num_tweets order by num_tweets desc


59 rows affected.

In [21]:
lang_df = langs.get_dataframe()
lang_df.set_index("n.lang")[:10].plot(kind="bar")


Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3701e70f60>

% of tweets with geotags


In [41]:
%cypher match (n:tweet) return count(n)


1 rows affected.
Out[41]:
count(n)
4306752

In [42]:
%cypher match (n:tweet) where n.coordinates is not null return count(n)


1 rows affected.
Out[42]:
count(n)
55881

In [44]:
55881 / 4306752.0  # 1.2%


Out[44]:
0.012975207302394008

Tweets by country


In [37]:
countries = %cypher match (n:tweet) where n.coordinates is not null return distinct n.country, count(*) as num_tweets order by num_tweets desc


193 rows affected.

In [48]:
countries_df = countries.get_dataframe()

In [51]:
countries_df.set_index("n.country")[:20].plot(kind="bar")


Out[51]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3701c30d68>

Average sentiment in English


In [56]:
%cypher match (n:tweet) where n.lang = "en" return avg(n.polarity) as average_en_polarity


1 rows affected.
Out[56]:
average_en_polarity
0.013533890799815687

In [57]:
%cypher match (n:tweet) where n.lang = "en" return avg(n.subjectivity) as average_en_subjectivity


1 rows affected.
Out[57]:
average_en_subjectivity
0.2964692099365155

Average sentiment in French


In [58]:
%cypher match (n:tweet) where n.lang = "fr" return avg(n.polarity) as average_fr_polarity


1 rows affected.
Out[58]:
average_fr_polarity
0.033514273832053354

In [59]:
%cypher match (n:tweet) where n.lang = "fr" return avg(n.subjectivity) as average_fr_subjectivity


1 rows affected.
Out[59]:
average_fr_subjectivity
0.3372897579084551

In [ ]: