In [1]:
%load_ext cypher
%matplotlib inline
import pandas as pd
In [3]:
%cypher match (n:tweet) return count(n)
Out[3]:
In [4]:
%cypher match (u:user) return count(u)
Out[4]:
In [5]:
%%cypher
match (n:tweet)-[r]-()
with n, count(r) as deg
order by deg desc
limit 10
match (n)<-[:TWEETS]-(u:user)
return u.screen_name as user, n.tid as tid, substring(n.text, 0, 20) as tweet, deg
Out[5]:
In [4]:
%%cypher
match (n:user)-[r]-()
return n.screen_name as user, n.uid, count(r) as deg
order by deg desc
limit 10
Out[4]:
In [32]:
%%cypher
match (n:hashtag)-[r]-()
return n.hashtag as hashtags, count(r) as deg
order by deg desc
limit 10
Out[32]:
In [6]:
langs = %cypher match (n:tweet) where n.lang is not null return distinct n.lang, count(*) as num_tweets order by num_tweets desc
In [21]:
lang_df = langs.get_dataframe()
lang_df.set_index("n.lang")[:10].plot(kind="bar")
Out[21]:
In [41]:
%cypher match (n:tweet) return count(n)
Out[41]:
In [42]:
%cypher match (n:tweet) where n.coordinates is not null return count(n)
Out[42]:
In [44]:
55881 / 4306752.0 # 1.2%
Out[44]:
In [37]:
countries = %cypher match (n:tweet) where n.coordinates is not null return distinct n.country, count(*) as num_tweets order by num_tweets desc
In [48]:
countries_df = countries.get_dataframe()
In [51]:
countries_df.set_index("n.country")[:20].plot(kind="bar")
Out[51]:
In [56]:
%cypher match (n:tweet) where n.lang = "en" return avg(n.polarity) as average_en_polarity
Out[56]:
In [57]:
%cypher match (n:tweet) where n.lang = "en" return avg(n.subjectivity) as average_en_subjectivity
Out[57]:
In [58]:
%cypher match (n:tweet) where n.lang = "fr" return avg(n.polarity) as average_fr_polarity
Out[58]:
In [59]:
%cypher match (n:tweet) where n.lang = "fr" return avg(n.subjectivity) as average_fr_subjectivity
Out[59]:
In [ ]: