In [1]:
%load_ext cypher
%matplotlib inline
import pandas as pd
In [2]:
%cypher match (n:tweet) return count(n)
Out[2]:
In [3]:
%cypher match (u:user) return count(u)
Out[3]:
In [4]:
%%cypher
match (n:tweet)-[r]-()
with n, count(r) as deg
order by deg desc
limit 10
match (n)<-[:TWEETS]-(u:user)
return u.screen_name as user, n.tid as tid, substring(n.text, 0, 20) as tweet, deg
Out[4]:
In [5]:
%%cypher
match (n:user)-[r]-()
return n.screen_name as user, n.uid, count(r) as deg
order by deg desc
limit 10
Out[5]:
In [6]:
%%cypher
match (n:hashtag)-[r]-()
return n.hashtag as hashtags, count(r) as deg
order by deg desc
limit 10
Out[6]:
In [7]:
langs = %cypher match (n:tweet) where n.lang is not null return distinct n.lang, count(*) as num_tweets order by num_tweets desc
In [8]:
lang_df = langs.get_dataframe()
lang_df.set_index("n.lang")[:10].plot(kind="bar")
Out[8]:
In [9]:
%cypher match (n:tweet) return count(n)
Out[9]:
In [10]:
%cypher match (n:tweet) where n.coordinates is not null return count(n)
Out[10]:
In [11]:
5213 / 337174.0 # 1.5%
Out[11]:
In [12]:
countries = %cypher match (n:tweet) where n.coordinates is not null return distinct n.country, count(*) as num_tweets order by num_tweets desc
In [13]:
countries_df = countries.get_dataframe()
In [14]:
countries_df.set_index("n.country")[:20].plot(kind="bar")
Out[14]:
In [15]:
colombia_cities = %cypher match (t:tweet) where t.country = "Colombia" return distinct t.full_name, count(*) as num_tweets order by num_tweets desc
In [16]:
colombia_cities.get_dataframe().set_index("t.full_name")[:20].plot(kind="bar")
Out[16]:
In [ ]: