In [1]:
    
%load_ext cypher
%matplotlib inline
import pandas as pd
    
In [2]:
    
%cypher match (n:tweet) return count(n)
    
    
    Out[2]:
In [3]:
    
%cypher match (u:user) return count(u)
    
    
    Out[3]:
In [4]:
    
%%cypher
match (n:tweet)-[r]-()
with n, count(r) as deg
order by deg desc
limit 10
match (n)<-[:TWEETS]-(u:user)
return u.screen_name as user, n.tid as tid, substring(n.text, 0, 20) as tweet, deg
    
    
    Out[4]:
In [5]:
    
%%cypher
match (n:user)-[r]-()
return n.screen_name as user, n.uid, count(r) as deg
order by deg desc
limit 10
    
    
    Out[5]:
In [6]:
    
%%cypher
match (n:hashtag)-[r]-()
return n.hashtag as hashtags, count(r) as deg
order by deg desc
limit 10
    
    
    Out[6]:
In [7]:
    
langs = %cypher match (n:tweet) where n.lang is not null return distinct n.lang, count(*) as num_tweets order by num_tweets desc
    
    
In [8]:
    
lang_df = langs.get_dataframe()
lang_df.set_index("n.lang")[:10].plot(kind="bar")
    
    Out[8]:
    
In [9]:
    
%cypher match (n:tweet) return count(n)
    
    
    Out[9]:
In [10]:
    
%cypher match (n:tweet) where n.coordinates is not null return count(n)
    
    
    Out[10]:
In [11]:
    
5213 / 337174.0  # 1.5%
    
    Out[11]:
In [12]:
    
countries = %cypher match (n:tweet) where n.coordinates is not null return distinct n.country, count(*) as num_tweets order by num_tweets desc
    
    
In [13]:
    
countries_df = countries.get_dataframe()
    
In [14]:
    
countries_df.set_index("n.country")[:20].plot(kind="bar")
    
    Out[14]:
    
In [15]:
    
colombia_cities = %cypher match (t:tweet) where t.country = "Colombia" return distinct t.full_name, count(*) as num_tweets order by num_tweets desc
    
    
In [16]:
    
colombia_cities.get_dataframe().set_index("t.full_name")[:20].plot(kind="bar")
    
    Out[16]:
    
In [ ]: