In [4]:
%load_ext cypher
%matplotlib inline
import pandas as pd


The cypher extension is already loaded. To reload it, use:
  %reload_ext cypher

Number of Nodes


In [30]:
%cypher match (n) return count(*) as num_nodes


1 rows affected.
Out[30]:
num_nodes
10991160

In [38]:
%cypher match (n:tweet) return count (*) as num_tweets


1 rows affected.
Out[38]:
num_tweets
7872765

In [39]:
%cypher match (n:user) return count (*) as num_users


1 rows affected.
Out[39]:
num_users
2812265

In [40]:
%cypher match (n:hashtag) return count (*) as num_hashtags


1 rows affected.
Out[40]:
num_hashtags
306130

Number of Edges


In [36]:
%cypher match (n)-[r]->() return count(*) as num_edges


1 rows affected.
Out[36]:
num_edges
28317710

Top Tweets


In [10]:
top_tweets = %cypher match (n:tweet)-[r]-(m:tweet) return n.tid, n.text, count(r) as deg order by deg desc limit 10


10 rows affected.

In [11]:
top_tweets.get_dataframe()


Out[11]:
n.tid n.text deg
0 678082461973245952 art exhibit and chill https://t.co/qp1Q4zJIx2 20575
1 679696820457594882 Are you an artist? Register on https://t.co/bE... 16002
2 679728153414230016 Art exhibits and chill? https://t.co/69FwPf8G3r 14075
3 677598064933236736 You all created a beautiful work of art I am s... 13661
4 678603783434276864 This girl ain't no stripper she an artist this... 10642
5 674032794289446912 A beautiful photo exhibiting the art that is t... 9474
6 667196563760934912 art museum date??? coffee shop date???? librar... 8943
7 682044628061806592 i don't get the art of cooking. i wouldn't eve... 8873
8 683025663402610688 btw my 5 fav albums of 2015\n\n1 - To Pimp A B... 8849
9 681200600558645248 If Gucci and future ever collab this gotta be ... 8073

Top Hashtags


In [12]:
top_tags = %cypher match (n:hashtag)-[r]-(m) return n.hashtag, count(r) as deg order by deg desc limit 10


10 rows affected.

In [13]:
top_tags.get_dataframe()


Out[13]:
n.hashtag deg
0 art 1900909
1 photography 242065
2 painting 226498
3 photo 148847
4 streetart 126865
5 artist 117023
6 drawing 96294
7 nature 89511
8 graffiti 87137
9 travel 81343

Top Users


In [14]:
top_users =  %cypher match (n:user)-[r]-(m) return n.uid, n.screen_name, count(r) as deg order by deg desc limit 10


10 rows affected.

In [15]:
top_users.get_dataframe()


Out[15]:
n.uid n.screen_name deg
0 10228272 YouTube 68751
1 11522502 Etsy 63921
2 458084183 FleblancArt 61787
3 3058517419 irelandarthouse 58391
4 2236416063 mjesusgz 37383
5 136250147 fleblancphoto 34679
6 23636912 alisonjardine 31468
7 36958530 AuthorAkansha 24133
8 2569637460 FREECAMERA1 23870
9 2274864469 QueGraffiti 23519

Top Languages


In [41]:
top_langs = %cypher match (n:tweet) where n.lang is not null return distinct n.lang, count(n.lang) as num_tweets order by num_tweets desc


60 rows affected.

In [42]:
top_langs.get_dataframe().head(20)


Out[42]:
n.lang num_tweets
0 en 6334635
1 und 329079
2 es 183748
3 fr 128929
4 de 78042
5 in 70440
6 ja 69331
7 it 57027
8 pt 56875
9 tl 39326
10 nl 28506
11 et 28114
12 tr 27431
13 ar 20223
14 pl 17358
15 th 15124
16 ro 13401
17 ht 11964
18 ru 10457
19 cs 10330

Top Cities


In [24]:
top_locs = %cypher match (n:tweet) where n.full_name is not null return distinct n.full_name, count(n.full_name) as num_tweets order by num_tweets desc


14674 rows affected.

In [25]:
top_locs.get_dataframe().head(20)


Out[25]:
n.full_name num_tweets
0 Manhattan, NY 5691
1 Los Angeles, CA 4114
2 Ciudad Autónoma de Buenos Aires, Argentina 4069
3 Chicago, IL 1847
4 Miami, FL 1306
5 Toronto, Ontario 1215
6 Paris, Ile-de-France 1161
7 Philadelphia, PA 1145
8 London, England 1096
9 San Francisco, CA 1064
10 İstanbul, Türkiye 1060
11 Brooklyn, NY 981
12 Washington, DC 967
13 Austria 900
14 George Town, Pinang 897
15 New York, USA 829
16 Houston, TX 827
17 Florida, USA 797
18 Quezon City, National Capital Region 769
19 Seattle, WA 746

Top Countries


In [26]:
top_countries = %cypher match (n:tweet) where n.country is not null return distinct n.country, count(n.country) as num_tweets order by num_tweets desc


194 rows affected.

In [28]:
top_countries.get_dataframe().head(20)


Out[28]:
n.country num_tweets
0 United States 71288
1 United Kingdom 12869
2 Argentina 4756
3 Canada 4292
4 Indonesia 4235
5 Republika ng Pilipinas 4005
6 Brasil 3712
7 Malaysia 3456
8 France 3420
9 Italia 3012
10 Australia 2355
11 Türkiye 2316
12 ประเทศไทย 2189
13 España 2153
14 日本 1738
15 India 1656
16 Deutschland 1527
17 Rossiya 1412
18 Österreich 1296
19 México 1244