In [1]:
%load_ext cypher
%matplotlib inline
import pandas as pd
In [5]:
top_tweets = %cypher match (n:tweet)-[r]-(m:tweet) return n.text, count(r) as deg order by deg desc limit 10
In [6]:
top_tweets.get_dataframe()
Out[6]:
In [35]:
top_tags = %cypher match (n:hashtag)-[r]-(m) return n.hashtag, count(r) as deg order by deg desc limit 10
In [36]:
top_tags.get_dataframe()
Out[36]:
In [10]:
top_users = %cypher match (n:user)-[r]-(m) return n.screen_name, count(r) as deg order by deg desc limit 10
In [11]:
top_users.get_dataframe()
Out[11]:
In [3]:
tweets = %cypher match (n:tweet) where n.lang is not null return n.tid, n.lang
In [6]:
tweets = tweets.get_dataframe()
tweets.to_csv("data/tweets_w_lang.csv")
In [7]:
del tweets
In [9]:
tweet_edges = %cypher match (n:tweet)--(m:tweet) where n.lang is not null and m.lang is not null return n.tid, m.tid
In [10]:
tweet_edges = tweet_edges.get_dataframe()
tweet_edges.head()
Out[10]:
In [11]:
tweet_edges.to_csv("data/tweets_w_lang_edge.csv")
In [ ]:
In [ ]:
In [13]:
tweets_df = pd.read_csv("data/clean_tweets.csv", delimiter="\t")
In [15]:
tweets_df.head()
Out[15]:
In [ ]:
lang_groups = tweets_df.groupby("lang")
In [ ]:
In [ ]:
geotweets = tweets_df.dropna(subset=["coordinates"])
In [ ]:
country_groups = geotweets.groupby("country")
In [ ]:
city_groups = geotweets.groupby("name")
In [ ]:
french_tweets_df = tweets_df[tweets_df["lang"] == "fr"]
In [ ]:
english_tweets_df = tweets_df[tweets_df["lang"] == "en"]