notebook.community

Edit and run



In [1]:

    
%load_ext cypher
%matplotlib inline
import pandas as pd









    



/home/davebshow/.virtualenvs/scientific3/lib/python3.4/site-packages/IPython/config.py:13: ShimWarning: The `IPython.config` package has been deprecated. You should import from traitlets.config instead.
  "You should import from traitlets.config instead.", ShimWarning)
/home/davebshow/.virtualenvs/scientific3/lib/python3.4/site-packages/IPython/utils/traitlets.py:5: UserWarning: IPython.utils.traitlets has moved to a top-level traitlets package.
  warn("IPython.utils.traitlets has moved to a top-level traitlets package.")



In [5]:

    
top_tweets = %cypher match (n:tweet)-[r]-(m:tweet) return n.text, count(r) as deg order by deg desc limit 10









    



10 rows affected.



In [6]:

    
top_tweets.get_dataframe()









    Out[6]:






  
    
      
      n
      deg
    
  
  
    
      0
      {'text': 'Thoughts go out to everyone in Paris...
      38199
    
    
      1
      {'text': 'thoughts and prayers for those peopl...
      27827
    
    
      2
      {'text': 'Thinking of everyone in Paris.', 'ti...
      22158
    
    
      3
      {'text': 'Had a great first show but just hear...
      17657
    
    
      4
      {'text': 'all of my love and prayers go out to...
      16860
    
    
      5
      {'text': 'To people blaming refugees for attac...
      14070
    
    
      6
      {'text': 'Peace for Paris https://t.co/ryf6XB2...
      13471
    
    
      7
      {'text': 'Paris massacre is an act of terroris...
      12738
    
    
      8
      {'text': 'Please, pray for Paris.', 'tid': '66...
      12262
    
    
      9
      {'text': 'Praying for Paris. Our hearts are br...
      11636



In [35]:

    
top_tags = %cypher match (n:hashtag)-[r]-(m) return n.hashtag, count(r) as deg order by deg desc limit 10









    



10 rows affected.



In [36]:

    
top_tags.get_dataframe()









    Out[36]:






  
    
      
      n.hashtag
      deg
    
  
  
    
      0
      paris
      719571
    
    
      1
      prayforparis
      368931
    
    
      2
      parisattacks
      206159
    
    
      3
      prayers4paris
      52805
    
    
      4
      bataclan
      51524
    
    
      5
      rechercheparis
      46333
    
    
      6
      prayersforparis
      31736
    
    
      7
      france
      31605
    
    
      8
      porteouverte
      30484
    
    
      9
      fusillade
      27833



In [10]:

    
top_users =  %cypher match (n:user)-[r]-(m) return n.screen_name, count(r) as deg order by deg desc limit 10









    



10 rows affected.



In [11]:

    
top_users.get_dataframe()









    Out[11]:






  
    
      
      n
      deg
    
  
  
    
      0
      {'screen_name': 'Louis_Tomlinson', 'uid': '842...
      76472
    
    
      1
      {'screen_name': 'NiallOfficial', 'uid': '10511...
      55876
    
    
      2
      {'screen_name': 'RecherchesP', 'uid': '4185722...
      45967
    
    
      3
      {'screen_name': 'Harry_Styles', 'uid': '181561...
      44814
    
    
      4
      {'screen_name': 'infos140', 'uid': '1356382759'}
      41809
    
    
      5
      {'screen_name': 'justinbieber', 'uid': '272600...
      38659
    
    
      6
      {'screen_name': 'nytimes', 'uid': '807095'}
      34432
    
    
      7
      {'screen_name': 'Michael5SOS', 'uid': '4032468...
      33745
    
    
      8
      {'screen_name': 'jean_jullien', 'uid': '185122...
      30970
    
    
      9
      {'screen_name': 'AP', 'uid': '51241574'}
      29869



In [3]:

    
tweets = %cypher match (n:tweet) where n.lang is not null return n.tid, n.lang









    



4263562 rows affected.



In [6]:

    
tweets = tweets.get_dataframe()
tweets.to_csv("data/tweets_w_lang.csv")



In [7]:

    
del tweets



In [9]:

    
tweet_edges = %cypher match (n:tweet)--(m:tweet) where n.lang is not null and m.lang is not null return n.tid, m.tid









    



6090786 rows affected.



In [10]:

    
tweet_edges = tweet_edges.get_dataframe()
tweet_edges.head()









    Out[10]:






  
    
      
      n.tid
      m.tid
    
  
  
    
      0
      665438816551964672
      665322019291013120
    
    
      1
      665563618776326144
      665563677429403650
    
    
      2
      665496407525208064
      665496689357254656
    
    
      3
      665502309351321602
      665485168120045568
    
    
      4
      665549978979442688
      665549060665909248



In [11]:

    
tweet_edges.to_csv("data/tweets_w_lang_edge.csv")



In [ ]:



In [ ]:



In [13]:

    
tweets_df = pd.read_csv("data/clean_tweets.csv", delimiter="\t")



In [15]:

    
tweets_df.head()









    Out[15]:






  
    
      
      tid:ID
      lang
      name
      text
      clean_text
      polarity:float
      subjectivity:float
      created_at
      full_name
      country
      country_code
      coordinates
      :LABEL



In [ ]:

    
lang_groups = tweets_df.groupby("lang")



In [ ]:



In [ ]:

    
geotweets = tweets_df.dropna(subset=["coordinates"])



In [ ]:

    
country_groups = geotweets.groupby("country")



In [ ]:

    
city_groups = geotweets.groupby("name")



In [ ]:

    
french_tweets_df = tweets_df[tweets_df["lang"] == "fr"]



In [ ]:

    
english_tweets_df = tweets_df[tweets_df["lang"] == "en"]

	n	deg
0	{'text': 'Thoughts go out to everyone in Paris...	38199
1	{'text': 'thoughts and prayers for those peopl...	27827
2	{'text': 'Thinking of everyone in Paris.', 'ti...	22158
3	{'text': 'Had a great first show but just hear...	17657
4	{'text': 'all of my love and prayers go out to...	16860
5	{'text': 'To people blaming refugees for attac...	14070
6	{'text': 'Peace for Paris https://t.co/ryf6XB2...	13471
7	{'text': 'Paris massacre is an act of terroris...	12738
8	{'text': 'Please, pray for Paris.', 'tid': '66...	12262
9	{'text': 'Praying for Paris. Our hearts are br...	11636

	n.hashtag	deg
0	paris	719571
1	prayforparis	368931
2	parisattacks	206159
3	prayers4paris	52805
4	bataclan	51524
5	rechercheparis	46333
6	prayersforparis	31736
7	france	31605
8	porteouverte	30484
9	fusillade	27833

	n	deg
0	{'screen_name': 'Louis_Tomlinson', 'uid': '842...	76472
1	{'screen_name': 'NiallOfficial', 'uid': '10511...	55876
2	{'screen_name': 'RecherchesP', 'uid': '4185722...	45967
3	{'screen_name': 'Harry_Styles', 'uid': '181561...	44814
4	{'screen_name': 'infos140', 'uid': '1356382759'}	41809
5	{'screen_name': 'justinbieber', 'uid': '272600...	38659
6	{'screen_name': 'nytimes', 'uid': '807095'}	34432
7	{'screen_name': 'Michael5SOS', 'uid': '4032468...	33745
8	{'screen_name': 'jean_jullien', 'uid': '185122...	30970
9	{'screen_name': 'AP', 'uid': '51241574'}	29869

	n.tid	m.tid
0	665438816551964672	665322019291013120
1	665563618776326144	665563677429403650
2	665496407525208064	665496689357254656
3	665502309351321602	665485168120045568
4	665549978979442688	665549060665909248