notebook.community

Edit and run



In [1]:

    
%load_ext cypher
import operator
import numpy as np









    



/home/davebshow/.virtualenvs/scientific3/lib/python3.4/site-packages/IPython/config.py:13: ShimWarning: The `IPython.config` package has been deprecated. You should import from traitlets.config instead.
  "You should import from traitlets.config instead.", ShimWarning)
/home/davebshow/.virtualenvs/scientific3/lib/python3.4/site-packages/IPython/utils/traitlets.py:5: UserWarning: IPython.utils.traitlets has moved to a top-level traitlets package.
  warn("IPython.utils.traitlets has moved to a top-level traitlets package.")



In [5]:

    
tweet_langs = %cypher match (n:tweet)--(m:tweet) where n.lang is not null and m.lang is not null and m.lang <> "und" and n.lang <> "und" return n.lang, m.lang









    



6113716 rows affected.



In [6]:

    
tweet_langs = tweet_langs.get_dataframe()
tweet_langs.head()



In [7]:

    
lang_groups = tweet_langs.groupby(["n.lang", "m.lang"])



In [8]:

    
lang_freq = lang_groups.groups
lang_freq = {k: len(v) for k, v in lang_freq.items()}



In [9]:

    
langs = sorted(lang_freq.items(), key=operator.itemgetter(1), reverse=True)



In [10]:

    
langs[:25][0], langs[:25][-1]









    Out[10]:





((('en', 'en'), 4264830), (('ro', 'ro'), 1112))



In [15]:

    
[k for k, v in langs[:50] if k[0] != k[1]]









    Out[15]:





[('fr', 'en'),
 ('en', 'fr'),
 ('und', 'en'),
 ('en', 'und'),
 ('es', 'en'),
 ('en', 'es'),
 ('fr', 'und'),
 ('und', 'fr')]



In [33]:

    
lang_freq = {k: v for k, v in langs[:25]}



In [37]:

    
list(lang_freq.items())[0:5]









    Out[37]:





[(('hr', 'hr'), 106),
 (('es', 'en'), 76),
 (('tl', 'tl'), 9650),
 (('pt', 'pt'), 216686),
 (('ja', 'ja'), 10866)]



In [38]:

    
keys = np.array(list(lang_freq.keys()))
vals = np.array(list(lang_freq.values()))
unq_keys, key_idx = np.unique(keys, return_inverse=True)
key_idx = key_idx.reshape(-1, 2)
n = len(unq_keys)
adj = np.zeros((n, n) ,dtype=vals.dtype)
adj[key_idx[:,0], key_idx[: ,1]] = vals
adj += adj.T



In [3]:

    
adj









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-00da27f6971b> in <module>()
----> 1 adj

NameError: name 'adj' is not defined



In [24]:

    
list(lang_freq.keys())









    Out[24]:





[('fr', 'pt'),
 ('tr', 'lt'),
 ('ne', 'ne'),
 ('pt', 'fr'),
 ('es', 'es'),
 ('tl', 'es'),
 ('sk', 'en'),
 ('ja', 'ja'),
 ('tl', 'in'),
 ('it', 'pt'),
 ('en', 'hi'),
 ('it', 'fr'),
 ('es', 'en'),
 ('ro', 'fr'),
 ('fr', 'in'),
 ('es', 'et'),
 ('pt', 'und'),
 ('ru', 'ru'),
 ('es', 'ja'),
 ('ru', 'und'),
 ('de', 'und'),
 ('fr', 'es'),
 ('und', 'und'),
 ('pl', 'und'),
 ('et', 'et'),
 ('ja', 'es'),
 ('und', 'in'),
 ('sk', 'nl'),
 ('no', 'no'),
 ('in', 'und'),
 ('und', 'pt'),
 ('fr', 'et'),
 ('pt', 'tr'),
 ('no', 'tr'),
 ('in', 'hr'),
 ('en', 'en'),
 ('hu', 'hu'),
 ('de', 'nl'),
 ('tr', 'fr'),
 ('no', 'en'),
 ('ta', 'ta'),
 ('en', 'lv'),
 ('in', 'ar'),
 ('lv', 'en'),
 ('fr', 'tl'),
 ('es', 'pl'),
 ('tr', 'en'),
 ('es', 'tl'),
 ('ru', 'es'),
 ('en', 'sl'),
 ('de', 'tr'),
 ('ar', 'ar'),
 ('ka', 'ka'),
 ('und', 'de'),
 ('ru', 'fr'),
 ('ko', 'in'),
 ('en', 'ja'),
 ('fr', 'de'),
 ('th', 'en'),
 ('fr', 'ja'),
 ('tr', 'tr'),
 ('tl', 'fr'),
 ('en', 'sv'),
 ('nl', 'fr'),
 ('en', 'da'),
 ('tr', 'pt'),
 ('bs', 'bs'),
 ('et', 'und'),
 ('no', 'sv'),
 ('ur', 'ur'),
 ('el', 'fr'),
 ('de', 'en'),
 ('pt', 'it'),
 ('lt', 'en'),
 ('ru', 'en'),
 ('und', 'fr'),
 ('en', 'ro'),
 ('sv', 'fr'),
 ('de', 'fr'),
 ('en', 'et'),
 ('in', 'in'),
 ('lt', 'tr'),
 ('und', 'it'),
 ('te', 'te'),
 ('en', 'es'),
 ('en', 'ru'),
 ('en', 'in'),
 ('nl', 'de'),
 ('en', 'pl'),
 ('fr', 'ar'),
 ('iw', 'iw'),
 ('tr', 'no'),
 ('hi', 'tl'),
 ('hi', 'en'),
 ('en', 'ko'),
 ('ro', 'ro'),
 ('it', 'und'),
 ('es', 'ru'),
 ('pl', 'pl'),
 ('fa', 'fa'),
 ('et', 'es'),
 ('und', 'en'),
 ('sl', 'sl'),
 ('de', 'es'),
 ('en', 'lt'),
 ('fr', 'it'),
 ('uk', 'uk'),
 ('et', 'in'),
 ('pt', 'es'),
 ('fr', 'sv'),
 ('in', 'hi'),
 ('iw', 'en'),
 ('th', 'th'),
 ('pt', 'pt'),
 ('sv', 'no'),
 ('is', 'is'),
 ('sv', 'en'),
 ('fr', 'nl'),
 ('fr', 'tr'),
 ('it', 'it'),
 ('et', 'en'),
 ('lt', 'lt'),
 ('en', 'hr'),
 ('pt', 'tl'),
 ('nl', 'en'),
 ('hi', 'et'),
 ('zh', 'en'),
 ('en', 'fr'),
 ('da', 'da'),
 ('bn', 'bn'),
 ('und', 'tr'),
 ('nl', 'sk'),
 ('en', 'pt'),
 ('en', 'tl'),
 ('tr', 'und'),
 ('pt', 'en'),
 ('pa', 'pa'),
 ('en', 'it'),
 ('pl', 'es'),
 ('in', 'tl'),
 ('en', 'iw'),
 ('in', 'pl'),
 ('lv', 'lv'),
 ('en', 'sk'),
 ('mr', 'mr'),
 ('in', 'fr'),
 ('tl', 'en'),
 ('tl', 'tl'),
 ('ml', 'ml'),
 ('in', 'et'),
 ('et', 'hi'),
 ('sr', 'sr'),
 ('pl', 'fr'),
 ('ar', 'fr'),
 ('fr', 'ru'),
 ('hr', 'hr'),
 ('el', 'el'),
 ('ja', 'en'),
 ('et', 'fr'),
 ('en', 'de'),
 ('de', 'de'),
 ('bg', 'bg'),
 ('tl', 'pt'),
 ('tl', 'hi'),
 ('sk', 'sk'),
 ('pl', 'en'),
 ('nl', 'nl'),
 ('ko', 'en'),
 ('it', 'en'),
 ('fr', 'ro'),
 ('hr', 'in'),
 ('fr', 'fr'),
 ('es', 'fr'),
 ('ko', 'ko'),
 ('fr', 'pl'),
 ('es', 'und'),
 ('en', 'th'),
 ('fi', 'fi'),
 ('ro', 'en'),
 ('ja', 'fr'),
 ('sl', 'en'),
 ('hi', 'in'),
 ('si', 'si'),
 ('da', 'en'),
 ('tr', 'de'),
 ('und', 'et'),
 ('en', 'und'),
 ('vi', 'vi'),
 ('in', 'ko'),
 ('en', 'tr'),
 ('in', 'en'),
 ('ar', 'in'),
 ('und', 'ru'),
 ('fr', 'el'),
 ('und', 'pl'),
 ('da', 'tr'),
 ('es', 'de'),
 ('zh', 'zh'),
 ('sv', 'sv'),
 ('fr', 'und'),
 ('und', 'es'),
 ('pl', 'in'),
 ('en', 'no'),
 ('hi', 'hi'),
 ('hr', 'en'),
 ('tr', 'da'),
 ('en', 'nl'),
 ('en', 'fi'),
 ('fi', 'en'),
 ('es', 'pt'),
 ('en', 'zh'),
 ('fr', 'en')]



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [2]:

    
replies = %cypher match (t1:tweet)-[:REPLIES_TO]-(t2:tweet) where t1.lang is not null and t2.lang is not null return t1, t2









    



24432 rows affected.



In [4]:

    
reply_df = replies.get_dataframe()
reply_df.head()









    Out[4]:






  
    
      
      t1
      t2
    
  
  
    
      0
      {'text': '@MaluVaccaro Praying for Paris... 🙏🏼...
      {'text': 'Pray for Paris! 🔵🔴⚪', 'subjectivity'...
    
    
      1
      {'text': 'And what steps will you be taking to...
      {'text': 'Sydney strongly supports the people ...
    
    
      2
      {'clean_text': 'concordo...mas n sei pq fizera...
      {'text': 'I just can't seem to fathom what hap...
    
    
      3
      {'text': 'Paris shows IS getting stronger: Abb...
      {'text': '@SBSNews Seriously, why would anyone...
    
    
      4
      {'text': 'Attentats de #Paris : plus de 200 bl...
      {'text': '@LP_LaPresse Tennez bon Paris !!! Le...



In [8]:

    
replies = %cypher match (t1:tweet)-[:REPLIES_TO]-(t2:tweet) where t1.lang is not null and t2.lang is not null and t1.lang <> t2.lang return t1, t2









    



2390 rows affected.



In [9]:

    
rt_df = replies.get_dataframe()
rt_df.head()









    Out[9]:






  
    
      
      t1
      t2
    
  
  
    
      0
      {'clean_text': 'concordo...mas n sei pq fizera...
      {'text': 'I just can't seem to fathom what hap...
    
    
      1
      {'text': 'Vols de Dallas à #Paris sont à nouve...
      {'text': 'Flights from DFW to #Paris are openi...
    
    
      2
      {'text': '@welt 
@thedailybeast column: Welcom...
      {'clean_text': 'Mehr als 120 Tote: Was Sie übe...
    
    
      3
      {'country': 'United States', 'text': '@katemoe...
      {'text': 'Sending all my love to Paris. 🇫🇷🇫🇷🇫🇷...
    
    
      4
      {'text': 'Our city hall lit up in solidarity w...
      {'lang': 'und', 'text': '@TelAvivNonstop @Isra...



In [ ]:

	t1	t2
0	{'text': '@MaluVaccaro Praying for Paris... 🙏🏼...	{'text': 'Pray for Paris! 🔵🔴⚪', 'subjectivity'...
1	{'text': 'And what steps will you be taking to...	{'text': 'Sydney strongly supports the people ...
2	{'clean_text': 'concordo...mas n sei pq fizera...	{'text': 'I just can't seem to fathom what hap...
3	{'text': 'Paris shows IS getting stronger: Abb...	{'text': '@SBSNews Seriously, why would anyone...
4	{'text': 'Attentats de #Paris : plus de 200 bl...	{'text': '@LP_LaPresse Tennez bon Paris !!! Le...

	t1	t2
0	{'clean_text': 'concordo...mas n sei pq fizera...	{'text': 'I just can't seem to fathom what hap...
1	{'text': 'Vols de Dallas à #Paris sont à nouve...	{'text': 'Flights from DFW to #Paris are openi...
2	{'text': '@welt @thedailybeast column: Welcom...	{'clean_text': 'Mehr als 120 Tote: Was Sie übe...
3	{'country': 'United States', 'text': '@katemoe...	{'text': 'Sending all my love to Paris. 🇫🇷🇫🇷🇫🇷...
4	{'text': 'Our city hall lit up in solidarity w...	{'lang': 'und', 'text': '@TelAvivNonstop @Isra...