In [1]:

    
%matplotlib inline

import tweepy as tw
import json
import pandas as pd
import numpy as np

from collections import defaultdict, Counter
import os
from IPython.display import clear_output

import networkx as nx


import matplotlib.pyplot as plt
import seaborn as sns

from io import StringIO
from pydotplus import graph_from_dot_data
import matplotlib.image as mpimg



In [2]:

    
sns.set_context("poster")
sns.set_style("ticks")



In [3]:

    
DATA_DIR="../data"
TWITTER_CONFIG_FILE=os.path.join(DATA_DIR, "twitter_config.json")

Twitter Access Tokens

If you are proceeding further then you are expected to have created your Twitter application by following the steps from Twitter App Creation page.

Make sure you have the following details of your Twitter application readily available:

'access_token'
'access_token_secret'
'consumer_key'
'consumer_secret'

Please enter the value of each of the items as shown in your Twitter application, when prompted by the code below.



In [4]:

    
if not os.path.isfile(TWITTER_CONFIG_FILE):
    with open(os.path.join(DATA_DIR, "twitter_config.sample.json")) as fp:
        creds = json.load(fp)
        for k in sorted(creds.keys()):
            v = input("Enter %s:\t" % k)
            creds[k] = v
    print(creds)
    with open(TWITTER_CONFIG_FILE, "w+") as fp:
        json.dump(creds, fp, indent=4, sort_keys=True)
    clear_output()
    print("Printed credentials to file %s" % TWITTER_CONFIG_FILE)



In [5]:

    
with open(TWITTER_CONFIG_FILE) as fp:
    creds = json.load(fp)
print(creds.keys())









    



dict_keys(['access_token', 'access_token_secret', 'consumer_key', 'consumer_secret'])



In [6]:

    
auth = tw.OAuthHandler(creds["consumer_key"], creds["consumer_secret"])
auth.set_access_token(creds["access_token"], creds["access_token_secret"])
api = tw.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True,
             retry_count=5, retry_delay=100, 
            )

print("Tweepy ready for search")









    



Tweepy ready for search



In [7]:

    
statuses = api.search(q=input("What is your search term?"), count=10)









    



What is your search term?China



In [8]:

    
len(statuses)









    Out[8]:





8



In [9]:

    
for status in statuses:
    print(status.text)









    



RT @jeetensingh: Lol #Presstitutes derives &amp; co-operate a new agenda .These are paid by China at the RaGa's lovingly dates with the Chinese…
@gauravcsawant @PMOIndia #China better should have balls to curb on Islamic kinda of nationalism(terrorism) and jih… https://t.co/rUQ8yDbcOU
RT @ChelseaFC: A reminder of the 25 players we have taken to China for the #CFCTour...

https://t.co/am0dk5ge72
RT @adambedders: If you're under 40 and farming you ought to be interested in this - get yourself to China! @NFYFC https://t.co/B2R8QJ1JgX
RT @MiddleEastEye: China reaffirms commitment to Palestinian state https://t.co/42ceUZp01f
RT @HarishK04131926: This is How Pakistani News Channels and Twitter Handles Spreading Lies About Indo-China War https://t.co/6o72AiOjpO vi…
Me gustó un video de @YouTube https://t.co/COJZ4XhMAV PROBANDOME MÁS ROPA CHINA | Uy Albert!
China destaca el “significativo progreso” llevado a cabo por Pekín y Washington en materia comercial https://t.co/fhL8euSUxZ



In [10]:

    
def dict2df(data):
    return pd.DataFrame(
        list(data.items()),
        columns=["item", "counts"]
    ).sort_values("counts", ascending=False)

def get_entities(statuses):
    hashtags = defaultdict(int)
    mentions = defaultdict(int)
    keys = ("hashtags", "user_mentions")
    for s in statuses:
        entities = s.entities
        if "hashtags" in entities:
            e = map(lambda x: x["text"], entities["hashtags"])
            for t in e:
                hashtags[t] += 1
        if "user_mentions" in entities:
            e = map(lambda x: x["screen_name"], entities["user_mentions"])
            for t in e:
                mentions[t] += 1
    return dict2df(hashtags), dict2df(mentions)



In [11]:

    
hashtags, mentions = get_entities(statuses)



In [12]:

    
len(statuses)









    Out[12]:





8



In [13]:

    
hashtags









    Out[13]:







  
    
      
      item
      counts
    
  
  
    
      0
      Presstitutes
      1
    
    
      1
      China
      1
    
    
      2
      CFCTour
      1



In [14]:

    
mentions









    Out[14]:







  
    
      
      item
      counts
    
  
  
    
      0
      jeetensingh
      1
    
    
      1
      gauravcsawant
      1
    
    
      2
      PMOIndia
      1
    
    
      3
      ChelseaFC
      1
    
    
      4
      adambedders
      1
    
    
      5
      NFYFC
      1
    
    
      6
      MiddleEastEye
      1
    
    
      7
      HarishK04131926
      1
    
    
      8
      YouTube
      1

Current user's information



In [15]:

    
current_user = api.me()
current_user









    Out[15]:





User(_api=<tweepy.api.API object at 0x7f28ae7b8588>, _json={'id': 16621479, 'id_str': '16621479', 'name': 'Shubhanshu Mishra', 'screen_name': 'TheShubhanshu', 'location': 'Urbana, Illinois, USA', 'profile_location': None, 'description': 'PhD Student at @GSLIS @UIUC using data mining and ML. All my tweets can be used for data mining. I created ReadLater for Chrome https://t.co/rDKLiPfswZ', 'url': 'https://t.co/WfIGt6oMtT', 'entities': {'url': {'urls': [{'url': 'https://t.co/WfIGt6oMtT', 'expanded_url': 'http://shubhanshu.com', 'display_url': 'shubhanshu.com', 'indices': [0, 23]}]}, 'description': {'urls': [{'url': 'https://t.co/rDKLiPfswZ', 'expanded_url': 'http://goo.gl/AxnrBG', 'display_url': 'goo.gl/AxnrBG', 'indices': [128, 151]}]}}, 'protected': False, 'followers_count': 657, 'friends_count': 615, 'listed_count': 71, 'created_at': 'Mon Oct 06 22:27:27 +0000 2008', 'favourites_count': 95, 'utc_offset': -21600, 'time_zone': 'Central America', 'geo_enabled': True, 'verified': False, 'statuses_count': 3918, 'lang': 'en', 'status': {'created_at': 'Tue Jul 18 18:56:10 +0000 2017', 'id': 887385525388234753, 'id_str': '887385525388234753', 'text': 'Wonderful analysis which is reproducible. Code available at https://t.co/1tvbJRI1J6 https://t.co/180cMbApv2', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [{'url': 'https://t.co/1tvbJRI1J6', 'expanded_url': 'https://github.com/polygraph-cool/comics', 'display_url': 'github.com/polygraph-cool…', 'indices': [60, 83]}, {'url': 'https://t.co/180cMbApv2', 'expanded_url': 'https://twitter.com/puddingviz/status/887298644864643077', 'display_url': 'twitter.com/puddingviz/sta…', 'indices': [84, 107]}]}, 'source': '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': True, 'quoted_status_id': 887298644864643077, 'quoted_status_id_str': '887298644864643077', 'retweet_count': 1, 'favorite_count': 2, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'lang': 'en'}, 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': '030103', 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme18/bg.gif', 'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme18/bg.gif', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/482000571210031104/CdTuSt_7_normal.jpeg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/482000571210031104/CdTuSt_7_normal.jpeg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/16621479/1404152030', 'profile_link_color': '947974', 'profile_sidebar_border_color': 'ADF1FC', 'profile_sidebar_fill_color': '000000', 'profile_text_color': 'FA8459', 'profile_use_background_image': True, 'has_extended_profile': True, 'default_profile': False, 'default_profile_image': False, 'following': False, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none', 'suspended': False, 'needs_phone_verification': False}, id=16621479, id_str='16621479', name='Shubhanshu Mishra', screen_name='TheShubhanshu', location='Urbana, Illinois, USA', profile_location=None, description='PhD Student at @GSLIS @UIUC using data mining and ML. All my tweets can be used for data mining. I created ReadLater for Chrome https://t.co/rDKLiPfswZ', url='https://t.co/WfIGt6oMtT', entities={'url': {'urls': [{'url': 'https://t.co/WfIGt6oMtT', 'expanded_url': 'http://shubhanshu.com', 'display_url': 'shubhanshu.com', 'indices': [0, 23]}]}, 'description': {'urls': [{'url': 'https://t.co/rDKLiPfswZ', 'expanded_url': 'http://goo.gl/AxnrBG', 'display_url': 'goo.gl/AxnrBG', 'indices': [128, 151]}]}}, protected=False, followers_count=657, friends_count=615, listed_count=71, created_at=datetime.datetime(2008, 10, 6, 22, 27, 27), favourites_count=95, utc_offset=-21600, time_zone='Central America', geo_enabled=True, verified=False, statuses_count=3918, lang='en', status=Status(_api=<tweepy.api.API object at 0x7f28ae7b8588>, _json={'created_at': 'Tue Jul 18 18:56:10 +0000 2017', 'id': 887385525388234753, 'id_str': '887385525388234753', 'text': 'Wonderful analysis which is reproducible. Code available at https://t.co/1tvbJRI1J6 https://t.co/180cMbApv2', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [{'url': 'https://t.co/1tvbJRI1J6', 'expanded_url': 'https://github.com/polygraph-cool/comics', 'display_url': 'github.com/polygraph-cool…', 'indices': [60, 83]}, {'url': 'https://t.co/180cMbApv2', 'expanded_url': 'https://twitter.com/puddingviz/status/887298644864643077', 'display_url': 'twitter.com/puddingviz/sta…', 'indices': [84, 107]}]}, 'source': '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': True, 'quoted_status_id': 887298644864643077, 'quoted_status_id_str': '887298644864643077', 'retweet_count': 1, 'favorite_count': 2, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'lang': 'en'}, created_at=datetime.datetime(2017, 7, 18, 18, 56, 10), id=887385525388234753, id_str='887385525388234753', text='Wonderful analysis which is reproducible. Code available at https://t.co/1tvbJRI1J6 https://t.co/180cMbApv2', truncated=False, entities={'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [{'url': 'https://t.co/1tvbJRI1J6', 'expanded_url': 'https://github.com/polygraph-cool/comics', 'display_url': 'github.com/polygraph-cool…', 'indices': [60, 83]}, {'url': 'https://t.co/180cMbApv2', 'expanded_url': 'https://twitter.com/puddingviz/status/887298644864643077', 'display_url': 'twitter.com/puddingviz/sta…', 'indices': [84, 107]}]}, source='Twitter for Android', source_url='http://twitter.com/download/android', in_reply_to_status_id=None, in_reply_to_status_id_str=None, in_reply_to_user_id=None, in_reply_to_user_id_str=None, in_reply_to_screen_name=None, geo=None, coordinates=None, place=None, contributors=None, is_quote_status=True, quoted_status_id=887298644864643077, quoted_status_id_str='887298644864643077', retweet_count=1, favorite_count=2, favorited=False, retweeted=False, possibly_sensitive=False, lang='en'), contributors_enabled=False, is_translator=False, is_translation_enabled=False, profile_background_color='030103', profile_background_image_url='http://abs.twimg.com/images/themes/theme18/bg.gif', profile_background_image_url_https='https://abs.twimg.com/images/themes/theme18/bg.gif', profile_background_tile=False, profile_image_url='http://pbs.twimg.com/profile_images/482000571210031104/CdTuSt_7_normal.jpeg', profile_image_url_https='https://pbs.twimg.com/profile_images/482000571210031104/CdTuSt_7_normal.jpeg', profile_banner_url='https://pbs.twimg.com/profile_banners/16621479/1404152030', profile_link_color='947974', profile_sidebar_border_color='ADF1FC', profile_sidebar_fill_color='000000', profile_text_color='FA8459', profile_use_background_image=True, has_extended_profile=True, default_profile=False, default_profile_image=False, following=False, follow_request_sent=False, notifications=False, translator_type='none', suspended=False, needs_phone_verification=False)



In [16]:

    
status









    Out[16]:





Status(_api=<tweepy.api.API object at 0x7f28ae7b8588>, _json={'created_at': 'Thu Jul 20 05:09:42 +0000 2017', 'id': 887902313796513792, 'id_str': '887902313796513792', 'text': 'China destaca el “significativo progreso” llevado a cabo por Pekín y Washington en materia comercial https://t.co/fhL8euSUxZ', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [{'url': 'https://t.co/fhL8euSUxZ', 'expanded_url': 'http://www.europapress.es/internacional/noticia-china-destaca-significativo-progreso-llevado-cabo-pekin-washington-materia-comercial-20170720053314.html?btz45=0509074220', 'display_url': 'europapress.es/internacional/…', 'indices': [101, 124]}]}, 'metadata': {'iso_language_code': 'es', 'result_type': 'recent'}, 'source': '<a href="http://www.botize.com" rel="nofollow">Botize</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 1336088690, 'id_str': '1336088690', 'name': 'Selene Serrano', 'screen_name': 'Selerrano', 'location': 'Estados Unidos', 'description': 'Una vez más estamos en esto.', 'url': None, 'entities': {'description': {'urls': []}}, 'protected': False, 'followers_count': 647, 'friends_count': 718, 'listed_count': 2, 'created_at': 'Mon Apr 08 08:33:11 +0000 2013', 'favourites_count': 9, 'utc_offset': -18000, 'time_zone': 'Central Time (US & Canada)', 'geo_enabled': False, 'verified': False, 'statuses_count': 3323, 'lang': 'es', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'C0DEED', 'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/837181779/6cb8a60b81f932294f97d9437df0b290.jpeg', 'profile_background_image_url_https': 'https://pbs.twimg.com/profile_background_images/837181779/6cb8a60b81f932294f97d9437df0b290.jpeg', 'profile_background_tile': True, 'profile_image_url': 'http://pbs.twimg.com/profile_images/3493339249/f01bdd5fa703af96dade20dea2caede8_normal.jpeg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/3493339249/f01bdd5fa703af96dade20dea2caede8_normal.jpeg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/1336088690/1365410332', 'profile_link_color': 'FAB81E', 'profile_sidebar_border_color': 'FFFFFF', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': True, 'default_profile': False, 'default_profile_image': False, 'following': False, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 0, 'favorite_count': 0, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'lang': 'es'}, created_at=datetime.datetime(2017, 7, 20, 5, 9, 42), id=887902313796513792, id_str='887902313796513792', text='China destaca el “significativo progreso” llevado a cabo por Pekín y Washington en materia comercial https://t.co/fhL8euSUxZ', truncated=False, entities={'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [{'url': 'https://t.co/fhL8euSUxZ', 'expanded_url': 'http://www.europapress.es/internacional/noticia-china-destaca-significativo-progreso-llevado-cabo-pekin-washington-materia-comercial-20170720053314.html?btz45=0509074220', 'display_url': 'europapress.es/internacional/…', 'indices': [101, 124]}]}, metadata={'iso_language_code': 'es', 'result_type': 'recent'}, source='Botize', source_url='http://www.botize.com', in_reply_to_status_id=None, in_reply_to_status_id_str=None, in_reply_to_user_id=None, in_reply_to_user_id_str=None, in_reply_to_screen_name=None, author=User(_api=<tweepy.api.API object at 0x7f28ae7b8588>, _json={'id': 1336088690, 'id_str': '1336088690', 'name': 'Selene Serrano', 'screen_name': 'Selerrano', 'location': 'Estados Unidos', 'description': 'Una vez más estamos en esto.', 'url': None, 'entities': {'description': {'urls': []}}, 'protected': False, 'followers_count': 647, 'friends_count': 718, 'listed_count': 2, 'created_at': 'Mon Apr 08 08:33:11 +0000 2013', 'favourites_count': 9, 'utc_offset': -18000, 'time_zone': 'Central Time (US & Canada)', 'geo_enabled': False, 'verified': False, 'statuses_count': 3323, 'lang': 'es', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'C0DEED', 'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/837181779/6cb8a60b81f932294f97d9437df0b290.jpeg', 'profile_background_image_url_https': 'https://pbs.twimg.com/profile_background_images/837181779/6cb8a60b81f932294f97d9437df0b290.jpeg', 'profile_background_tile': True, 'profile_image_url': 'http://pbs.twimg.com/profile_images/3493339249/f01bdd5fa703af96dade20dea2caede8_normal.jpeg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/3493339249/f01bdd5fa703af96dade20dea2caede8_normal.jpeg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/1336088690/1365410332', 'profile_link_color': 'FAB81E', 'profile_sidebar_border_color': 'FFFFFF', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': True, 'default_profile': False, 'default_profile_image': False, 'following': False, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, id=1336088690, id_str='1336088690', name='Selene Serrano', screen_name='Selerrano', location='Estados Unidos', description='Una vez más estamos en esto.', url=None, entities={'description': {'urls': []}}, protected=False, followers_count=647, friends_count=718, listed_count=2, created_at=datetime.datetime(2013, 4, 8, 8, 33, 11), favourites_count=9, utc_offset=-18000, time_zone='Central Time (US & Canada)', geo_enabled=False, verified=False, statuses_count=3323, lang='es', contributors_enabled=False, is_translator=False, is_translation_enabled=False, profile_background_color='C0DEED', profile_background_image_url='http://pbs.twimg.com/profile_background_images/837181779/6cb8a60b81f932294f97d9437df0b290.jpeg', profile_background_image_url_https='https://pbs.twimg.com/profile_background_images/837181779/6cb8a60b81f932294f97d9437df0b290.jpeg', profile_background_tile=True, profile_image_url='http://pbs.twimg.com/profile_images/3493339249/f01bdd5fa703af96dade20dea2caede8_normal.jpeg', profile_image_url_https='https://pbs.twimg.com/profile_images/3493339249/f01bdd5fa703af96dade20dea2caede8_normal.jpeg', profile_banner_url='https://pbs.twimg.com/profile_banners/1336088690/1365410332', profile_link_color='FAB81E', profile_sidebar_border_color='FFFFFF', profile_sidebar_fill_color='DDEEF6', profile_text_color='333333', profile_use_background_image=True, has_extended_profile=True, default_profile=False, default_profile_image=False, following=False, follow_request_sent=False, notifications=False, translator_type='none'), user=User(_api=<tweepy.api.API object at 0x7f28ae7b8588>, _json={'id': 1336088690, 'id_str': '1336088690', 'name': 'Selene Serrano', 'screen_name': 'Selerrano', 'location': 'Estados Unidos', 'description': 'Una vez más estamos en esto.', 'url': None, 'entities': {'description': {'urls': []}}, 'protected': False, 'followers_count': 647, 'friends_count': 718, 'listed_count': 2, 'created_at': 'Mon Apr 08 08:33:11 +0000 2013', 'favourites_count': 9, 'utc_offset': -18000, 'time_zone': 'Central Time (US & Canada)', 'geo_enabled': False, 'verified': False, 'statuses_count': 3323, 'lang': 'es', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'C0DEED', 'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/837181779/6cb8a60b81f932294f97d9437df0b290.jpeg', 'profile_background_image_url_https': 'https://pbs.twimg.com/profile_background_images/837181779/6cb8a60b81f932294f97d9437df0b290.jpeg', 'profile_background_tile': True, 'profile_image_url': 'http://pbs.twimg.com/profile_images/3493339249/f01bdd5fa703af96dade20dea2caede8_normal.jpeg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/3493339249/f01bdd5fa703af96dade20dea2caede8_normal.jpeg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/1336088690/1365410332', 'profile_link_color': 'FAB81E', 'profile_sidebar_border_color': 'FFFFFF', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': True, 'default_profile': False, 'default_profile_image': False, 'following': False, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, id=1336088690, id_str='1336088690', name='Selene Serrano', screen_name='Selerrano', location='Estados Unidos', description='Una vez más estamos en esto.', url=None, entities={'description': {'urls': []}}, protected=False, followers_count=647, friends_count=718, listed_count=2, created_at=datetime.datetime(2013, 4, 8, 8, 33, 11), favourites_count=9, utc_offset=-18000, time_zone='Central Time (US & Canada)', geo_enabled=False, verified=False, statuses_count=3323, lang='es', contributors_enabled=False, is_translator=False, is_translation_enabled=False, profile_background_color='C0DEED', profile_background_image_url='http://pbs.twimg.com/profile_background_images/837181779/6cb8a60b81f932294f97d9437df0b290.jpeg', profile_background_image_url_https='https://pbs.twimg.com/profile_background_images/837181779/6cb8a60b81f932294f97d9437df0b290.jpeg', profile_background_tile=True, profile_image_url='http://pbs.twimg.com/profile_images/3493339249/f01bdd5fa703af96dade20dea2caede8_normal.jpeg', profile_image_url_https='https://pbs.twimg.com/profile_images/3493339249/f01bdd5fa703af96dade20dea2caede8_normal.jpeg', profile_banner_url='https://pbs.twimg.com/profile_banners/1336088690/1365410332', profile_link_color='FAB81E', profile_sidebar_border_color='FFFFFF', profile_sidebar_fill_color='DDEEF6', profile_text_color='333333', profile_use_background_image=True, has_extended_profile=True, default_profile=False, default_profile_image=False, following=False, follow_request_sent=False, notifications=False, translator_type='none'), geo=None, coordinates=None, place=None, contributors=None, is_quote_status=False, retweet_count=0, favorite_count=0, favorited=False, retweeted=False, possibly_sensitive=False, lang='es')



In [17]:

    
print(
"""Username: {}
Full Name: {}
# Followers: {}
# Friends: {}
# Statuses: {}""".format(
        current_user.screen_name,
        current_user.name,
        current_user.followers_count,
        current_user.friends_count,
        current_user.statuses_count
    )
)









    



Username: TheShubhanshu
Full Name: Shubhanshu Mishra
# Followers: 657
# Friends: 615
# Statuses: 3918

Friends API



In [18]:

    
friends = []
for friend in tw.Cursor(api.friends, count=100).items():
    friends.append(friend)
print("{} friends found for {}".format(len(friends), current_user.name))









    



617 friends found for Shubhanshu Mishra



In [19]:

    
df_friends = pd.DataFrame(
    list(map(
        lambda k: (k.id, k.name, k.friends_count, k.followers_count, k.statuses_count),
        friends
    )), columns=["id", "name", "friends", "followers", "statuses"]
).sort_values("followers", ascending=False).reset_index(drop=True)
df_friends.head(15)









    Out[19]:







  
    
      
      id
      name
      friends
      followers
      statuses
    
  
  
    
      0
      813286
      Barack Obama
      628994
      92017544
      15451
    
    
      1
      50393960
      Bill Gates
      183
      36274382
      2424
    
    
      2
      88856792
      Aamir Khan
      9
      21429541
      475
    
    
      3
      822215679726100480
      President Trump
      42
      19421468
      904
    
    
      4
      20536157
      Google
      205
      18432652
      73365
    
    
      5
      71201743
      Virat Kohli
      45
      16535365
      1175
    
    
      6
      1536791610
      President Obama
      79
      15389729
      352
    
    
      7
      44196397
      Elon Musk
      42
      10330297
      3275
    
    
      8
      15492359
      TED Talks
      514
      10175236
      22977
    
    
      9
      19725644
      Neil deGrasse Tyson
      41
      8067857
      5453
    
    
      10
      15473958
      Curiosity Rover
      166
      3702441
      3349
    
    
      11
      19658826
      New Scientist
      74
      3004456
      41021
    
    
      12
      14647570
      Scientific American
      3882
      2995548
      54716
    
    
      13
      2839430431
      Pokémon GO
      18
      2406210
      296
    
    
      14
      16017475
      Nate Silver
      1007
      2390137
      16543



In [20]:

    
network = np.zeros([df_friends.shape[0], df_friends.shape[0]])
network.shape









    Out[20]:





(617, 617)



In [21]:

    
def get_friendship(id1, id2, verbose=False):
    response = api.show_friendship(source_id=id1, target_id=id2)
    if verbose:
        print(response)
    return response[0].following, response[1].following



In [22]:

    
get_friendship(df_friends["id"].values[0], df_friends["id"].values[1], verbose=True)









    



(Friendship(_api=<tweepy.api.API object at 0x7f28ae7b8588>, id=813286, id_str='813286', screen_name='BarackObama', following=False, followed_by=True, live_following=False, following_received=None, following_requested=None, notifications_enabled=None, can_dm=True, blocking=None, blocked_by=None, muting=None, want_retweets=None, all_replies=None, marked_spam=None), Friendship(_api=<tweepy.api.API object at 0x7f28ae7b8588>, id=50393960, id_str='50393960', screen_name='BillGates', following=True, followed_by=False, following_received=None, following_requested=None))






    Out[22]:





(False, True)



In [23]:

    
network[0, 0] = False
network[1, 0] = True
network[0:3, 0]









    Out[23]:





array([ 0.,  1.,  0.])



In [24]:

    
def generate_ego_network(df_friends):
    network = np.zeros([df_friends.shape[0], df_friends.shape[0]])
    processed_friendships=0
    for i, fid1 in enumerate(df_friends["id"].values):
        for j, fid2 in enumerate(df_friends["id"].values[i+1:], start=i+1):
            try:
                tie_labels = get_friendship(fid1, fid2)
                processed_friendships += 1
            except:
                print("Processed friendships = {}".format(processed_friendships))
                print("Error occurred")
                return network
            network[i, j] = tie_labels[0]
            network[j, i] = tie_labels[1]
    return network



In [25]:

    
df_friends.tail()









    Out[25]:







  
    
      
      id
      name
      friends
      followers
      statuses
    
  
  
    
      612
      709866998604419072
      ICSS2016
      15
      19
      28
    
    
      613
      217524967
      Shubhanshu Mishra
      3
      12
      382
    
    
      614
      862776294441984000
      Christopher De Sa
      8
      11
      1
    
    
      615
      4330080442
      Sudhanshu Mishra
      21
      1
      0
    
    
      616
      492698838
      LiveLifeLikeAJive
      9
      1
      0

Generate user mention network



In [26]:

    
statuses = [status for status in tw.Cursor(
    api.search, q=input("What is your search term?"), count=1000).items(1000)]









    



What is your search term?Data Mining



In [27]:

    
len(statuses)









    Out[27]:





1000



In [28]:

    
status = next(filter(lambda x: len(x.entities["hashtags"]), statuses))



In [29]:

    
status.entities









    Out[29]:





{'hashtags': [{'indices': [7, 15], 'text': 'Twitter'},
  {'indices': [26, 33], 'text': 'Python'}],
 'symbols': [],
 'urls': [{'display_url': 'kdnuggets.com/2016/07/mining…',
   'expanded_url': 'http://www.kdnuggets.com/2016/07/mining-twitter-data-python-part-7.html#.WXA5UpRC9nI.twitter',
   'indices': [75, 98],
   'url': 'https://t.co/sQ3xXylp2y'}],
 'user_mentions': []}



In [30]:

    
def get_entities(statuses, entity_type, text_property):
    entity_counts = defaultdict(int)
    entity_network = defaultdict(int)
    for status in statuses:
        for i, entity in enumerate(status.entities[entity_type]):
            entity_counts[entity[text_property].lower()] += 1
            for j, entity_2 in enumerate(status.entities[entity_type][i+1:], start=i+1):
                entity_network[(
                    entity[text_property].lower(),
                    entity_2[text_property].lower()
                )] += 1
    return entity_counts, entity_network



In [31]:

    
entity_type="user_mentions"
text_property="screen_name"
entity_counts, entity_network = get_entities(statuses, entity_type, text_property)



In [32]:

    
df_entities = pd.DataFrame(list(entity_counts.items()),
                           columns=["entity", "counts"]).sort_values(
    "counts", ascending=False
).reset_index(drop=True)
df_entities.head()









    Out[32]:







  
    
      
      entity
      counts
    
  
  
    
      0
      pokemongohubnet
      73
    
    
      1
      youtube
      42
    
    
      2
      gp_pulipaka
      40
    
    
      3
      ronald_vanloon
      34
    
    
      4
      billmaher
      29



In [33]:

    
df_entities.head(20)









    Out[33]:







  
    
      
      entity
      counts
    
  
  
    
      0
      pokemongohubnet
      73
    
    
      1
      youtube
      42
    
    
      2
      gp_pulipaka
      40
    
    
      3
      ronald_vanloon
      34
    
    
      4
      billmaher
      29
    
    
      5
      nikhilmkss
      24
    
    
      6
      msnbc
      23
    
    
      7
      kylegriffin1
      22
    
    
      8
      hitanalytics
      17
    
    
      9
      tmarzagao
      16
    
    
      10
      msarsar
      15
    
    
      11
      potus
      15
    
    
      12
      ittes2017
      11
    
    
      13
      jstines3
      11
    
    
      14
      deetelecare
      10
    
    
      15
      datasciencectrl
      9
    
    
      16
      pokemongohubes
      8
    
    
      17
      dbaker007
      8
    
    
      18
      leokem
      7
    
    
      19
      jamisonhutton
      6



In [34]:

    
df_entity_pairs = pd.DataFrame([(k1, k2, v) for (k1,k2), v in entity_network.items()],
                           columns=[
                               "{}_1".format(entity_type),
                               "{}_2".format(entity_type),
                               "counts"]).sort_values(
    "counts", ascending=False
).reset_index(drop=True)
df_entity_pairs.head()









    Out[34]:







  
    
      
      user_mentions_1
      user_mentions_2
      counts
    
  
  
    
      0
      kylegriffin1
      msnbc
      22
    
    
      1
      ittes2017
      sonerhoca
      11
    
    
      2
      ittes2017
      tolgaguyer
      11
    
    
      3
      jstines3
      potus
      11
    
    
      4
      dbaker007
      datasciencectrl
      8



In [35]:

    
df_entity_pairs.head(20)









    Out[35]:







  
    
      
      user_mentions_1
      user_mentions_2
      counts
    
  
  
    
      0
      kylegriffin1
      msnbc
      22
    
    
      1
      ittes2017
      sonerhoca
      11
    
    
      2
      ittes2017
      tolgaguyer
      11
    
    
      3
      jstines3
      potus
      11
    
    
      4
      dbaker007
      datasciencectrl
      8
    
    
      5
      sonerhoca
      tolgaguyer
      6
    
    
      6
      ittes2017
      ittes2017
      5
    
    
      7
      salsaprice
      billmaher
      4
    
    
      8
      openupsa
      rox_jos
      4
    
    
      9
      ipfconline1
      gp_pulipaka
      4
    
    
      10
      mcmcgregory
      msarsar
      4
    
    
      11
      jenrosebresnick
      hitanalytics
      3
    
    
      12
      taxjusticeafric
      openupsa
      3
    
    
      13
      taxjusticeafric
      rox_jos
      3
    
    
      14
      cararice107
      billmaher
      3
    
    
      15
      telecareaware
      deetelecare
      3
    
    
      16
      gbmnyc
      rvawonk
      2
    
    
      17
      mattashers
      captainslog2017
      2
    
    
      18
      mattashers
      mmpadellan
      2
    
    
      19
      mattashers
      mrscottlads
      2

Plot network



In [36]:

    
G = nx.Graph()



In [37]:

    
G.add_nodes_from(entity_counts)



In [38]:

    
G.add_edges_from([
    (k[0], k[1], {"weight": v})
    for k, v in entity_network.items()
])



In [39]:

    
fig, ax = plt.subplots(1,1)
nx.draw_networkx(
    G, with_labels=True,
    node_size=[x[1]*3 for x in G.degree_iter()],
    pos=nx.spring_layout(G),
    ax=ax
)
ax.axis("off")









    Out[39]:





(-0.10167285826047012,
 1.1019029321484448,
 -0.097910076231053481,
 1.0946683307075713)






    



/home/napsternxg/anaconda3/envs/get17_sna/lib/python3.6/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
  (prop.get_family(), self.defaultFamily[fontext]))



In [40]:

    
connected_components = sorted(nx.connected_component_subgraphs(G), key = len, reverse=True)
print("{} connected components found.".format(len(connected_components)))









    



192 connected components found.



In [41]:

    
fig, ax = plt.subplots(1,1)
nx.draw_networkx(
    connected_components[0], with_labels=True,
    node_size=[x[1]*5 for x in connected_components[0].degree_iter()],
    pos=nx.spring_layout(connected_components[0]),
    ax=ax
)
ax.axis("off")









    Out[41]:





(-0.10354350610649285, 1.0896721356921388, -0.10500000000000001, 1.105)






    



/home/napsternxg/anaconda3/envs/get17_sna/lib/python3.6/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
  (prop.get_family(), self.defaultFamily[fontext]))



In [42]:

    
fig, ax = plt.subplots(1,2, figsize=(16,8))
ax[0].hist(list(G.degree().values()), bins=list(range(max(G.degree().values()))), log=True)
ax[0].set_xlabel("Degree")
ax[0].set_ylabel("Frequency")

ax[1].hist(list(entity_counts.values()), bins=list(range(max(entity_counts.values()))), log=True)
ax[1].set_xlabel("Counts")
ax[1].set_ylabel("Frequency")
sns.despine(offset=10)









    



/home/napsternxg/anaconda3/envs/get17_sna/lib/python3.6/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
  (prop.get_family(), self.defaultFamily[fontext]))

Why are the graphs different?

Concept of weights



In [ ]:

	item	counts
0	jeetensingh	1
1	gauravcsawant	1
2	PMOIndia	1
3	ChelseaFC	1
4	adambedders	1
5	NFYFC	1
6	MiddleEastEye	1
7	HarishK04131926	1
8	YouTube	1

	id	name	friends	followers	statuses
0	813286	Barack Obama	628994	92017544	15451
1	50393960	Bill Gates	183	36274382	2424
2	88856792	Aamir Khan	9	21429541	475
3	822215679726100480	President Trump	42	19421468	904
4	20536157	Google	205	18432652	73365
5	71201743	Virat Kohli	45	16535365	1175
6	1536791610	President Obama	79	15389729	352
7	44196397	Elon Musk	42	10330297	3275
8	15492359	TED Talks	514	10175236	22977
9	19725644	Neil deGrasse Tyson	41	8067857	5453
10	15473958	Curiosity Rover	166	3702441	3349
11	19658826	New Scientist	74	3004456	41021
12	14647570	Scientific American	3882	2995548	54716
13	2839430431	Pokémon GO	18	2406210	296
14	16017475	Nate Silver	1007	2390137	16543

	id	name	friends	followers	statuses
612	709866998604419072	ICSS2016	15	19	28
613	217524967	Shubhanshu Mishra	3	12	382
614	862776294441984000	Christopher De Sa	8	11	1
615	4330080442	Sudhanshu Mishra	21	1	0
616	492698838	LiveLifeLikeAJive	9	1	0

	entity	counts
0	pokemongohubnet	73
1	youtube	42
2	gp_pulipaka	40
3	ronald_vanloon	34
4	billmaher	29

	user_mentions_1	user_mentions_2	counts
0	kylegriffin1	msnbc	22
1	ittes2017	sonerhoca	11
2	ittes2017	tolgaguyer	11
3	jstines3	potus	11
4	dbaker007	datasciencectrl	8