Experiments with Social Media Libraries for Python

Anshuman Kanetkar


In [ ]:
import twitter

In [ ]:
import json

fp = open('/home/anshuman/.twitter_api_keys.json', 'r')

kwargs = json.load(fp)

twitter_keys = kwargs

fp.close()

api = twitter.Api(**kwargs)

In [ ]:
print api.VerifyCredentials()

In [ ]:
# %timeit statuses = api.GetUserTimeline(screen_name="BarackObama")
# print [s.text for s in statuses]

In [ ]:
from twitter import TwitterError

In [ ]:
# Northampton city Coords
north_lat = 52.240477000000000000
north_long = -0.902655999999979000
north_radius = 50
radius_units= 'km'

results = api.GetSearch(geocode=(north_lat, north_long, str(north_radius) + radius_units), count= 100)

In [ ]:
statuses = [s.text for s in results]

for s in statuses:
    print('----')
    print(s)
    print('----')

In [ ]:
results

Twitter provides a streaming API for downloading timelines or search results. To use it, we need to make a persistent HTTP request, and twitter returns us the results in real time, as if they were an infinite stream of data. References:


In [ ]:
import urllib
from hashlib import sha1
import hmac
import random
import time
import base64
import re, string 

random.seed()


re_nonword = re.compile(r'[\W_]+')
re_28 = re.compile(r'(\%28)')
re_2C = re.compile(r'(\%2C)')
re_29 = re.compile(r'(\%29)')

def get_nonword(orig):
    return re_nonword.sub('', orig)

def get_nonce() : 
    return get_nonword(base64.b64encode(str(random.getrandbits(32))))

def get_timestamp():
    return int(time.time())

def sub_escchars(orig):
    ''' No idea why, but the Twitter signature generator encodes brackets, comma etc. as %25XX instead of %XX, where XX
    is the normal escape code for that non-word character.
    This method replicates the same thing.
    '''
    return re_29.sub('%2529', re_2C.sub('%252C', re_28.sub('%2528', orig)))

def sign_request(consumer_secret, 
                token_secret,
                req_type,
                url,
                argmap):
    '''
    Steps for generating the signature:
    1. Sort the keys in argmap alphabetically and join the key/value pairs in the format:
        base_string <- key1=value1&key2=value2...&keyN=valueN
    2. This is the 'base string', %-encode this string.
        base_string <- %-encode(base_string)
    3. %-encode the URL, and prepend it to the above base string with the '&' separator as:
        base_string <- enc_url&base_string
    4. Prepend the Request Type ("GET", "POST") to the string above using '&' as separator
        base_string <- req_type&base_string
    5. Generate the siging key as:
        key <- enc_consumer_secret&enc_token_secret
    where, enc_consumer_secret and enc_token_secret are %-encoded OAuth consumer & token secrets respectively.
    6. Generate the SHA1 HMAC by signing the base_string with the key.
        signature <- %-encode(base64(hmac(base_string,key)))
    '''
    # %-encode the secrets for the signing key, as per specification
    enc_consumer_secret = urllib.quote_plus(consumer_secret)
    enc_token_secret = urllib.quote_plus(token_secret)
    print(argmap)
    
    args = sorted([ key + '=' + argmap[key] for key in argmap ])
    
    # The Base String as specified here: 
    args_str = '&'.join(args) # as specified by oauth
    args_str = urllib.quote_plus(args_str)
    
    enc_url = urllib.quote_plus(url)
    args_str = req_type + "&" + enc_url + "&" + args_str
    
    args_str = sub_escchars(args_str)
    # key = CONSUMER_SECRET& #If you dont have a token yet
    key=""
    if enc_token_secret is None:
        key = enc_consumer_secret + '&'
    else:
        key = enc_consumer_secret + '&' + enc_token_secret
    #key = &TOKEN_SECRET" 

    print("Base string : %s" % args_str)

    print("sigining key : %s" % key)
    hashed = hmac.new(key, args_str, sha1)
    hashed_b64 = hashed.digest().encode("base64").rstrip('\n')
    print("Signature : %s" % hashed_b64)
    # The signature
    return urllib.quote_plus(hashed_b64)



# The URL to query
url = 'https://stream.twitter.com/1.1/statuses/filter.json'

#include_entities	true
#geocode = '(52.240477000000000000,-0.902655999999979000,50km)'

northants_box = '-1.386293,51.985165,-0.282167,52.650010'
london_box = '-0.567680,51.277729,0.289254,51.701847'
track='india'

''' Consumer Key
'''
oauth_consumer_key = twitter_keys["consumer_key"]
'''Nonce
    The oauth_nonce parameter is a unique token your application should generate for each unique request. 
    Twitter will use this value to determine whether a request has been submitted multiple times. The value 
    for this request was generated by base64 encoding 32 bytes of random data, and stripping out all non-word 
    characters, but any approach which produces a relatively random alphanumeric string should be OK here.
'''
oauth_nonce =  get_nonce()
oauth_signature_method = "HMAC-SHA1"
'''Timestamp
    The oauth_timestamp parameter indicates when the request was created. This value should be the number of seconds 
    since the Unix epoch at the point the request is generated, and should be easily generated in most programming 
    languages. Twitter will reject requests which were created too far in the past, so it is important to keep the 
    clock of the computer generating requests in sync with NTP.
'''
oauth_timestamp = get_timestamp()
oauth_token = twitter_keys["access_token_key"]
oauth_version = "1.0"


# The Parameters used in the base string for signing
argmap = {
    'locations'  : northants_box,
    #'track' : track,
    'oauth_consumer_key' : oauth_consumer_key,
    'oauth_nonce' : oauth_nonce,
    'oauth_signature_method' : oauth_signature_method,
    'oauth_timestamp' : str(oauth_timestamp),
    'oauth_token' : oauth_token,
    'oauth_version' : oauth_version,
}

# Parameters that constitute the siging key
oauth_consumer_secret = twitter_keys["consumer_secret"]
oauth_token_secret = twitter_keys["access_token_secret"]

# Generate the oauth request signature
oauth_signature = sign_request(oauth_consumer_secret, oauth_token_secret, "GET", url, argmap)

#  The signing key is simply the percent encoded consumer secret, followed by an ampersand character ‘&’, 
# followed by the percent encoded token secret:

req_args = {
    'locations' : northants_box,
    #'track' : track,
}


twurl = url

# Generate the %-encoded request args
twdata = urllib.urlencode(req_args)

# Set up the request header
twheader = 'Authorization: OAuth oauth_consumer_key="%s", oauth_nonce="%s", oauth_signature="%s", oauth_signature_method="HMAC-SHA1", oauth_timestamp="%d", oauth_token="%s", oauth_version="1.0"' % (oauth_consumer_key, oauth_nonce, oauth_signature, oauth_timestamp, oauth_token) 

# Generate the cURL command line
twcurl_cmd = "curl --get " + twurl + " --data '" + twdata +  "' --header '" + twheader + "' --verbose"

# Run this command to stream the search results.
print("cURL command line:")
print(twcurl_cmd)

The following generated command can be used to download filtered twitter statuses using th streaming API:

$ curl --get https://stream.twitter.com/1.1/statuses/filter.json --data 'locations=-1.386293%2C51.985165%2C-0.282167%2C52.650010' --header 'Authorization: OAuth oauth_consumer_key="XXXXXXXX", oauth_nonce="MzA1OTUzNjg3MQ", oauth_signature="PooL0huM2TirKvTZCroKjeRtI5Q%3D", oauth_signature_method="HMAC-SHA1", oauth_timestamp="1423089806", oauth_token="YYYYYYYY", oauth_version="1.0"' --verbose


In [1]:
import json
import pandas as pd
import numpy as np

def process_results(fp, max=1000):
    lines = 0
    errors = 0
    while (lines < max):
        
        try:
            nextLine = fp.readline()
            lines += 1
        except:
            errors += 1
            continue
        
        try:
            result = json.loads(nextLine)
            yield result
        except:
            errors += 1
            continue

    print(errors)
    
def make_map(fp):
    for r in process_results(fp):
        text = r["text"]
        #print(text)
        #print(r["geo"])
        if ("geo" in r and r["geo"] is None):
            (xlon, ylat) = (np.nan, np.nan)
        else:
            xlon = r["geo"]["coordinates"][0]
            ylat = r["geo"]["coordinates"][1]
        created_at = r["created_at"]
        tid = r["id"]
        user = "@" + r["user"]["screen_name"]
        d_row = {
            "id" : tid,
            "status"  : text,
            "user" : user,
            "created_at" : created_at,
            "latitude" : xlon,
            "longitude" : ylat,
        }
        yield d_row
    
fp = open("/home/anshuman/northants/twitter_search_results.json", "r")

try:
    df = pd.DataFrame(make_map(fp))
finally:
    fp.close()
    

df


21
Out[1]:
created_at id latitude longitude status user
0 Wed Feb 04 22:43:56 +0000 2015 563105703460872193 52.277570 -0.649445 @leecarty why. We deserved to win @LiamMo25LFC
1 Wed Feb 04 22:43:56 +0000 2015 563105705440595969 52.318541 0.928298 Attitude towards everythin😂 http://t.co/PIl3tu... @KatieluluKatie
2 Wed Feb 04 22:43:58 +0000 2015 563105713787248641 52.254427 -0.869346 No idea what I'm going to do with myself though @evesusannah
3 Wed Feb 04 22:43:58 +0000 2015 563105714236063746 NaN NaN Thinking Molly might like something from Honey... @MrsMollyWobbles
4 Wed Feb 04 22:44:02 +0000 2015 563105730044370944 52.291203 -0.702349 @MATTHARDYBRAND @JEFFHARDYBRAND @AmyDumas my f... @Th3Whit3Shad0w
5 Wed Feb 04 22:44:05 +0000 2015 563105742853767168 52.610973 -1.121534 “@RavinaSudra: @__Payal @SianDV were gonna go ... @SianDV
6 Wed Feb 04 22:44:06 +0000 2015 563105747324907522 51.844064 -1.350696 Three-car crash on A34 at Kidlington http://t.... @DailyOXFORD
7 Wed Feb 04 22:44:07 +0000 2015 563105752056074240 NaN NaN Hey #newsnight there's a massive Elephant in t... @dukesy12
8 Wed Feb 04 22:44:08 +0000 2015 563105752861384705 51.844064 -1.350696 Academics vote on disputed flats http://t.co/h... @DailyOXFORD
9 Wed Feb 04 22:44:08 +0000 2015 563105754904010753 NaN NaN @SimonCowell Simon,Would you kindly RT/Follow ... @CaptCub2012
10 Wed Feb 04 22:44:16 +0000 2015 563105788722683904 53.031900 -1.547498 @cathorio @neurocrispy "sorry, just seen you'v... @calidus007
11 Wed Feb 04 22:44:16 +0000 2015 563105788924026885 51.397183 -0.904905 Drakes smile💖💖💖💖 @Molly_Smith982
12 Wed Feb 04 22:44:17 +0000 2015 563105791327371264 NaN NaN @BBCTwo What a pity there is so much bad langu... @AsterTony
13 Wed Feb 04 22:44:19 +0000 2015 563105799464292353 52.269139 -0.836737 Life's bullshit @TheBayleyLeslie
14 Wed Feb 04 22:44:19 +0000 2015 563105800823275520 51.870373 -1.200183 Roadworks (Severe delay) M40 J9 northbound acc... @BeepBeepTraffic
15 Wed Feb 04 22:44:20 +0000 2015 563105805604757505 52.457643 -0.608575 @Matey30 @aspirationalbob At least nobody is w... @simonwbrown
16 Wed Feb 04 22:44:25 +0000 2015 563105824370089985 NaN NaN @katathome1999 Lucky you! I know someone who s... @MattRMBlake
17 Wed Feb 04 22:44:25 +0000 2015 563105825812934656 52.544281 0.815952 @Joel__Henry Thanks matey. #LegoForTheWin @uberspoons
18 Wed Feb 04 22:44:26 +0000 2015 563105828228845570 NaN NaN @SaraThornton1 I assume you mean that view fro... @ianptyers
19 Wed Feb 04 22:44:26 +0000 2015 563105828090413057 52.492724 -0.289812 “@BBCNews: Thursday's Mirror: "Luckiest cabbie... @smilingfreak
20 Wed Feb 04 22:44:28 +0000 2015 563105839511109632 51.997915 -0.297661 @RealSparklePony I've seen plenty I'd willingl... @thatblighterian
21 Wed Feb 04 22:44:31 +0000 2015 563105851284926464 52.658055 -1.060212 #MianiteFanArt @ProSyndicate http://t.co/lU57D... @wwejordanrock
22 Wed Feb 04 22:44:32 +0000 2015 563105856641064960 52.408162 -0.714605 @DaanGrimes I just remembered out of no where ... @LiamDavidson2
23 Wed Feb 04 22:44:32 +0000 2015 563105856880136192 NaN NaN crazy day! @LukeJamesParker
24 Wed Feb 04 22:44:33 +0000 2015 563105860503994368 NaN NaN @AndieDelicacy ych! Their* not there!! *shamef... @lundofthemanor
25 Wed Feb 04 22:44:37 +0000 2015 563105874869489664 52.005167 -0.800439 this is the point where everyone starts talkin... @sibeIIahallward
26 Wed Feb 04 22:44:39 +0000 2015 563105883425886208 52.563774 -1.174531 @spanishfijian @SuePeace1 @susanhorgan1 @JILLC... @BigAl_lfc
27 Wed Feb 04 22:44:45 +0000 2015 563105910059716610 52.288105 -0.701322 @Drew2304 at least I tried to rep it once fam🌚... @JamieLB97
28 Wed Feb 04 22:44:50 +0000 2015 563105931341602820 52.527671 -1.364108 👍👍👍 love that mate sick one @lucykirkh4m
29 Wed Feb 04 22:44:51 +0000 2015 563105932713152512 NaN NaN @katathome1999 this very petite woman walking ... @MattRMBlake
... ... ... ... ... ... ...
949 Wed Feb 04 23:15:48 +0000 2015 563113722781372416 NaN NaN @Lozpilicueta Sick in the head. Not that I wan... @WhyNeverCharlie
950 Wed Feb 04 23:15:48 +0000 2015 563113724589146112 52.119923 -0.297078 🔥🔥🔥🔥🔥 @GemAitchison
951 Wed Feb 04 23:15:49 +0000 2015 563113729383215105 51.363566 -0.323074 @MillsIzzy don't even dis. I've had an iPhone ... @indraherrmann
952 Wed Feb 04 23:15:53 +0000 2015 563113744738557952 52.387881 -0.742308 Slight addiction to extremely dark chocolate, ... @LouisePolitano
953 Wed Feb 04 23:15:58 +0000 2015 563113766309871616 52.537255 1.723434 @Alison18860349 thanks Hun lots of good energy... @FifitFiona
954 Wed Feb 04 23:16:00 +0000 2015 563113774249304066 NaN NaN @TommyTwoCan94 @Rebeccakinloch @OfficialBWFC W... @Nidgster
955 Wed Feb 04 23:16:04 +0000 2015 563113790431297536 51.885611 0.008062 @JessieMace @ZaidaGrace93 @GemBobSquare @paige... @AntonyjMiles
956 Wed Feb 04 23:16:04 +0000 2015 563113792025149440 NaN NaN @marinadwk aeeee valeu, 25 anos agora, um quar... @vitinmenezello
957 Wed Feb 04 23:16:06 +0000 2015 563113799063207936 NaN NaN #Trivia4u. #USA 1846 – The #1stMormonPioneers ... @traveltrivia4u
958 Wed Feb 04 23:16:08 +0000 2015 563113806092845057 53.031899 -1.547397 @missynjb "some ppl dough ring it's true😡Denma... @calidus007
959 Wed Feb 04 23:16:15 +0000 2015 563113836539305986 NaN NaN @Cynical_Ackky yeah man GGs tonight see you to... @Cynical__Lee
960 Wed Feb 04 23:16:15 +0000 2015 563113838091186176 NaN NaN @deniselarge congatulations Denise thoroughly ... @edward_whitaker
961 Wed Feb 04 23:16:16 +0000 2015 563113839978618882 52.595741 -0.236500 I don't think I will ever have a full night of... @LindaMeldere
962 Wed Feb 04 23:16:17 +0000 2015 563113843325698050 52.657074 -1.075789 Not gonna drink through feb.. Was gonna do Jan... @Rawrjmama
963 Wed Feb 04 23:16:18 +0000 2015 563113847750672385 NaN NaN Literally so excited for the next two days off! @JessOsully
964 Wed Feb 04 23:16:18 +0000 2015 563113848530808832 NaN NaN En serio que he llegado justo para el arroyo? ... @Bertagg
965 Wed Feb 04 23:16:23 +0000 2015 563113868592160770 NaN NaN Mariah Carey Blesses Fifth Harmony With Her Ap... @LeaderEnter
966 Wed Feb 04 23:16:23 +0000 2015 563113869577842688 51.907710 -0.376769 @TanyaBrick #love and #thameslink don't appear... @wtp1962
967 Wed Feb 04 23:16:24 +0000 2015 563113876116746240 NaN NaN @DezzaW6 @EllaDecember we are collectively poo... @BSsmoove
968 Wed Feb 04 23:16:25 +0000 2015 563113876653621248 51.158439 0.864446 @BigRodVTX no matter what we do, we'll never a... @safetymanuk
969 Wed Feb 04 23:16:30 +0000 2015 563113899726479360 52.632044 -1.135656 Two flatmates I dislike the one with the boyfr... @WeirMelissa
970 Wed Feb 04 23:16:32 +0000 2015 563113909456871424 52.630888 -1.115594 @LiibanDirir: They just want my count, heard I... @LiibanDirir
971 Wed Feb 04 23:16:34 +0000 2015 563113916604375040 52.659697 -1.067000 @_xamnaaa SHUTUP blud 😂😭 bah @jvnnvt
972 Wed Feb 04 23:16:35 +0000 2015 563113918621831170 52.787919 0.441150 i NEED a juice cube @MissSharuni_JLS
973 Wed Feb 04 23:16:36 +0000 2015 563113923181031425 52.261576 -1.166197 @jimmybullard and @TheLuluLife going innnnnnnn... @NathanCockerton
974 Wed Feb 04 23:16:38 +0000 2015 563113934677639169 52.640341 -1.157413 PRhyme - U Looz @uncleatu
975 Wed Feb 04 23:16:39 +0000 2015 563113936120053760 NaN NaN @TommyTwoCan94 @Rebeccakinloch @OfficialBWFC W... @Nidgster
976 Wed Feb 04 23:16:41 +0000 2015 563113944316125184 51.313403 -0.345554 Aqui en mi trabajo creen que molestarme con mi... @emicalderonraue
977 Wed Feb 04 23:16:41 +0000 2015 563113947289882624 51.029670 -0.270811 @natalierushmcn http://t.co/tyvzSjDSdo @NATALlERUSHMAN
978 Wed Feb 04 23:16:44 +0000 2015 563113958341881856 NaN NaN Yes, it's Rowland from Grange Hill. A video by... @CliveAMartin

979 rows × 6 columns


In [ ]: