In [1]:
import json
path='/home/sriram/Downloads/'
tweetFile='Twitter_data1.txt'
import pandas as pd

In [2]:
tweets_data = []
tweets_data_path=path+tweetFile
tweets_file = open(tweets_data_path, "r")
for line in tweets_file:
    try:
        tweet = json.loads(line)
        tweets_data.append(tweet)
    except:
        continue

In [3]:
print len(tweets_data)


9998

In [33]:
from textblob import TextBlob
count=0
tweets = pd.DataFrame(index=range(len(tweets_data)), columns=['text','lang','retweeted','location','state','sentiment','country_code','lat','lon'])
#tweets['text'] = map(lambda tweet: tweet['text'] if tweet['text']!=None else None, tweets_data)
#tweets['lang'] = map(lambda tweet: tweet['lang'], tweets_data)
#tweets['retweeted']= map(lambda tweet: tweet['retweeted'], tweets_data)
#tweets['location'] = map(lambda tweet: tweet['user']['location'] if tweet['user']['location'] != None else None, tweets_data)

for i in range(len(tweets_data)):
    try:
        tweets['text'][i] = tweets_data[i]['text']
    except:
        tweets['text'][i] = ""
    try:
        tweets['lang'][i]=tweets_data[i]['lang']
    except:
        tweets['lang'][i]='NA'
    try:
        tweets['retweeted'][i]=tweets_data[i]['retweeted']
    except:
        tweets['lang'][i]='NA'
    try:
        tweets['location'][i]=tweets_data[i]['user']['location']
    except:
        tweets['location'][i]='NA'
    try:
        tweets['country_code'][i]=tweets_data[i]['place']['country_code']
    except:
        tweets['country_code'][i]=''
    try:
        tweets['lon'][i]=tweets_data[i]['place']['bounding_box']['coordinates'][0][0][0]
    except:
        tweets['lon'][i]='NA'
    try:
        tweets['lat'][i]=tweets_data[i]['place']['bounding_box']['coordinates'][0][0][1]
    except:
        tweets['lat'][i]='NA'

In [67]:
import time
import zipcode
start_time = time.time()
count=0
for i in range(len(tweets)):
    blob = TextBlob(tweets['text'][i])
    try:
        sentence=blob.sentences[0]
        tweets['sentiment'][i]=sentence.sentiment.polarity
    except:
        tweets['sentiment'][i]=0
    try:
        stateFromData=tweets['location'][i].split(',')[1]
    except:
        stateFromData=''
    if len(stateFromData)==2:
        tweets['state'][i]=stateFromData
    else:
        if tweets['lat'][i] !='NA':
            radius=10
            incre=10
            zips=zipcode.isinradius((tweets['lat'][i],tweets['lon'][i]),radius)
            while len(zips)==0:
                radius=radius+incre
                zips=zipcode.isinradius((tweets['lat'][i],tweets['lon'][i]),radius)
                incre=incre+10
            myzip = zipcode.isequal(str(zips[0].zip))
            tweets['state'][i]=myzip.state
        else:
            tweets['state'][i]='NA'
    count+=1
    if count%1000==0:
        print (count," Tweets processed")
print("--- %s seconds ---" % (time.time() - start_time))


(1000, ' Tweets processed')
(2000, ' Tweets processed')
(3000, ' Tweets processed')
(4000, ' Tweets processed')
(5000, ' Tweets processed')
(6000, ' Tweets processed')
(7000, ' Tweets processed')
(8000, ' Tweets processed')
(9000, ' Tweets processed')
--- 166.130603075 seconds ---

In [63]:
len(u'IL')


Out[63]:
2

In [18]:
import time
start_time = time.time()

for i in range(len(tweets)):
    zipcode.isinradius((39.98,-87.29),20)


Out[18]:
[-89.294485, 37.670875]

In [53]:
tweets.head()


Out[53]:
text lang retweeted location state sentiment country_code lat lon
0 Graduation is the only time when I can call my... en False Chicago, IL MO -0.3 US 37.6709 -89.2945
1 Headache 🤦🏽‍♂️ et False Augusta, GA GA 0 US 33.3389 -82.212
2 Your girl can be up and getting ready 30min be... en False Arizona, USA AZ 0.1 US 31.3322 -114.818
3 Yo seriously I totally get the beers at angels... en False Bakersfield, CA CA -0.05 US 35.2558 -119.172
4 #NeverForget #WestHaven #Connecticut @ West Ha... en False Lower East Side, Manhattan CT 0 US 41.2331 -72.9947

In [13]:
zip=zipcode.isinradius((39.98,-87.29),20)


Out[13]:
{u'attributes': {},
 u'bounding_box': {u'coordinates': [[[-89.294485, 37.670875],
    [-89.294485, 37.764433],
    [-89.151934, 37.764433],
    [-89.151934, 37.670875]]],
  u'type': u'Polygon'},
 u'country': u'United States',
 u'country_code': u'US',
 u'full_name': u'Carbondale, IL',
 u'id': u'20d0abc87d11501a',
 u'name': u'Carbondale',
 u'place_type': u'city',
 u'url': u'https://api.twitter.com/1.1/geo/id/20d0abc87d11501a.json'}

In [ ]: