In [1]:
#Tags to search
listOfHashtags = ['#happy #surprised',
'#happy #surprise',
'#happiness #surprised',
'#happy #surprise',
'#happy surprised',
'#happy surprise',
'#happiness surprised',
'#happy surprise',
'happy #surprised',
'happy #surprise',
'happiness #surprised',
'happy #surprise']
In [2]:
import tweepy
CONSUMER_KEY = "BbsXyXGLLlo7Ro7dR1uM8nyJH"
CONSUMER_SECRET = "xc5SGI6zihyrbaAedIaPvjhMQMI8SAwnRFMf0VSmsJTxxZYdsn"
OAUTH_TOKEN = "876981176-qnEzZwalCnekO9y8vuh60omCyMBk4MYCtCmOC3nD"
OAUTH_TOKEN_SECRET = "M26T9FfCnUMHHAGzJFOmk0TvtRAppFwYoOqPDEYBLaOdn"
consumer_key = CONSUMER_KEY
consumer_secret = CONSUMER_SECRET
access_token = OAUTH_TOKEN
access_token_secret = OAUTH_TOKEN_SECRET
auth = tweepy.auth.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
In [3]:
api = tweepy.API(auth)
In [8]:
results = tweepy.API.search(api, q="#happiness")
In [14]:
uniqueIds = set()
IDs = []
for r in results:
uniqueIds.add(r.id_str)
IDs.append(r.id_str)
In [16]:
sorted(IDs, reverse = True)
Out[16]:
In [33]:
results = tweepy.API.search(api, q="#happiness", since_id = "457652495343681536", count=100)
In [34]:
for r in results:
uniqueIds.add(r.id_str)
IDs.append(r.id_str)
In [35]:
sorted(IDs, reverse = True)
Out[35]:
In [36]:
len(uniqueIds)
Out[36]:
In [ ]:
In [37]:
import requests
In [41]:
results = requests.get("https://api.twitter.com/1.1/search/tweets.json?q='#happiness'&count=100",auth)
In [40]:
results
Out[40]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [66]:
import time
import sys
results = {}
for tag in listOfHashtags:
time.sleep(2)
print "fetching tag:",tag
sys.stdout.flush()
results[tag] = tweepy.API.search(api, q=tag, count=100)
In [81]:
for obj in results['#happiness #surprised']:
print obj.id
In [99]:
cnt = 0
for re in tweepy.API.search(api, q='انا سعيد', count=100, lang="ar"):
print "-"*100
print re.text
cnt+=1
print cnt
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [1]:
import io
import sys
import time
from urllib2 import URLError
from httplib import BadStatusLine
import json
import twitter
from functools import partial
from sys import maxint
from functools import partial
from sys import maxint
def oauth_login():
# connect to twitter api
CONSUMER_KEY = 'dxlcEjYAI5Iu42r2UgbudA'
CONSUMER_SECRET = 'VpFao9hZYHvbjcoo4oh1oob4345wy0L6sxLfKlO0pw'
OAUTH_TOKEN = '2360203712-fw1oHlAtvOlv80wbyyW2BWHkMYFdwVkGn5VE3az'
OAUTH_TOKEN_SECRET = 'kBOPieTnmhC8oL75bDdhoNv6BzkbT44K5yWNMwQnpAMut'
auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
CONSUMER_KEY, CONSUMER_SECRET)
twitter_api = twitter.Twitter(auth=auth)
return twitter_api
In [2]:
def make_twitter_request(twitter_api_func, max_errors=10, *args, **kw):
def handle_twitter_http_error(e, wait_period=2, sleep_when_rate_limited=True):
if wait_period > 3600: # Seconds
print >> sys.stderr, 'Too many retries. Quitting.'
raise e
if e.e.code == 401:
print >> sys.stderr, 'Encountered 401 Error (Not Authorized)'
return None
elif e.e.code == 404:
print >> sys.stderr, 'Encountered 404 Error (Not Found)'
return None
elif e.e.code == 429:
print >> sys.stderr, 'Encountered 429 Error (Rate Limit Exceeded)'
if sleep_when_rate_limited:
print >> sys.stderr, "Retrying in 15 minutes...ZzZ..."
sys.stderr.flush()
time.sleep(60*15 + 5)
print >> sys.stderr, '...ZzZ...Awake now and trying again.'
return 2
else:
raise e # Caller must handle the rate limiting issue
elif e.e.code in (500, 502, 503, 504):
print >> sys.stderr, 'Encountered %i Error. Retrying in %i seconds' % \
(e.e.code, wait_period)
time.sleep(wait_period)
wait_period *= 1.5
return wait_period
else:
raise e
wait_period = 2
error_count = 0
while True:
try:
return twitter_api_func(*args, **kw)
except twitter.api.TwitterHTTPError, e:
error_count = 0
wait_period = handle_twitter_http_error(e, wait_period)
if wait_period is None:
return
except URLError, e:
error_count += 1
print >> sys.stderr, "URLError encountered. Continuing."
if error_count > max_errors:
print >> sys.stderr, "Too many consecutive errors...bailing out."
raise
except BadStatusLine, e:
error_count += 1
print >> sys.stderr, "BadStatusLine encountered. Continuing."
if error_count > max_errors:
print >> sys.stderr, "Too many consecutive errors...bailing out."
raise
In [3]:
twitter_api = oauth_login()
In [7]:
def twitter_search(twitter_api, q, max_results, **kw):
search_results = twitter_api.search.tweets(q=q, count=max_results, **kw)
statuses = search_results['statuses']
# Enforce a reasonable limit
max_results = min(10000, max_results)
for _ in range(10000): # 10*100 = 1000
try:
next_results = search_results['search_metadata']['next_results']
print search_results['search_metadata']['next_results']
except KeyError, e: # No more results when next_results doesn't exist
break
# Create a dictionary from next_results, which has the following form:
# ?max_id=313519052523986943&q=NCAA&include_entities=1
kwargs = dict([ kv.split('=')
for kv in next_results[1:].split("&") ])
search_results = twitter_api.search.tweets(**kwargs)
statuses += search_results['statuses']
if len(statuses) > max_results:
break
return statuses
In [5]:
q = "#happy #surprised"
results = twitter_search(twitter_api, q, max_results=400)
In [8]:
if __name__=="__main__":
searchPhrase = "#happy #surprised"
maxNumberOfResults = 400
results = twitter_search(twitter_api, searchPhrase, max_results=400)
print "Number of results fetched: ",len(results)
In [24]:
twitter_api.search.tweets(q="#freebandnames")['statuses']
Out[24]:
In [37]:
import time
results = twitter_api.search.tweets(q="#happy #surprised")
max_id = results['search_metadata']['max_id']
set_ids = set()
for eachid in results['statuses']:
set_ids.add(eachid['id'])
print eachid['text']
while True:
print max_id
results = twitter_api.search.tweets(q="#happy #surprised", since_id=max_id)
max_id = results['search_metadata']['max_id']
print results['statuses']
sys.stdout.flush()
for eachid in results['statuses']:
set_ids.add(eachid['id'])
print eachid['text']
if len(set_ids) > 1500:
break
print "-"*50
sys.stdout.flush()
time.sleep(10)
In [31]:
len(set_ids)
Out[31]:
In [ ]: