Stream Api

Extract using timframe and related keyword Encode and then output in json


In [5]:
import time
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import os
import io

In [3]:
ckey = 'vIESIYA65Zx1BDQqgyzGUdUXP'
consumer_secret = 'S76tJwQpFw9yg4GrMMr9qQmXQ9QiWnTEMvwhnTluHgd0x2PmTa'
access_token_key = '280025454-QCIfZsD40uw0VHlBeNBi6FFVhHkJFK6CkKT4FW7c'
access_token_secret = '#########333'
 
 
start_time = time.time() #grabs the system time
keyword_list = ['twitter'] #track list

In [7]:
#Listener Class Override
class listener(StreamListener):
    def __init__(self, start_time, time_limit=6000):
        self.time = start_time
        self.limit = time_limit
        self.tweet_data = []
 
    def on_data(self, data):
 
        saveFile = io.open('raw_tweets.json', 'a', encoding='utf-8')
 
        while (time.time() - self.time) < self.limit:
 
            try:
 
                self.tweet_data.append(data)
 
                return True
 
 
            except BaseException, e:
                print 'failed ondata,', str(e)
                time.sleep(5)
                pass
 
        saveFile = io.open('raw_tweets.json', 'w', encoding='utf-8')
        saveFile.write(u'[\n')
        saveFile.write(','.join(self.tweet_data))
        saveFile.write(u'\n]')
        saveFile.close()
        exit()

        def on_error(self, status):
            print statuses
        
        saveFile = io.open('raw_tweets.json', 'w', encoding='utf-8')
        saveFile.write(u'[\n')
        saveFile.write(','.join(self.tweet_data))
        saveFile.write(u'\n]')
        saveFile.close()
auth = OAuthHandler(ckey, consumer_secret) #OAuth object
auth.set_access_token(access_token_key, access_token_secret)


twitterStream = Stream(auth, listener(start_time, time_limit=200)) #initialize Stream object with a time out limit
twitterStream.filter(track=keyword_list, languages=['en'])  #call the filter method to run the Stream Object


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-7-d88ca3043539> in <module>()
     44 
     45 twitterStream = Stream(auth, listener(start_time, time_limit=200)) #initialize Stream object with a time out limit
---> 46 twitterStream.filter(track=keyword_list, languages=['en'])  #call the filter method to run the Stream Object

C:\Users\Vamps\Anaconda2\lib\site-packages\tweepy\streaming.pyc in filter(self, follow, track, async, locations, stall_warnings, languages, encoding, filter_level)
    443         self.session.params = {'delimited': 'length'}
    444         self.host = 'stream.twitter.com'
--> 445         self._start(async)
    446 
    447     def sitestream(self, follow, stall_warnings=False,

C:\Users\Vamps\Anaconda2\lib\site-packages\tweepy\streaming.pyc in _start(self, async)
    359             self._thread.start()
    360         else:
--> 361             self._run()
    362 
    363     def on_closed(self, resp):

C:\Users\Vamps\Anaconda2\lib\site-packages\tweepy\streaming.pyc in _run(self)
    292             # call a handler first so that the exception can be logged.
    293             self.listener.on_exception(exception)
--> 294             raise exception
    295 
    296     def _data(self, data):

TypeError: write() argument 1 must be unicode, not str