El modulo a utilizar es el tweepy
Creamos un eviorent con py2 e instalamos sobre el
conda create -n py2 python=2 anaconda
source activate py2
/home/guadatech/anaconda3/envs/py2/bin/easy_install tweepy
Ejecutamos el notebook en el entorno creado
/home/guadatech/anaconda3/envs/py2/bin/ipython notebook
In [1]:
import tweepy
import ConfigParser
config = ConfigParser.ConfigParser()
config.read('datosCuentaTwitter.ini')
APP_KEY = config.get('DatosTwitter','APP_KEY')
APP_SECRET = config.get('DatosTwitter','APP_SECRET')
OAUTH_TOKEN = config.get('DatosTwitter','OAUTH_TOKEN')
OAUTH_TOKEN_SECRET = config.get('DatosTwitter','OAUTH_TOKEN_SECRET')
auth = tweepy.OAuthHandler(APP_KEY, APP_SECRET)
auth.set_access_token(OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
api = tweepy.API(auth)
In [2]:
public_tweets = api.home_timeline(count=2)
for tweet in public_tweets:
print(tweet.user.screen_name)
print(tweet.created_at)
print tweet.text
print(' *'*40)
In [ ]:
# Busqueda informacion
for tweet in api.search(q='GeekyTheory',count=4, result_type='recent'):
print(tweet.created_at)
print(tweet.user.screen_name)
print(tweet.text)
print(' *'*40)
In [36]:
# Obteniendo todos los tweets del timeline del usuario entre dos fechas determinadas
import datetime
startDate = datetime.datetime(2015, 12, 7, 0, 30, 0) #2015-12-07 12:15:12
endDate = datetime.datetime(2015, 12, 7, 1, 0, 0)
tweets = []
tempTweets = []
tmpTweets = api.home_timeline()
for tweet in tmpTweets:
if tweet.created_at < endDate and tweet.created_at > startDate:
tweets.append(tweet)
while (tmpTweets[-1].created_at > startDate):
print("Last Tweet @", tmpTweets[-1].created_at, " - fetching some more")
tmpTweets = api.home_timeline(max_id = tmpTweets[-1].id, count=20)
for tweet in tmpTweets:
if tweet.created_at < endDate and tweet.created_at > startDate:
tweets.append(tweet)
print("Numero de Tweets: %d"% len(tweets))
for tweet in tweets:
print(tweet.created_at)
print(tweet.user.screen_name)
print(tweet.text)
print(' *'*40)
In [64]:
# Otro metodo
cont = 0
for tweet in tweepy.Cursor(api.search,q='ciudadanos,elmundoes', since="2015-12-06", until="2015-12-07").items(100):
cont += 1
print(cont)
print(tweet.created_at)
print(tweet.user.screen_name)
print(tweet.text)
print(' *'*40)
In [54]:
# Otro metodo
cont = 0
for tweet in tweepy.Cursor(api.user_timeline,id="elmundoes").items(10):
cont += 1
print(cont)
print(tweet.created_at)
print(tweet.user.screen_name)
print(tweet.text)
print(' *'*40)
In [19]:
import datetime
def buscaTweetsPorTerminosYFechas(usuario,terminos, desde, hasta):
""" Funcion para devolver los tweets que contengan una lista de terminos
de una fecha desde a una fecha hasta, los terminos y las fechas se han de dar en cadenas """
busqueda = ','.join(terminos)
tweets = []
tweetsUnicos = set()
for tweet in tweepy.Cursor(api.search,q=busqueda, since=desde, until=hasta).items(100):
if usuario == tweet.user.screen_name and not tweet.text in tweetsUnicos:
tweets.append((tweet.created_at,tweet.text))
tweetsUnicos.add(tweet.text)
return tweets
In [20]:
tweets = buscaTweetsPorTerminosYFechas('elmundoes',['ciudadanos'],"2015-12-07","2015-12-08" )
print(len(tweets))
In [15]:
print(tweets)
In [42]:
# ¿Cuantos tweets generan en un dia los tres periodicos (abc, el pais y el mundo) que sigue raspbebot?
tweets = []
for tweet in tweepy.Cursor(api.home_timeline, since="2015-12-06", until="2015-12-07").items():
tweets.append(tweet)
print(len(tweets))
In [43]:
len(tweets)
Out[43]:
In [47]:
print([(str(tweet.created_at),tweet.text) for tweet in tweets][-10:])
In [ ]:
#Import the necessary methods from tweepy library
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import ConfigParser
config = ConfigParser.ConfigParser()
config.read('datosCuentaTwitter.ini')
APP_KEY = config.get('DatosTwitter','APP_KEY')
APP_SECRET = config.get('DatosTwitter','APP_SECRET')
OAUTH_TOKEN = config.get('DatosTwitter','OAUTH_TOKEN')
OAUTH_TOKEN_SECRET = config.get('DatosTwitter','OAUTH_TOKEN_SECRET')
class MyListener(StreamListener):
### ATENCION !!! Funcion desactualizada !!!!!
### Mirar codigo mas abajo para una version buena
def on_data(self, data):
try:
with open('python.json', 'a') as f:
print(data)
f.write(data)
return True
except BaseException as e:
print("Error on_data: %s" % str(e))
return True
def on_error(self, status):
print(status)
return True
auth = tweepy.OAuthHandler(APP_KEY, APP_SECRET)
auth.set_access_token(OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
twitter_stream = Stream(auth, MyListener())
twitter_stream.filter(track=['kevin'])
In [29]:
import thread
import json
#Import the necessary methods from tweepy library
import tweepy
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
#Variables that contains the user credentials to access Twitter API
APP_KEY = 'XQqwUHk3iy8Wlk3JNMWjeM42D'
APP_SECRET = 'dD5B1Z7nc4cS2hvmoCo2qsZbqwCEOcI6yK94bqYfwhX9qjs4gQ'
OAUTH_TOKEN = '1635231295-upESx6zFjY0oVRjre9mBP3Hp6OA0krhEKWdD3a6'
OAUTH_TOKEN_SECRET = 'cFxHzCtlCSNmpnnQEBxAn6PXMDFcLR61J8vyKmrpmU'
periodicos = { 'elmundo':'14436030', 'elpais':'7996082', 'abc':'19923515','larazon':'112694236',
'eldiarioes':'535707261','LaVanguardia':'74453123','publico':'17676713','20minutos':'31090827' ,
'raspbebot':'1635231295' }
tweets = {}
class MyListener(StreamListener):
def on_status(self, tweet):
if tweet.user.screen_name in ['el_pais','elmundoes','abc_es','raspbebot']:
print(tweet.created_at)
print(tweet.user.screen_name)
print(tweet.text)
print(' *'*40)
tweets.setdefault(tweet.user.screen_name,[]).append((str(tweet.created_at), tweet.text))
def on_error(self, status_code):
if status_code == 420:
return False
auth = tweepy.OAuthHandler(APP_KEY, APP_SECRET)
auth.set_access_token(OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
twitter_stream = Stream(auth, MyListener())
twitter_stream.filter(track='PP,Psoe,Podemos,Ciudadanos,Rajoy,Pedro,Pablo,Albert, \
,Mariano,Sanchez,Iglesias,Rivera, \
CiudadanosCs,PPopular,PSOE,ahorapodemos',
follow=[periodicos['elmundo'], periodicos['elpais'],periodicos['abc'],periodicos['larazon'],
periodicos['eldiarioes'], periodicos['LaVanguardia'],periodicos['publico'],
periodicos['20minutos'],periodicos['raspbebot']] )
# Poner async=True para una ejecucion en paralelo
# Solo castellano languages=['es']
# Para seguir periodicos
# [elmundo, elpais, abc, larazon,eldiarioes,LaVanguardia,publico,20minutos ,raspbebot]
# follow=['14436030', '7996082', '19923515', '112694236','535707261','74453123','17676713','31090827','1635231295']
# Termino de busqueda track=['dwwww']
In [22]:
print(tweets)
In [23]:
with open('periodicos.json', 'a') as f:
datos = json.dumps(tweets, sort_keys=True, indent=4, separators=(',',':'))
f.write(datos)
In [ ]: