Twitter

Para esta parte os recomiendo primero seguir la informmación del libro Data Mining Social Web, el capítulo referente a Twitter enlace. Luego os adjunto algunas cosas interesantes.


In [9]:
#Es bueno crearse un módulo en el que estén las funciones que más vais a utilizar
#en el trabajo con el API de Twitter, como puede ser la conexión o la búsqueda
#de datos
#
#Todo el código de esta parte podría ir en un módulo parte llamado por ejemplo twitterAPI
__author__ = 'miguel'
import twitter
import io
import json

#Función para la conexión.
def oauth_login():
    CONSUMER_KEY = 'kvAbp1mrWFTvUtdwMZm2SbnGE'
    CONSUMER_SECRET = 'WqGXQIpOVKbjwP8FRWF4u7Xy3kc5kMkujuvEDT9fqZfBiykCLI'
    OAUTH_TOKEN = '7730092-BvcE6lKJs8455JE8hyEhYHKXHX5g9X05izuuU47qIX'
    OAUTH_TOKEN_SECRET = 'xGzotzBjBImJNDLAogP60jb3GVlRnp3M9jtp3QSFgJDAI'

    auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET, CONSUMER_KEY, CONSUMER_SECRET)

    twitter_api = twitter.Twitter(auth=auth)
    return twitter_api

#Función para grabar la información en formato JSON
def save_json(filename, data):
    with io.open('{0}.json'.format(filename),'w', encoding='utf-8') as f:
        f.write(unicode(json.dumps(data, ensure_ascii=False)))

#Función para leer el fichero JSON
def load_json(filename):
    with io.open('{0}.json'.format(filename),encoding='utf-8') as f:
        return f.read()

In [10]:
#Esto ya podŕia ir en otro fichero con un import al fichero anteriormente creado
twitter_api =  oauth_login()

#Defino Cádiz como  punto de localización de la información.
SPAIN_WOE_ID =  23424950
spain_trends = twitter_api.trends.place(_id=SPAIN_WOE_ID)
print json.dumps(spain_trends, indent = 1)


[
 {
  "created_at": "2016-03-14T18:26:16Z", 
  "trends": [
   {
    "url": "http://twitter.com/search?q=%23EnUnaSociedadSana", 
    "query": "%23EnUnaSociedadSana", 
    "tweet_volume": 12300, 
    "name": "#EnUnaSociedadSana", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23TorosMVT", 
    "query": "%23TorosMVT", 
    "tweet_volume": null, 
    "name": "#TorosMVT", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23CapitalismoVsMiseria", 
    "query": "%23CapitalismoVsMiseria", 
    "tweet_volume": 36868, 
    "name": "#CapitalismoVsMiseria", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23VideoclipGoodLovin", 
    "query": "%23VideoclipGoodLovin", 
    "tweet_volume": null, 
    "name": "#VideoclipGoodLovin", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23eurovipll%C3%B3n", 
    "query": "%23eurovipll%C3%B3n", 
    "tweet_volume": null, 
    "name": "#eurovipll\u00f3n", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%22Karl+Marx%22", 
    "query": "%22Karl+Marx%22", 
    "tweet_volume": 19201, 
    "name": "Karl Marx", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%22Leopoldo+L%C3%B3pez%22", 
    "query": "%22Leopoldo+L%C3%B3pez%22", 
    "tweet_volume": 13983, 
    "name": "Leopoldo L\u00f3pez", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%22Gin%C3%A9s+Mar%C3%ADn%22", 
    "query": "%22Gin%C3%A9s+Mar%C3%ADn%22", 
    "tweet_volume": null, 
    "name": "Gin\u00e9s Mar\u00edn", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=Macedonia", 
    "query": "Macedonia", 
    "tweet_volume": 19499, 
    "name": "Macedonia", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%22PP+de+Valencia%22", 
    "query": "%22PP+de+Valencia%22", 
    "tweet_volume": null, 
    "name": "PP de Valencia", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%22Rita+Barber%C3%A1%22", 
    "query": "%22Rita+Barber%C3%A1%22", 
    "tweet_volume": null, 
    "name": "Rita Barber\u00e1", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=Ondarroa", 
    "query": "Ondarroa", 
    "tweet_volume": null, 
    "name": "Ondarroa", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=Puigdemont", 
    "query": "Puigdemont", 
    "tweet_volume": null, 
    "name": "Puigdemont", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=Soro", 
    "query": "Soro", 
    "tweet_volume": null, 
    "name": "Soro", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%22Van+Avermaet%22", 
    "query": "%22Van+Avermaet%22", 
    "tweet_volume": null, 
    "name": "Van Avermaet", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%22Eva+Borox%22", 
    "query": "%22Eva+Borox%22", 
    "tweet_volume": null, 
    "name": "Eva Borox", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%22Renfe+y+Adif%22", 
    "query": "%22Renfe+y+Adif%22", 
    "tweet_volume": null, 
    "name": "Renfe y Adif", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=Togashi", 
    "query": "Togashi", 
    "tweet_volume": null, 
    "name": "Togashi", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23YoungLove", 
    "query": "%23YoungLove", 
    "tweet_volume": 30594, 
    "name": "#YoungLove", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23TeJuroQue", 
    "query": "%23TeJuroQue", 
    "tweet_volume": 10557, 
    "name": "#TeJuroQue", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23rushdegira", 
    "query": "%23rushdegira", 
    "tweet_volume": null, 
    "name": "#rushdegira", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23ShadowhuntersSeason2", 
    "query": "%23ShadowhuntersSeason2", 
    "tweet_volume": 55932, 
    "name": "#ShadowhuntersSeason2", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23PiDay", 
    "query": "%23PiDay", 
    "tweet_volume": 161886, 
    "name": "#PiDay", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23EnGAMEtengoFE", 
    "query": "%23EnGAMEtengoFE", 
    "tweet_volume": null, 
    "name": "#EnGAMEtengoFE", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23BastaDeSilencio", 
    "query": "%23BastaDeSilencio", 
    "tweet_volume": null, 
    "name": "#BastaDeSilencio", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23masvaletarde", 
    "query": "%23masvaletarde", 
    "tweet_volume": null, 
    "name": "#masvaletarde", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23AtletiPSV", 
    "query": "%23AtletiPSV", 
    "tweet_volume": null, 
    "name": "#AtletiPSV", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23zapeando575", 
    "query": "%23zapeando575", 
    "tweet_volume": null, 
    "name": "#zapeando575", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23plenoZGZ", 
    "query": "%23plenoZGZ", 
    "tweet_volume": null, 
    "name": "#plenoZGZ", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23VCTE39", 
    "query": "%23VCTE39", 
    "tweet_volume": null, 
    "name": "#VCTE39", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23Putin", 
    "query": "%23Putin", 
    "tweet_volume": null, 
    "name": "#Putin", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23DedicatoriasMusicalesQMCFM", 
    "query": "%23DedicatoriasMusicalesQMCFM", 
    "tweet_volume": null, 
    "name": "#DedicatoriasMusicalesQMCFM", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23AureliersSomosCandela", 
    "query": "%23AureliersSomosCandela", 
    "tweet_volume": null, 
    "name": "#AureliersSomosCandela", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23EsperanzaMiaEnNova66", 
    "query": "%23EsperanzaMiaEnNova66", 
    "tweet_volume": null, 
    "name": "#EsperanzaMiaEnNova66", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23Preacher", 
    "query": "%23Preacher", 
    "tweet_volume": null, 
    "name": "#Preacher", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23edirecto", 
    "query": "%23edirecto", 
    "tweet_volume": null, 
    "name": "#edirecto", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23LaHoradelasamigas", 
    "query": "%23LaHoradelasamigas", 
    "tweet_volume": null, 
    "name": "#LaHoradelasamigas", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23TADciudadanoapp", 
    "query": "%23TADciudadanoapp", 
    "tweet_volume": null, 
    "name": "#TADciudadanoapp", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23cambiame192", 
    "query": "%23cambiame192", 
    "tweet_volume": null, 
    "name": "#cambiame192", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%2325PremiosUA", 
    "query": "%2325PremiosUA", 
    "tweet_volume": null, 
    "name": "#25PremiosUA", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23EnMaximaArturoGrao", 
    "query": "%23EnMaximaArturoGrao", 
    "tweet_volume": null, 
    "name": "#EnMaximaArturoGrao", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23STOPacuerdo", 
    "query": "%23STOPacuerdo", 
    "tweet_volume": null, 
    "name": "#STOPacuerdo", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23TerritorioUD", 
    "query": "%23TerritorioUD", 
    "tweet_volume": null, 
    "name": "#TerritorioUD", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23AnabelaNewRichard", 
    "query": "%23AnabelaNewRichard", 
    "tweet_volume": null, 
    "name": "#AnabelaNewRichard", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23Reto150SinPersonalidad", 
    "query": "%23Reto150SinPersonalidad", 
    "tweet_volume": null, 
    "name": "#Reto150SinPersonalidad", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23TodosConFranNicolas", 
    "query": "%23TodosConFranNicolas", 
    "tweet_volume": null, 
    "name": "#TodosConFranNicolas", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23Reto100RaquelASalvame", 
    "query": "%23Reto100RaquelASalvame", 
    "tweet_volume": null, 
    "name": "#Reto100RaquelASalvame", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23eShowBCN16", 
    "query": "%23eShowBCN16", 
    "tweet_volume": null, 
    "name": "#eShowBCN16", 
    "promoted_content": null
   }, 
   {
    "url": "http://twitter.com/search?q=%23MestreMateo2016", 
    "query": "%23MestreMateo2016", 
    "tweet_volume": null, 
    "name": "#MestreMateo2016", 
    "promoted_content": null
   }
  ], 
  "as_of": "2016-03-14T18:35:06Z", 
  "locations": [
   {
    "woeid": 23424950, 
    "name": "Spain"
   }
  ]
 }
]

In [16]:
# Como sabéis hay un límite en el número de llamadas
# que podemos hacer al API de Twitter por lo que es recomendable
# almacenarlo en un fichero

save_json('spainmore', spain_trends)

In [ ]:
# Luego si queremos podemos recuperarlo para trabajar con los datos 
recover_data = load_json('spainmore')

# Vamos a ver si lo ha recuperado correctamente
print json.dumps(recover_data, indent=1)

In [17]:
# Vamos a extraer las consultas solamente de los tweets

datos = json.loads(open('spainmore.json').read())
consultas =  [ status['query']
                 for status in datos[0]['trends'] ]

print json.dumps(consultas, indent = 1)


[
 "%23FelizLunes", 
 "%23paro", 
 "%23RajoyRNE", 
 "%22Seguridad+Social%22", 
 "%23DMAF2015", 
 "%23QuieroUnHTCOneM9", 
 "%22David+Lynch%22", 
 "%22PP+la+Comunidad+Valenciana%22", 
 "%22El+PIB%22", 
 "%22Mario+Camus%22"
]

In [34]:
#
#
#
#

q = '#paro'
count = 100

search_results = twitter_api.search.tweets(q=q, count=count)
statuses = search_results['statuses']

# Iterate through 5 more batches of results by following the cursor
for _ in range(5):
    print "Length of statuses", len(statuses)
    try:
        next_results = search_results['search_metadata']['next_results']
    except KeyError, e: # No more results when next_results doesn't exist
        break
        
# Create a dictionary from next_results, which has the following form:
# ?max_id=313519052523986943&q=NCAA&include_entities=1
    kwargs = dict([ kv.split('=') for kv in next_results[1:].split("&") ])
    search_results = twitter_api.search.tweets(**kwargs)
    statuses += search_results['statuses']

# Show one sample search result by slicing the list...
save_json('lostrends', statuses)
#print json.dumps(statuses[0], indent=1)
untweet = statuses[0]
print untweet['place']


Length of statuses 100
Length of statuses 200
Length of statuses 200
None

In [23]:



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-23-de54d3319029> in <module>()
----> 1 dict_statuses = json.loads(statuses)

/usr/lib/python2.7/json/__init__.pyc in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    336             parse_int is None and parse_float is None and
    337             parse_constant is None and object_pairs_hook is None and not kw):
--> 338         return _default_decoder.decode(s)
    339     if cls is None:
    340         cls = JSONDecoder

/usr/lib/python2.7/json/decoder.pyc in decode(self, s, _w)
    364 
    365         """
--> 366         obj, end = self.raw_decode(s, idx=_w(s, 0).end())
    367         end = _w(s, end).end()
    368         if end != len(s):

TypeError: expected string or buffer

In [18]:
%env


Out[18]:
{'CLICOLOR': '1',
 'CLUTTER_IM_MODULE': 'xim',
 'COMPIZ_CONFIG_PROFILE': 'ubuntu',
 'DBUS_SESSION_BUS_ADDRESS': 'unix:abstract=/tmp/dbus-PY34tE7CVt',
 'DEFAULTS_PATH': '/usr/share/gconf/ubuntu.default.path',
 'DESKTOP_SESSION': 'ubuntu',
 'DISPLAY': ':0',
 'GDMSESSION': 'ubuntu',
 'GDM_LANG': 'es_ES',
 'GIT_PAGER': 'cat',
 'GNOME_DESKTOP_SESSION_ID': 'this-is-deprecated',
 'GNOME_KEYRING_CONTROL': '',
 'GNOME_KEYRING_PID': '',
 'GTK2_MODULES': 'overlay-scrollbar',
 'GTK_IM_MODULE': 'ibus',
 'GTK_MODULES': 'unity-gtk-module',
 'HOME': '/home/miguel',
 'IM_CONFIG_PHASE': '1',
 'INSTANCE': '',
 'JOB': 'dbus',
 'LANG': 'es_ES.UTF-8',
 'LANGUAGE': 'es_ES',
 'LESSCLOSE': '/usr/bin/lesspipe %s %s',
 'LESSOPEN': '| /usr/bin/lesspipe %s',
 'LIBVIRT_DEFAULT_URI': 'qemu:///system',
 'LOGNAME': 'miguel',
 'LS_COLORS': 'rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.Z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.jpg=01;35:*.jpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.axv=01;35:*.anx=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=00;36:*.au=00;36:*.flac=00;36:*.m4a=00;36:*.mid=00;36:*.midi=00;36:*.mka=00;36:*.mp3=00;36:*.mpc=00;36:*.ogg=00;36:*.ra=00;36:*.wav=00;36:*.axa=00;36:*.oga=00;36:*.spx=00;36:*.xspf=00;36:',
 'MANDATORY_PATH': '/usr/share/gconf/ubuntu.mandatory.path',
 'MANPATH': '/home/miguel/.npm-packages/share/man:/usr/local/man:/usr/local/share/man:/usr/share/man',
 'NODE_PATH': '/home/miguel/.npm-packages/lib/node_modules:/usr/lib/nodejs:/usr/lib/node_modules:/usr/share/javascript',
 'NPM_PACKAGES': '/home/miguel/.npm-packages',
 'PAGER': 'cat',
 'PATH': '/home/miguel/.npm-packages/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games;~/Escritorio/AR/TESISBLOCKS/android-studio/bin',
 'PWD': '/home/miguel/Dropbox/UCA/Lenguaje y Sistemas LSI/10_Segundo_Semestre_2015_16/SD/enotebook/python_introduccion',
 'QT4_IM_MODULE': 'xim',
 'QT_IM_MODULE': 'ibus',
 'QT_QPA_PLATFORMTHEME': 'appmenu-qt5',
 'SESSION': 'ubuntu',
 'SESSIONTYPE': 'gnome-session',
 'SESSION_MANAGER': 'local/miguel-XPS-v14:@/tmp/.ICE-unix/2061,unix/miguel-XPS-v14:/tmp/.ICE-unix/2061',
 'SHELL': '/bin/bash',
 'SHLVL': '1',
 'SSH_AUTH_SOCK': '/run/user/1000/keyring/ssh',
 'TERM': 'xterm-color',
 'UPSTART_SESSION': 'unix:abstract=/com/ubuntu/upstart-session/1000/1831',
 'USER': 'miguel',
 'VTE_VERSION': '4002',
 'WINDOWID': '77595728',
 'XAUTHORITY': '/home/miguel/.Xauthority',
 'XDG_CONFIG_DIRS': '/etc/xdg/xdg-ubuntu:/usr/share/upstart/xdg:/etc/xdg',
 'XDG_CURRENT_DESKTOP': 'Unity',
 'XDG_DATA_DIRS': '/usr/share/ubuntu:/usr/share/gnome:/usr/local/share/:/usr/share/',
 'XDG_GREETER_DATA_DIR': '/var/lib/lightdm-data/miguel',
 'XDG_MENU_PREFIX': 'gnome-',
 'XDG_RUNTIME_DIR': '/run/user/1000',
 'XDG_SEAT': 'seat0',
 'XDG_SEAT_PATH': '/org/freedesktop/DisplayManager/Seat0',
 'XDG_SESSION_DESKTOP': 'ubuntu',
 'XDG_SESSION_ID': 'c2',
 'XDG_SESSION_PATH': '/org/freedesktop/DisplayManager/Session0',
 'XDG_SESSION_TYPE': 'x11',
 'XDG_VTNR': '7',
 'XMODIFIERS': '@im=ibus',
 '_': '/usr/bin/ipython'}

In [ ]: