In [4]:
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

# Standard library
import json

# Module specific
#import ads
import ads.sandbox as ads
import pandas as pd
import markovify

In [5]:
# Which metadata fields do we want to retrieve from the ADS API?
# (basically everything apart from 'aff' and 'body' to reduce data volume)
FIELDS = ['pub', 'citation_count', 'year', 'first_author_norm',
          'title', 'property'
         ]

In [7]:
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

# Standard library
import json
import re
import argparse

# Module specific
import ads
#import ads.sandbox as ads
import pandas as pd

# Which metadata fields do we want to retrieve from the ADS API?
FIELDS = ['pub', 'citation_count', 'year', 'first_author_norm',
          'title', 'property'
         ]

def getPapers(year=1991,rows=200000, mincite=2):

    query = ads.SearchQuery(rows=rows, 
        year=year, 
        fl=FIELDS, 
        database = "astronomy", sort='citation_count desc',
           fq=['database:astronomy', 'property:refereed', 
           'property:article', 'citation_count:[{} TO *]'.format(mincite)])

    return query

def makeDataframe(year=1991,rows=200000, mincite=2):
    papers = []
    for x in getPapers(year=year, rows=rows, mincite=mincite):
        papers.append(x)

    df = pd.DataFrame(
        columns=['lastname', 'title'],
        data=[returnLastnameTitle(q) for q in papers])

    return df

def returnLastnameTitle(q):
    # last name
    try:
        lastname = q.first_author_norm.split(',')[0]
        lastname = re.sub(r'([^\s\w]|_)+', '', lastname)
    except AttributeError:
        return ['none','none']

    # paper title
    try:
        title = q.title[0]
        title = re.sub(r'([^\s\w]|_)+', '', title)
    except TypeError:
        return ['none','none']

    return [lastname,title]

def toJson(year=1991,rows=200000, mincite=2):
    df = makeDataframe(year=year,rows=rows, mincite=mincite)
    df.to_json('data/{}.json'.format(year))

In [51]:
def printReference(year):
    df = pd.read_json('data/{}.json'.format(year), )
    df.sort_index(inplace=True)
    textstr = '. '.join([df.title[i] for i in range(df.shape[0])])
    text_model = markovify.Text(textstr, state_size=1, )
    outtitle = text_model.make_short_sentence(90)

    author = df['lastname'].value_counts()[df['lastname'].value_counts() > 1].sample().index[0]

    print('{} et al., {} ({})'.format(author,outtitle,year))

In [61]:
printReference(1990)


Briggs et al., Cosmogenic nuclides in SU UMa Stars. (1990)

In [88]:
import markovify
import pandas

from twython import Twython
from secrets import *

def printReference(year):
    df = pd.read_json('data/{}.json'.format(year), )
    df.sort_index(inplace=True)
    textstr = '. '.join([df.title[i] for i in range(df.shape[0])])
    text_model = markovify.Text(textstr, state_size=1, )
    outtitle = text_model.make_short_sentence(140)

    author = df['lastname'].value_counts()[df['lastname'].value_counts() > 1].sample().index[0]

    return '{} et al., {} ({})'.format(author,outtitle,year)

def post_tweet(status,):
    """Post an animated gif and associated status message to Twitter."""
    twitter = Twython(APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
    response = twitter.update_status(status=status)
    print(response)
    return twitter, response

In [102]:
#status = printReference(1985)
#q = post_tweet(status)

In [69]:
# twitter = Twython(APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
# response = twitter.update_status(status='hello', )

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [62]:
df = pd.read_json('data/2015.json', )
df.sort(inplace=True)


/Users/tom/.virtualenvs/p2/lib/python2.7/site-packages/ipykernel/__main__.py:2: FutureWarning: sort(....) is deprecated, use sort_index(.....)
  from ipykernel import kernelapp as app

In [98]:
print(printReference(1973))


Wilson et al., Observations of auroral arcs and Stability of Mexico. (1973)

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [63]:
df.shape


Out[63]:
(2000, 2)

In [ ]:


In [101]:
re.sub('-',' ', 'tom-tom')


Out[101]:
u'tom tom'

In [ ]:


In [ ]: