In [4]:
from __future__ import (absolute_import, division, print_function,
unicode_literals)
# Standard library
import json
# Module specific
#import ads
import ads.sandbox as ads
import pandas as pd
import markovify
In [5]:
# Which metadata fields do we want to retrieve from the ADS API?
# (basically everything apart from 'aff' and 'body' to reduce data volume)
FIELDS = ['pub', 'citation_count', 'year', 'first_author_norm',
'title', 'property'
]
In [7]:
from __future__ import (absolute_import, division, print_function,
unicode_literals)
# Standard library
import json
import re
import argparse
# Module specific
import ads
#import ads.sandbox as ads
import pandas as pd
# Which metadata fields do we want to retrieve from the ADS API?
FIELDS = ['pub', 'citation_count', 'year', 'first_author_norm',
'title', 'property'
]
def getPapers(year=1991,rows=200000, mincite=2):
query = ads.SearchQuery(rows=rows,
year=year,
fl=FIELDS,
database = "astronomy", sort='citation_count desc',
fq=['database:astronomy', 'property:refereed',
'property:article', 'citation_count:[{} TO *]'.format(mincite)])
return query
def makeDataframe(year=1991,rows=200000, mincite=2):
papers = []
for x in getPapers(year=year, rows=rows, mincite=mincite):
papers.append(x)
df = pd.DataFrame(
columns=['lastname', 'title'],
data=[returnLastnameTitle(q) for q in papers])
return df
def returnLastnameTitle(q):
# last name
try:
lastname = q.first_author_norm.split(',')[0]
lastname = re.sub(r'([^\s\w]|_)+', '', lastname)
except AttributeError:
return ['none','none']
# paper title
try:
title = q.title[0]
title = re.sub(r'([^\s\w]|_)+', '', title)
except TypeError:
return ['none','none']
return [lastname,title]
def toJson(year=1991,rows=200000, mincite=2):
df = makeDataframe(year=year,rows=rows, mincite=mincite)
df.to_json('data/{}.json'.format(year))
In [51]:
def printReference(year):
df = pd.read_json('data/{}.json'.format(year), )
df.sort_index(inplace=True)
textstr = '. '.join([df.title[i] for i in range(df.shape[0])])
text_model = markovify.Text(textstr, state_size=1, )
outtitle = text_model.make_short_sentence(90)
author = df['lastname'].value_counts()[df['lastname'].value_counts() > 1].sample().index[0]
print('{} et al., {} ({})'.format(author,outtitle,year))
In [61]:
printReference(1990)
In [88]:
import markovify
import pandas
from twython import Twython
from secrets import *
def printReference(year):
df = pd.read_json('data/{}.json'.format(year), )
df.sort_index(inplace=True)
textstr = '. '.join([df.title[i] for i in range(df.shape[0])])
text_model = markovify.Text(textstr, state_size=1, )
outtitle = text_model.make_short_sentence(140)
author = df['lastname'].value_counts()[df['lastname'].value_counts() > 1].sample().index[0]
return '{} et al., {} ({})'.format(author,outtitle,year)
def post_tweet(status,):
"""Post an animated gif and associated status message to Twitter."""
twitter = Twython(APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
response = twitter.update_status(status=status)
print(response)
return twitter, response
In [102]:
#status = printReference(1985)
#q = post_tweet(status)
In [69]:
# twitter = Twython(APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
# response = twitter.update_status(status='hello', )
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [62]:
df = pd.read_json('data/2015.json', )
df.sort(inplace=True)
In [98]:
print(printReference(1973))
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [63]:
df.shape
Out[63]:
In [ ]:
In [101]:
re.sub('-',' ', 'tom-tom')
Out[101]:
In [ ]:
In [ ]: