In [1]:
import pandas as pd, json
Affiliate list
In [12]:
members=[{'f':'Denes','l':'Csala','u':'Lancaster University'},\
{'f':'Harry','l':'Hoster','u':'Lancaster University'},\
{'f':'Gregory','l':'Offer','u':'Imperial College London'},\
{'f':'Monica','l':'Marinescu','u':'Imperial College London'},\
{'f':'Billy','l':'Wu','u':'Imperial College London'},\
{'f':'Aron','l':'Walsh','u':'Imperial College London'},\
{'f':'Sam','l':'Cooper','u':'Imperial College London'},\
{'f':'Dhammika','l':'Widanage','u':'University of Warwick'},\
{'f':'Emma','l':'Kendrick','u':'University of Birmingham'},\
{'f':'James','l':'Marco','u':'University of Warwick'},\
{'f':'Charles','l':'Monroe','u':'University of Oxford'},\
{'f':'David','l':'Howey','u':'University of Oxford'},\
{'f':'Jon','l':'Chapman','u':'University of Oxford'},\
{'f':'Colin','l':'Please','u':'University of Oxford'},\
{'f':'Denis','l':'Kramer','u':'University of Southampton'},\
{'f':'Chris-Kriton','l':'Skylaris','u':'University of Southampton'},\
{'f':'Giles','l':'Richardson','u':'University of Southampton'},\
{'f':'Dan','l':'Brett','u':'University College London'},\
{'f':'David','l':'Scanlon','u':'University College London'},\
{'f':'Paul','l':'Shearing','u':'University College London'},\
{'f':'Saiful','l':'Islam','u':'University of Bath'},\
{'f':'Benjamin','l':'Morgan','u':'University of Bath'}]
Initialize SCOPUS API key
In [13]:
key_file = open("key", "r") #IP-based, at uni
#key_file = open("key2", "r") #at home
key = key_file.read()
from pyscopus.scopus import Scopus
pyscopus = Scopus(key)
Retrieve SCOPUS author IDs
In [14]:
authors=[]
for i in members:
print i
query_dict = {'affil': i['u'], 'authfirst': i['f'], 'authlast': i['l']}
author_results = pyscopus.search_author(query_dict);
if len(author_results)>0: scopusid=author_results[0]['author_id']
else: scopusid='0000'
i['s']=scopusid
authors.append(i)
In [15]:
pd.DataFrame(authors)
Out[15]:
Retrieve SCOPUS publication IDs for each author
In [16]:
pubs=[]
for i in authors:
i['pubs']=pyscopus.search_author_publication(i['s'])
pubs.append(i)
Retrieve abstracts for each publication ID for each author
In [17]:
import json
In [15]:
file('f1.json','w').write(json.dumps(pubs))
In [18]:
abstracts=[]
minyear=2011
for i in pubs:
print i['f'],i['l']
p=[]
for j in i['pubs']:
if int(j['cover_date'][:4])>minyear:
try:
p.append({'title':j['title'],'date':j['cover_date'],'journal':j['publication_name']\
,'abstract':pyscopus.retrieve_abstract(j['scopus_id'],show=False)['text']});
except:
p.append({'title':j['title'],'date':j['cover_date'],'journal':j['publication_name']\
,'abstract':''});
i['abs']=p
abstracts.append(i)
In [19]:
abs2=[]
for i in abstracts:
j=dict(i)
j.pop('pubs');
abs2.append(j)
In [83]:
abs3={}
for i in abs2:
if i['l'] not in abs3:
print i['l']
abs3[i['l']]=i
In [89]:
# Simple WordCloud
from os import path
from scipy.misc import imread
import matplotlib.pyplot as plt
import random
from wordcloud import WordCloud, STOPWORDS
In [96]:
abs4={}
for i in abs3:
abs4[i]=''
for j in abs3[i]['abs']:
abs4[i]=abs4[i]+' '+j['abstract']
In [131]:
#set the stopwords list
sw2={'inf','inf'}
for i in abs4:
print i
text=abs4[i]
wordcloud = WordCloud(relative_scaling = 0.1, background_color="white",
stopwords = set(STOPWORDS.union(sw2))
).generate(text)
plt.imshow(wordcloud)
plt.axis("off")
plt.show()