In [1]:
    
import pandas as pd, json
    
Affiliate list
In [12]:
    
members=[{'f':'Denes','l':'Csala','u':'Lancaster University'},\
         {'f':'Harry','l':'Hoster','u':'Lancaster University'},\
         {'f':'Gregory','l':'Offer','u':'Imperial College London'},\
         {'f':'Monica','l':'Marinescu','u':'Imperial College London'},\
         {'f':'Billy','l':'Wu','u':'Imperial College London'},\
         {'f':'Aron','l':'Walsh','u':'Imperial College London'},\
         {'f':'Sam','l':'Cooper','u':'Imperial College London'},\
         {'f':'Dhammika','l':'Widanage','u':'University of Warwick'},\
         {'f':'Emma','l':'Kendrick','u':'University of Birmingham'},\
         {'f':'James','l':'Marco','u':'University of Warwick'},\
         {'f':'Charles','l':'Monroe','u':'University of Oxford'},\
         {'f':'David','l':'Howey','u':'University of Oxford'},\
         {'f':'Jon','l':'Chapman','u':'University of Oxford'},\
         {'f':'Colin','l':'Please','u':'University of Oxford'},\
         {'f':'Denis','l':'Kramer','u':'University of Southampton'},\
         {'f':'Chris-Kriton','l':'Skylaris','u':'University of Southampton'},\
         {'f':'Giles','l':'Richardson','u':'University of Southampton'},\
         {'f':'Dan','l':'Brett','u':'University College London'},\
         {'f':'David','l':'Scanlon','u':'University College London'},\
         {'f':'Paul','l':'Shearing','u':'University College London'},\
         {'f':'Saiful','l':'Islam','u':'University of Bath'},\
         {'f':'Benjamin','l':'Morgan','u':'University of Bath'}]
    
Initialize SCOPUS API key
In [13]:
    
key_file = open("key", "r") #IP-based, at uni
#key_file = open("key2", "r") #at home
key = key_file.read()
from pyscopus.scopus import Scopus
pyscopus = Scopus(key)
    
Retrieve SCOPUS author IDs
In [14]:
    
authors=[]
for i in members:
    print i
    query_dict = {'affil': i['u'], 'authfirst': i['f'], 'authlast': i['l']}
    author_results = pyscopus.search_author(query_dict);
    if len(author_results)>0: scopusid=author_results[0]['author_id']
    else: scopusid='0000'
    i['s']=scopusid
    authors.append(i)
    
    
In [15]:
    
pd.DataFrame(authors)
    
    Out[15]:
Retrieve SCOPUS publication IDs for each author
In [16]:
    
pubs=[]
for i in authors:
    i['pubs']=pyscopus.search_author_publication(i['s'])
    pubs.append(i)
    
    
Retrieve abstracts for each publication ID for each author
In [17]:
    
import json
    
In [15]:
    
file('f1.json','w').write(json.dumps(pubs))
    
In [18]:
    
abstracts=[]
minyear=2011
for i in pubs:
    print i['f'],i['l']
    p=[]
    for j in i['pubs']:
        if int(j['cover_date'][:4])>minyear:
            try:
                p.append({'title':j['title'],'date':j['cover_date'],'journal':j['publication_name']\
                      ,'abstract':pyscopus.retrieve_abstract(j['scopus_id'],show=False)['text']});
            except:
                p.append({'title':j['title'],'date':j['cover_date'],'journal':j['publication_name']\
                      ,'abstract':''});
    i['abs']=p
    abstracts.append(i)
    
    
In [19]:
    
abs2=[]
for i in abstracts:
    j=dict(i)
    j.pop('pubs');
    abs2.append(j)
    
In [83]:
    
abs3={}
for i in abs2:
    if i['l'] not in abs3:
        print i['l'] 
        abs3[i['l']]=i
    
    
In [89]:
    
# Simple WordCloud
from os import path
from scipy.misc import imread
import matplotlib.pyplot as plt
import random
from wordcloud import WordCloud, STOPWORDS
    
In [96]:
    
abs4={}
for i in abs3:
    abs4[i]=''
    for j in abs3[i]['abs']:
        abs4[i]=abs4[i]+' '+j['abstract']
    
In [131]:
    
#set the stopwords list
sw2={'inf','inf'}
for i in abs4:
    print i
    text=abs4[i]
    wordcloud = WordCloud(relative_scaling = 0.1, background_color="white",
                          stopwords = set(STOPWORDS.union(sw2))
                          ).generate(text)
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()