In [1]:
import sys
sys.path.append("../scripts/")
In [2]:
import json
from data_collection_util import *
In [ ]:
# load original profile
with open("../profile/profile.json") as f:
orig_profile = json.load(f)
In [ ]:
import mathscinet
mathscinet.find_papers_by_author_id("602264")
In [ ]:
import codecs
with codecs.open("../profile/profile2.json", "w",'utf-8') as f:
json.dump(orig_profile, f, indent=4, separators=(',', ': '), ensure_ascii = False)
In [ ]:
# load original profile
with open("../profile/profile.json") as f:
orig_profile = json.load(f)
In original profile, each gear member has the following arrtibutes:
A sample member profile looks like this:
In [ ]:
sample_profile = {u'cluster_id': 0,
u'gear_collaborators': [],
u'mathsci_id': u'MR304864',
u'member_id': 12,
u'member_type': u'member',
u'name': u'Steven',
u'organization': u'University of Illinois at Urbana-Champaign',
u'other_collaborators': u'Indranil Biswas, Jim Glazebrook, Tomas Gomez, Adam Jacob, Franz Kamber, Vincent Mercat, Vicente Munoz, Peter Newstead, Mathias Stemmler',
u'photo': u'BradlowSteven.jpg',
u'pos_x': 0,
u'pos_y': 0,
u'research_interests': u'Higgs Bundles',
u'short_bio': u"I'm interested in moduli spaces associated with holomorphic vector bundles. In particular, I'm a big fan of applications of Higgs bundle technology to the study of surface group representation varieties. Before I die, I'd like to be able to compute the surface group representation corresponding to any given Higgs bundle, and vice versa.",
u'surname': u'Bradlow',
u'title': u'GEAR Member',
u'website': u''}
In this step, we build mapping between gear_id and mathscinet_id. For example, given a gear member id, gear_mathsci_mapper will return a mathscinet id
In [ ]:
mappers = make_mappers(orig_profile)
gear_mathsci_mapper = mappers[0]
mathsci_gear_mapper = mappers[1]
In this step, the program iterates through all members. If a member has valid mathscinet id, then we retrieve the paper list of that member.
In [ ]:
paper_set = download_full_paper_set(orig_profile)
paper_set has the following structure:
- 'member 0': paper a, paper b
- 'member 1': paper b, paper c, paper d
- 'member 2': paper c
for each paper, the structure is as follows:
A sample paper looks like this:
In [ ]:
sp={'authors': ['MR367870', 'MR1001390'],
'date': 2012,
'description': u'Conner, Gregory R. ; Kent, Curtis Inverse limits of finite rank free groups. J. Group Theory 15 (2012), no. 6, 823\u2013829. (Reviewer: David Meier) 20E05 (20E18)',
'id': u'MR2997025',
'url': u'http://www.ams.org/mathscinet/search/publdoc.html?pg1=MR&s1=MR2997025'}
In our paper_set, Professor Bradlow has the following papers:
In [ ]:
paper_set['MR304864']
In [ ]:
paper_2011_meta = filter_2011(paper_set)
paper_set_2011 = paper_2011_meta[0]
count_2011 = paper_2011_meta[1]
In [ ]:
# download papers citing gear member papers
download_gear_papers(paper_set_2011, count_2011)
We have one function update_collaborators that updates the co-authorship relation and the other function update_citations that updates the co-citation relation.
In [ ]:
# update coauthorship/cocitation data
full_paper_list = []
useful_paper = set()
for ending_year in range(2011, 2017):
update_collaborators(orig_profile, paper_set_2011, 2011, ending_year, mathsci_gear_mapper, useful_paper)
update_citations(orig_profile, paper_set_2011, 2011, ending_year, mathsci_gear_mapper, full_paper_list, useful_paper)
In [ ]:
len(useful_paper)
These two functions will add additional data fields to authors.
Let's look at Professor Bradlow's profile again:
In [ ]:
orig_profile['items'][12]
It means that, Professor Bradlow (member id 12), has co-authored 2 papers (with paper id 'MR3323627' and 'MR2999985') with Member 43, and 2 papers (with paper id 'MR3323627' and 'MR2999985') with Member 49.
In [ ]:
# print matrix
for ending_year in range(2011, 2017):
matrix_maker(orig_profile, 2011, ending_year)
In [ ]:
import codecs
def export_paper(the_paper_list):
print "Exporting papers ..."
output_path = os.path.join( '..', 'website_input', 'papers.json')
export = {}
for p in the_paper_list:
export[p['id']] = p
with codecs.open(output_path, "w", 'utf-8') as f:
json.dump(export, f, indent=4, separators=(',', ': '), ensure_ascii = False)
In [ ]:
len(full_paper_list)
In [ ]:
In [ ]:
export_paper(full_paper_list)
In [ ]:
export_profile(orig_profile)
In [ ]:
def export_profile(profile):
output_path = os.path.join( '..', 'website_input', 'profile.json')
with open(output_path, "w") as f:
json.dump(unicode(profile), f, ensure_ascii = False)
In [ ]:
output_path = os.path.join( '..', 'website_input', 'profile.json')
with open(output_path, "w") as f:
json.dump(unicode(profile), f, ensure_ascii = False)
In [ ]:
output_path = os.path.join( '..', 'website_input', 'profile.json')
with open(output_path, "r") as f:
p = json.load(f)
In [ ]:
p.keys()
In [3]:
p={2:22, 4:4}
In [4]:
p
Out[4]:
In [5]:
p.pop(2)
Out[5]:
In [6]:
p
Out[6]:
In [ ]: