In [17]:
from numpy import ndarray
# making authors list
def make_authors_list_from_csv(filename):
authors_list = []
with open(filename+'.csv') as f:
next(f)
for line in f:
split = line.split('|')
authors = split[2].split(',')
authors_clean = []
for author in authors:
authors_clean.append(author.lstrip())
# authors list
for author in authors_clean:
author= author.lstrip()
if not(author in authors_list):
authors_list.append(author.lstrip())
authors_list = sorted(authors_list)
authors_dict = {}
i = 0
for author in authors_list:
authors_dict[author] = i
i+=1
return authors_list,authors_dict
# making dictionnary for indexing author
# construction poster dictionary
# for each poster: the list of collaborators
def make_poster_dict_from_csv(filename):
poster_count = {}
poster_dict = {}
with open(filename+'.csv') as f:
next(f)
for line in f:
split = line.split('|')
posterid =int(split[0])
authors = split[2].split(',')
authors_id = []
for author in authors:
author = author.lstrip()
authors_id.append(authors_dict[author.lstrip()])
if author in poster_count:
poster_count[author]+=1
else:
poster_count[author]=1
poster_dict[posterid] = authors_id
# for author in poster_count:
# poster_count_list.append([author,poster_count[author]])
return poster_dict,poster_count
import os
# poster count when multiple poster
def make_json_poster_count(poster_count,filename,number_min=1):
poster_count_list = [[author,poster_count[author]] for author in poster_count]
poster_count_list = sorted(poster_count_list,key=lambda x:x[1], reverse=True)
authors = [x[0] for x in poster_count_list][0:100]
counts = [x[1] for x in poster_count_list][0:100]
with open(filename+".json", "w") as outfile:
outfile.write("{\"name\":\"" + str(authors) +"\",\n\"count\":"+str(counts)+"}\n")
return
# poster count when multiple poster
def make_csv_poster_count(poster_count,filename,number_min=1):
poster_count_list = [[author,poster_count[author]] for author in poster_count]
poster_count_list = sorted(poster_count_list,key=lambda x:x[1], reverse=True)
authors = [x[0] for x in poster_count_list][0:100]
counts = [x[1] for x in poster_count_list][0:100]
with open(filename+".csv", "w") as outfile:
outfile.write("Authors, Counts\n")
for x in poster_count:
outfile.write(x[0] +', '+ str(x[1])+"\n")
In [18]:
filename = 'AbstractsCosyne2012'
authors_list,authors_dict = make_authors_list_from_csv(filename)
poster_dict,poster_count = make_poster_dict_from_csv(filename)
make_json_poster_count(poster_count,'poster_count12',2);
make_csv_poster_count(poster_count,'poster_count12',2);
In [19]:
filename = 'AbstractsCosyne2013'
authors_list,authors_dict = make_authors_list_from_csv(filename)
poster_dict,poster_count = make_poster_dict_from_csv(filename)
make_json_poster_count(poster_count,'poster_count13',2);
make_csv_poster_count(poster_count,'poster_count13',2);
In [25]:
import csv
with open('cosyne2011.csv', 'rb') as csvfile:
spamreader = csv.reader(csvfile, delimiter="|", quotechar="")
for row in spamreader:
print ', '.join(row)
In [9]:
lt[0:5]
Out[9]:
In [ ]: