In [1]:
from __future__ import division

"""gendering"""
from genderComputer.genderComputer import GenderComputer

"""bibtex parsing"""
import os
import bibtexparser as b #module for bibtexin'
from bibtexparser.bparser import BibTexParser #import to add customization
from bibtexparser.customization import *

"""plotting functions"""
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt

"""date time functions """
from datetime import datetime #idk bring in the system date or whatever

"""csv"""
import csv

In [70]:
today = datetime.today()
gc = GenderComputer(os.path.abspath('genderComputer/nameLists')) #make gendercomputer


Loaded dictionary from /home/feline/Documents/div3/bibliometrics/genderComputer/nameLists/gender.dict
Finished initialization

In [56]:
bib = 'CriticalOpenNeuro.bib' #bring that bib file in

In [4]:
def customizations(record):
    """Use some functions delivered by the library
    :param record: a record
    :returns: -- customized record
    """
    record = type(record)
    record = doi(record)
    record = convert_to_unicode(record)
    record = author(record)
    return record

In [5]:
def parseFile(bib_file):
    """parse the bib file
    
    :param bib_file: bibtex file to be parsed
    :returns: -- a bibtex file object
    """
    with open(bib_file) as bibtex_file: 
        parser = BibTexParser()
        parser.homogenize = True        
        parser.customization = customizations
        data = b.load(bibtex_file, parser = parser)
        return data

In [63]:
auCount = 0
notav = 0
uni = 0
men = 0
women = 0
unavailable = []

In [58]:
def countGender(ts=True):
    """take the bib database and count genders of authors
    """ 
    global auCount
    global notav 
    global uni 
    global men 
    global women 
    global unavailable 
    no_author = []
    no_title = []
    no_gender = []
    for entry in data.entries:
        if "title" in entry:
            title = entry["title"]
        else:
            no_title.append(entry)
        if "author" in entry:
            authors = entry["author"] 
        else:
            no_author.append(title)
        for j in authors:
            auCount += 1
            gender = gc.resolveGender(j, None) #resolve gender, yay
            if gender == 'male':
                men += 1
            elif gender == 'female':
                women += 1
            elif gender == 'unisex':
                uni += 1
            else:
                notav += 1 
                no_gender.append(j)
    if ts==True:
        print "No Author in these Papers:\n\n" + '\n'.join(no_author)
        print "\nNo Gender on these Names:\n" + '\n'.join(no_gender)
        print "\n\nNo title on these entries:\n\n" + '\n'.join(no_title)

In [64]:
data = parseFile(bib) #run the parse file
countGender(ts=True)


No Author in these Papers:

Statistics' Crisis of Reproducibility
Measuring Scholarly Impact
Issues in Open Research Data
{CSP} - 'A Brief History of the Native American Church'
Erowid Experience Vaults: 25I-{NBOMe} \& Alprazolam (Xanax) - Years of Therapy in One Day - 101324
Hallucinogenic Plants and Their Use in Traditional Societies - An Overview | Cultural Survival
Hebrew University researcher: Moses was tripping at Mount Sinai
Shake-up of centuries-old system of credit in scholarly communication
Reproducible research: Notes from the field
Sharing knowledge and saving lives: one doctor's story
Gates Foundation announces world's strongest policy on open access research : Nature News Blog
The Thomson Reuters Journal Selection Process - {IP} \& Science - Thomson Reuters
Software Carpentry: Orwell, Dickens, and How We'll Know We're Done
The perils of mixing open source and money ({DHH})
Setting the Default to Reproducible: Reproducibility in Computational and Experimental Mathematics
Neuroscience drug development slows, requires funding | {CenterWatch} News Online
Feminist Epistemologies

No Gender on these Names:
Gorelick, Root
Release, CERN Press
{OseiTutu}, J. Janewa
Braun, Lundy
Wagenmakers, Eric-Jan
Miranda, J. Jaime
Zaman, M. Justin
Editors, Scientific Data
Publishing, 2012|Academic
Comments, Impact|3
Stoskopf, M. K.
Simoneau, J.
Cohen, M. S.
Henson, C. M.
Rice, Kenner C.
Woods, J. H.
{McAfee}, R. Preston
Rohlfing, T.
Poline, J.-B.
Nosek, B. A.
Spies, J. R.
Motyl, M.
Begley, C. Glenn
Ray, Rinki
Wise, M. Norton
contributors, Wikipedia
Ally, Ariff
Nakamura, Naosuke
Chan, C.-H.
Tan, Seong-Seng
Parnavelas, J. G.
Rajeevan, Mangalathu S.
Abbas, Atheir
Chang, Yuchiao
Standards, Task Group on Data Citation
Practices, {CODATA}-{ICSTI}
Zhang, L
Ma, W
Barker, J. L
Rubinow, D. R
Torper, Olef
Lau, Shong
Chiba, Shigetoshi
Khor, Hwei Ling
Visiers, Irache
Panicker, Mitradas M.
Salazar, F.H. Rick
Manji, Husseini
Puig, M. Victoria
Lee, Hyeong-Min
Chan, Pokman
Ge, Yongchao
{McKenney}, J. D.
Abbas, Atheir I.
Yadav, P. N.
Arbuckle, M. I.
Caron, M. G.
Olaghere da Silva, Uade B.
Thomas, Tarita O.
Ma, Jinming
Zhou, X. Edward
Zhang, Chenghai
Yang, Huaiyu
Jiang, Hualiang
Xu, H. Eric
Kanagarajadurai, Karuppiah
Panicker, Mitradas M.
De Almeida, J.
Mengod, G.
Huang, Shan-Fu
Gangadhar, Beechanahalli P.
Rebois, R. Victor
Johansen, Pål-Ørjan
History.com, 
Workgroup}, {VA Mid-Atlantic MIRECC Registry
Maudsley, S.
A. Patel, S.
Park, S.-S.
M. Luttrell, L.
Martin, B.
Strickland, Seligman, L.
Holbrook, J. Britt
Millman, K. Jarrod
Guo, Quanmin
Arunachalam, Subbiah
Ibanez, L.
Avila, R.
Aylward, S.
Soares, Thannya Nascimento
Reichman, O. J.
Research}, {F100
Editorial, Nature
Neuroskeptic, 
Steen, R. Grant
Moonesinghe, Ramal
Janssens, A. Cecile J. W
Fang, Ferric C.
Begley, C. Glenn
Asadullah, Khusru
Zhang, Minghua
Song, Xiaomu
Chen, Nan-Kuei
Gopalan, R
Chan, An-Wen
{MOeLLER}, {ANDERS} P.
van der Worp, H. Bart
DA, Kronick
JC, Burnham
Comments, 2014|Academic communication|5
Khan, Razib
Nsamenang, A. Bame
Björk, Bo-Christer
Kim, Heekyung Hellen
Pillay, Pundy
Pillay, Pundy
Comments, 2012|Uncategorized|5
Taylor, 
Evans, J. A.
Reimer, J.
Miranda, J. Jaime
Greysen, S Ryan
Olapade-Olaopa, E Oluwabunmi
Mullan, Fitzhugh
Henry, Trotter
Dennis, Alasia Datonye
Horty, J. F.
Soo, Khee Chee
Editors, The PLoS Medicine
Fang, Ferric C.
Holbrook, J. Britt
deevybee, 
Butlin, R.
Alsheikh-Ali, Alawi A.
Al-Mallah, Mouaz H.
Dallmeier-Tiessen, Suenje
Manifesto, Science Code
Igor, 
Writer, Guest
06, National Geographic {PUBLISHED} March
{2014}, 
{p.f.wouters@cwts.leidenuniv.nl}, 
Fernandes, G. Wilson
(Organization), Global Forum for Health Research
Workshop, The London
ICSU, 
ICSU, 
Sumathipala, Athula
Clark, Jocalyn P.
Obuaya, Chi-Chi
Newby, L. Kristin
Review, Editors of the Harvard Educational
Shonkoff, J. P.
Brotherton, P. Sean
Nguyen, Vinh-Kim
{Syamsidik}, 
Towghi, Fouzieyha
History), Arctic Studies Center (National Museum of Natural
Neuroscience, Society for
Roy, Deboleena
Roy, Deboleena
Boulware, L. Ebony
Strenta, A. Christopher


No title on these entries:



In [20]:
"""writing names unassigned to a file for troubleshooting"""
with open('unavailable_gender.csv', 'wb') as myfile:
    wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
    wr.writerow(unavailable)

In [65]:
stats = {'Women':women, 'Men':men, 'Unisex':uni, 'Not Available':notav}
percents = {'Women':women, 'Men':men, 'Unisex':uni, 'Not Available':notav}

In [66]:
for key in stats:
    value = stats[key]
    percent = value/auCount*100 #probably should fix so it can't break if dividing by zero
    percents[key] = percent

In [67]:
print stats
print percents
print auCount


{'Unisex': 29, 'Not Available': 162, 'Men': 882, 'Women': 483}
{'Unisex': 1.8637532133676094, 'Not Available': 10.411311053984576, 'Men': 56.68380462724936, 'Women': 31.041131105398456}
1556

In [71]:
plt.bar(range(len(stats)), percents.values(), align='center', color="#2aa198")
plt.xticks(range(len(percents)), percents.keys(), color="#657b83")
plt.xlabel('Gender Assigned (generated ' + str(today) +')', color="#073642")
plt.ylabel('Percents', color="#073642")


Out[71]:
<matplotlib.text.Text at 0x8a435d0>

In [72]:
plt.savefig('gender_distr.png', bbox_inches='tight',transparent=True)

In [ ]: