In [1]:
import json
from pprint import pprint
from dateutil import parser
from bs4 import BeautifulSoup
from pysurvey.plot import setup, line, legend, box, dateticks
from matplotlib.dates import date2num

Faculty sort


In [3]:
data = json.load(open('/Users/ajmendez/data/rumormill/faculty_2014.json'))
# data = json.load(open('postdoc_2014.json'))

In [4]:
len(data), type(data), np.array(data[0].keys())


Out[4]:
(64,
 list,
 array([u'comment', u'newver', u'oldver', u'source', u'version', u'user',
        u'date', u'preview', u'changes'], 
       dtype='<U7'))

In [5]:
np.array([x.get('newver', 0) for x in data])


Out[5]:
array([ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 14, 15, 16, 17, 18, 19,
       20, 21, 22, 24, 25, 27, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40,
       41, 42, 44, 45, 46, 47, 48, 49, 51, 50, 26, 52, 53, 28, 56, 55, 54,
       31, 57, 58, 60, 59, 61, 62, 65, 64, 68, 67, 66, 43])

In [6]:
dates = np.array([parser.parse(x['date']) for x in data])
changes = np.array([len(x['comment']) for x in data])
changes = np.array([len(x['changes']) for x in data])

In [7]:
setup(figsize=(6,6),
      xlabel='Date', 
      ylabel='Length of Changes')
plot(date2num(dates),changes, 's')
dateticks()



In [9]:
def parse_preview_table(s):
    header = []
    rows = []
    for i, tr in enumerate(s.find_all('tr')):
        if i == 0:
            # it is not clear if i need this since 
            # they are special th values
            for th in tr.find_all('th'):
                header.append(th.text.strip())
        else:
            tmp = []
            
            # populate the links
            for a in tr.find_all('a'):
                a.insert_after('[{} {}]'.format(a['href'], a.text.strip())
                                        .replace('a ] d','ad]'))
                a.extract()
            
            # populate the bolded people
            for strong in tr.find_all('strong'):
                strong.insert_after(u'*{}*'.format(strong.text.strip()))
                strong.extract()
            
            # Get the columns of the row
            tmp.extend([' '.join(td.text.strip()
                                   .replace('(          )','')
                                   .split())
                        for td in tr.find_all('td')])
            
            # create a simple tag for the intro
            tmp.insert(0,tmp[0].split('(')[0].strip())
            
            rows.append(tmp)
    return header, rows

def get_title(soup):
    try:
        # crawl up the tree ten times.
        for i in range(10):
            s = soup.previous_element.strip()
            if len(s) > 0:
                return s
            else:
                soup = soup.previous_element
    except Exception as e:
        print e
        return 'No Title found'

    
def print_title(title):
    print '-'*40
    print title
    print '-'*40

def print_row(header, rows):
#     print header
#     for row
    pprint(rows[:])

def parse_preview_faculty(soup):
    for s in soup.find_all('table',{'class':'wikitable'}):
        print_title(get_title(s))
        print_row(*parse_preview_table(s))
#         header, rows = 
#         print header
#         pprint(rows[:])

#         print header
#         break
#     print soup.prettify()

soup = BeautifulSoup(data[-1]['preview'])
parse_preview_faculty(soup)

# Need to sort and filter


----------------------------------------
Tenure Track Faculty Positions
----------------------------------------
[[u'Institution', u'Institution (ad link)', ''],
 [u'Brigham Young',
  u'Brigham Young ( [http://jobregister.aas.org/job_view?JobID=48442 ad] )',
  u'Due October 1, 2014'],
 [u'Brown University',
  u'Brown University ( [http://www.brown.edu/academics/physics/sites/brown.edu.academics.physics/files/uploads/Ad%20-%20Astro-Cosmo%20Faculty%20Search.pdf ad] )',
  u'Due December 5, 2014'],
 [u'Universidad de Chile',
  u'Universidad de Chile',
  u'Offers made to *Valentino Gonzalez* and *Cesar Fuentes*'],
 [u'U. of Chicago',
  u'U. of Chicago ( [http://careers.aps.org/jobs/6344193/faculty-positions-in-astronomy-astrophysics ad] )',
  u'Due October 16, 2014'],
 [u'Cal State Fullerton',
  u'Cal State Fullerton (Physics and/or Astronomy Education Research, [http://careers.aps.org/jobs/6366984/physics-or-astronomy-education-research-tenure-track ad] )',
  u'Due October 1, 2014'],
 [u'Columbia University',
  u'Columbia University ( [https://jobregister.aas.org/node/49148 ad] )',
  u'Due December 15, 2014'],
 [u'Florida Institute of Technology',
  u'Florida Institute of Technology (Planetary Science, [http://cos.fit.edu/pss/position-pss802.php ad] )',
  u'Due January 5, 2015'],
 [u'Humboldt State',
  u'Humboldt State ( [http://sciencefacultyjobs.com/index.php?option=com_noah&Itemid=35&v=g&method=showdetails&list=advertisement&rollid=5944&fromfromlist=classifiedscategory&fromfrommethod=showhtmllist&fromfromid=10 ad] )',
  u'Due September 29, 2014'],
 [u'Leuven University',
  u'Leuven University ( [http://eas.unige.ch/jobs.jsp?type=job&id=474 ad] )',
  u'Due September 30, 2014'],
 [u'U.Hull',
  u'U.Hull ( [https://jobregister.aas.org/job_view?JobID=48388 ad] )',
  u'Interviews taking place September 10, 2014, approx. 5 applicants shortlisted.'],
 [u'IA-UNAM Mexico City',
  u'IA-UNAM Mexico City ( [https://jobregister.aas.org/job_view?JobID=48527 ad] )',
  u'Due October 7, 2014'],
 [u'IA-UNAM Ensenada',
  u'IA-UNAM Ensenada ( [https://jobregister.aas.org/job_view?JobID=48527 ad] )',
  u'Due October 7, 2014'],
 [u'NC State University',
  u'NC State University (Astrophysics, [https://jobregister.aas.org/job_view?JobID=48719 ad] )',
  u'Due November 1, 2014'],
 [u'Princeton',
  u'Princeton ( [http://www.princeton.edu/astro/resources/job/jo/index.xml ad] )',
  u'Due Nov. 15, 2014'],
 [u'San Francisco State University',
  u'San Francisco State University ( [http://www.physics.sfsu.edu/astrosearch.html ad] )',
  u'Due December 1, 2014'],
 [u'UC Santa Cruz',
  u'UC Santa Cruz ( [https://recruit.ucsc.edu/apply/JPF00176 ad] )',
  u'Due December 1, 2014'],
 [u'University of Washington',
  u'University of Washington ( [https://sharepoint.washington.edu/phys/research/DUSC/Pages/default.aspx physics] , [https://sharepoint.washington.edu/phys/admin/Pages/View-Position.aspx?pid=38 ad] )',
  u'Review of applications will begin on October 20, 2014'],
 [u'University of Washington',
  u'University of Washington (astronomy, 2 positions, [https://jobregister.aas.org/job_view?JobID=49023 ad] )',
  u'Due November 30, 2014']]
----------------------------------------
Science Staff and Management Positions
----------------------------------------
[[u'Operations staff astronomer at ESO/Paranal',
  u'Operations staff astronomer at ESO/Paranal ( [https://jobregister.aas.org/job_view?JobID=48283 ad] )',
  u'Due July 31st, short list made, including:'],
 [u'JPL weak lensing http://jobregister.aas.org/node/48271',
  u'JPL weak lensing http://jobregister.aas.org/node/48271',
  ''],
 [u'Adaptive Optics Scientist / Operations staff astronomer at ESO/Paranal',
  u'Adaptive Optics Scientist / Operations staff astronomer at ESO/Paranal ( [https://jobregister.aas.org/job_view?JobID=48748 ad] )',
  u'Due October 31st'],
 [u'Director, High Altitude Observatory',
  u'Director, High Altitude Observatory',
  u'Shortlist made, including: Sarah Gibson, Scott McIntosh, Stan Solomon'],
 [u'Director, W. M. Keck Observatory',
  u'Director, W. M. Keck Observatory',
  u'Hilton Lewis'],
 [u'Department Head',
  u'Department Head (Physics and Space Sciences, Florida Institute of Technology, [http://cos.fit.edu/pss/position-pss800.php ad] )',
  u'Due January 5, 2015']]
----------------------------------------
Faculty Shuffle
----------------------------------------
[[u'Wendy Freedman',
  u'Wendy Freedman',
  u'Carnegie Observatories',
  u'University of Chicago'],
 [u'Anna Scaife',
  u'Anna Scaife',
  u'University of Southampton',
  u'University of Manchester'],
 [u'Rene Breton',
  u'Rene Breton',
  u'University of Southampton',
  u'University of Manchester']]

Postdocs


In [10]:
postdoc = json.load(open('/Users/ajmendez/data/rumormill/postdoc_2014.json'))

In [13]:
dates = np.array([parser.parse(x['date']) for x in postdoc])
changes = np.array([len(x['comment']) for x in postdoc])
changes = np.array([len(x['changes']) for x in postdoc])

setup(figsize=(6,6),
      xlabel='Date', 
      ylabel='Length of Changes')
plot(date2num(dates),changes, 's')
dateticks()



In [11]:
psoup = BeautifulSoup(postdoc[-1]['preview'])
parse_preview_faculty(psoup)


----------------------------------------
Postdoc or term positions
----------------------------------------
[[u'Submillimeter Array Fellowship',
  u'Submillimeter Array Fellowship ( [http://jobregister.aas.org/job_view?JobID=48424 ad] )',
  u'Due Oct 1, 2014. Short list made.'],
 [u'Berkeley',
  u'Berkeley (21cm cosmology w/ Parsons [http://www.higheredjobs.com/faculty/details.cfm?JobCode=175960269&Title=Postdoctoral%20Scholar%20-%20Radio%20Astronomy%20Lab ad] )',
  u'Due Dec. 1, 2014'],
 [u'Caltech',
  u'Caltech (theoretical astrophysics w/ Hopkins [http://www.tapir.caltech.edu/~phopkins/Site/Opportunities.html ad] )',
  u'Due Nov. 15, 2014'],
 [u'Dartmouth College Society of Fellows',
  u'Dartmouth College Society of Fellows ( [http://www.dartmouth.edu/sof/fellowships/postdoc.html ad] )',
  u'Due Oct. 15, 2014'],
 [u'ETH Zurich',
  u'ETH Zurich (Extragalactic Astrophysics [http://jobregister.aas.org/job_view?JobID=49205 ad] )',
  u'Due Dec 12, 2014'],
 [u'Fermilab Theoretical Astrophysics',
  u'Fermilab Theoretical Astrophysics (2 positions, [https://academicjobsonline.org/ajo/jobs/4778 ad] )',
  u'Due Dec. 1, 2014'],
 [u'Harvard-Smithsonian Center for Astrophysics, stars/exoplanets',
  u'Harvard-Smithsonian Center for Astrophysics, stars/exoplanets ( [http://eas.unige.ch/jobs.jsp?type=job&id=531 ad] )',
  u'Due Nov. 30 2014, or until position filled.'],
 [u'Harvard-Smithsonian Center for Astrophysics, GRBs / time-domain astrophysics',
  u'Harvard-Smithsonian Center for Astrophysics, GRBs / time-domain astrophysics ( [http://jobregister.aas.org/job_view?JobID=48943 ad] )',
  u'Due Jan. 15, 2015'],
 [u'Harvard-Smithsonian Center for Astrophysics, SMA Fellowship',
  u'Harvard-Smithsonian Center for Astrophysics, SMA Fellowship',
  u'shortlist made, including Erika Nesvold'],
 [u'IMPS Fellowship, UC Santa Cruz',
  u'IMPS Fellowship, UC Santa Cruz ( [https://jobregister.aas.org/job_view?JobID=48961 ad)]',
  u'Due Jan. 5, 2015'],
 [u'The Jim Buckee Fellow in Astrophysics, International Centre for Radio Astronomy Research & The University of Western Australia',
  u'The Jim Buckee Fellow in Astrophysics, International Centre for Radio Astronomy Research & The University of Western Australia ( [http://jobregister.aas.org/job_view?JobID=49172 ad] )',
  u'Due November 30, 2014'],
 [u'JPL/Caltech, Fellowship-level position in Exoplanet and Comparative Planetary Science',
  u'JPL/Caltech, Fellowship-level position in Exoplanet and Comparative Planetary Science ( [http://postdocs.jpl.nasa.gov/researchapplicants/jobpostings/index.cfm?FuseAction=ShowJobPosting&JobPostingID=524 ad] )',
  u'Due Nov. 1, 2014. There will be an option of a near-immediate start after selection and mutual agreement.'],
 [u'KICP Postdoctoral Research Fellow http://kicp-fellowship.uchicago.edu/',
  u'KICP Postdoctoral Research Fellow http://kicp-fellowship.uchicago.edu/',
  u'Due, October 31, 2014'],
 [u'Keele University',
  u'Keele University (nucleosynthesis w/ Hirschi [https://atsv7.wcn.co.uk/search_engine/jobs.cgi?owner=5062976&ownertype=fair&jcode=1433699&vt_template=1434&adminview=1 ad] )',
  u'Due Oct. 1st 2014'],
 [u'Ludwig-Maximilians University, Munich, 3 to 5 Year Postdoctoral Position in Cosmology /w Weller',
  u'Ludwig-Maximilians University, Munich, 3 to 5 Year Postdoctoral Position in Cosmology /w Weller ( [http://www.usm.uni-muenchen.de/Jobs.php ad)]',
  u'Due Nov. 30st 2014'],
 [u'Michigan State University',
  u'Michigan State University (radio transients, w/Chomiuk [https://jobregister.aas.org/job_view?JobID=48763 ad] )',
  u'Due Nov. 15 2014'],
 [u'National Observatory of Athens',
  u'National Observatory of Athens (IAASARS; IR ex-gal w/ Xilouris-Charmandaris [http://www.astro.noa.gr/en/jobopenings/documents/ir_postdoc_noa.pdf ad] )',
  u'Due Oct. 1st 2014, or until position filled. Short list made.'],
 [u'National Center for Supercomputing Applications',
  u'National Center for Supercomputing Applications (NCSA) Astro Group postdoc (3+ positions, [http://www.ncsa.illinois.edu/about/jobs/postdoc_astro ad] )',
  u'Due Feb. 1st, 2015.'],
 [u'National Science Foundation Astronomy & Astrophysics Postdoctoral Fellowship',
  u'National Science Foundation Astronomy & Astrophysics Postdoctoral Fellowship',
  u'Due Oct. 8th, 2014'],
 [u'Niels Bohr Institute - Postdoctoral Fellowships in Theoretical Astrophysics and Planetary Sciences',
  u'Niels Bohr Institute - Postdoctoral Fellowships in Theoretical Astrophysics and Planetary Sciences ( [http://jobregister.aas.org/job_view?JobID=49153 ad] )',
  u'Due Nov. 15th, 2014'],
 [u'Niels Bohr Institute - 5yr Assistant Professor in Theoretical Astrophysics',
  u'Niels Bohr Institute - 5yr Assistant Professor in Theoretical Astrophysics ( [http://jobregister.aas.org/job_view?JobID=49155 ad] )',
  u'Due Nov. 15th, 2014'],
 [u'Niels Bohr Institute - 5yr Associate Professor in Theoretical Astrophysics',
  u'Niels Bohr Institute - 5yr Associate Professor in Theoretical Astrophysics ( [http://jobregister.aas.org/job_view?JobID=49157 ad] )',
  u'Due Nov. 15th, 2014'],
 [u'Northwestern University, CIERA Fellowships',
  u'Northwestern University, CIERA Fellowships ( [http://ciera.northwestern.edu/Research/fellowships.php ad] )',
  u'Due Dec. 1st, 2014'],
 [u'Northwestern University, Lindheimer Fellowship',
  u'Northwestern University, Lindheimer Fellowship ( [http://ciera.northwestern.edu/Research/fellowships.php ad] )',
  u'Due Dec. 1st, 2014'],
 [u'Northwestern University w/ Faucher-Gigu\xe8re',
  u'Northwestern University w/ Faucher-Gigu\xe8re ( [http://ciera.northwestern.edu/Research/fellowships.php ad] )',
  u'Due Dec. 1st, 2014'],
 [u'Observat\xf3rio Nacional/Leiden Observatory Joint Fellowship',
  u'Observat\xf3rio Nacional/Leiden Observatory Joint Fellowship ( [http://staff.on.br/overzier/rioleiden/ ad] )',
  u'Review of applications begins Nov. 1, 2014'],
 [u'Ohio State CCAPP Fellowships',
  u'Ohio State CCAPP Fellowships ( [http://ccapp.osu.edu/jobs.html ad] )',
  u'Review of applications begins Nov. 15, 2014'],
 [u"Queen's University Belfast Research Fellowship",
  u"Queen's University Belfast Research Fellowship ( [https://www.jiscmail.ac.uk/cgi-bin/webadmin?A2=ind1409&L=RASJOBS&F=&S=&X=E4D7518FA6D6D1C21D&Y=b.riaz%40herts.ac.uk&P=5733 ad] )",
  u'Due 17th October'],
 [u'Royal Astronomical Society Research Fellowships',
  u'Royal Astronomical Society Research Fellowships (2 positions, [http://www.ras.org.uk/news-and-press/2504-call-for-applications-ras-research-fellowships-2015 ad] )',
  u'Applications due 17 Oct 2014, interviews Feb 2015, offers March 2015.'],
 [u'University College London',
  u'University College London (weak lensing [https://jobregister.aas.org/node/48464 ad] )',
  u'Due Sep. 30th 2014'],
 [u'University of Arizona Prize Fellow in Theoretical and Computational Astrophysics',
  u'University of Arizona Prize Fellow in Theoretical and Computational Astrophysics ( [https://www.as.arizona.edu/sites/default/files/TheoryAdvert_0.pdf ad] )',
  u'Due Dec 15th 2014'],
 [u'University of Arizona',
  u'University of Arizona (cosmology w/ Rozo, [https://www.uacareertrack.com/ ad] ). (click on search postings, job number 56476)',
  u'Due Dec. 15th 2014'],
 [u'University of California Los Angeles',
  u'University of California Los Angeles (dynamics w/ Naoz [http://www.astro.ucla.edu/~snaoz/postdoc14.html ad] )',
  u'Due Dec 15th 2014'],
 [u'University of Chicago',
  u'University of Chicago (planetary w/Kite, [https://jobregister.aas.org/job_view?JobID=49033 ad] )',
  u'Due Dec 20th 2014'],
 [u'University of Helsinki',
  u'University of Helsinki (computational extragalactic w/ Johansson, [http://jobregister.aas.org/job_view?JobID=48758 ad] )',
  u'Due Oct. 15th 2014'],
 [u'University of Illinois at Urbana-Champaign',
  u'University of Illinois at Urbana-Champaign ( Fortner Fellowships; [https://my.physics.illinois.edu/join/ ad] )',
  u'Due Jan 16, 2015'],
 [u'University of Pennsylvania',
  u'University of Pennsylvania (exoplanets w/ Blake [http://sites.sas.upenn.edu/chblake/chblake/files/postdoc_ad.pdf ad] )',
  u'Due Nov. 21, 2014'],
 [u'University of Michigan, w/Rauscher',
  u'University of Michigan, w/Rauscher (exoplanets, theoretical; [https://jobregister.aas.org/job_view?JobID=49217 ad)]',
  u'Due Nov 30, 2014'],
 [u'University of Michigan, w/ Sharon',
  u'University of Michigan, w/ Sharon (extragalactic, lensing; [https://jobregister.aas.org/job_view?JobID=48963 ad] )',
  u'Due Nov 15, 2014'],
 [u'University of Michigan, McLaughlin Prise Fellowship',
  u'University of Michigan, McLaughlin Prise Fellowship ( [https://jobregister.aas.org/job_view?JobID=48918 ad)]',
  u'Due Dec 1, 2014'],
 [u'University of Texas at Austin/McDonald Observatory, Harlan J. Smith Fellowship',
  u'University of Texas at Austin/McDonald Observatory, Harlan J. Smith Fellowship ( [https://jobregister.aas.org/job_view?JobID=48837 ad] )',
  u'Due Nov 24, 2014'],
 [u'University of Warwick',
  u'University of Warwick (2 positions, [http://www2.warwick.ac.uk/fac/sci/physics/research/astro/vacancies/ ad] )',
  u'Due Sep. 14th, short list made'],
 [u'Uppsala University Postdoctoral Fellowships in Physics & Astronomy',
  u'Uppsala University Postdoctoral Fellowships in Physics & Astronomy (3 positions; [http://www.uu.se/en/join-us/jobs-detail-page/?positionId=44467 ad] )',
  u'Due Nov. 01, 2014, short list made'],
 [u'Lawrence Berkeley National Laboratory - Physics',
  u'Lawrence Berkeley National Laboratory - Physics (Fellowship) ( [https://academicjobsonline.org/ajo/jobs/4336 ad] )',
  u'Due Oct. 15th, 2014']]

In [ ]: