In [97]:

    
import requests
import json
import prettytable
import csv
import codecs



In [ ]:



In [108]:

    
from bs4 import BeautifulSoup
import requests



In [111]:

    
url = 'http://www.nobelprize.org/nobel_prizes/lists/universities.html'
r = requests.get(url)
soup = BeautifulSoup(r.text, from_encoding=r.encoding)
place_acquired = soup.find_all(name="div", attrs={"class": "by_year"})



In [113]:

    
place_acquired
#soup = unicode(soup)
#soup = soup.encode('ascii', 'ignore')









    



---------------------------------------------------------------------------
UnicodeDecodeError                        Traceback (most recent call last)
<ipython-input-113-3e8953726478> in <module>()
----> 1 place_acquired
      2 #soup = unicode(soup)
      3 #soup = soup.encode('ascii', 'ignore')

c:\Anaconda\lib\site-packages\IPython\core\displayhook.pyc in __call__(self, result)
    236                 self.write_format_data(format_dict, md_dict)
    237                 self.log_output(format_dict)
--> 238             self.finish_displayhook()
    239 
    240     def cull_cache(self):

c:\Anaconda\lib\site-packages\IPython\kernel\zmq\displayhook.pyc in finish_displayhook(self)
     70         sys.stderr.flush()
     71         if self.msg['content']['data']:
---> 72             self.session.send(self.pub_socket, self.msg, ident=self.topic)
     73         self.msg = None
     74 

c:\Anaconda\lib\site-packages\IPython\kernel\zmq\session.pyc in send(self, stream, msg_or_type, content, parent, ident, buffers, track, header, metadata)
    647         if self.adapt_version:
    648             msg = adapt(msg, self.adapt_version)
--> 649         to_send = self.serialize(msg, ident)
    650         to_send.extend(buffers)
    651         longest = max([ len(s) for s in to_send ])

c:\Anaconda\lib\site-packages\IPython\kernel\zmq\session.pyc in serialize(self, msg, ident)
    551             content = self.none
    552         elif isinstance(content, dict):
--> 553             content = self.pack(content)
    554         elif isinstance(content, bytes):
    555             # content is already packed, as in a relayed message

c:\Anaconda\lib\site-packages\IPython\kernel\zmq\session.pyc in <lambda>(obj)
     83 # disallow nan, because it's not actually valid JSON
     84 json_packer = lambda obj: jsonapi.dumps(obj, default=date_default,
---> 85     ensure_ascii=False, allow_nan=False,
     86 )
     87 json_unpacker = lambda s: jsonapi.loads(s)

c:\Anaconda\lib\site-packages\zmq\utils\jsonapi.pyc in dumps(o, **kwargs)
     38         kwargs['separators'] = (',', ':')
     39 
---> 40     s = jsonmod.dumps(o, **kwargs)
     41 
     42     if isinstance(s, unicode):

c:\Anaconda\lib\json\__init__.pyc in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, encoding, default, sort_keys, **kw)
    248         check_circular=check_circular, allow_nan=allow_nan, indent=indent,
    249         separators=separators, encoding=encoding, default=default,
--> 250         sort_keys=sort_keys, **kw).encode(obj)
    251 
    252 

c:\Anaconda\lib\json\encoder.pyc in encode(self, o)
    208         if not isinstance(chunks, (list, tuple)):
    209             chunks = list(chunks)
--> 210         return ''.join(chunks)
    211 
    212     def iterencode(self, o, _one_shot=False):

UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 8957: ordinal not in range(128)



In [6]:

    
def grab_field_and_number(string):
    '''
    >>>grab_field_and_number("The Nobel Prize in Physics 2000")
    ('2000', 'Physics')
    
    >>>grab_field_and_number("The Prize in Economic Sciences 2010")
    ('2010', 'Economic Sciences')
    
    >>>grab_field_and_number("The Nobel Prize in Physiology or Medicine 2000")
    ('2000', 'Physiology or Medicine')
    
    >>>grab_field_and_number("The Nobel in Peace Prize 2010")
    ('2010', 'Peace')
    '''
    
    if "Economic" in string:
        temp_string = string.split()
        year = temp_string.pop()
        field = temp_string[-2] + " " + temp_string[-1]
    elif "Physiology or Medicine" in string:
        temp_string = string.split()
        year = temp_string.pop()
        field = temp_string[-3] + " " + temp_string[-2] + " " + temp_string[-1]
    elif "Peace" in string:
        temp_string = string.split()
        year = temp_string.pop()
        field = temp_string[-2]
    else:
        temp_string = string.split()
        year = temp_string.pop()
        field = temp_string[-1]
    return year, field



In [7]:

    
grab_field_and_number("The Nobel in Peace Prize 2010")









    Out[7]:





('2010', 'Peace')



In [8]:

    
#grab_field_and_number("The Nobel Prize in Physics 2000")



In [9]:

    
#grab_field_and_number("The Prize in Economic Sciences 2010")



In [10]:

    
#grab_field_and_number("The Nobel Prize in Physiology or Medicine 2000")



In [11]:

    
def grab_inst_country_citystate(string):
    '''
    >>>grab_inst_citystate_country("Edinburgh University, Edinburgh, United Kingdom")
    ('Edinburgh University', ' United Kingdom', ' Edinburgh', '', '')
    
    >>>grab_inst_country_citystate("Fred Hutchinson Cancer Research Center, Seattle, WA, USA")
    ('Fred Hutchinson Cancer Research Center', ' USA', ' WA', ' Seattle', '')
    
    >>>grab_inst_country_citystate("Columbia University Division, Cardio-Pulmonary Laboratory, Bellevue Hospital, New York, NY, USA")
    ('Columbia University Division',
 ' USA',
 ' NY',
 ' New York',
 ' Cardio-Pulmonary Laboratory,  Bellevue Hospital')
    '''
    pieces = string.split(",")
    institution = pieces[0]
    country = pieces[-1]
    city_state = pieces[1:-1]
    city, state, extra_loc = grab_city_state(city_state)
    return institution, country, city, state, extra_loc



In [12]:

    
#grab_inst_citystate_country("Edinburgh University, Edinburgh, United Kingdom")



In [13]:

    
#grab_inst_country_citystate("Fred Hutchinson Cancer Research Center, Seattle, WA, USA")



In [14]:

    
#grab_inst_country_citystate("Columbia University Division, Cardio-Pulmonary Laboratory, Bellevue Hospital, New York, NY, USA")



In [15]:

    
def grab_city_state(a_list):
    '''
    >>>grab_city_state(["Cardio-Pulmonary Laboratory", "Bellevue Hospital", "New York", "NY"])
    ('NY', 'New York', 'Cardio-Pulmonary Laboratory, Bellevue Hospital')
    
    >>>grab_city_state(["Bellevue Hospital", "New York", "NY"])
    ('NY', 'New York', 'Bellevue Hospital')
    
    >>>grab_city_state(['New York', 'NY'])
    grab_city_state(['New York', 'NY'])
    
    >>>grab_city_state(['New York'])
    ('New York', '', '')    
    '''
    city = a_list.pop()
    state = ""    
    other = ""
    if len(a_list) >= 1:
        state = a_list.pop()
        other = ", ".join(a_list)
    return city, state, other



In [16]:

    
#grab_city_state(["Cardio-Pulmonary Laboratory", "Bellevue Hospital", "New York", "NY"])



In [17]:

    
#grab_city_state(["Bellevue Hospital", "New York", "NY"])



In [18]:

    
#grab_city_state(['New York', 'NY'])



In [19]:

    
#grab_city_state(['New York'])



In [ ]:



In [20]:

    
def separate_old_country_names(country):
    old = ""
    new = ""
    if " (now " in country:
        old_and_new = country.split(' (now ')
        old = old_and_new[0]
        new = old_and_new[1][:-1]
    else:
        old = country
        new = country
    return old, new



In [114]:

    
def find_country_acq(bs4_html):
    all_names = [["name", "institution",
                  "old_country_name_acquired","current_country_name_acquired",
                  "city","state","year","field"]]
    place_acq = ""
    for i in bs4_html:
        #pprint.pprint(i) 
        #print "*"*80
        #print i
        if i.find_all('h3'):
            #print "i.TEXT: ", i.text
            place_acq = i.h3.text
        if i.find_all('a'):
            #print ""
            #print "i.a.TEXT: ", i.a.text
            #print "i.h6.TEXT: ", i.h6.text
            #print "PLACE_ACQ: ", place_acq
            #print "field_year: ", field_year
            field_year = i.a.text
            name = i.h6.text
            year, field = grab_field_and_number(field_year)
            institution, country, city, state, extra_loc = grab_inst_country_citystate(place_acq)
            
            old_country_name, new_country_name = separate_old_country_names(country)
            
            all_names.append([name.encode('utf-8'),
                              institution.encode('utf-8'),
                              old_country_name.encode('utf-8'),
                              new_country_name.encode('utf-8'),
                              city.encode('utf-8'), 
                              state.encode('utf-8'),
                              year.encode('utf-8'),
                              field.encode('utf-8')])
            
            #print ""
            #print "*"*80
    return all_names



In [117]:

    
len(find_country_acq(place_acquired))









    Out[117]:





698



In [ ]:



In [23]:

    
url = 'http://www.nobelprize.org/nobel_prizes/lists/countries.html'
r = requests.get(url)
soup = BeautifulSoup(r.text)
birth_html = soup.find_all(name="div", attrs={"class": "by_year"})



In [ ]:



In [24]:

    
def find_country_birth(bs4_html):
    all_names = [["name","birth_country_old_name",
                  "birth_country_current_name",
                  "year","field"]]
    place_acq = ""
    for i in bs4_html:
        # Only place acquired entries have an 'h3' sub-class
        if i.find_all('h3'):
            place_acq = i.h3.text
        # Only field_year/name entries have an 'h6' sub-class.
        if i.find_all('h6'):
            field_year = i.a.text
            name = i.h6.text
            year, field = grab_field_and_number(field_year)
            old_country_name, new_country_name = separate_old_country_names(place_acq)
            
            all_names.append([name.encode('utf-8'), 
                              old_country_name.encode('utf-8'),
                              new_country_name.encode('utf-8'),
                              year.encode('utf-8'),
                              field.encode('utf-8')])
            
    return all_names



In [42]:

    
len(find_country_birth(birth_html))









    Out[42]:





865



In [ ]:



In [ ]:



In [26]:

    
url = 'http://www.nobelprize.org/nobel_prizes/lists/age.html'
r = requests.get(url)
soup = BeautifulSoup(r.text)
age_html = soup.find_all(name="div", attrs={"class": "large-12 columns"})



In [27]:

    
def find_age(bs4_html):
    all_names = [["name", "age"]]
    place_acq = ""
    for i in age_html[6].find_all(['h3', 'h6']):
        
        if "Age" in i.string:
            age = i.string.split()[-1]
        if "Age" not in i.string:
            name = i.string
            all_names.append([name.encode('utf-8'), age.encode('utf-8')])
    return all_names



In [28]:

    
len(find_age(age_html))









    Out[28]:





865



In [29]:

    
nobel_ages = find_age(age_html)



In [30]:

    
with open('nobel_ages.csv', 'wb') as f:
    writer = csv.writer(f)
    writer.writerows(nobel_ages)



In [118]:

    
country_acquired = find_country_acq(place_acquired)



In [35]:

    
#country_acquired



In [120]:

    
import pandas as pd

headers = country_acquired.pop(0)
df = pd.DataFrame(country_acquired, columns=headers)
df.head()









    Out[120]:






  
    
      
      name
      institution
      old_country_name_acquired
      current_country_name_acquired
      city
      state
      year
      field
    
  
  
    
      0
       Zhores I. Alferov
       A.F. Ioffe Physico-Technical Institute
         Russia
         Russia
        St. Petersburg
       
       2000
                 Physics
    
    
      1
            Jens C. Skou
                            Aarhus University
        Denmark
        Denmark
                Aarhus
       
       1997
               Chemistry
    
    
      2
       Dale T. Mortensen
                            Aarhus University
        Denmark
        Denmark
                Aarhus
       
       2010
       Economic Sciences
    
    
      3
              Lev Landau
                          Academy of Sciences
           USSR
           USSR
                Moscow
       
       1962
                 Physics
    
    
      4
           Pyotr Kapitsa
                          Academy of Sciences
           USSR
           USSR
                Moscow
       
       1978
                 Physics



In [38]:

    
#country_birth



In [44]:

    
import pandas as pd

country_birth = find_country_birth(birth_html)
headers = country_birth.pop(0)
df = pd.DataFrame(country_birth, columns=headers)
df.head()









    



---------------------------------------------------------------------------
UnicodeDecodeError                        Traceback (most recent call last)
<ipython-input-44-85da9c4b1a68> in <module>()
      4 headers = country_birth.pop(0)
      5 df = pd.DataFrame(country_birth, columns=headers)
----> 6 df.head()

c:\Anaconda\lib\site-packages\IPython\core\displayhook.pyc in __call__(self, result)
    236                 self.write_format_data(format_dict, md_dict)
    237                 self.log_output(format_dict)
--> 238             self.finish_displayhook()
    239 
    240     def cull_cache(self):

c:\Anaconda\lib\site-packages\IPython\kernel\zmq\displayhook.pyc in finish_displayhook(self)
     70         sys.stderr.flush()
     71         if self.msg['content']['data']:
---> 72             self.session.send(self.pub_socket, self.msg, ident=self.topic)
     73         self.msg = None
     74 

c:\Anaconda\lib\site-packages\IPython\kernel\zmq\session.pyc in send(self, stream, msg_or_type, content, parent, ident, buffers, track, header, metadata)
    647         if self.adapt_version:
    648             msg = adapt(msg, self.adapt_version)
--> 649         to_send = self.serialize(msg, ident)
    650         to_send.extend(buffers)
    651         longest = max([ len(s) for s in to_send ])

c:\Anaconda\lib\site-packages\IPython\kernel\zmq\session.pyc in serialize(self, msg, ident)
    551             content = self.none
    552         elif isinstance(content, dict):
--> 553             content = self.pack(content)
    554         elif isinstance(content, bytes):
    555             # content is already packed, as in a relayed message

c:\Anaconda\lib\site-packages\IPython\kernel\zmq\session.pyc in <lambda>(obj)
     83 # disallow nan, because it's not actually valid JSON
     84 json_packer = lambda obj: jsonapi.dumps(obj, default=date_default,
---> 85     ensure_ascii=False, allow_nan=False,
     86 )
     87 json_unpacker = lambda s: jsonapi.loads(s)

c:\Anaconda\lib\site-packages\zmq\utils\jsonapi.pyc in dumps(o, **kwargs)
     38         kwargs['separators'] = (',', ':')
     39 
---> 40     s = jsonmod.dumps(o, **kwargs)
     41 
     42     if isinstance(s, unicode):

c:\Anaconda\lib\json\__init__.pyc in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, encoding, default, sort_keys, **kw)
    248         check_circular=check_circular, allow_nan=allow_nan, indent=indent,
    249         separators=separators, encoding=encoding, default=default,
--> 250         sort_keys=sort_keys, **kw).encode(obj)
    251 
    252 

c:\Anaconda\lib\json\encoder.pyc in encode(self, o)
    208         if not isinstance(chunks, (list, tuple)):
    209             chunks = list(chunks)
--> 210         return ''.join(chunks)
    211 
    212     def iterencode(self, o, _one_shot=False):

UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 171: ordinal not in range(128)



In [48]:

    
countries = list(set(df.birth_country_new_name))









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-48-a7a78aa7a2c0> in <module>()
----> 1 countries = list(set(df.birth_country_new_name))

c:\Anaconda\lib\site-packages\pandas\core\generic.pyc in __getattr__(self, name)
   1841                 return self[name]
   1842             raise AttributeError("'%s' object has no attribute '%s'" %
-> 1843                                  (type(self).__name__, name))
   1844 
   1845     def __setattr__(self, name, value):

AttributeError: 'DataFrame' object has no attribute 'birth_country_new_name'



In [238]:

    
#google_api_key = "AIzaSyDAJxRxTE-ZC5M7qGN5Bg_FXwgc5e_TqdU"



In [239]:

    
def lookup_lat_lon(city="", state="", country="", key=""):
    return "https://maps.googleapis.com/maps/api/geocode/json?"+"address="+country+"&key="+key



In [288]:

    
lookup_lat_lon(country=countries[38], key=google_api_key)









    Out[288]:





"https://maps.googleapis.com/maps/api/geocode/json?address=People's Republic of China&key=AIzaSyDAJxRxTE-ZC5M7qGN5Bg_FXwgc5e_TqdU"



In [289]:

    
url2 = lookup_lat_lon(country=countries[38], key=google_api_key)



In [290]:

    
r2 = requests.get(url2)



In [291]:

    
country_json = r2.json()



In [292]:

    
birth_lat = country_json['results'][0]['geometry']['location']['lat']
birth_lon = country_json['results'][0]['geometry']['location']['lng']
birth_country_long_name = country_json['results'][0]['address_components'][0]['long_name']
birth_country_short_name = country_json['results'][0]['address_components'][0]['short_name']



In [45]:

    
print birth_lat
print birth_lon
#birth_country_long_name









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-45-0d2c4c7e0c91> in <module>()
----> 1 print birth_lat
      2 print birth_lon
      3 #birth_country_long_name

NameError: name 'birth_lat' is not defined



In [295]:

    
#country_json



In [46]:

    
def get_long_lat(country_list, birth_countries=True):
    
    output = [['birth_lat', 
               'birth_lon', 
               'birth_country_current_name',
               'birth_country_short_name']]
    if birth_countries == False:
        output = [['acquired_lat', 
                   'acquired_lon', 
                   'current_country_name_acquired',
                   'acquired_country_short_name']]
    # https://console.developers.google.com
    # https://developers.google.com/maps/documentation/geocoding/?csw=1
    google_api_key = "AIzaSyDAJxRxTE-ZC5M7qGN5Bg_FXwgc5e_TqdU"
    
    for each_country in country_list:
        url = lookup_lat_lon(country=each_country, key=google_api_key)
        r = requests.get(url)
        country_json = r.json()
        lat = country_json['results'][0]['geometry']['location']['lat']
        lon = country_json['results'][0]['geometry']['location']['lng']
        #country_long_name = country_json['results'][0]['address_components'][0]['long_name']
        country_long_name = each_country
        country_short_name = country_json['results'][0]['address_components'][0]['short_name']
        
        output.append([lat,
                       lon,
                       country_long_name,
                       country_short_name])
    return output



In [47]:

    
# Get the lat/lon from the Google API!
lat_lon_birth_countries = get_long_lat(countries, birth_countries=True)









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-47-a641c1a16df4> in <module>()
      1 # Get the lat/lon from the Google API!
----> 2 lat_lon_birth_countries = get_long_lat(countries, birth_countries=True)

NameError: name 'countries' is not defined



In [308]:

    
headers = lat_lon_birth_countries.pop(0)
birth_countries_df = pd.DataFrame(lat_lon_birth_countries, columns=headers)



In [313]:

    
birth_countries_df.head()









    Out[313]:






  
    
      
      birth_lat
      birth_lon
      birth_country_current_name
      birth_country_short_name
    
  
  
    
      0
      -14.235004
       -51.925280
                      Brazil
       BR
    
    
      1
       56.130366
      -106.346771
                      Canada
       CA
    
    
      2
      -18.766947
        46.869107
                  Madagascar
       MG
    
    
      3
       41.608635
        21.745275
       Republic of Macedonia
       MK
    
    
      4
       41.871940
        12.567380
                       Italy
       IT



In [397]:

    
df = pd.merge(df, birth_countries_df)
df.tail()









    Out[397]:






  
    
      
      name
      birth_country_old_name
      birth_country_current_name
      year
      field
      birth_lat
      birth_lon
      birth_country_short_name
    
  
  
    
      859
       Robert J. Shiller
             USA
             USA
       2013
            Economic Sciences
       37.090240
       -95.712891
       US
    
    
      860
       Lars Peter Hansen
             USA
             USA
       2013
            Economic Sciences
       37.090240
       -95.712891
       US
    
    
      861
        Baruj Benacerraf
       Venezuela
       Venezuela
       1980
       Physiology or Medicine
        6.423750
       -66.589730
       VE
    
    
      862
             Le Duc Tho 
         Vietnam
         Vietnam
       1973
                        Peace
       14.058324
       108.277199
       VN
    
    
      863
         Tawakkol Karman
           Yemen
           Yemen
       2011
                        Peace
       15.552727
        48.516388
       YE



In [398]:

    
# df.to_csv('data/temp.csv')



In [319]:

    
headers = nobel_ages.pop(0)
nobel_ages_df = pd.DataFrame(nobel_ages, columns=headers)



In [384]:

    
#pd.merge(df, nobel_ages_df).tail(20)

Since 4 people won Nobel Prizes twice (!) at different ages, these dataframes can't just be merged on the 'name' column. Instead, we can sort/reorder each dataframe by the names and year/age, resetting the index to get them aligned.

Now, we can merge(or join()) them in pandas on the indices of each dataframe. Now, we can see Marie Curie was age 36 when recieving the nobel when recieving the Nobel Prize in 1903 in Physics, then 44 in 1911 when recieving the Nobel Prize in Chemistry.



In [399]:

    
sorted1 = df.sort(columns=['name', 'year']).reset_index(drop=True)
sorted2 = nobel_ages_df.sort(columns=['name', 'age']).reset_index(drop=True)
merged = pd.merge(sorted1, sorted2, left_index=True, right_index=True, how='outer', on='name')
merged[merged.name=="Marie Curie"]









    Out[399]:






  
    
      
      name
      birth_country_old_name
      birth_country_current_name
      year
      field
      birth_lat
      birth_lon
      birth_country_short_name
      age
    
  
  
    
      521
       Marie Curie
       Russian Empire
       Poland
       1903
         Physics
       51.919438
       19.145136
       PL
       36
    
    
      522
       Marie Curie
       Russian Empire
       Poland
       1911
       Chemistry
       51.919438
       19.145136
       PL
       44



In [401]:

    
merged.to_csv('data/temp.csv', encoding='utf-8')



In [402]:

    
merged.head()









    Out[402]:






  
    
      
      name
      birth_country_old_name
      birth_country_current_name
      year
      field
      birth_lat
      birth_lon
      birth_country_short_name
      age
    
  
  
    
      0
       A. Michael Spence
                                     USA
             USA
       2001
       Economic Sciences
       37.090240
      -95.712891
       US
       58
    
    
      1
            Aage N. Bohr
                                 Denmark
         Denmark
       1975
                 Physics
       56.263920
        9.501785
       DK
       53
    
    
      2
       Aaron Ciechanover
       British Protectorate of Palestine
          Israel
       2004
               Chemistry
       31.046051
       34.851612
       IL
       57
    
    
      3
              Aaron Klug
                               Lithuania
       Lithuania
       1982
               Chemistry
       55.169438
       23.881275
       LT
       56
    
    
      4
             Abdus Salam
                                   India
        Pakistan
       1979
                 Physics
       30.375321
       69.345116
       PK
       53



In [403]:









    Out[403]:





[[0, -1, -2, -3], [1, 0, -1, -2], [2, 1, 0, -1]]



In [404]:









    Out[404]:





[[0, 1, 2], [-1, 0, 1], [-2, -1, 0], [-3, -2, -1]]



In [405]:









    



The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!



In [ ]:



In [ ]:

	name	institution	old_country_name_acquired	current_country_name_acquired	city	year	field
0	Zhores I. Alferov	A.F. Ioffe Physico-Technical Institute	Russia	Russia	St. Petersburg	2000	Physics
1	Jens C. Skou	Aarhus University	Denmark	Denmark	Aarhus	1997	Chemistry
2	Dale T. Mortensen	Aarhus University	Denmark	Denmark	Aarhus	2010	Economic Sciences
3	Lev Landau	Academy of Sciences	USSR	USSR	Moscow	1962	Physics
4	Pyotr Kapitsa	Academy of Sciences	USSR	USSR	Moscow	1978	Physics

	birth_lat	birth_lon	birth_country_current_name	birth_country_short_name
0	-14.235004	-51.925280	Brazil	BR
1	56.130366	-106.346771	Canada	CA
2	-18.766947	46.869107	Madagascar	MG
3	41.608635	21.745275	Republic of Macedonia	MK
4	41.871940	12.567380	Italy	IT

	name	birth_country_old_name	birth_country_current_name	year	field	birth_lat	birth_lon	birth_country_short_name
859	Robert J. Shiller	USA	USA	2013	Economic Sciences	37.090240	-95.712891	US
860	Lars Peter Hansen	USA	USA	2013	Economic Sciences	37.090240	-95.712891	US
861	Baruj Benacerraf	Venezuela	Venezuela	1980	Physiology or Medicine	6.423750	-66.589730	VE
862	Le Duc Tho	Vietnam	Vietnam	1973	Peace	14.058324	108.277199	VN
863	Tawakkol Karman	Yemen	Yemen	2011	Peace	15.552727	48.516388	YE

	name	birth_country_old_name	birth_country_current_name	year	field	birth_lat	birth_lon	birth_country_short_name	age
521	Marie Curie	Russian Empire	Poland	1903	Physics	51.919438	19.145136	PL	36
522	Marie Curie	Russian Empire	Poland	1911	Chemistry	51.919438	19.145136	PL	44

	name	birth_country_old_name	birth_country_current_name	year	field	birth_lat	birth_lon	birth_country_short_name	age
0	A. Michael Spence	USA	USA	2001	Economic Sciences	37.090240	-95.712891	US	58
1	Aage N. Bohr	Denmark	Denmark	1975	Physics	56.263920	9.501785	DK	53
2	Aaron Ciechanover	British Protectorate of Palestine	Israel	2004	Chemistry	31.046051	34.851612	IL	57
3	Aaron Klug	Lithuania	Lithuania	1982	Chemistry	55.169438	23.881275	LT	56
4	Abdus Salam	India	Pakistan	1979	Physics	30.375321	69.345116	PK	53