notebook.community

Edit and run



In [1]:

    
from pygbif import species # http://pygbif.readthedocs.org/en/latest/
from pygbif import occurrences
import copy



In [4]:

    
res = occurrences.search(taxonKey=2382397, limit=1000)



In [26]:

    
for result in res['results']:
    if 'decimalLatitude' not in result.keys():
        print(result, '\n')









    



{'issues': [], 'phylum': 'Chordata', 'month': 11, 'speciesKey': 2382397, 'occurrenceRemarks': 'number 26379; lot count 2', 'familyKey': 4481, 'scientificName': 'Etheostoma blennioides Rafinesque, 1819', 'phylumKey': 44, 'lastCrawled': '2016-04-14T20:40:34.381+0000', 'occurrenceID': 'urn:uuid:eff02618-43b0-4604-86f0-b944c58568d6', 'individualCount': 2, 'specificEpithet': 'blennioides', 'publishingOrgKey': '2e167bb0-4441-11db-9ba2-b8a03c50a862', 'key': 1039513466, 'species': 'Etheostoma blennioides', 'taxonRemarks': 'Animals and Plants: Vertebrates - Fish', 'classKey': 204, 'nomenclaturalCode': 'ICZN', 'kingdomKey': 1, 'catalogNumber': 'YPM ICH 026379', 'order': 'Perciformes', 'associatedReferences': 'Det. by: Chantal E. Parker, Gregory J. Watkins-Colwell', 'extensions': {}, 'taxonRank': 'SPECIES', 'stateProvince': 'Tennessee', 'lastInterpreted': '2016-03-17T15:56:12.601+0000', 'rightsHolder': 'Yale Peabody Museum of Natural History', 'identifiers': [], 'year': 2012, 'orderKey': 587, 'lastParsed': '2016-03-17T15:53:47.755+0000', 'accessRights': 'Open Access, http://creativecommons.org/publicdomain/zero/1.0/; see Yale Peabody policies at: http://hdl.handle.net/10079/8931zqj', 'day': 18, 'language': 'eng', 'genericName': 'Etheostoma', 'references': 'http://collections.peabody.yale.edu/search/Record/YPM-ICH-026379', 'protocol': 'DWC_ARCHIVE', 'county': 'Lincoln County', 'eventDate': '2012-11-17T23:00:00.000+0000', 'genusKey': 2382199, 'otherCatalogNumbers': 'IPTCNUMS', 'datasetKey': '96419bea-f762-11e1-a439-00145eb45e9a', 'locality': 'Elk River Drainage at Harms Mill at TN273 ~7km W of Fayetteville (TJN12-01)', 'family': 'Percidae', 'basisOfRecord': 'PRESERVED_SPECIMEN', 'license': 'http://creativecommons.org/publicdomain/zero/1.0/', 'relations': [], 'gbifID': '1039513466', 'vernacularName': 'perches; perch-like fishes; ray-finned fishes; vertebrates; chordates; animals', 'facts': [], 'collectionCode': 'VZ', 'kingdom': 'Animalia', 'preparations': '10% form.->70% alc.', 'taxonKey': 2382397, 'previousIdentifications': 'Etheostoma blennioides', 'identifier': 'urn:uuid:eff02618-43b0-4604-86f0-b944c58568d6', 'higherClassification': 'Animalia; Chordata; Vertebrata; Gnathostomata [vertebrate]; Actinopterygii; Neopterygii-Teleostei-Euteleostei; Acanthopterygii-Percomorpha; Perciformes; Percoidei; Percoidea; Percidae', 'higherGeography': 'North America; USA; Tennessee; Lincoln County', 'class': 'Actinopterygii', 'countryCode': 'US', 'recordedBy': 'Thomas J. Near, Benjamin P. Keck, Sharon F. Clemmensen, C. D. Hulsey', 'publishingCountry': 'US', 'type': 'PhysicalObject', 'country': 'United States', 'genus': 'Etheostoma', 'bibliographicCitation': 'Etheostoma blennioides (YPM ICH 026379)', 'ownerInstitutionCode': 'YPM', 'identifiedBy': 'Chantal E. Parker, Gregory J. Watkins-Colwell', 'continent': 'NORTH_AMERICA', 'institutionCode': 'YPM'} 

{'issues': [], 'phylum': 'Chordata', 'month': 4, 'speciesKey': 2382397, 'familyKey': 4481, 'scientificName': 'Etheostoma blennioides Rafinesque, 1819', 'phylumKey': 44, 'lastCrawled': '2015-05-19T12:19:27.874+0000', 'specificEpithet': 'blennioides', 'publishingOrgKey': 'aa40a1e0-818b-11d9-b6d0-b8a03c50a862', 'key': 1087221806, 'fieldNumber': '20', 'species': 'Etheostoma blennioides', 'classKey': 204, 'kingdomKey': 1, 'catalogNumber': '31-7292', 'order': 'Perciformes', 'extensions': {}, 'taxonRank': 'SPECIES', 'stateProvince': 'Kansas', 'lastInterpreted': '2015-05-19T12:24:55.449+0000', 'rightsHolder': 'NatureServe', 'identifiers': [], 'year': 2010, 'orderKey': 587, 'lastParsed': '2015-05-19T12:19:27.887+0000', 'day': 6, 'genericName': 'Etheostoma', 'protocol': 'DWC_ARCHIVE', 'rights': "These data are made available by NatureServe with permission of the relevant natural heritage programs and conservation data centers (http://www.natureserve.org/visitLocal/index.jsp), and use of these data is governed by the terms of NatureServe's data sharing agreements with these independent data custodians. These data are available for non-commercial conservation, educational, and research use, but may not be repackaged or redistributed in any form without written permission. Appropriate acknowledgment of NatureServe and its natural heritage program members should be made in any reports or other products derived from these data. NatureServe makes no warranty as to the currency, completeness, or accuracy of these data, and shall have no liability or responsibility to the data users, or any other person or entity with respect to any liability, loss, or damage caused or alleged to be caused directly or indirectly by the dataset.", 'eventDate': '2010-04-05T22:00:00.000+0000', 'genusKey': 2382199, 'datasetKey': '7fd12114-9010-4c13-8f46-990fe04ca882', 'locality': 'Crawford', 'family': 'Percidae', 'basisOfRecord': 'LITERATURE', 'eventRemarks': 'http://www.natureserve.org/explorer/servlet/NatureServe?searchSpeciesUid=ELEMENT_GLOBAL.2.790349', 'relations': [], 'gbifID': '1087221806', 'institutionCode': 'NTSRV', 'collectionCode': 'KS-NHP', 'facts': [], 'datasetName': 'NatureServe Network Species Occurrence Data', 'kingdom': 'Animalia', 'taxonKey': 2382397, 'identifier': '31-7292', 'class': 'Actinopterygii', 'countryCode': 'US', 'publishingCountry': 'US', 'country': 'United States', 'genus': 'Etheostoma', 'scientificNameID': '168375', 'continent': 'NORTH_AMERICA'}



In [32]:

    
from geopy.geocoders import Nominatim
from geopy.distance import vincenty
geolocator = Nominatim()
location = geolocator.geocode("Crawford")



In [34]:

    
location.longitude









    Out[34]:





-87.7291



In [62]:

    
def load_species_occurrence(name_species): 
    if isinstance(name_species, basestring):
        name_species = [name_species]

    dfs_full = []
    
    for name in name_species:
        usageKey = species.name_backbone(name=name_species, verbose=False)['usageKey']
        first_res = occurrences.search(taxonKey=usageKey, limit=100000)
        full_results = copy.copy(first_res)

        # results are paginated so we need a loop to fetch them all
        counter = 1
        while first_res['endOfRecords'] is False:
            first_res = occurrences.search(taxonKey=usageKey, offset=300*counter, limit=10000)
            full_results['results'] = copy.copy(full_results['results']) + copy.copy(first_res['results'])
            counter+=1

        print(full_results['count'], len(full_results['results'])) # match?

        df_full = pd.DataFrame(full_results['results']) # load results in pandas dataframes
        dfs_full.append(df_full)
    return dfs_full

def plot_species_occurrence(dfs_full):
    if not isinstance(dfs_full, list):
        dfs_full = [dfs_full]
    colors = plt.cm.rainbow(np.linspace(0, 1, len(dfs_full)))
    # drop rows without latitude/longitude
    for df_full in dfs_full:
        df_clean = df_full.dropna(how='any',subset=['decimalLatitude', 'decimalLongitude'])
    
        # latitude/longitude lists

        df_full_latitude = df_clean.decimalLatitude
        df_full_longitude = df_clean.decimalLongitude

        # changed projection from 'ortho' to 'robin' to 'merc' finally
        # Make this plot larger.
        plt.figure(figsize=(16,12))

        plt.title("%s occurence records from GBIF" % df_clean['species'].dropna().tolist()[0] )

        my_map = Basemap(projection='merc', lat_0=50, lon_0=-100,
                  resolution='l', area_thresh=1000.0, 
                         llcrnrlon=df_full_longitude.min(),# lower left corner longitude point 
                         llcrnrlat=df_full_latitude.min(), # lower left corner latitude point
                         urcrnrlon=df_full_longitude.max(), # upper right longitude point
                         urcrnrlat=df_full_latitude.max() # upper right latitude point
                        )  


        # prepare longitude/latitude list for basemap
        df_x, df_y = my_map(df_full_longitude.tolist(), df_full_latitude.tolist())

        my_map.drawcoastlines()
        my_map.drawcountries()
        my_map.drawmapboundary(fill_color='#649eff')
        my_map.fillcontinents(color='#cc9955')
        # draw latitude and longitude
        my_map.drawmeridians(np.arange(0, 360, 30))
        my_map.drawparallels(np.arange(-90, 90, 30))
        my_map.plot(df_x, df_y, 'bo', markersize=5, color="#b01a1a")
    
    plt.show()



In [15]:

    
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.basemap import Basemap



In [16]:

    
import pandas as pd



In [17]:

    
import time



In [18]:

    
start = time.time()
df_species = load_species_occurrence("Alvarezsauridae Bonaparte") 
stop = time.time()
print("Elapsed time loading species in pandas: %s seconds" % (stop-start))
start = time.time()
plot_species_occurrence(df_species)
stop = time.time()
print("Elapsed time plotting species: %s seconds" % (stop-start))









    



24 24
Elapsed time loading species in pandas: 0.7711710929870605 seconds






    












    



Elapsed time plotting species: 1.568812608718872 seconds



In [19]:

    
df_species['species'].unique()









    Out[19]:





array(['Mononykus olecranus', nan, 'Shuvuuia deserti',
       'Ceratonykus oculatus', 'Albertonykus borealis',
       'Alvarezsaurus calvoi', 'Parvicursor remotus',
       'Patagonykus puertai', 'Achillesaurus manazzonei'], dtype=object)



In [20]:

    
start = time.time()
df_species = load_species_occurrence("Urocolius Bonaparte") 
stop = time.time()
print("Elapsed time loading species in pandas: %s seconds" % (stop-start))
start = time.time()
plot_species_occurrence(df_species)
stop = time.time()
print("Elapsed time plotting species: %s seconds" % (stop-start))









    



41144 41144
Elapsed time loading species in pandas: 80.68101739883423 seconds






    












    



Elapsed time plotting species: 2.6647543907165527 seconds



In [21]:

    
df_species['species'].unique()









    Out[21]:





array(['Urocolius macrourus', 'Urocolius indicus', nan], dtype=object)



In [24]:

    
start = time.time()
df_species = load_species_occurrence("Geomyidae Bonaparte") 
stop = time.time()
print("Elapsed time loading species in pandas: %s seconds" % (stop-start))
start = time.time()
plot_species_occurrence(df_species)
stop = time.time()
print("Elapsed time plotting species: %s seconds" % (stop-start))









    



125022 125022
Elapsed time loading species in pandas: 402.251097202301 seconds






    












    



Elapsed time plotting species: 5.072403192520142 seconds



In [25]:

    
import xlrd # conda install xlrd



In [26]:

    
Ohio_dataset = "/home/daniela/Documents/NLeSC/Biodiversity/Aafke/2Daniela.xlsx"
book = xlrd.open_workbook(Ohio_dataset)
fish_selection = book.sheet_by_name('fish_selection')

fish_explore = []
for rownum in range(fish_selection.nrows):
    if fish_selection.row_values(rownum)[4]=='y':
        fish_explore.append(fish_selection.row_values(rownum)[0])



In [167]:

    
len(fish_explore)









    Out[167]:





66



In [235]:

    
# Let's generalize to list of species
def load_species_occurrences(names_species, **kwargs): 
    if isinstance(names_species, str):
        names_species = [names_species]

    dfs_full = []
    
    for name in names_species:
        usageKey = species.name_backbone(name=name, verbose=False)['usageKey']
        first_res = occurrences.search(taxonKey=usageKey, limit=100000, **kwargs)
        full_results = copy.copy(first_res)

        # results are paginated so we need a loop to fetch them all
        counter = 1
        while first_res['endOfRecords'] is False:
            first_res = occurrences.search(taxonKey=usageKey, offset=300*counter, limit=10000)
            full_results['results'] = copy.copy(full_results['results']) + copy.copy(first_res['results'])
            counter+=1
        
        print("Loading species %s ... " % name)
        print("Number of occurrences: %s " % full_results['count'])
        print(full_results['count'] == len(full_results['results'])) # match?

        df_full = pd.DataFrame(full_results['results']) # load results in pandas dataframes
        dfs_full.append(df_full)
    return dfs_full

def plot_species_occurrences(dfs_full):
    if not isinstance(dfs_full, list):
        dfs_full = [dfs_full]
    colors = plt.cm.rainbow(np.linspace(0, 1, len(dfs_full)))
    # drop rows without latitude/longitude
    #ajmo = pd.concat([one,two], ignore_index=True)    
    dfs_full_latitude = pd.concat([df_full.dropna(how='any',
                               subset=['decimalLatitude', 'decimalLongitude']).decimalLatitude 
                               for df_full in dfs_full], ignore_index=True)

    dfs_full_longitude = pd.concat([df_full.dropna(how='any',
                               subset=['decimalLatitude', 'decimalLongitude']).decimalLongitude 
                               for df_full in dfs_full], ignore_index=True)


    # changed projection from 'ortho' to 'robin' to 'merc' finally
    # Make this plot larger.    
    my_map = Basemap(projection='merc', lat_0=50, lon_0=-100,
                  resolution='l', area_thresh=1000.0, 
                         llcrnrlon=dfs_full_longitude.min(),# lower left corner longitude point 
                         llcrnrlat=dfs_full_latitude.min(), # lower left corner latitude point
                         urcrnrlon=dfs_full_longitude.max(), # upper right longitude point
                         urcrnrlat=dfs_full_latitude.max() # upper right latitude point
                        )  
    plt.figure(figsize=(20,20))

    plt.title("%s occurence records from GBIF" % "asdasf" ) 
    my_map.drawcoastlines()
    my_map.drawcountries()
    my_map.drawmapboundary(fill_color='#649eff')
    my_map.fillcontinents(color='#cc9955')
    # draw latitude and longitude
    my_map.drawmeridians(np.arange(0, 360, 30))
    my_map.drawparallels(np.arange(-90, 90, 30))
        
    for idx, df_full in enumerate(dfs_full):
        df_clean = df_full.dropna(how='any',subset=['decimalLatitude', 'decimalLongitude'])
        # latitude/longitude lists

        df_full_latitude = df_clean.decimalLatitude
        df_full_longitude = df_clean.decimalLongitude

        # prepare longitude/latitude list for basemap
        df_x, df_y = my_map(df_full_longitude.tolist(), df_full_latitude.tolist())

        my_map.plot(df_x, df_y, 'bo', markersize=10, color=colors[idx], 
                    label=df_clean['species'].dropna().tolist()[0])
    plt.legend()
    plt.show()



In [234]:

    
data_frames = load_species_occurrences(names_species=fish_explore[:2], exclude_fields=['one','two'], country='US')









    



Loading species Etheostoma_blennioides ... 
Number of occurrences: 6828 
False
Loading species Hypentelium_nigricans ... 
Number of occurrences: 9263 
False



In [189]:

    
data_frames[2]['species'].unique()









    Out[189]:





array(['Rhinichthys obtusus'], dtype=object)



In [190]:

    
plot_species_occurrences(data_frames)



In [224]:

    
first_res = occurrences.search(taxonKey="2341077", limit=10000, country='US')



In [228]:

    
frame = pd.DataFrame(first_res['results'], columns=['accessRights'])



In [229]:

    
frame









    Out[229]:






  
    
      
      accessRights
    
  
  
    
      0
      NaN
    
    
      1
      NaN
    
    
      2
      NaN
    
    
      3
      http://vertnet.org/resources/norms.html
    
    
      4
      NaN
    
    
      5
      NaN
    
    
      6
      NaN
    
    
      7
      NaN
    
    
      8
      NaN
    
    
      9
      NaN
    
    
      10
      NaN
    
    
      11
      NaN
    
    
      12
      http://vertnet.org/resources/norms.html
    
    
      13
      http://vertnet.org/resources/norms.html
    
    
      14
      NaN
    
    
      15
      NaN
    
    
      16
      NaN
    
    
      17
      http://vertnet.org/resources/norms.html
    
    
      18
      NaN
    
    
      19
      http://vertnet.org/resources/norms.html
    
    
      20
      NaN
    
    
      21
      NaN
    
    
      22
      NaN
    
    
      23
      NaN
    
    
      24
      NaN
    
    
      25
      NaN
    
    
      26
      NaN
    
    
      27
      NaN
    
    
      28
      NaN
    
    
      29
      NaN
    
    
      ...
      ...
    
    
      270
      NaN
    
    
      271
      NaN
    
    
      272
      NaN
    
    
      273
      NaN
    
    
      274
      NaN
    
    
      275
      NaN
    
    
      276
      NaN
    
    
      277
      NaN
    
    
      278
      NaN
    
    
      279
      NaN
    
    
      280
      NaN
    
    
      281
      NaN
    
    
      282
      http://biodiversity.ku.edu/research/university...
    
    
      283
      http://vertnet.org/resources/norms.html
    
    
      284
      http://vertnet.org/resources/norms.html
    
    
      285
      http://vertnet.org/resources/norms.html
    
    
      286
      http://vertnet.org/resources/norms.html
    
    
      287
      http://vertnet.org/resources/norms.html
    
    
      288
      NaN
    
    
      289
      http://biodiversity.ku.edu/research/university...
    
    
      290
      http://vertnet.org/resources/norms.html
    
    
      291
      NaN
    
    
      292
      NaN
    
    
      293
      http://vertnet.org/resources/norms.html
    
    
      294
      NaN
    
    
      295
      http://vertnet.org/resources/norms.html
    
    
      296
      http://vertnet.org/resources/norms.html
    
    
      297
      NaN
    
    
      298
      http://biodiversity.ku.edu/research/university...
    
    
      299
      http://vertnet.org/resources/norms.html
    
  

300 rows × 1 columns



In [ ]:

	accessRights
0	NaN
1	NaN
2	NaN
3	http://vertnet.org/resources/norms.html
4	NaN
5	NaN
6	NaN
7	NaN
8	NaN
9	NaN
10	NaN
11	NaN
12	http://vertnet.org/resources/norms.html
13	http://vertnet.org/resources/norms.html
14	NaN
15	NaN
16	NaN
17	http://vertnet.org/resources/norms.html
18	NaN
19	http://vertnet.org/resources/norms.html
20	NaN
21	NaN
22	NaN
23	NaN
24	NaN
25	NaN
26	NaN
27	NaN
28	NaN
29	NaN
...	...
270	NaN
271	NaN
272	NaN
273	NaN
274	NaN
275	NaN
276	NaN
277	NaN
278	NaN
279	NaN
280	NaN
281	NaN
282	http://biodiversity.ku.edu/research/university...
283	http://vertnet.org/resources/norms.html
284	http://vertnet.org/resources/norms.html
285	http://vertnet.org/resources/norms.html
286	http://vertnet.org/resources/norms.html
287	http://vertnet.org/resources/norms.html
288	NaN
289	http://biodiversity.ku.edu/research/university...
290	http://vertnet.org/resources/norms.html
291	NaN
292	NaN
293	http://vertnet.org/resources/norms.html
294	NaN
295	http://vertnet.org/resources/norms.html
296	http://vertnet.org/resources/norms.html
297	NaN
298	http://biodiversity.ku.edu/research/university...
299	http://vertnet.org/resources/norms.html