In [1]:
from pygbif import species # http://pygbif.readthedocs.org/en/latest/
from pygbif import occurrences
import copy

In [4]:
res = occurrences.search(taxonKey=2382397, limit=1000)

In [26]:
for result in res['results']:
    if 'decimalLatitude' not in result.keys():
        print(result, '\n')


{'issues': [], 'phylum': 'Chordata', 'month': 11, 'speciesKey': 2382397, 'occurrenceRemarks': 'number 26379; lot count 2', 'familyKey': 4481, 'scientificName': 'Etheostoma blennioides Rafinesque, 1819', 'phylumKey': 44, 'lastCrawled': '2016-04-14T20:40:34.381+0000', 'occurrenceID': 'urn:uuid:eff02618-43b0-4604-86f0-b944c58568d6', 'individualCount': 2, 'specificEpithet': 'blennioides', 'publishingOrgKey': '2e167bb0-4441-11db-9ba2-b8a03c50a862', 'key': 1039513466, 'species': 'Etheostoma blennioides', 'taxonRemarks': 'Animals and Plants: Vertebrates - Fish', 'classKey': 204, 'nomenclaturalCode': 'ICZN', 'kingdomKey': 1, 'catalogNumber': 'YPM ICH 026379', 'order': 'Perciformes', 'associatedReferences': 'Det. by: Chantal E. Parker, Gregory J. Watkins-Colwell', 'extensions': {}, 'taxonRank': 'SPECIES', 'stateProvince': 'Tennessee', 'lastInterpreted': '2016-03-17T15:56:12.601+0000', 'rightsHolder': 'Yale Peabody Museum of Natural History', 'identifiers': [], 'year': 2012, 'orderKey': 587, 'lastParsed': '2016-03-17T15:53:47.755+0000', 'accessRights': 'Open Access, http://creativecommons.org/publicdomain/zero/1.0/; see Yale Peabody policies at: http://hdl.handle.net/10079/8931zqj', 'day': 18, 'language': 'eng', 'genericName': 'Etheostoma', 'references': 'http://collections.peabody.yale.edu/search/Record/YPM-ICH-026379', 'protocol': 'DWC_ARCHIVE', 'county': 'Lincoln County', 'eventDate': '2012-11-17T23:00:00.000+0000', 'genusKey': 2382199, 'otherCatalogNumbers': 'IPTCNUMS', 'datasetKey': '96419bea-f762-11e1-a439-00145eb45e9a', 'locality': 'Elk River Drainage at Harms Mill at TN273 ~7km W of Fayetteville (TJN12-01)', 'family': 'Percidae', 'basisOfRecord': 'PRESERVED_SPECIMEN', 'license': 'http://creativecommons.org/publicdomain/zero/1.0/', 'relations': [], 'gbifID': '1039513466', 'vernacularName': 'perches; perch-like fishes; ray-finned fishes; vertebrates; chordates; animals', 'facts': [], 'collectionCode': 'VZ', 'kingdom': 'Animalia', 'preparations': '10% form.->70% alc.', 'taxonKey': 2382397, 'previousIdentifications': 'Etheostoma blennioides', 'identifier': 'urn:uuid:eff02618-43b0-4604-86f0-b944c58568d6', 'higherClassification': 'Animalia; Chordata; Vertebrata; Gnathostomata [vertebrate]; Actinopterygii; Neopterygii-Teleostei-Euteleostei; Acanthopterygii-Percomorpha; Perciformes; Percoidei; Percoidea; Percidae', 'higherGeography': 'North America; USA; Tennessee; Lincoln County', 'class': 'Actinopterygii', 'countryCode': 'US', 'recordedBy': 'Thomas J. Near, Benjamin P. Keck, Sharon F. Clemmensen, C. D. Hulsey', 'publishingCountry': 'US', 'type': 'PhysicalObject', 'country': 'United States', 'genus': 'Etheostoma', 'bibliographicCitation': 'Etheostoma blennioides (YPM ICH 026379)', 'ownerInstitutionCode': 'YPM', 'identifiedBy': 'Chantal E. Parker, Gregory J. Watkins-Colwell', 'continent': 'NORTH_AMERICA', 'institutionCode': 'YPM'} 

{'issues': [], 'phylum': 'Chordata', 'month': 4, 'speciesKey': 2382397, 'familyKey': 4481, 'scientificName': 'Etheostoma blennioides Rafinesque, 1819', 'phylumKey': 44, 'lastCrawled': '2015-05-19T12:19:27.874+0000', 'specificEpithet': 'blennioides', 'publishingOrgKey': 'aa40a1e0-818b-11d9-b6d0-b8a03c50a862', 'key': 1087221806, 'fieldNumber': '20', 'species': 'Etheostoma blennioides', 'classKey': 204, 'kingdomKey': 1, 'catalogNumber': '31-7292', 'order': 'Perciformes', 'extensions': {}, 'taxonRank': 'SPECIES', 'stateProvince': 'Kansas', 'lastInterpreted': '2015-05-19T12:24:55.449+0000', 'rightsHolder': 'NatureServe', 'identifiers': [], 'year': 2010, 'orderKey': 587, 'lastParsed': '2015-05-19T12:19:27.887+0000', 'day': 6, 'genericName': 'Etheostoma', 'protocol': 'DWC_ARCHIVE', 'rights': "These data are made available by NatureServe with permission of the relevant natural heritage programs and conservation data centers (http://www.natureserve.org/visitLocal/index.jsp), and use of these data is governed by the terms of NatureServe's data sharing agreements with these independent data custodians. These data are available for non-commercial conservation, educational, and research use, but may not be repackaged or redistributed in any form without written permission. Appropriate acknowledgment of NatureServe and its natural heritage program members should be made in any reports or other products derived from these data. NatureServe makes no warranty as to the currency, completeness, or accuracy of these data, and shall have no liability or responsibility to the data users, or any other person or entity with respect to any liability, loss, or damage caused or alleged to be caused directly or indirectly by the dataset.", 'eventDate': '2010-04-05T22:00:00.000+0000', 'genusKey': 2382199, 'datasetKey': '7fd12114-9010-4c13-8f46-990fe04ca882', 'locality': 'Crawford', 'family': 'Percidae', 'basisOfRecord': 'LITERATURE', 'eventRemarks': 'http://www.natureserve.org/explorer/servlet/NatureServe?searchSpeciesUid=ELEMENT_GLOBAL.2.790349', 'relations': [], 'gbifID': '1087221806', 'institutionCode': 'NTSRV', 'collectionCode': 'KS-NHP', 'facts': [], 'datasetName': 'NatureServe Network Species Occurrence Data', 'kingdom': 'Animalia', 'taxonKey': 2382397, 'identifier': '31-7292', 'class': 'Actinopterygii', 'countryCode': 'US', 'publishingCountry': 'US', 'country': 'United States', 'genus': 'Etheostoma', 'scientificNameID': '168375', 'continent': 'NORTH_AMERICA'} 


In [32]:
from geopy.geocoders import Nominatim
from geopy.distance import vincenty
geolocator = Nominatim()
location = geolocator.geocode("Crawford")

In [34]:
location.longitude


Out[34]:
-87.7291

In [62]:
def load_species_occurrence(name_species): 
    if isinstance(name_species, basestring):
        name_species = [name_species]

    dfs_full = []
    
    for name in name_species:
        usageKey = species.name_backbone(name=name_species, verbose=False)['usageKey']
        first_res = occurrences.search(taxonKey=usageKey, limit=100000)
        full_results = copy.copy(first_res)

        # results are paginated so we need a loop to fetch them all
        counter = 1
        while first_res['endOfRecords'] is False:
            first_res = occurrences.search(taxonKey=usageKey, offset=300*counter, limit=10000)
            full_results['results'] = copy.copy(full_results['results']) + copy.copy(first_res['results'])
            counter+=1

        print(full_results['count'], len(full_results['results'])) # match?

        df_full = pd.DataFrame(full_results['results']) # load results in pandas dataframes
        dfs_full.append(df_full)
    return dfs_full

def plot_species_occurrence(dfs_full):
    if not isinstance(dfs_full, list):
        dfs_full = [dfs_full]
    colors = plt.cm.rainbow(np.linspace(0, 1, len(dfs_full)))
    # drop rows without latitude/longitude
    for df_full in dfs_full:
        df_clean = df_full.dropna(how='any',subset=['decimalLatitude', 'decimalLongitude'])
    
        # latitude/longitude lists

        df_full_latitude = df_clean.decimalLatitude
        df_full_longitude = df_clean.decimalLongitude

        # changed projection from 'ortho' to 'robin' to 'merc' finally
        # Make this plot larger.
        plt.figure(figsize=(16,12))

        plt.title("%s occurence records from GBIF" % df_clean['species'].dropna().tolist()[0] )

        my_map = Basemap(projection='merc', lat_0=50, lon_0=-100,
                  resolution='l', area_thresh=1000.0, 
                         llcrnrlon=df_full_longitude.min(),# lower left corner longitude point 
                         llcrnrlat=df_full_latitude.min(), # lower left corner latitude point
                         urcrnrlon=df_full_longitude.max(), # upper right longitude point
                         urcrnrlat=df_full_latitude.max() # upper right latitude point
                        )  


        # prepare longitude/latitude list for basemap
        df_x, df_y = my_map(df_full_longitude.tolist(), df_full_latitude.tolist())

        my_map.drawcoastlines()
        my_map.drawcountries()
        my_map.drawmapboundary(fill_color='#649eff')
        my_map.fillcontinents(color='#cc9955')
        # draw latitude and longitude
        my_map.drawmeridians(np.arange(0, 360, 30))
        my_map.drawparallels(np.arange(-90, 90, 30))
        my_map.plot(df_x, df_y, 'bo', markersize=5, color="#b01a1a")
    
    plt.show()

In [15]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.basemap import Basemap

In [16]:
import pandas as pd

In [17]:
import time

In [18]:
start = time.time()
df_species = load_species_occurrence("Alvarezsauridae Bonaparte") 
stop = time.time()
print("Elapsed time loading species in pandas: %s seconds" % (stop-start))
start = time.time()
plot_species_occurrence(df_species)
stop = time.time()
print("Elapsed time plotting species: %s seconds" % (stop-start))


24 24
Elapsed time loading species in pandas: 0.7711710929870605 seconds
Elapsed time plotting species: 1.568812608718872 seconds

In [19]:
df_species['species'].unique()


Out[19]:
array(['Mononykus olecranus', nan, 'Shuvuuia deserti',
       'Ceratonykus oculatus', 'Albertonykus borealis',
       'Alvarezsaurus calvoi', 'Parvicursor remotus',
       'Patagonykus puertai', 'Achillesaurus manazzonei'], dtype=object)

In [20]:
start = time.time()
df_species = load_species_occurrence("Urocolius Bonaparte") 
stop = time.time()
print("Elapsed time loading species in pandas: %s seconds" % (stop-start))
start = time.time()
plot_species_occurrence(df_species)
stop = time.time()
print("Elapsed time plotting species: %s seconds" % (stop-start))


41144 41144
Elapsed time loading species in pandas: 80.68101739883423 seconds
Elapsed time plotting species: 2.6647543907165527 seconds

In [21]:
df_species['species'].unique()


Out[21]:
array(['Urocolius macrourus', 'Urocolius indicus', nan], dtype=object)

In [24]:
start = time.time()
df_species = load_species_occurrence("Geomyidae Bonaparte") 
stop = time.time()
print("Elapsed time loading species in pandas: %s seconds" % (stop-start))
start = time.time()
plot_species_occurrence(df_species)
stop = time.time()
print("Elapsed time plotting species: %s seconds" % (stop-start))


125022 125022
Elapsed time loading species in pandas: 402.251097202301 seconds
Elapsed time plotting species: 5.072403192520142 seconds

In [25]:
import xlrd # conda install xlrd

In [26]:
Ohio_dataset = "/home/daniela/Documents/NLeSC/Biodiversity/Aafke/2Daniela.xlsx"
book = xlrd.open_workbook(Ohio_dataset)
fish_selection = book.sheet_by_name('fish_selection')

fish_explore = []
for rownum in range(fish_selection.nrows):
    if fish_selection.row_values(rownum)[4]=='y':
        fish_explore.append(fish_selection.row_values(rownum)[0])

In [167]:
len(fish_explore)


Out[167]:
66

In [235]:
# Let's generalize to list of species
def load_species_occurrences(names_species, **kwargs): 
    if isinstance(names_species, str):
        names_species = [names_species]

    dfs_full = []
    
    for name in names_species:
        usageKey = species.name_backbone(name=name, verbose=False)['usageKey']
        first_res = occurrences.search(taxonKey=usageKey, limit=100000, **kwargs)
        full_results = copy.copy(first_res)

        # results are paginated so we need a loop to fetch them all
        counter = 1
        while first_res['endOfRecords'] is False:
            first_res = occurrences.search(taxonKey=usageKey, offset=300*counter, limit=10000)
            full_results['results'] = copy.copy(full_results['results']) + copy.copy(first_res['results'])
            counter+=1
        
        print("Loading species %s ... " % name)
        print("Number of occurrences: %s " % full_results['count'])
        print(full_results['count'] == len(full_results['results'])) # match?

        df_full = pd.DataFrame(full_results['results']) # load results in pandas dataframes
        dfs_full.append(df_full)
    return dfs_full

def plot_species_occurrences(dfs_full):
    if not isinstance(dfs_full, list):
        dfs_full = [dfs_full]
    colors = plt.cm.rainbow(np.linspace(0, 1, len(dfs_full)))
    # drop rows without latitude/longitude
    #ajmo = pd.concat([one,two], ignore_index=True)    
    dfs_full_latitude = pd.concat([df_full.dropna(how='any',
                               subset=['decimalLatitude', 'decimalLongitude']).decimalLatitude 
                               for df_full in dfs_full], ignore_index=True)

    dfs_full_longitude = pd.concat([df_full.dropna(how='any',
                               subset=['decimalLatitude', 'decimalLongitude']).decimalLongitude 
                               for df_full in dfs_full], ignore_index=True)


    # changed projection from 'ortho' to 'robin' to 'merc' finally
    # Make this plot larger.    
    my_map = Basemap(projection='merc', lat_0=50, lon_0=-100,
                  resolution='l', area_thresh=1000.0, 
                         llcrnrlon=dfs_full_longitude.min(),# lower left corner longitude point 
                         llcrnrlat=dfs_full_latitude.min(), # lower left corner latitude point
                         urcrnrlon=dfs_full_longitude.max(), # upper right longitude point
                         urcrnrlat=dfs_full_latitude.max() # upper right latitude point
                        )  
    plt.figure(figsize=(20,20))

    plt.title("%s occurence records from GBIF" % "asdasf" ) 
    my_map.drawcoastlines()
    my_map.drawcountries()
    my_map.drawmapboundary(fill_color='#649eff')
    my_map.fillcontinents(color='#cc9955')
    # draw latitude and longitude
    my_map.drawmeridians(np.arange(0, 360, 30))
    my_map.drawparallels(np.arange(-90, 90, 30))
        
    for idx, df_full in enumerate(dfs_full):
        df_clean = df_full.dropna(how='any',subset=['decimalLatitude', 'decimalLongitude'])
        # latitude/longitude lists

        df_full_latitude = df_clean.decimalLatitude
        df_full_longitude = df_clean.decimalLongitude

        # prepare longitude/latitude list for basemap
        df_x, df_y = my_map(df_full_longitude.tolist(), df_full_latitude.tolist())

        my_map.plot(df_x, df_y, 'bo', markersize=10, color=colors[idx], 
                    label=df_clean['species'].dropna().tolist()[0])
    plt.legend()
    plt.show()

In [234]:
data_frames = load_species_occurrences(names_species=fish_explore[:2], exclude_fields=['one','two'], country='US')


Loading species Etheostoma_blennioides ... 
Number of occurrences: 6828 
False
Loading species Hypentelium_nigricans ... 
Number of occurrences: 9263 
False

In [189]:
data_frames[2]['species'].unique()


Out[189]:
array(['Rhinichthys obtusus'], dtype=object)

In [190]:
plot_species_occurrences(data_frames)



In [224]:
first_res = occurrences.search(taxonKey="2341077", limit=10000, country='US')

In [228]:
frame = pd.DataFrame(first_res['results'], columns=['accessRights'])

In [229]:
frame


Out[229]:
accessRights
0 NaN
1 NaN
2 NaN
3 http://vertnet.org/resources/norms.html
4 NaN
5 NaN
6 NaN
7 NaN
8 NaN
9 NaN
10 NaN
11 NaN
12 http://vertnet.org/resources/norms.html
13 http://vertnet.org/resources/norms.html
14 NaN
15 NaN
16 NaN
17 http://vertnet.org/resources/norms.html
18 NaN
19 http://vertnet.org/resources/norms.html
20 NaN
21 NaN
22 NaN
23 NaN
24 NaN
25 NaN
26 NaN
27 NaN
28 NaN
29 NaN
... ...
270 NaN
271 NaN
272 NaN
273 NaN
274 NaN
275 NaN
276 NaN
277 NaN
278 NaN
279 NaN
280 NaN
281 NaN
282 http://biodiversity.ku.edu/research/university...
283 http://vertnet.org/resources/norms.html
284 http://vertnet.org/resources/norms.html
285 http://vertnet.org/resources/norms.html
286 http://vertnet.org/resources/norms.html
287 http://vertnet.org/resources/norms.html
288 NaN
289 http://biodiversity.ku.edu/research/university...
290 http://vertnet.org/resources/norms.html
291 NaN
292 NaN
293 http://vertnet.org/resources/norms.html
294 NaN
295 http://vertnet.org/resources/norms.html
296 http://vertnet.org/resources/norms.html
297 NaN
298 http://biodiversity.ku.edu/research/university...
299 http://vertnet.org/resources/norms.html

300 rows × 1 columns


In [ ]: