In [3]:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [4]:
#Lets provide some insight into PO.DAAC impact metrics 
#in Elseviers SCOPUS service

In [5]:
#First lets import the libraries we require
from pprint import pprint
import podaac.podaac as podaac
import podaac.podaac_utils as utils

In [6]:
#Then we can create instances of the classes we will use
p = podaac.Podaac()
u = utils.PodaacUtils()

In [7]:
#Print a list of dataset short names
print('\nHeres list_all_available_granule_search_dataset_short_names()')
result = u.list_all_available_granule_search_dataset_short_names()
dsetShortName = [i for i in result]
print(len(result))


Heres list_all_available_granule_search_dataset_short_names()
607

In [8]:
#dataset_landing_pages = []
#Perform a landing page search on each dataset, note this can take some time as we are querying >600 times
#for ds in dsetShortName:
#    result = p.dataset_search(short_name=ds)
#    #Cache the dataset landing page URL
#    searchStr = 'http://podaac.jpl.nasa.gov/dataset/'
#    for i in result.strip().split():
#        if searchStr in i:
#            dataset_landing_pages.append(str(i))
#print(dataset_landing_pages)

In [9]:
from bs4 import BeautifulSoup
poddac_dois = []
#Perform a doi search on each dataset, note this can take some time as we are querying >600 times
for ds in dsetShortName:
    try:
        result = p.dataset_search(short_name=ds, full='True')
    except Exception:
        print('Error retrieving record for ' + ds)
    soup = BeautifulSoup(result, 'html.parser')
    doi_element = soup.find('podaac:datasetdoi', text=True)
    if doi_element is not None:
        doi = doi_element.text #do not include 'https://doi.org/' prefix as it eventually makes search less accurate and increases runtime
        poddac_dois.append(doi)


404 Client Error: Not Found for url: https://podaac.jpl.nasa.gov/ws/search/dataset/?itemsPerPage=7&pretty=True&format=atom&full=True&shortName=WAF_DEALIASED_SASS_L2
Error retrieving record for WAF_DEALIASED_SASS_L2
404 Client Error: Not Found for url: https://podaac.jpl.nasa.gov/ws/search/dataset/?itemsPerPage=7&pretty=True&format=atom&full=True&shortName=WENTZ_SASS_SIGMA0_L2
Error retrieving record for WENTZ_SASS_SIGMA0_L2
404 Client Error: Not Found for url: https://podaac.jpl.nasa.gov/ws/search/dataset/?itemsPerPage=7&pretty=True&format=atom&full=True&shortName=CHELTON_SEASAT_SASS_L3
Error retrieving record for CHELTON_SEASAT_SASS_L3
404 Client Error: Not Found for url: https://podaac.jpl.nasa.gov/ws/search/dataset/?itemsPerPage=7&pretty=True&format=atom&full=True&shortName=SEASAT_BYU_L3_OW_SIGMA0_ENHANCED
Error retrieving record for SEASAT_BYU_L3_OW_SIGMA0_ENHANCED
404 Client Error: Not Found for url: https://podaac.jpl.nasa.gov/ws/search/dataset/?itemsPerPage=7&pretty=True&format=atom&full=True&shortName=UCLA_DEALIASED_SASS_L3
Error retrieving record for UCLA_DEALIASED_SASS_L3
404 Client Error: Not Found for url: https://podaac.jpl.nasa.gov/ws/search/dataset/?itemsPerPage=7&pretty=True&format=atom&full=True&shortName=ATLAS_DEALIASED_SASS_L2
Error retrieving record for ATLAS_DEALIASED_SASS_L2

In [10]:
print(len(poddac_dois))


570

In [11]:
import json
import requests
data = []
full_dois = []
full_titles = []
doi_count = 0
#Using Elsevier's Scopus Search, lets see if we can 
#retreieve any information from the above dataset dois
for podaac_doi in poddac_dois:
    url = 'https://api.elsevier.com/content/search/scopus?query=ALL:' + podaac_doi + '&date=2017-2019&APIKey=715b412c00f0b95e918a3e7abe6e6ee4'
    try:
        metadata = requests.get(url)
        status_codes = [404, 400, 503, 408]
        if metadata.status_code in status_codes:
            metadata.raise_for_status()
    except requests.exceptions.HTTPError as error:
        print(error)
        raise
    json_data = json.loads(metadata.text)
    doi_count = doi_count + int(json_data['search-results']['opensearch:totalResults'])
    entries = json_data['search-results']['entry']
    for entry in entries:
        if 'error' not in entry:
            try:
                prism_doi = entry['prism:doi']
            except KeyError:
                prism_doi = '.'
            full_dois.append(prism_doi)
            try:
                prism_publication_name = entry['prism:publicationName']
            except KeyError:
                prism_publication_name = '.'
            try:
                dc_title = entry['dc:title']
            except KeyError:
                dc_title = '.'
            full_titles.append(dc_title.lower())
            try:
                citedby_count = entry['citedby-count']
            except KeyError:
                citedby_count = '.'
            data.append(['.', '.', dc_title, prism_publication_name, citedby_count, entry['prism:url'], prism_doi, '.', '.', '.', '.', '.', '.', '.', '.'])
print('Located {} PO.DAAC DOI hits from Scopus database.'.format(doi_count))


Located 76 PO.DAAC DOI hits from Scopus database.

In [12]:
#Again, using Elsevier's Scopus Search, lets see other 'podaac' resources we can retreive.
orig_url = 'https://api.elsevier.com/content/search/scopus?query=ALL:podaac&date=2017-2019&APIKey=715b412c00f0b95e918a3e7abe6e6ee4'
def process_url(url):
    try:
        metadata = requests.get(url)
        status_codes = [404, 400, 503, 408]
        if metadata.status_code in status_codes:
            metadata.raise_for_status()
    except requests.exceptions.HTTPError as error:
        print(error)
        raise
    json_data = json.loads(metadata.text)
    if (json_data['search-results']['link'][2]['@ref'] or json_data['search-results']['link'][3]['@ref'] == 'next') or (json_data['search-results']['link'][0]['@hef'] == json_data['search-results']['link'][3]['@hef']):
        switch = ''
        if json_data['search-results']['link'][2]['@ref'] == 'next':
            switch = 'first'
        elif json_data['search-results']['link'][3]['@ref'] == 'next':
            switch = 'pagination'
        else:
            switch = 'last'
        entries = json_data['search-results']['entry']
        for entry in entries:
            try:
                prism_doi = entry['prism:doi']
                full_dois.append(prism_doi)
            except KeyError:
                prism_doi = '.'
            try:
                prism_publication_name = entry['prism:publicationName']
            except KeyError:
                prism_publication_name = '.'
            try:
                dc_title = entry['dc:title']
                full_titles.append(dc_title.lower())
            except KeyError:
                dc_title = '.'
            try:
                citedby_count = entry['citedby-count']
            except KeyError:
                citedby_count = '.'
            data.append(['.', '.', dc_title, prism_publication_name, citedby_count, entry['prism:url'], prism_doi, '.', '.', '.', '.', '.', '.', '.', '.'])
        if switch == 'first':
            print('Processing: ' + json_data['search-results']['link'][2]['@href'])
            process_url(json_data['search-results']['link'][2]['@href'])
        elif switch == 'pagination':
            print('Processing: ' + json_data['search-results']['link'][3]['@href'])
            process_url(json_data['search-results']['link'][3]['@href'])
        elif switch == 'last':
            return
process_url(orig_url)
#doi_count = doi_count + process_url(url, doi_count)
#print('Revised DOI hits to {} after "podaac" keyword search in Scopus database.'.format(doi_count))
#print(full_dois)


Processing: https://api.elsevier.com/content/search/scopus?start=25&count=25&query=ALL%3Apodaac&date=2017-2019&apiKey=715b412c00f0b95e918a3e7abe6e6ee4
Processing: https://api.elsevier.com/content/search/scopus?start=50&count=25&query=ALL%3Apodaac&date=2017-2019&apiKey=715b412c00f0b95e918a3e7abe6e6ee4
Processing: https://api.elsevier.com/content/search/scopus?start=75&count=25&query=ALL%3Apodaac&date=2017-2019&apiKey=715b412c00f0b95e918a3e7abe6e6ee4
Processing: https://api.elsevier.com/content/search/scopus?start=100&count=25&query=ALL%3Apodaac&date=2017-2019&apiKey=715b412c00f0b95e918a3e7abe6e6ee4

In [13]:
print(len(full_dois))


177

In [14]:
import pandas as pd
#Simply print the data as a pandas data frame
pd.DataFrame(data, columns=["Dataset Name as Cited in Article", "PERSISTENT_ID", "Manuscript Title", "Journal Source", "Impact factor of the Manuscript", "Article URL", "DOI", "PDF Name", "In-Text", "In-Text 2", "Referenced", "Referenced 2", "In-Text", "Referenced", "Confirmed"])


Out[14]:
Dataset Name as Cited in Article PERSISTENT_ID Manuscript Title Journal Source Impact factor of the Manuscript Article URL DOI PDF Name In-Text In-Text 2 Referenced Referenced 2 In-Text Referenced Confirmed
0 . . Water masses and oceanic eddy regulation of la... Journal of Marine Systems 0 https://api.elsevier.com/content/abstract/scop... 10.1016/j.jmarsys.2018.03.004 . . . . . . . .
1 . . Evaluation of satellite-derived SST products i... Marine Technology Society Journal 2 https://api.elsevier.com/content/abstract/scop... 10.4031/MTSJ.52.3.7 . . . . . . . .
2 . . Does upwelling intensity determine larval fish... Fisheries Oceanography 2 https://api.elsevier.com/content/abstract/scop... 10.1111/fog.12224 . . . . . . . .
3 . . Synergistic use of remote sensing and modeling... Remote Sensing 2 https://api.elsevier.com/content/abstract/scop... 10.3390/rs9080778 . . . . . . . .
4 . . Larval fish assemblages across an upwelling fr... Estuarine, Coastal and Shelf Science 10 https://api.elsevier.com/content/abstract/scop... 10.1016/j.ecss.2016.12.015 . . . . . . . .
5 . . Impact of the elevation angle on CYGNSS GNSS-R... Remote Sensing 1 https://api.elsevier.com/content/abstract/scop... 10.3390/rs10111749 . . . . . . . .
6 . . Intercomparison of Antarctic ice-shelf, ocean,... Geoscientific Model Development 1 https://api.elsevier.com/content/abstract/scop... 10.5194/gmd-11-1257-2018 . . . . . . . .
7 . . An economist’s guide to climate change Science Journal of Economic Perspectives 0 https://api.elsevier.com/content/abstract/scop... 10.1257/jep.32.4.3 . . . . . . . .
8 . . Eddy-Resolving In Situ Ocean Climatologies of ... Journal of Geophysical Research: Oceans 0 https://api.elsevier.com/content/abstract/scop... 10.1029/2018JC014548 . . . . . . . .
9 . . Evaluation of GRACE Mascon Gravity Solution in... IEEE Transactions on Geoscience and Remote Sen... 1 https://api.elsevier.com/content/abstract/scop... 10.1109/TGRS.2016.2616760 . . . . . . . .
10 . . Covariability of near-surface wind speed stati... Journal of Physical Oceanography 0 https://api.elsevier.com/content/abstract/scop... 10.1175/JPO-D-17-0177.1 . . . . . . . .
11 . . Physical modulation to the biological producti... Ocean Science 0 https://api.elsevier.com/content/abstract/scop... 10.5194/os-14-1303-2018 . . . . . . . .
12 . . Review of energy systems deployment and develo... Energy 2 https://api.elsevier.com/content/abstract/scop... 10.1016/j.energy.2018.07.185 . . . . . . . .
13 . . Anchovy (Engraulis encrasicolus) early life st... Hydrobiologia 3 https://api.elsevier.com/content/abstract/scop... 10.1007/s10750-017-3253-9 . . . . . . . .
14 . . Uncertainty in the global oceanic CO2 uptake i... Biogeosciences 0 https://api.elsevier.com/content/abstract/scop... 10.5194/bg-15-1701-2018 . . . . . . . .
15 . . Assessment and validation of the offshore wind... International Journal of Energy Technology and... 0 https://api.elsevier.com/content/abstract/scop... 10.1504/IJETP.2018.095606 . . . . . . . .
16 . . Using 3DVAR data assimilation to measure offsh... Applied Energy 1 https://api.elsevier.com/content/abstract/scop... 10.1016/j.apenergy.2017.09.030 . . . . . . . .
17 . . Modeling and sensitivity of the seasonal ocean... Sustainable Energy Technologies and Assessments 2 https://api.elsevier.com/content/abstract/scop... 10.1016/j.seta.2016.11.002 . . . . . . . .
18 . . Assessment of the offshore wind speed distribu... International Journal of Renewable Energy Rese... 3 https://api.elsevier.com/content/abstract/scop... . . . . . . . . .
19 . . The efficacy of aerosol-cloud radiative pertur... Atmospheric Chemistry and Physics 0 https://api.elsevier.com/content/abstract/scop... 10.5194/acp-18-17475-2018 . . . . . . . .
20 . . Understanding terrestrial water storage variat... Hydrology and Earth System Sciences 1 https://api.elsevier.com/content/abstract/scop... 10.5194/hess-22-4061-2018 . . . . . . . .
21 . . Emerging trends in global freshwater availability Nature 31 https://api.elsevier.com/content/abstract/scop... 10.1038/s41586-018-0123-1 . . . . . . . .
22 . . Sustained Water Loss in California's Mountain ... Journal of Geophysical Research: Solid Earth 7 https://api.elsevier.com/content/abstract/scop... 10.1002/2017JB014424 . . . . . . . .
23 . . Land cover, land use, and climate change impac... Remote Sensing 2 https://api.elsevier.com/content/abstract/scop... 10.3390/rs9060623 . . . . . . . .
24 . . Refined Estimates of Net Community Production ... Global Biogeochemical Cycles 2 https://api.elsevier.com/content/abstract/scop... 10.1002/2017GB005792 . . . . . . . .
25 . . Diurnal Convection-Wind Coupling in the Bay of... Journal of Geophysical Research: Atmospheres 2 https://api.elsevier.com/content/abstract/scop... 10.1002/2017JD027271 . . . . . . . .
26 . . Refined Estimates of Net Community Production ... Global Biogeochemical Cycles 2 https://api.elsevier.com/content/abstract/scop... 10.1002/2017GB005792 . . . . . . . .
27 . . Array DBMS in environmental science: Satellite... Proceedings of the 2017 IEEE 9th International... 1 https://api.elsevier.com/content/abstract/scop... 10.1109/IDAACS.2017.8095248 . . . . . . . .
28 . . Time Variations in Ocean Bottom Pressure from ... Journal of Geophysical Research: Oceans 1 https://api.elsevier.com/content/abstract/scop... 10.1029/2018JC014108 . . . . . . . .
29 . . A Meteoric Water Budget for the Arctic Ocean Journal of Geophysical Research: Oceans 2 https://api.elsevier.com/content/abstract/scop... 10.1002/2017JC012807 . . . . . . . .
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
149 . . The gulf stream convergence zone in the time-m... Journal of the Atmospheric Sciences 9 https://api.elsevier.com/content/abstract/scop... 10.1175/JAS-D-16-0213.1 . . . . . . . .
150 . . Hyperspectral IASI L1C data compression Sensors (Switzerland) 0 https://api.elsevier.com/content/abstract/scop... 10.3390/s17061404 . . . . . . . .
151 . . Impact of tracking loop settings of the Swarm ... Advances in Space Research 7 https://api.elsevier.com/content/abstract/scop... 10.1016/j.asr.2017.03.003 . . . . . . . .
152 . . Interannual and decadal variability in tropica... Water (Switzerland) 1 https://api.elsevier.com/content/abstract/scop... 10.3390/w9060402 . . . . . . . .
153 . . Predicting the dispersal of wild pacific oyste... Aquatic Invasions 2 https://api.elsevier.com/content/abstract/scop... 10.3391/ai.2017.12.2.01 . . . . . . . .
154 . . Employing predictive spatial models to inform ... Frontiers in Marine Science 3 https://api.elsevier.com/content/abstract/scop... 10.3389/fmars.2017.00149 . . . . . . . .
155 . . Implementing Salinity Gradient Energy at River... Pressure Retarded Osmosis: Renewable Energy Ge... 0 https://api.elsevier.com/content/abstract/scop... 10.1016/B978-0-12-812103-0.00005-2 . . . . . . . .
156 . . Enabling the extraction of climate-scale tempo... IEEE Transactions on Geoscience and Remote Sen... 2 https://api.elsevier.com/content/abstract/scop... 10.1109/TGRS.2017.2656081 . . . . . . . .
157 . . Multi-scale quantitative precipitation forecas... Journal of Hydrology 2 https://api.elsevier.com/content/abstract/scop... 10.1016/j.jhydrol.2017.03.003 . . . . . . . .
158 . . Evaluation of Different Wind Fields for Storm ... Journal of Coastal Research 3 https://api.elsevier.com/content/abstract/scop... 10.2112/JCOASTRES-D-15-00202.1 . . . . . . . .
159 . . Interannual surface salinity on Northwest Atla... Journal of Geophysical Research: Oceans 7 https://api.elsevier.com/content/abstract/scop... 10.1002/2016JC012580 . . . . . . . .
160 . . Element analysis: A wavelet-based method for a... Proceedings of the Royal Society A: Mathematic... 4 https://api.elsevier.com/content/abstract/scop... 10.1098/rspa.2016.0776 . . . . . . . .
161 . . SMAP L-Band Microwave Radiometer: Instrument D... IEEE Transactions on Geoscience and Remote Sen... 22 https://api.elsevier.com/content/abstract/scop... 10.1109/TGRS.2016.2631978 . . . . . . . .
162 . . Quantifying the anthropogenic impact on ground... Journal of Applied Remote Sensing 1 https://api.elsevier.com/content/abstract/scop... 10.1117/1.JRS.11.026029 . . . . . . . .
163 . . The effect of ENSO to the variability of sea s... IOP Conference Series: Earth and Environmental... 2 https://api.elsevier.com/content/abstract/scop... 10.1088/1755-1315/55/1/012066 . . . . . . . .
164 . . Improved Sea Ice Fraction Characterization for... IEEE Transactions on Geoscience and Remote Sen... 2 https://api.elsevier.com/content/abstract/scop... 10.1109/TGRS.2016.2622011 . . . . . . . .
165 . . Ka-Band Dual Copolarized Empirical Model for t... IEEE Transactions on Geoscience and Remote Sen... 13 https://api.elsevier.com/content/abstract/scop... 10.1109/TGRS.2016.2628640 . . . . . . . .
166 . . Long-term and seasonal Caspian Sea level chang... Journal of Geophysical Research: Solid Earth 12 https://api.elsevier.com/content/abstract/scop... 10.1002/2016JB013595 . . . . . . . .
167 . . Spatial-temporal analysis of sea level changes... IOP Conference Series: Earth and Environmental... 1 https://api.elsevier.com/content/abstract/scop... 10.1088/1742-6596/52/1/012065 . . . . . . . .
168 . . Thermospheric density estimation and responses... Journal of Atmospheric and Solar-Terrestrial P... 3 https://api.elsevier.com/content/abstract/scop... 10.1016/j.jastp.2016.12.011 . . . . . . . .
169 . . Homogenization of scatterometer wind retrievals International Journal of Climatology 6 https://api.elsevier.com/content/abstract/scop... 10.1002/joc.4746 . . . . . . . .
170 . . Annual cyclicity in export efficiency in the i... Global Biogeochemical Cycles 4 https://api.elsevier.com/content/abstract/scop... 10.1002/2016GB005561 . . . . . . . .
171 . . Modeling and sensitivity of the seasonal ocean... Sustainable Energy Technologies and Assessments 2 https://api.elsevier.com/content/abstract/scop... 10.1016/j.seta.2016.11.002 . . . . . . . .
172 . . Evaluating the hydrological consistency of eva... Hydrology and Earth System Sciences 2 https://api.elsevier.com/content/abstract/scop... 10.5194/hess-21-323-2017 . . . . . . . .
173 . . Global variability and changes in ocean total ... Geophysical Research Letters 0 https://api.elsevier.com/content/abstract/scop... 10.1002/2016GL071712 . . . . . . . .
174 . . Water resources: Sustaining quality and quantity Engineering for Sustainable Communities: Princ... 0 https://api.elsevier.com/content/abstract/scop... 10.1061/9780784414811.ch16 . . . . . . . .
175 . . Improvements of storm surge forecasting in the... European Journal of Remote Sensing 1 https://api.elsevier.com/content/abstract/scop... 10.1080/22797254.2017.1350558 . . . . . . . .
176 . . Development of water level estimation algorith... Journal of Applied Remote Sensing 1 https://api.elsevier.com/content/abstract/scop... 10.1117/1.JRS.11.016012 . . . . . . . .
177 . . Greenland and Antarctica Ice Sheet Mass Change... Surveys in Geophysics 14 https://api.elsevier.com/content/abstract/scop... 10.1007/s10712-016-9398-7 . . . . . . . .
178 . . Renewable Energy: Physics, Engineering, Enviro... Renewable Energy: Physics, Engineering, Enviro... 9 https://api.elsevier.com/content/abstract/scop... . . . . . . . . .

179 rows × 15 columns


In [15]:
#Lets find the overlap/match between what's been retreived from SCOPUS and what the JPL Librarians discovered
#We can do this by attemtping matches on DOI
from openpyxl import load_workbook
jpl_doi_list = []
wb = load_workbook(filename = '2017_PO.DAAC_Citation_cleaned_and_verified.xlsx', read_only=True)
ws = wb['Cleaned']
for doi_cell in ws['G2':'G169']:
    for doi in doi_cell:
        doi_str = str(doi.value)
        jpl_doi_list.append(doi_str.replace('https://doi.org/',''))
print('Matched DOI Hits: ' + str(len([i for i in jpl_doi_list if i in full_dois])))


Matched DOI Hits: 46

In [16]:
#Trying the same with Manuscript titles is not reliable... the JPL versions are not 'clean' enough.
jpl_title_list = []
wb = load_workbook(filename = '2017_PO.DAAC_Citation_cleaned_and_verified.xlsx', read_only=True)
ws = wb['Cleaned']
for title_cell in ws['C2':'C169']:
    for title in title_cell:
        title_str = str(title.value)
        jpl_title_list.append(title_str.lower())
print('Matched Manuscript Name Hits: ' + str(len([i for i in jpl_title_list if i in full_titles])))


Matched Manuscript Name Hits: 44

In [ ]: