http://jupyter.readthedocs.io/en/latest
https://github.com/ipython/ipython/wiki/A-gallery-of-interesting-IPython-Notebooks
In [ ]:
import json
import requests
iss_url = 'http://api.open-notify.org/iss-now.json'
data = requests.get(iss_url).json()
print(json.dumps(data, indent=4))
In [ ]:
# Lattitude and Longitude of C'Ville
LAT = 38.0293
LON = 78.4767
iss_url = 'http://api.open-notify.org/iss-pass.json?lat={}&lon={}'.format(LAT, LON)
print(iss_url)
In [ ]:
data = requests.get(iss_url).json()
print(json.dumps(data, indent=4))
In [ ]:
import arrow
In [ ]:
from arrow.arrow import Arrow
for item in data['response']:
datetime = Arrow.fromtimestamp(item['risetime'])
print(
'The ISS will be visable over Charlottesville on {} at {} for {} seconds.'.format(
datetime.date(),
datetime.time(),
item['duration']
)
)
In [ ]:
pokeapi = 'http://pokeapi.co/api/v2/generation/1/'
pokedata = requests.get(pokeapi).json()
# Take that data, print out a nicely formatted version of the first 5 results
print(json.dumps(pokedata['pokemon_species'][:5], indent=4))
In [ ]:
# Let's get more info about the first pokemon on the list
# By following the chain of linked data
# Narrow down the url we'd like to get
bulbasaur_url = pokedata['pokemon_species'][0]['url']
# request data from that URL
bulbasaur_data = requests.get(bulbasaur_url).json()
# Let's remove the 'flavor text' because that's really long
del bulbasaur_data['flavor_text_entries']
In [ ]:
bulbasaur_data
Many institutions have an OAI-PMH based API.
This is great because they all have a unified way of interacting with the data in the repositories, just with different host urls.
You can create common code that will interact with most OAI-PMH feeds with only changing the base access URL.
In [ ]:
from furl import furl
vt_url = furl('http://vtechworks.lib.vt.edu/oai/request')
In [ ]:
vt_url.args['verb'] = 'ListRecords'
vt_url.args['metadataPrefix'] = 'oai_dc'
vt_url.url
In [ ]:
data = requests.get(vt_url.url)
data.content
In [ ]:
from lxml import etree
etree_element = etree.XML(data.content)
etree_element
In [ ]:
etree_element.getchildren()
In [ ]:
# A little namespace parsing and cleanup
namespaces = etree_element.nsmap
namespaces['ns0'] = etree_element.nsmap[None]
del namespaces[None]
records = etree_element.xpath('//ns0:record', namespaces=namespaces)
records[:10]
In [ ]:
# What's inside one of these records?
one_record = records[0]
one_record.getchildren()
In [ ]:
# We want to check out the "metadata" element, which is the second in the list
# Let's make sure to get those namespaces too
# Here's a cool trick to join 2 dictionaries in python 3!
namespaces = {**namespaces, **one_record[1][0].nsmap}
del namespaces[None]
# Now we have namespaces we can use!
namespaces
In [ ]:
# Use those namespaces to get titles
titles = records[0].xpath('//dc:title/node()', namespaces=namespaces)
titles[:10]
The SHARE search API is built on a tool called elasticsearch. It lets you search a subset of SHARE's normalized metadata in a simple format.
Here are the fields available in SHARE's elasticsearch endpoint:
- 'title'
- 'language'
- 'subject'
- 'description'
- 'date'
- 'date_created'
- 'date_modified
- 'date_updated'
- 'date_published'
- 'tags'
- 'links'
- 'awards'
- 'venues'
- 'sources'
- 'contributors'
You can see a formatted version of the base results from the API by visiting the SHARE Search API URL.
In [ ]:
SHARE_SEARCH_API = 'https://staging-share.osf.io/api/search/abstractcreativework/_search'
In [ ]:
from furl import furl
search_url = furl(SHARE_SEARCH_API)
search_url.args['size'] = 3
recent_results = requests.get(search_url.url).json()
recent_results = recent_results['hits']['hits']
In [ ]:
recent_results
In [ ]:
print('The request URL is {}'.format(search_url.url))
print('----------')
for result in recent_results:
print(
'{} -- from {}'.format(
result['_source']['title'],
result['_source']['sources']
)
)
In [ ]:
import json
def query_share(url, query):
# A helper function that will use the requests library,
# pass along the correct headers, and make the query we want
headers = {'Content-Type': 'application/json'}
data = json.dumps(query)
return requests.post(url, headers=headers, data=data).json()
In [ ]:
search_url.args = None # reset the args so that we remove our old query arguments.
search_url.url # Show the URL that we'll be requesting to make sure the args were cleared
In [ ]:
tags_query = {
"query": {
"exists": {
"field": "tags"
}
}
}
missing_tags_query = {
"query": {
"bool": {
"must_not": {
"exists": {
"field": "tags"
}
}
}
}
}
In [ ]:
with_tags = query_share(search_url.url, tags_query)
missing_tags = query_share(search_url.url, missing_tags_query)
total_results = requests.get(search_url.url).json()['hits']['total']
with_tags_percent = (float(with_tags['hits']['total'])/total_results)*100
missing_tags_percent = (float(missing_tags['hits']['total'])/total_results)*100
print(
'{} results out of {}, or {}%, have tags.'.format(
with_tags['hits']['total'],
total_results,
format(with_tags_percent, '.2f')
)
)
print(
'{} results out of {}, or {}%, do NOT have tags.'.format(
missing_tags['hits']['total'],
total_results,
format(missing_tags_percent, '.2f')
)
)
print('------------')
print('As a little sanity check....')
print('{} + {} = {}%'.format(with_tags_percent, missing_tags_percent, format(with_tags_percent + missing_tags_percent, '.2f')))
SHARE has a host of other APIs that provide direct access to the data stored in SHARE.
You can read more about the SHARE Data Models here: http://share-research.readthedocs.io/en/latest/share_models.html
In [ ]:
SHARE_API = 'https://staging-share.osf.io/api/'
In [ ]:
share_endpoints = requests.get(SHARE_API).json()
share_endpoints
You can visit https://staging-share.osf.io/api/ and see the data formatted in "pretty printed" JSON
In [ ]:
SHARE_PROVIDERS = 'https://staging-share.osf.io/api/providers/'
In [ ]:
data = requests.get(SHARE_PROVIDERS).json()
data
In [ ]:
print('Here are the first 10 Providers:')
for source in data['results']:
print(
'{}\n{}\n{}\n'.format(
source['long_title'],
source['home_page'],
source['provider_name']
)
)
In [ ]: