In [1]:
import os
# Note: we don't need Pandas
# Filters allow you to accomplish many basic operations automatically
from sodapy import Socrata
In [2]:
socrata_domain = 'opendata.socrata.com'
socrata_dataset_identifier = 'f92i-ik66'
# If you choose to use a token, run the following command on the terminal (or add it to your .bashrc)
# $ export SODAPY_APPTOKEN=<token>
socrata_token = os.environ.get("SODAPY_APPTOKEN")
In [3]:
client = Socrata(socrata_domain, socrata_token)
In [4]:
metadata = client.get_metadata(socrata_dataset_identifier)
[x['name'] for x in metadata['columns']]
Out[4]:
In [5]:
meta_amount = [x for x in metadata['columns'] if x['name'] == 'AMOUNT'][0]
meta_amount
Out[5]:
In [6]:
# Get the average from the metadata. Note that it's a string by default
meta_amount['cachedContents']['average']
Out[6]:
In [7]:
# Use the 'where' argument to filter the data before downloading it
results = client.get(socrata_dataset_identifier, where="amount >= 2433")
print("Total number of non-null results: {}".format(meta_amount['cachedContents']['non_null']))
print("Number of results downloaded: {}".format(len(results)))
results[:3]
Out[7]:
In [8]:
results = client.get(socrata_dataset_identifier,
where="amount < 2433",
select="amount, job",
order="amount ASC")
results[:3]
Out[8]:
In [10]:
results = client.get(socrata_dataset_identifier,
group="recipient",
select="sum(amount), recipient",
order="sum(amount) DESC")
results
Out[10]:
In [11]:
results = client.get(socrata_dataset_identifier, limit=6, select="name, amount")
results
Out[11]:
In [11]:
loop_size = 3
num_loops = 2
for i in range(num_loops):
results = client.get(socrata_dataset_identifier,
select="name, amount",
limit=loop_size,
offset=loop_size * i)
print("\n> Loop number: {}".format(i))
# This simply formats the output nicely
for result in results:
print(result)
In [13]:
query = """
select
name,
amount
where
amount > 1000
and amount < 2000
limit
5
"""
results = client.get(socrata_dataset_identifier, query=query)
results
Out[13]:
In [20]:
nyc_dogs_domain = 'data.cityofnewyork.us'
nyc_dogs_dataset_identifier = 'nu7n-tubp'
nyc_dogs_client = Socrata(nyc_dogs_domain, socrata_token)
results = nyc_dogs_client.get(nyc_dogs_dataset_identifier,
q="Slider",
select="animalname, breedname")
results
Out[20]:
There's plenty more to do! Check out Queries using SODA for additional functionality