See Github for project details
Make sure that ENIGMA_APPTOKEN (availble in https://public.enigma.com/settings) is set in your .bashrc or session
export ENIGMA_APPTOKEN=<string>
In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
In [10]:
import sandbox
import seaborn as sns
import os
In [28]:
# https://docs.enigma.com/public/public_v20_user_python.html
import requests
headers = {'authorization': 'Bearer {}'.format(os.environ['ENIGMA_APPTOKEN'])}
base_url = "https://public.enigma.com/api/"
def find_current_snapshot_id(dataset_id):
url = base_url + "datasets/" + dataset_id
r = requests.get(url, headers=headers)
dataset = r.json()
return dataset['current_snapshot']['id']
print(find_current_snapshot_id('1ff77e4e-cd39-4467-b344-de2c755bff26')) # Remodeling Dataset
def get_basics(snapshot_id, row_limit=5, row_offset=0):
url = "{}snapshots/{}".format(base_url, snapshot_id)
params = {'row_limit':row_limit, 'row_offset':row_offset}
r = requests.get(url, headers=headers, params=params)
snapshot = r.json()
display_name = snapshot['dataset']['display_name']
column_names = [x['display_name'] for x in snapshot['fields']]
rows = snapshot['table_rows']['count']
data = snapshot['table_rows']['rows']
return display_name, column_names, rows, data
temp = get_basics(snapshot_id='fee50c37-5ac6-4697-90a0-9e949fd7ee6f')
print(temp)
In [34]:
display_name, column_names, rows, data = \
get_basics(snapshot_id='fee50c37-5ac6-4697-90a0-9e949fd7ee6f', row_limit=1000)
df = pd.DataFrame(data=data, columns=column_names)
df = df.applymap(lambda x: x.replace("'", "") if type(x) is str else x)
df.head()
Out[34]:
In [35]:
df['Type of alteration/repair - 1999 version'].value_counts().head()
Out[35]:
Take a look at the distributions of the top three