See Github for project details

Make sure that ENIGMA_APPTOKEN (availble in https://public.enigma.com/settings) is set in your .bashrc or session

export ENIGMA_APPTOKEN=<string>


In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [10]:
import sandbox
import seaborn as sns
import os

In [28]:
# https://docs.enigma.com/public/public_v20_user_python.html

import requests

headers = {'authorization': 'Bearer {}'.format(os.environ['ENIGMA_APPTOKEN'])}
base_url = "https://public.enigma.com/api/"

def find_current_snapshot_id(dataset_id):
    url = base_url + "datasets/" + dataset_id
    r = requests.get(url, headers=headers)
    dataset = r.json()
    return dataset['current_snapshot']['id']
print(find_current_snapshot_id('1ff77e4e-cd39-4467-b344-de2c755bff26')) # Remodeling Dataset

def get_basics(snapshot_id, row_limit=5, row_offset=0):
    url = "{}snapshots/{}".format(base_url, snapshot_id)
    params = {'row_limit':row_limit, 'row_offset':row_offset}
    r = requests.get(url, headers=headers, params=params)
    snapshot = r.json()
    display_name = snapshot['dataset']['display_name']
    column_names = [x['display_name'] for x in snapshot['fields']]
    rows = snapshot['table_rows']['count']
    data = snapshot['table_rows']['rows']
    return display_name, column_names, rows, data

temp = get_basics(snapshot_id='fee50c37-5ac6-4697-90a0-9e949fd7ee6f')
print(temp)


fee50c37-5ac6-4697-90a0-9e949fd7ee6f
('Upgrades and Remodeling', ['Control number', 'Type of alteration/repair - 1999 version', 'Household member performed alteration/repair', 'Cost of alteration/repair', 'Edit flag for RAS', 'Hurricane Katrina related alteration/repair', 'Edit flag for RAD'], 147329, [["'036000001147'", "'47'", "'1'", '250', "'-9'", "'2'", "'-9'"], ["'036000001147'", "'62'", "'2'", '500', "'-9'", "'2'", "'-9'"], ["'036000001147'", "'63'", "'1'", '70', "'-9'", "'2'", "'-9'"], ["'036000001150'", "'52'", "'2'", '1600', "'-9'", "'2'", "'-9'"], ["'036000001151'", "'57'", "'1'", '200', "'-9'", "'2'", "'-9'"]])

In [34]:
display_name, column_names, rows, data = \
    get_basics(snapshot_id='fee50c37-5ac6-4697-90a0-9e949fd7ee6f', row_limit=1000)
df = pd.DataFrame(data=data, columns=column_names)
df = df.applymap(lambda x: x.replace("'", "") if type(x) is str else x)
df.head()


Out[34]:
Control number Type of alteration/repair - 1999 version Household member performed alteration/repair Cost of alteration/repair Edit flag for RAS Hurricane Katrina related alteration/repair Edit flag for RAD
0 036000001147 47 1 250 -9 2 -9
1 036000001147 62 2 500 -9 2 -9
2 036000001147 63 1 70 -9 2 -9
3 036000001150 52 2 1600 -9 2 -9
4 036000001151 57 1 200 -9 2 -9

In [35]:
df['Type of alteration/repair - 1999 version'].value_counts().head()


Out[35]:
37    97
53    84
47    77
61    65
57    59
Name: Type of alteration/repair - 1999 version, dtype: int64

Take a look at the distributions of the top three