See Github for project details

Make sure that ENIGMA_APPTOKEN (availble in https://public.enigma.com/settings) is set in your .bashrc or session

export ENIGMA_APPTOKEN=<string>



In [1]:

    
%matplotlib inline
%load_ext autoreload
%autoreload 2



In [10]:

    
import sandbox
import seaborn as sns
import os



In [28]:

    
# https://docs.enigma.com/public/public_v20_user_python.html

import requests

headers = {'authorization': 'Bearer {}'.format(os.environ['ENIGMA_APPTOKEN'])}
base_url = "https://public.enigma.com/api/"

def find_current_snapshot_id(dataset_id):
    url = base_url + "datasets/" + dataset_id
    r = requests.get(url, headers=headers)
    dataset = r.json()
    return dataset['current_snapshot']['id']
print(find_current_snapshot_id('1ff77e4e-cd39-4467-b344-de2c755bff26')) # Remodeling Dataset

def get_basics(snapshot_id, row_limit=5, row_offset=0):
    url = "{}snapshots/{}".format(base_url, snapshot_id)
    params = {'row_limit':row_limit, 'row_offset':row_offset}
    r = requests.get(url, headers=headers, params=params)
    snapshot = r.json()
    display_name = snapshot['dataset']['display_name']
    column_names = [x['display_name'] for x in snapshot['fields']]
    rows = snapshot['table_rows']['count']
    data = snapshot['table_rows']['rows']
    return display_name, column_names, rows, data

temp = get_basics(snapshot_id='fee50c37-5ac6-4697-90a0-9e949fd7ee6f')
print(temp)









    



fee50c37-5ac6-4697-90a0-9e949fd7ee6f
('Upgrades and Remodeling', ['Control number', 'Type of alteration/repair - 1999 version', 'Household member performed alteration/repair', 'Cost of alteration/repair', 'Edit flag for RAS', 'Hurricane Katrina related alteration/repair', 'Edit flag for RAD'], 147329, [["'036000001147'", "'47'", "'1'", '250', "'-9'", "'2'", "'-9'"], ["'036000001147'", "'62'", "'2'", '500', "'-9'", "'2'", "'-9'"], ["'036000001147'", "'63'", "'1'", '70', "'-9'", "'2'", "'-9'"], ["'036000001150'", "'52'", "'2'", '1600', "'-9'", "'2'", "'-9'"], ["'036000001151'", "'57'", "'1'", '200', "'-9'", "'2'", "'-9'"]])



In [34]:

    
display_name, column_names, rows, data = \
    get_basics(snapshot_id='fee50c37-5ac6-4697-90a0-9e949fd7ee6f', row_limit=1000)
df = pd.DataFrame(data=data, columns=column_names)
df = df.applymap(lambda x: x.replace("'", "") if type(x) is str else x)
df.head()









    Out[34]:







  
    
      
      Control number
      Type of alteration/repair - 1999 version
      Household member performed alteration/repair
      Cost of alteration/repair
      Edit flag for RAS
      Hurricane Katrina related alteration/repair
      Edit flag for RAD
    
  
  
    
      0
      036000001147
      47
      1
      250
      -9
      2
      -9
    
    
      1
      036000001147
      62
      2
      500
      -9
      2
      -9
    
    
      2
      036000001147
      63
      1
      70
      -9
      2
      -9
    
    
      3
      036000001150
      52
      2
      1600
      -9
      2
      -9
    
    
      4
      036000001151
      57
      1
      200
      -9
      2
      -9



In [35]:

    
df['Type of alteration/repair - 1999 version'].value_counts().head()









    Out[35]:





37    97
53    84
47    77
61    65
57    59
Name: Type of alteration/repair - 1999 version, dtype: int64

Take a look at the distributions of the top three

	Control number	Type of alteration/repair - 1999 version	Household member performed alteration/repair	Cost of alteration/repair	Edit flag for RAS	Hurricane Katrina related alteration/repair	Edit flag for RAD
0	036000001147	47	1	250	-9	2	-9
1	036000001147	62	2	500	-9	2	-9
2	036000001147	63	1	70	-9	2	-9
3	036000001150	52	2	1600	-9	2	-9
4	036000001151	57	1	200	-9	2	-9