In [1]:
import alpine as AlpineAPI
In [2]:
from pprint import pprint
import json
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [3]:
filename = "alpine_login.conf"
with open(filename, "r") as f:
data = f.read()
conn_info = json.loads(data)
host = conn_info["host"]
port = conn_info["port"]
username = conn_info["username"]
password = conn_info["password"]
In [4]:
session = AlpineAPI.APIClient(host, port, username, password)
In [5]:
pprint(session.workfile.get(701), depth = 2)
In [6]:
workflow_id = 701
In [7]:
process_id = session.workfile.process.run(workflow_id)
session.workfile.process.wait_until_finished(workflow_id,
process_id,
verbose=True,
query_time=5,
timeout=1000)
Out[7]:
In [8]:
flow_results = session.workfile.process.download_results(workflow_id, process_id)
pprint(flow_results['outputs'], depth = 2)
The downloaded results file is a summary of all the operator output in the workflow. In particular, it is a JSON file that we can manipulate or save to disk.
In [9]:
outfile = "Results_File_N_Trees_{}.fr".format(str(50))
with open(outfile, "w") as f:
json.dump(flow_results, f)
When we convert the downloaded results to a Python object we get a nested dictionary/list object. Here we're pulling two values out of the results:
This function parses the file to return those two values.
In [10]:
def parse_flow_results(workflow_id, process_id):
flow_results = session.workfile.process.download_results(workflow_id, process_id)
# Get accuracy from the confusion matrix
conf_matrix_data = session.workfile.process.find_operator('Confusion Matrix', flow_results)
acc = float(conf_matrix_data['visualData'][0]['visualData']['items'][2]['Class Recall'].split()[1])
# Get number of trees from the Alpine Forest
alpine_forest_data = session.workfile.process.find_operator('Alpine Forest Classification', flow_results)
N = int(alpine_forest_data['visualData'][2]['visualData']['items'][0]['Average over All Trees'])
return (N, acc)
In [11]:
ntrees = [5,10,25,50,75]
variables = [[{"name": "@n_trees", "value": str(N)}] for N in ntrees]
In [12]:
variables
Out[12]:
In [15]:
test_acc = []
for variable in variables:
print("Running with workflow variable: {}".format(variable))
process_id = session.workfile.process.run(workflow_id, variables=variable)
session.workfile.process.wait_until_finished(workflow_id, process_id, verbose=True, query_time=5, timeout=1000)
(N, acc) = parse_flow_results(workflow_id, process_id)
test_acc.append(acc)
print("For {} trees, test accuracy is {}".format(N, acc))
print("")
In [16]:
plt.figure(figsize = (10,7))
plt.plot(ntrees, test_acc, "o--")
plt.xlabel("Number of trees", size=15)
plt.ylabel("Test Accuracy", size=15);
In [ ]: