In [ ]:
import os
import pandas as pd
import re
import subprocess
import sys
import matplotlib as mpl
mpl.use('Agg') 
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [ ]:
sys.path.append('../code/')

from database_comparisons import Database, DatabaseComparison

In [ ]:
# for Waffle, until the default Java is set to 8:
#java="/usr/lib/jvm/java-8-oracle/jre/bin/java"
# for badger, set java = 'java'
java='java'

In [ ]:
! pwd

In [ ]:
! ls -l ../*.jar

In [ ]:
dbc = DatabaseComparison(desc_string='binary')
dbc.make_db(cutoff=0.04)

In [ ]:
dbc.summary

In [ ]:
dbc.make_db(cutoff=0.04)

In [ ]:
dbc.make_dbs([0.3, 0.5])

In [ ]:
dbc.summary

In [ ]:
dbc.databases

In [ ]:
p = dbc.plot_db_construction_time_vs_n_nodes()

In [ ]:
p = dbc.plot_db_construction_time_vs_cutoff()

In [ ]:
p = dbc.plot_cc_vs_cutoff()

In [ ]:
assert False

Run the real query


In [ ]:
def plot(df, x_col, y_col, filename=None):
    x = df[x_col]
    y = df[y_col]
    
    fig, ax = plt.subplots(1, 1, figsize=(4, 3))
    plt.plot(x, y, linestyle='--', marker='o', color='#756bb1')
    ax.set_ylim(bottom=0)
    
    #plt.legend(loc='best')
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.xlabel(x_col)
    plt.ylabel(y_col)
    plt.title('Number of non-singleton connected components')
    plt.tight_layout()
    if filename:
        fig.savefig(filename)
    return fig

In [ ]:
connected_components = plot(results, 'cutoff', 'connected components')

Try the new Python object


In [ ]:
assert False

In [ ]:
os.path.dirname('../data_mining_Neo4j_v2_3_2/databases/db_binary_0.33/stderr_build.txt')

In [ ]:
! ls ../data_mining_Neo4j_v2_3_2/databases/db_binary_0.33

In [ ]:
db = Database(cutoff = 0.33, desc_string='binary')

In [ ]:
db.summary_df()

In [ ]:
load_existing_db(db)

In [ ]:
# try loading an existing db. 
db2 = Database(cutoff = 0.33, desc_string='binary')
db2.summary_df()

In [ ]:
load_existing_db(db)

In [ ]: