In [1]:
import pandas_profiling
import pandas as pd
In [2]:
props = pd.read_csv('../data/processed_properties.csv',
header=0, index_col=0)
props = props.drop_duplicates()
props = props.dropna()
props.info()
props_profile = pandas_profiling.ProfileReport(props)
props_profile.to_file('props_profile.html')
In [3]:
comet = pd.read_csv('../data/comet/comet_unprocessed_timings.csv',
header=0, index_col=0)
comet = comet.drop_duplicates()
comet = comet.dropna()
comet.info()
comet_merged = pd.merge(comet, props, on='matrix')
comet_profile = pandas_profiling.ProfileReport(comet_merged)
comet_profile.to_file('comet_unprocessed_timings_profile.html')
In [4]:
janus = pd.read_csv('../data/janus/janus_unprocessed_timings.csv',
header=0, index_col=0)
janus = janus.drop_duplicates()
janus = janus.dropna()
janus.info()
janus_merged = pd.merge(janus, props, on='matrix')
janus_profile = pandas_profiling.ProfileReport(janus_merged)
janus_profile.to_file('janus_unprocessed_timings_profile.html')
In [5]:
bridges = pd.read_csv('../data/bridges/bridges_unprocessed_timings.csv',
header=0, index_col=0)
bridges = bridges.drop_duplicates()
bridges = bridges.dropna()
bridges.info()
bridges_merged = pd.merge(bridges, props, on='matrix')
bridges_profile = pandas_profiling.ProfileReport(bridges_merged)
bridges_profile.to_file('bridges_unprocessed_timings_profile.html')
In [6]:
all_times = pd.concat([comet, bridges, janus], ignore_index=True)
all_times.info()
In [9]:
combined = pd.merge(props, all_times, on=['matrix','matrix_id'])
combined.info()
combined = combined.drop_duplicates()
combined = combined.dropna()
combined_profile = pandas_profiling.ProfileReport(combined)
combined_profile.to_file('unprocessed_combined_profile.html')
In [24]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
combined_new = combined.drop(['matrix', 'solver', 'prec',
'status', 'system'], axis=1)
combined_new = combined_new.dropna()
X = combined_new.iloc[:,:-2]
y = combined_new.iloc[:, -1]
clf = RandomForestClassifier()
clf.fit(X, y)