notebook.community

Edit and run



In [1]:

    
from train import *
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

logging.getLogger('tensorflow').setLevel(logging.WARNING)



In [2]:

    
number_of_train_test_splits = 30
PARAMS









    Out[2]:





{'cv_folds': 5,
 'div_const': 100,
 'dropout': 0.0,
 'layers': [100, 100],
 'learning_rate': 0.01,
 'max_steps': 100,
 'test_ratio': 0.2,
 'training_set_size': 90000}



In [3]:

    
all_data = tf.contrib.learn.datasets.base.load_csv_without_header(
    filename="micro_data.csv",
    target_dtype=np.float32,
    features_dtype=np.float32) 

X = all_data.data[:,:15]
y = all_data.target / PARAMS['div_const']
X = (X - np.mean(X, axis=0, keepdims=True))/np.std(X, axis=0, keepdims=True)



In [11]:

    
def stats(normalized_X, normalized_y):
    X_train, X_test, y_train, y_test = train_test_split(
        normalized_X, normalized_y, test_size=PARAMS['test_ratio'])
    return do_training(1, X_train, X_test, y_train, y_test)

def get_error_distribution(X, y, number_of_splits):
    stats_dicts = []
    for i in range(number_of_splits):
        print(i)
        stats_dicts.append(stats(X, y))
    stats_df = pd.DataFrame(stats_dicts)
    return stats_df



In [5]:

    
stats_df = get_error_distribution(X, y, number_of_train_test_splits)









    



0
Removing old model dir...
1
Removing old model dir...
2
Removing old model dir...
3
Removing old model dir...
4
Removing old model dir...
5
Removing old model dir...
6
Removing old model dir...
7
Removing old model dir...
8
Removing old model dir...
9
Removing old model dir...
10
Removing old model dir...
11
Removing old model dir...
12
Removing old model dir...
13
Removing old model dir...
14
Removing old model dir...
15
Removing old model dir...
16
Removing old model dir...
17
Removing old model dir...
18
Removing old model dir...
19
Removing old model dir...
20
Removing old model dir...
21
Removing old model dir...
22
Removing old model dir...
23
Removing old model dir...
24
Removing old model dir...
25
Removing old model dir...
26
Removing old model dir...
27
Removing old model dir...
28
Removing old model dir...
29
Removing old model dir...



In [6]:

    
plt.figure(figsize=(12,6))
plt.subplot(1, 2, 1, title = "Distribution of Average Relative Error")
plt.hist(stats_df["relative_avg_err"].values)
plt.subplot(1, 2, 2, title = "Distribution of Maximal Relative Error")
plt.hist(stats_df["relative_max_err"].values)
plt.show()