notebook.community

Edit and run



In [1]:

    
import pandas as pd
from pandas.io.json import json_normalize #package for flattening json in pandas df
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import json
import os
%matplotlib inline



In [2]:

    
os.chdir("./runs/lagaris/1d_trapz_preparation3/")
origin_path = os.getcwd() 
runs_id = os.listdir("./")
runs_id = [int(item) for item in runs_id]
runs_id = sorted(runs_id)



In [4]:

    
df_list = []
for run_id in runs_id:
    os.chdir("./"+str(run_id))
    print(run_id)
    f_in = open('out.json', 'r')
    
    run_info = json.load(f_in)
    f_in.close()
    a = json_normalize(run_info)
    #a.set_index(pd.Index([run_id]))
    df_list.append(a)
    #a = pd.concat(a,b)
    os.chdir(origin_path)
res1 = pd.concat(df_list,ignore_index=True)









    



---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-4-a49716de0e11> in <module>()
      1 df_list = []
      2 for run_id in runs_id:
----> 3     os.chdir("./"+str(run_id))
      4     print(run_id)
      5     f_in = open('out.json', 'r')

FileNotFoundError: [WinError 2] Не удается найти указанный файл: './0'



In [4]:

    
res_correct = res1



In [5]:

    
os.chdir("../1d_trapz_preparation/")
origin_path = os.getcwd() 
runs_id = os.listdir("./")
runs_id = [int(item) for item in runs_id]
runs_id = sorted(runs_id)
df_list = []
for run_id in runs_id:
    os.chdir("./"+str(run_id))
    f_in = open('out.json', 'r')
    run_info = json.load(f_in)
    f_in.close()
    a = json_normalize(run_info)
    #a.set_index(pd.Index([run_id]))
    df_list.append(a)
    #a = pd.concat(a,b)
    os.chdir(origin_path)
res1 = pd.concat(df_list,ignore_index=True)
res_correct1 = res1[res1['Model info.n_sig'] < 14]
res_correct1 = res_correct1[res_correct1['Model info.m_train'] >= 21]

res_n_sig5_1 = res_correct1[res_correct1['Model info.n_sig'] == 5]

m_train_all_1 = np.array(res_n_sig5_1['Model info.m_train'].tolist())

mse_all_1 = np.array(res_n_sig5_1['Out info.MSE'].tolist())

mse_means_1 = []
std_errs_1 = []
h = 8

for i in range(9):
    ds = res_n_sig5_1[0+h*i:8+h*i]
    mse_l = np.array(ds['Out info.MSE'].tolist())
    mse_means_1.append(np.mean(mse_l))
    std_errs_1.append(np.std(mse_l))

mse_means_1 = np.array(mse_means_1)
my_set_1 = set(m_train_all_1)
m_trains_1 = np.array(sorted( list(my_set_1) ) )
print(m_trains_1.shape)
print(mse_means_1.shape)









    



(9,)
(9,)



In [6]:

    
res_n_sig5 = res_correct[res_correct['Model info.n_sig'] == 5]

m_train_all = np.array(res_n_sig5['Model info.m_train'].tolist())

mse_all = np.array(res_n_sig5['Out info.MSE'].tolist())

mse_means = []
std_errs = []
h = 20

for i in range(13):
    ds = res_n_sig5[0+h*i:8+h*i]
    mse_l = np.array(ds['Out info.MSE'].tolist())
    mse_means.append(np.mean(mse_l))
    std_errs.append(np.std(mse_l))

mse_means = np.array(mse_means)
my_set = set(m_train_all)
m_trains = np.array( sorted( list(my_set) ) )
print(m_trains.shape)
print(mse_means.shape)









    



(13,)
(13,)



In [7]:

    
m_trains = np.concatenate((m_trains, m_trains_1))
mse_means = np.concatenate((mse_means, mse_means_1))
m_train_all = np.concatenate((m_train_all, m_train_all_1))
mse_all = np.concatenate((mse_all, mse_all_1))
std_errs = np.concatenate((std_errs, std_errs_1))

figure = plt.figure(figsize=(12,8))
axes = figure.add_subplot (1, 1, 1)
plt.grid(True)
plt.title('MSE vs number of train points. 5 sigmoids', fontsize=15)
plt.xlabel('Number of train points', fontsize=15)
plt.ylabel('MSE', fontsize=15)
plt.scatter(m_train_all, mse_all, label = 'MSE for all points', marker = "D",s=40)

plt.plot(m_trains, mse_means, color='black', marker='x', linestyle='dashed', linewidth=3, markersize=16, label = 'Mean MSE')
plt.errorbar(m_trains, mse_means, yerr=std_errs, ecolor='r', lw=2, capsize=15, mew = 3, zorder=3, label = 'Errorbar', linestyle='None')
axes.set_yscale ('log', nonposy='clip')

plt.legend(loc=1, prop={'size': 15})
print(std_errs)









    



[2.36514092e-02 8.33557293e-03 6.27291775e-03 2.86890673e-02
 2.38381740e-02 1.16440174e-07 2.29063407e-03 2.19389787e-08
 5.48905040e-09 1.20798746e-11 4.82621916e-09 7.76941724e-13
 2.32448698e-09 7.48882209e-14 4.28903187e-12 3.42037145e-14
 2.50820681e-12 5.09949962e-13 3.82992239e-14 1.91794816e-13
 6.49017104e-14 9.30516474e-14]



In [8]:

    
figure = plt.figure(figsize=(12,8))
axes = figure.add_subplot (1, 1, 1)
plt.grid(True)
plt.title('MSE vs number of train points. 5 sigmoids', fontsize=15)
plt.xlabel('Number of train points', fontsize=15)
plt.ylabel('MSE', fontsize=15)
plt.scatter(m_train_all, mse_all, label = 'MSE for all points', marker = "D",s=40)

plt.plot(m_trains, mse_means, color='black', marker='x', linestyle='dashed', linewidth=3, markersize=16, label = 'Mean MSE')
plt.errorbar(m_trains, mse_means, yerr=std_errs, ecolor='r', lw=2, capsize=15, mew = 3, zorder=3, label = 'Errorbar', linestyle='None')
axes.set_yscale ('log', nonposy='clip')

plt.legend(loc=1, prop={'size': 15})
print(std_errs)









    



[2.36514092e-02 8.33557293e-03 6.27291775e-03 2.86890673e-02
 2.38381740e-02 1.16440174e-07 2.29063407e-03 2.19389787e-08
 5.48905040e-09 1.20798746e-11 4.82621916e-09 7.76941724e-13
 2.32448698e-09 7.48882209e-14 4.28903187e-12 3.42037145e-14
 2.50820681e-12 5.09949962e-13 3.82992239e-14 1.91794816e-13
 6.49017104e-14 9.30516474e-14]



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [9]:

    
#%matplotlib notebook
n_sig_all = np.array(res_correct['Model info.n_sig'].tolist())
n_sig_all = n_sig_all.reshape(n_sig_all.size,1)

m_train_all = np.array(res_correct['Model info.m_train'].tolist())
m_train_all = m_train_all.reshape(m_train_all.size,1)

mse_all = np.array(res_correct['Out info.MSE'].tolist())
mse_all = mse_all.reshape(mse_all.size,1)


from mpl_toolkits.mplot3d.axes3d import Axes3D
fig = plt.figure(figsize=(15,10))
ax = fig.add_subplot(1, 2, 1, projection='3d')
ax.scatter(n_sig_all, m_train_all, np.log10(mse_all))









    Out[9]:





<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x2e3e1b280b8>



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [10]:

    
plt.figure(figsize=(10,7))
plt.grid(True)
plt.title('MSE vs m_trapz', fontsize=15)
plt.xlabel('Number of integration points', fontsize=15)
plt.ylabel('MSE', fontsize=15)
ax = plt.gca()
res_correct.plot(
    x='Model info.n_sig',
    y='Out info.MSE',
    logy=True,
    #logx=True,
    ax = plt.gca(),
    style = 'ro',
)

ax.legend(
   loc='best',
   fontsize=15
)









    Out[10]:





<matplotlib.legend.Legend at 0x2e3e2159860>



In [11]:

    
res2 = res_correct[res_correct['Model info.m_train'] > 35]
plt.figure(figsize=(10,7))
plt.title('MSE vs m_trapz', fontsize=26)
res2.plot.scatter(
    x='Model info.n_sig',
    y='Out info.MSE',
    logy=True,
    ax = plt.gca(),
)









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-11-533d06b4d4cd> in <module>()
      6     y='Out info.MSE',
      7     logy=True,
----> 8     ax = plt.gca(),
      9 )

D:\Anaconda\lib\site-packages\pandas\plotting\_core.py in scatter(self, x, y, s, c, **kwds)
   3459             ...                       colormap='viridis')
   3460         """
-> 3461         return self(kind='scatter', x=x, y=y, c=c, s=s, **kwds)
   3462 
   3463     def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None,

D:\Anaconda\lib\site-packages\pandas\plotting\_core.py in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   2939                           fontsize=fontsize, colormap=colormap, table=table,
   2940                           yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 2941                           sort_columns=sort_columns, **kwds)
   2942     __call__.__doc__ = plot_frame.__doc__
   2943 

D:\Anaconda\lib\site-packages\pandas\plotting\_core.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   1975                  yerr=yerr, xerr=xerr,
   1976                  secondary_y=secondary_y, sort_columns=sort_columns,
-> 1977                  **kwds)
   1978 
   1979 

D:\Anaconda\lib\site-packages\pandas\plotting\_core.py in _plot(data, x, y, subplots, ax, kind, **kwds)
   1741         if isinstance(data, ABCDataFrame):
   1742             plot_obj = klass(data, x=x, y=y, subplots=subplots, ax=ax,
-> 1743                              kind=kind, **kwds)
   1744         else:
   1745             raise ValueError("plot kind %r can only be used for data frames"

D:\Anaconda\lib\site-packages\pandas\plotting\_core.py in __init__(self, data, x, y, s, c, **kwargs)
    843             # the handling of this argument later
    844             s = 20
--> 845         super(ScatterPlot, self).__init__(data, x, y, s=s, **kwargs)
    846         if is_integer(c) and not self.data.columns.holds_integer():
    847             c = self.data.columns[c]

D:\Anaconda\lib\site-packages\pandas\plotting\_core.py in __init__(self, data, x, y, **kwargs)
    818             y = self.data.columns[y]
    819         if len(self.data[x]._get_numeric_data()) == 0:
--> 820             raise ValueError(self._kind + ' requires x column to be numeric')
    821         if len(self.data[y]._get_numeric_data()) == 0:
    822             raise ValueError(self._kind + ' requires y column to be numeric')

ValueError: scatter requires x column to be numeric



In [ ]:

    
#1 6 11 16 21 26 31 36 41 46 51 56



In [ ]:

    
res_correct



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]: