notebook.community

Edit and run



In [14]:

    
import numpy as np
import pandas as pd
import models.imports.features

import seaborn as sns
import matplotlib.pyplot as plt

sns.set()



In [15]:

    
## load in features df.

testsf_ = r'../data/text/bitcoin/test_set.csv'
features_df = models.imports.features.import_file(testsf_)



In [16]:

    
features_df.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 438 entries, 0 to 437
Data columns (total 6 columns):
date     438 non-null object
chibs    438 non-null float64
hm       438 non-null float64
is       438 non-null float64
lr       438 non-null float64
price    438 non-null float64
dtypes: float64(5), object(1)
memory usage: 24.0+ KB



In [17]:

    
## split into x, y

test_set_x, test_set_y = models.imports.features.create_dataset(features_df)



In [18]:

    
## x,



In [19]:

    
test_set_x.describe()









    Out[19]:







  
    
      
      chibs
      hm
      is
      lr
    
  
  
    
      count
      438.000000
      438.000000
      438.000000
      438.000000
    
    
      mean
      -55153.808507
      -66529.265613
      -75349.367168
      -55028.698495
    
    
      std
      42663.375130
      49726.140700
      57495.218033
      41716.623213
    
    
      min
      -305415.252186
      -352072.199048
      -413326.708742
      -292502.817959
    
    
      25%
      -74187.056052
      -89739.540644
      -102376.014141
      -73206.271557
    
    
      50%
      -50830.446764
      -63141.625600
      -70437.452087
      -52113.932467
    
    
      75%
      -25949.610707
      -33241.088109
      -37184.234472
      -26441.975005
    
    
      max
      -838.793301
      -1095.388574
      -1061.871666
      -911.180683



In [20]:

    
test_set_x.plot(alpha=.5, title='estimates')









    Out[20]:





<matplotlib.axes._subplots.AxesSubplot at 0x2013b4ca320>



In [21]:

    
test_set_x.plot.hist(bins=100, alpha=.5, title='estimate histograms')









    Out[21]:





<matplotlib.axes._subplots.AxesSubplot at 0x2013b51fcf8>



In [22]:

    
## y,



In [23]:

    
test_set_y.describe()









    Out[23]:







  
    
      
      price
    
  
  
    
      count
      438.000000
    
    
      mean
      756.124110
    
    
      std
      1309.982576
    
    
      min
      4.220000
    
    
      25%
      122.087500
    
    
      50%
      379.710000
    
    
      75%
      674.965000
    
    
      max
      10895.010000



In [24]:

    
## train output,



In [25]:

    
plt.subplot(1,4,1)
plt.title('norm')
plt.plot(test_set_y)

plt.subplot(1,4,2)
plt.title('log')
plt.plot(np.log(test_set_y))

plt.subplot(1,4,3)
plt.title('scaled')
plt.plot(test_set_y / np.max(test_set_y))

plt.subplot(1,4,4)
plt.title('log + scaled')
transformed_y = np.log(test_set_y)
transformed_y /= np.max(transformed_y)
plt.plot(transformed_y)


plt.tight_layout()
plt.show()



In [26]:

    
test_set_y.plot.hist(bins=100, title='price histogram')









    Out[26]:





<matplotlib.axes._subplots.AxesSubplot at 0x2013b61f160>

	chibs	hm	is	lr
count	438.000000	438.000000	438.000000	438.000000
mean	-55153.808507	-66529.265613	-75349.367168	-55028.698495
std	42663.375130	49726.140700	57495.218033	41716.623213
min	-305415.252186	-352072.199048	-413326.708742	-292502.817959
25%	-74187.056052	-89739.540644	-102376.014141	-73206.271557
50%	-50830.446764	-63141.625600	-70437.452087	-52113.932467
75%	-25949.610707	-33241.088109	-37184.234472	-26441.975005
max	-838.793301	-1095.388574	-1061.871666	-911.180683