In [14]:
import numpy as np
import pandas as pd
import models.imports.features

import seaborn as sns
import matplotlib.pyplot as plt

sns.set()

In [15]:
## load in features df.

testsf_ = r'../data/text/bitcoin/test_set.csv'
features_df = models.imports.features.import_file(testsf_)

In [16]:
features_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 438 entries, 0 to 437
Data columns (total 6 columns):
date     438 non-null object
chibs    438 non-null float64
hm       438 non-null float64
is       438 non-null float64
lr       438 non-null float64
price    438 non-null float64
dtypes: float64(5), object(1)
memory usage: 24.0+ KB

In [17]:
## split into x, y

test_set_x, test_set_y = models.imports.features.create_dataset(features_df)

In [18]:
## x,

In [19]:
test_set_x.describe()


Out[19]:
chibs hm is lr
count 438.000000 438.000000 438.000000 438.000000
mean -55153.808507 -66529.265613 -75349.367168 -55028.698495
std 42663.375130 49726.140700 57495.218033 41716.623213
min -305415.252186 -352072.199048 -413326.708742 -292502.817959
25% -74187.056052 -89739.540644 -102376.014141 -73206.271557
50% -50830.446764 -63141.625600 -70437.452087 -52113.932467
75% -25949.610707 -33241.088109 -37184.234472 -26441.975005
max -838.793301 -1095.388574 -1061.871666 -911.180683

In [20]:
test_set_x.plot(alpha=.5, title='estimates')


Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x2013b4ca320>

In [21]:
test_set_x.plot.hist(bins=100, alpha=.5, title='estimate histograms')


Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0x2013b51fcf8>

In [22]:
## y,

In [23]:
test_set_y.describe()


Out[23]:
price
count 438.000000
mean 756.124110
std 1309.982576
min 4.220000
25% 122.087500
50% 379.710000
75% 674.965000
max 10895.010000

In [24]:
## train output,

In [25]:
plt.subplot(1,4,1)
plt.title('norm')
plt.plot(test_set_y)

plt.subplot(1,4,2)
plt.title('log')
plt.plot(np.log(test_set_y))

plt.subplot(1,4,3)
plt.title('scaled')
plt.plot(test_set_y / np.max(test_set_y))

plt.subplot(1,4,4)
plt.title('log + scaled')
transformed_y = np.log(test_set_y)
transformed_y /= np.max(transformed_y)
plt.plot(transformed_y)


plt.tight_layout()
plt.show()



In [26]:
test_set_y.plot.hist(bins=100, title='price histogram')


Out[26]:
<matplotlib.axes._subplots.AxesSubplot at 0x2013b61f160>