In [14]:
import numpy as np
import pandas as pd
import models.imports.features
import seaborn as sns
import matplotlib.pyplot as plt
sns.set()
In [15]:
## load in features df.
testsf_ = r'../data/text/bitcoin/test_set.csv'
features_df = models.imports.features.import_file(testsf_)
In [16]:
features_df.info()
In [17]:
## split into x, y
test_set_x, test_set_y = models.imports.features.create_dataset(features_df)
In [18]:
## x,
In [19]:
test_set_x.describe()
Out[19]:
In [20]:
test_set_x.plot(alpha=.5, title='estimates')
Out[20]:
In [21]:
test_set_x.plot.hist(bins=100, alpha=.5, title='estimate histograms')
Out[21]:
In [22]:
## y,
In [23]:
test_set_y.describe()
Out[23]:
In [24]:
## train output,
In [25]:
plt.subplot(1,4,1)
plt.title('norm')
plt.plot(test_set_y)
plt.subplot(1,4,2)
plt.title('log')
plt.plot(np.log(test_set_y))
plt.subplot(1,4,3)
plt.title('scaled')
plt.plot(test_set_y / np.max(test_set_y))
plt.subplot(1,4,4)
plt.title('log + scaled')
transformed_y = np.log(test_set_y)
transformed_y /= np.max(transformed_y)
plt.plot(transformed_y)
plt.tight_layout()
plt.show()
In [26]:
test_set_y.plot.hist(bins=100, title='price histogram')
Out[26]: