In [1]:
import numpy as np
import pandas as pd
import models.imports.features
import seaborn as sns
import matplotlib.pyplot as plt
sns.set()
In [2]:
## load in features df.
trainsf_ = r'../data/text/bitcoin/train_set.csv'
features_df = models.imports.features.import_file(trainsf_)
In [3]:
features_df.info()
In [4]:
## split into x, y
features_set_x, features_set_y = models.imports.features.create_dataset(features_df)
In [5]:
## x,
In [6]:
features_set_x.describe()
Out[6]:
In [7]:
features_set_x.plot(alpha=.5, title='estimates')
Out[7]:
In [8]:
features_set_x.plot.hist(bins=100, alpha=.5, title='estimate histograms')
Out[8]:
In [9]:
## y,
In [10]:
features_set_y.describe()
Out[10]:
In [11]:
## train output,
In [12]:
plt.subplot(1,4,1)
plt.title('norm')
plt.plot(features_set_y)
plt.subplot(1,4,2)
plt.title('log')
plt.plot(np.log(features_set_y))
plt.subplot(1,4,3)
plt.title('scaled')
plt.plot(features_set_y / np.max(features_set_y))
plt.subplot(1,4,4)
plt.title('log + scaled')
transformed_y = np.log(features_set_y)
transformed_y /= np.max(transformed_y)
plt.plot(transformed_y)
plt.tight_layout()
plt.show()
In [13]:
features_set_y.plot.hist(bins=100, title='price histogram')
Out[13]: