In [1]:
from harness import Harness
import sklearn.datasets
import sklearn.discriminant_analysis
import sklearn.mixture
import sklearn.model_selection
from pandas import (
CategoricalIndex, DataFrame, Index, Series
)
from IPython.display import (
Markdown,
)
In [2]:
"""Can I load the iris data"""
iris = sklearn.datasets.load_iris()
In [3]:
"""Can I initialize a Harness DataFrame"""
df = Harness(
data=iris['data'],
index=CategoricalIndex(iris['target']).rename('target'),
columns=iris['feature_names'],
estimator=sklearn.discriminant_analysis.LinearDiscriminantAnalysis(),
feature_level='target',
)
df = df.set_index(
df.index
.rename_categories(iris['target_names'])
.rename('target_name'), append=True
).set_index(
df
.index.rename_categories(['red', 'green', 'blue'])
.rename('color'), append=True
)
df.sample(5)
Out[3]:
In [4]:
test_train = next(
sklearn.model_selection
.StratifiedKFold(n_splits=2)
.split(df.values, df.Index('target'))
)
split = DataFrame(index=df.index, columns=['split'])
split.iloc[test_train[0]], split.iloc[test_train[1]] = 'train', 'test'
df = df.set_index(split.set_index('split', append=True).swaplevel(-1,0).index)
df.ix['train'].fit().sample(5)
Out[4]:
In [5]:
df.ix['train'].score()
Out[5]:
In [6]:
df.add_template(
scoreboard="""
The table presents some information about the `{{
df.estimator.__str__().split('(',1)[0]
}}` model applied to the `iris` dataset.
| | Test | Training |
|---------|-------------------------|--------------------------|
| Score |{{df.ix['test'].score()}}|{{df.ix['train'].score()}}|
| Samples | {{df.ix['test'] | len}} | {{df.ix['train'] | len}} |
"""
);
In [7]:
Markdown(df.get_template('scoreboard'))
Out[7]:
In [8]:
transformed = df.transform()
transformed.sample(2)
Out[8]:
In [9]:
with transformed.reset_index().DataSource(
x=0, y=1, fill_color='color', text='target_name'
) as source:
source.Scatter(color='text', marker='split')
source.save()