In [2]:
import edaHelper as eda
from bokeh.sampledata.autompg import autompg
a=eda.Regression(autompg,'mpg')
print a.df.info()
a.categorize()
print a.df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 392 entries, 0 to 391
Data columns (total 9 columns):
mpg       392 non-null float64
cyl       392 non-null int64
displ     392 non-null float64
hp        392 non-null int64
weight    392 non-null int64
accel     392 non-null float64
yr        392 non-null int64
origin    392 non-null int64
name      392 non-null object
dtypes: float64(3), int64(5), object(1)
memory usage: 30.6+ KB
None
mpg has  127  unique values
cyl has  5  unique values
displ has  81  unique values
hp has  93  unique values
weight has  346  unique values
accel has  95  unique values
yr has  13  unique values
origin has  3  unique values
name has  301  unique values
<class 'pandas.core.frame.DataFrame'>
Int64Index: 392 entries, 0 to 391
Data columns (total 9 columns):
mpg       392 non-null float64
cyl       392 non-null category
displ     392 non-null float64
hp        392 non-null int64
weight    392 non-null int64
accel     392 non-null float64
yr        392 non-null int64
origin    392 non-null category
name      392 non-null object
dtypes: category(2), float64(3), int64(3), object(1)
memory usage: 25.3+ KB
None
<class 'pandas.core.frame.DataFrame'>
Int64Index: 392 entries, 0 to 391
Data columns (total 9 columns):
mpg       392 non-null float64
cyl       392 non-null category
displ     392 non-null float64
hp        392 non-null int64
weight    392 non-null int64
accel     392 non-null float64
yr        392 non-null int64
origin    392 non-null category
name      392 non-null object
dtypes: category(2), float64(3), int64(3), object(1)
memory usage: 25.3+ KB
None

In [3]:
import matplotlib as plt
plt.figure.max_open_warning=200
a.plot_all()

In [4]:
a.categorize()
a.plot_against_y()
a.only()


mpg has  127  unique values
displ has  81  unique values
hp has  93  unique values
weight has  346  unique values
accel has  95  unique values
yr has  13  unique values
name has  301  unique values
<class 'pandas.core.frame.DataFrame'>
Int64Index: 392 entries, 0 to 391
Data columns (total 9 columns):
mpg       392 non-null float64
cyl       392 non-null category
displ     392 non-null float64
hp        392 non-null int64
weight    392 non-null int64
accel     392 non-null float64
yr        392 non-null int64
origin    392 non-null category
name      392 non-null object
dtypes: category(2), float64(3), int64(3), object(1)
memory usage: 25.3+ KB
None
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-4-99c8cf5ffca1> in <module>()
      1 a.categorize()
----> 2 a.plot_against_y()
      3 a.only()

AttributeError: 'Unsupervised' object has no attribute 'plot_against_y'

In [ ]: