In [5]:
import edaHelper as eda
from bokeh.sampledata.autompg import autompg
a=eda.Regression(autompg,'mpg')
print a.df.info()
a.categorize()
print a.df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 392 entries, 0 to 391
Data columns (total 9 columns):
mpg       392 non-null float64
cyl       392 non-null category
displ     392 non-null float64
hp        392 non-null int64
weight    392 non-null int64
accel     392 non-null float64
yr        392 non-null int64
origin    392 non-null category
name      392 non-null object
dtypes: category(2), float64(3), int64(3), object(1)
memory usage: 25.3+ KB
None
mpg has  127  unique values
displ has  81  unique values
hp has  93  unique values
weight has  346  unique values
accel has  95  unique values
yr has  13  unique values
name has  301  unique values
<class 'pandas.core.frame.DataFrame'>
Int64Index: 392 entries, 0 to 391
Data columns (total 9 columns):
mpg       392 non-null float64
cyl       392 non-null category
displ     392 non-null float64
hp        392 non-null int64
weight    392 non-null int64
accel     392 non-null float64
yr        392 non-null int64
origin    392 non-null category
name      392 non-null object
dtypes: category(2), float64(3), int64(3), object(1)
memory usage: 25.3+ KB
None
<class 'pandas.core.frame.DataFrame'>
Int64Index: 392 entries, 0 to 391
Data columns (total 9 columns):
mpg       392 non-null float64
cyl       392 non-null category
displ     392 non-null float64
hp        392 non-null int64
weight    392 non-null int64
accel     392 non-null float64
yr        392 non-null int64
origin    392 non-null category
name      392 non-null object
dtypes: category(2), float64(3), int64(3), object(1)
memory usage: 25.3+ KB
None

In [6]:
import matplotlib as plt
plt.figure.max_open_warning=200
a.plot_all()


/home/d/anaconda/lib/python2.7/site-packages/matplotlib/pyplot.py:424: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)

In [7]:
a.categorize()
a.plot_against_y()
a.only()


mpg has  127  unique values
displ has  81  unique values
hp has  93  unique values
weight has  346  unique values
accel has  95  unique values
yr has  13  unique values
name has  301  unique values
<class 'pandas.core.frame.DataFrame'>
Int64Index: 392 entries, 0 to 391
Data columns (total 9 columns):
mpg       392 non-null float64
cyl       392 non-null category
displ     392 non-null float64
hp        392 non-null int64
weight    392 non-null int64
accel     392 non-null float64
yr        392 non-null int64
origin    392 non-null category
name      392 non-null object
dtypes: category(2), float64(3), int64(3), object(1)
memory usage: 25.3+ KB
None
8 in column cyl only has value set([1]) in column origin
3 in column cyl only has value set([3]) in column origin
5 in column cyl only has value set([2]) in column origin

In [ ]: