In [5]:
import edaHelper as eda
from bokeh.sampledata.autompg import autompg
a=eda.Regression(autompg,'mpg')
print a.df.info()
a.categorize()
print a.df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 392 entries, 0 to 391
Data columns (total 9 columns):
mpg 392 non-null float64
cyl 392 non-null category
displ 392 non-null float64
hp 392 non-null int64
weight 392 non-null int64
accel 392 non-null float64
yr 392 non-null int64
origin 392 non-null category
name 392 non-null object
dtypes: category(2), float64(3), int64(3), object(1)
memory usage: 25.3+ KB
None
mpg has 127 unique values
displ has 81 unique values
hp has 93 unique values
weight has 346 unique values
accel has 95 unique values
yr has 13 unique values
name has 301 unique values
<class 'pandas.core.frame.DataFrame'>
Int64Index: 392 entries, 0 to 391
Data columns (total 9 columns):
mpg 392 non-null float64
cyl 392 non-null category
displ 392 non-null float64
hp 392 non-null int64
weight 392 non-null int64
accel 392 non-null float64
yr 392 non-null int64
origin 392 non-null category
name 392 non-null object
dtypes: category(2), float64(3), int64(3), object(1)
memory usage: 25.3+ KB
None
<class 'pandas.core.frame.DataFrame'>
Int64Index: 392 entries, 0 to 391
Data columns (total 9 columns):
mpg 392 non-null float64
cyl 392 non-null category
displ 392 non-null float64
hp 392 non-null int64
weight 392 non-null int64
accel 392 non-null float64
yr 392 non-null int64
origin 392 non-null category
name 392 non-null object
dtypes: category(2), float64(3), int64(3), object(1)
memory usage: 25.3+ KB
None
In [6]:
import matplotlib as plt
plt.figure.max_open_warning=200
a.plot_all()
/home/d/anaconda/lib/python2.7/site-packages/matplotlib/pyplot.py:424: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
max_open_warning, RuntimeWarning)
In [7]:
a.categorize()
a.plot_against_y()
a.only()
mpg has 127 unique values
displ has 81 unique values
hp has 93 unique values
weight has 346 unique values
accel has 95 unique values
yr has 13 unique values
name has 301 unique values
<class 'pandas.core.frame.DataFrame'>
Int64Index: 392 entries, 0 to 391
Data columns (total 9 columns):
mpg 392 non-null float64
cyl 392 non-null category
displ 392 non-null float64
hp 392 non-null int64
weight 392 non-null int64
accel 392 non-null float64
yr 392 non-null int64
origin 392 non-null category
name 392 non-null object
dtypes: category(2), float64(3), int64(3), object(1)
memory usage: 25.3+ KB
None
8 in column cyl only has value set([1]) in column origin
3 in column cyl only has value set([3]) in column origin
5 in column cyl only has value set([2]) in column origin
In [ ]:
Content source: dingocuster/edaHelper
Similar notebooks: