notebook.community

Edit and run



In [1]:

    
import pandas as pd



In [2]:

    
df = pd.DataFrame({'a': [1, 2, 1, 3],
                   'b': [0.4, 1.1, 0.1, 0.8],
                   'c': ['X', 'Y', 'X', 'Z'],
                   'd': ['3', '5', '2', '1'],
                   'e': [True, True, False, True]})



In [3]:

    
print(df)









    



   a    b  c  d      e
0  1  0.4  X  3   True
1  2  1.1  Y  5   True
2  1  0.1  X  2  False
3  3  0.8  Z  1   True



In [4]:

    
print(df.dtypes)









    



a      int64
b    float64
c     object
d     object
e       bool
dtype: object



In [5]:

    
print(df.describe())









    



              a         b
count  4.000000  4.000000
mean   1.750000  0.600000
std    0.957427  0.439697
min    1.000000  0.100000
25%    1.000000  0.325000
50%    1.500000  0.600000
75%    2.250000  0.875000
max    3.000000  1.100000



In [6]:

    
print(type(df.describe()))









    



<class 'pandas.core.frame.DataFrame'>



In [7]:

    
print(df.describe().loc['std'])









    



a    0.957427
b    0.439697
Name: std, dtype: float64



In [8]:

    
print(df.describe().at['std', 'b'])









    



0.439696865275764



In [9]:

    
print(df.describe(exclude='number'))









    



        c  d     e
count   4  4     4
unique  3  4     2
top     X  3  True
freq    2  1     3



In [10]:

    
df_notnum = df[['c', 'd', 'e']]
print(df_notnum)









    



   c  d      e
0  X  3   True
1  Y  5   True
2  X  2  False
3  Z  1   True



In [11]:

    
print(df_notnum.dtypes)









    



c    object
d    object
e      bool
dtype: object



In [12]:

    
print(df_notnum.describe())









    



        c  d     e
count   4  4     4
unique  3  4     2
top     X  3  True
freq    2  1     3



In [13]:

    
print(df.describe(include='all'))









    



               a         b    c    d     e
count   4.000000  4.000000    4    4     4
unique       NaN       NaN    3    4     2
top          NaN       NaN    X    3  True
freq         NaN       NaN    2    1     3
mean    1.750000  0.600000  NaN  NaN   NaN
std     0.957427  0.439697  NaN  NaN   NaN
min     1.000000  0.100000  NaN  NaN   NaN
25%     1.000000  0.325000  NaN  NaN   NaN
50%     1.500000  0.600000  NaN  NaN   NaN
75%     2.250000  0.875000  NaN  NaN   NaN
max     3.000000  1.100000  NaN  NaN   NaN



In [14]:

    
print(df.describe(include=int))









    



              a
count  4.000000
mean   1.750000
std    0.957427
min    1.000000
25%    1.000000
50%    1.500000
75%    2.250000
max    3.000000



In [15]:

    
print(type(df.describe(include=int)))









    



<class 'pandas.core.frame.DataFrame'>



In [16]:

    
print(df.describe(include=[object, bool]))









    



        c  d     e
count   4  4     4
unique  3  4     2
top     X  3  True
freq    2  1     3



In [17]:

    
print(df.describe(exclude=[float, object]))









    



               a     e
count   4.000000     4
unique       NaN     2
top          NaN  True
freq         NaN     3
mean    1.750000   NaN
std     0.957427   NaN
min     1.000000   NaN
25%     1.000000   NaN
50%     1.500000   NaN
75%     2.250000   NaN
max     3.000000   NaN



In [18]:

    
print(df.count())









    



a    4
b    4
c    4
d    4
e    4
dtype: int64



In [19]:

    
print(df.nunique())









    



a    3
b    4
c    3
d    4
e    2
dtype: int64



In [20]:

    
print(df.mode())









    



     a    b    c  d     e
0  1.0  0.1    X  1  True
1  NaN  0.4  NaN  2   NaN
2  NaN  0.8  NaN  3   NaN
3  NaN  1.1  NaN  5   NaN



In [21]:

    
print(df.mode().count())









    



a    1
b    4
c    1
d    4
e    1
dtype: int64



In [22]:

    
print(df.mode().iloc[0])









    



a       1
b     0.1
c       X
d       1
e    True
Name: 0, dtype: object



In [23]:

    
print(df['c'].value_counts().iat[0])



In [24]:

    
print(df.apply(lambda x: x.value_counts().iat[0]))









    



a    2
b    1
c    2
d    1
e    3
dtype: int64



In [25]:

    
print(df.mean(numeric_only=True))









    



a    1.75
b    0.60
e    0.75
dtype: float64



In [26]:

    
print(df.std(numeric_only=True))









    



a    0.957427
b    0.439697
e    0.500000
dtype: float64



In [27]:

    
print(df.min(numeric_only=True))









    



a    1.0
b    0.1
e    0.0
dtype: float64



In [28]:

    
print(df.max(numeric_only=True))









    



a    3.0
b    1.1
e    1.0
dtype: float64



In [29]:

    
print(df.median(numeric_only=True))









    



a    1.5
b    0.6
e    1.0
dtype: float64



In [30]:

    
print(df.quantile(q=[0.25, 0.75], numeric_only=True))









    



         a      b     e
0.25  1.00  0.325  0.75
0.75  2.25  0.875  1.00



In [31]:

    
print(df.quantile(q=[0, 0.25, 0.5, 0.75, 1], numeric_only=True))









    



         a      b     e
0.00  1.00  0.100  0.00
0.25  1.00  0.325  0.75
0.50  1.50  0.600  1.00
0.75  2.25  0.875  1.00
1.00  3.00  1.100  1.00



In [32]:

    
print(df.describe(percentiles=[0.2, 0.4, 0.6, 0.8]))









    



              a         b
count  4.000000  4.000000
mean   1.750000  0.600000
std    0.957427  0.439697
min    1.000000  0.100000
20%    1.000000  0.280000
40%    1.200000  0.480000
50%    1.500000  0.600000
60%    1.800000  0.720000
80%    2.400000  0.920000
max    3.000000  1.100000



In [33]:

    
print(df.astype('str').describe())









    



        a    b  c  d     e
count   4    4  4  4     4
unique  3    4  3  4     2
top     1  1.1  X  3  True
freq    2    1  2  1     3



In [34]:

    
print(df.astype({'a': str}).describe(exclude='number'))









    



        a  c  d     e
count   4  4  4     4
unique  3  3  4     2
top     1  X  3  True
freq    2  2  1     3



In [35]:

    
print(df.astype({'d': int, 'e': int}).describe())









    



              a         b         d     e
count  4.000000  4.000000  4.000000  4.00
mean   1.750000  0.600000  2.750000  0.75
std    0.957427  0.439697  1.707825  0.50
min    1.000000  0.100000  1.000000  0.00
25%    1.000000  0.325000  1.750000  0.75
50%    1.500000  0.600000  2.500000  1.00
75%    2.250000  0.875000  3.500000  1.00
max    3.000000  1.100000  5.000000  1.00



In [36]:

    
s_int = df['a']
print(s_int)









    



0    1
1    2
2    1
3    3
Name: a, dtype: int64



In [37]:

    
print(s_int.describe())









    



count    4.000000
mean     1.750000
std      0.957427
min      1.000000
25%      1.000000
50%      1.500000
75%      2.250000
max      3.000000
Name: a, dtype: float64



In [38]:

    
print(type(s_int.describe()))









    



<class 'pandas.core.series.Series'>



In [39]:

    
s_str = df['d']
print(s_str.describe())









    



count     4
unique    4
top       3
freq      1
Name: d, dtype: object



In [40]:

    
print(s_str.astype('int').describe())









    



count    4.000000
mean     2.750000
std      1.707825
min      1.000000
25%      1.750000
50%      2.500000
75%      3.500000
max      5.000000
Name: d, dtype: float64



In [41]:

    
df['dt'] = pd.to_datetime(['2018-01-01', '2018-03-15', '2018-02-20', '2018-03-15'])



In [42]:

    
print(df.dtypes)









    



a              int64
b            float64
c             object
d             object
e               bool
dt    datetime64[ns]
dtype: object



In [43]:

    
print(df.describe(include='datetime'))









    



                         dt
count                     4
unique                    3
top     2018-03-15 00:00:00
freq                      2
first   2018-01-01 00:00:00
last    2018-03-15 00:00:00



In [44]:

    
print(df['dt'].min())









    



2018-01-01 00:00:00



In [45]:

    
print(df['dt'].max())









    



2018-03-15 00:00:00



In [46]:

    
print(df.T.describe())









    



        0                    1                    2                    3
count   6                    6                    6                    6
unique  5                    6                    6                    6
top     1  2018-03-15 00:00:00  2018-02-20 00:00:00  2018-03-15 00:00:00
freq    2                    1                    1                    1