In [1]:
import pandas as pd
In [2]:
df = pd.DataFrame({'a': [1, 2, 1, 3],
'b': [0.4, 1.1, 0.1, 0.8],
'c': ['X', 'Y', 'X', 'Z'],
'd': ['3', '5', '2', '1'],
'e': [True, True, False, True]})
In [3]:
print(df)
In [4]:
print(df.dtypes)
In [5]:
print(df.describe())
In [6]:
print(type(df.describe()))
In [7]:
print(df.describe().loc['std'])
In [8]:
print(df.describe().at['std', 'b'])
In [9]:
print(df.describe(exclude='number'))
In [10]:
df_notnum = df[['c', 'd', 'e']]
print(df_notnum)
In [11]:
print(df_notnum.dtypes)
In [12]:
print(df_notnum.describe())
In [13]:
print(df.describe(include='all'))
In [14]:
print(df.describe(include=int))
In [15]:
print(type(df.describe(include=int)))
In [16]:
print(df.describe(include=[object, bool]))
In [17]:
print(df.describe(exclude=[float, object]))
In [18]:
print(df.count())
In [19]:
print(df.nunique())
In [20]:
print(df.mode())
In [21]:
print(df.mode().count())
In [22]:
print(df.mode().iloc[0])
In [23]:
print(df['c'].value_counts().iat[0])
In [24]:
print(df.apply(lambda x: x.value_counts().iat[0]))
In [25]:
print(df.mean(numeric_only=True))
In [26]:
print(df.std(numeric_only=True))
In [27]:
print(df.min(numeric_only=True))
In [28]:
print(df.max(numeric_only=True))
In [29]:
print(df.median(numeric_only=True))
In [30]:
print(df.quantile(q=[0.25, 0.75], numeric_only=True))
In [31]:
print(df.quantile(q=[0, 0.25, 0.5, 0.75, 1], numeric_only=True))
In [32]:
print(df.describe(percentiles=[0.2, 0.4, 0.6, 0.8]))
In [33]:
print(df.astype('str').describe())
In [34]:
print(df.astype({'a': str}).describe(exclude='number'))
In [35]:
print(df.astype({'d': int, 'e': int}).describe())
In [36]:
s_int = df['a']
print(s_int)
In [37]:
print(s_int.describe())
In [38]:
print(type(s_int.describe()))
In [39]:
s_str = df['d']
print(s_str.describe())
In [40]:
print(s_str.astype('int').describe())
In [41]:
df['dt'] = pd.to_datetime(['2018-01-01', '2018-03-15', '2018-02-20', '2018-03-15'])
In [42]:
print(df.dtypes)
In [43]:
print(df.describe(include='datetime'))
In [44]:
print(df['dt'].min())
In [45]:
print(df['dt'].max())
In [46]:
print(df.T.describe())