In [1]:
import pandas as pd
In [2]:
s = pd.Series(data=[x**2 for x in range(11)],
index=list('abcdefghijk'))
In [3]:
print(s)
In [4]:
s_cut = pd.cut(s, 4)
print(s_cut)
In [5]:
print(type(s_cut))
In [6]:
print(pd.cut(s, [0, 10, 50, 100]))
In [7]:
s_cut, bins = pd.cut(s, 4, retbins=True)
print(s_cut)
In [8]:
print(bins)
print(type(bins))
In [9]:
print(pd.cut(s, 4, right=False))
In [10]:
print(pd.cut(s, 4, labels=False))
In [11]:
print(pd.cut(s, 4, labels=['small', 'medium', 'large', 'x-large']))
In [12]:
print(pd.cut(s, 3))
In [13]:
print(pd.cut(s, 3, precision=1))
In [14]:
print(pd.qcut(s, 2))
In [15]:
s_qcut, bins = pd.qcut(s, 4, labels=['Q1', 'Q2', 'Q3', 'Q4'], retbins=True)
print(s_qcut)
In [16]:
print(bins)
In [17]:
s_duplicate = pd.Series(data=[0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6],
index=list('abcdefghijk'))
In [18]:
print(s_duplicate)
In [19]:
print(pd.qcut(s_duplicate, 2))
In [20]:
# print(pd.qcut(s_duplicate, 4))
# ValueError: Bin edges must be unique: array([0. , 0. , 1. , 3.5, 6. ]).
# You can drop duplicate edges by setting the 'duplicates' kwarg
In [21]:
print(pd.qcut(s_duplicate, 4, duplicates='drop'))
In [22]:
counts = pd.cut(s, 3, labels=['S', 'M', 'L']).value_counts()
print(counts)
In [23]:
print(type(counts))
In [24]:
print(counts['M'])
In [25]:
print(pd.value_counts(pd.cut(s, 3, labels=['S', 'M', 'L'])))
In [26]:
l = [x**2 for x in range(11)]
print(l)
In [27]:
l_cut = pd.cut(l, 3, labels=['S', 'M', 'L'])
print(l_cut)
In [28]:
print(type(l_cut))
In [29]:
print(l_cut[0])
In [30]:
print(list(l_cut))
In [31]:
print(pd.value_counts(l_cut))
In [32]:
df_titanic = pd.read_csv('data/src/titanic_train.csv').drop(['Name', 'Ticket', 'Cabin', 'Embarked'], axis=1)
In [33]:
print(df_titanic.head())
In [34]:
print(df_titanic['Age'].describe())
In [35]:
print(pd.cut(df_titanic['Age'], 5, precision=0).value_counts(sort=False, dropna=False))
In [36]:
df_titanic['Age_bin'] = pd.cut(df_titanic['Age'], 5, labels=False)
In [37]:
print(df_titanic.head())