In [1]:
import pandas as pd
import scipy.stats
from sklearn import preprocessing
In [2]:
df = pd.DataFrame([[0, 1, 2], [3, 4, 5], [6, 7, 8]],
columns=['col1', 'col2', 'col3'],
index=['a', 'b', 'c'])
In [3]:
print(df)
In [4]:
print((df - df.min()) / (df.max() - df.min()))
In [5]:
print(((df.T - df.T.min()) / (df.T.max() - df.T.min())).T)
In [6]:
print((df - df.values.min()) / (df.values.max() - df.values.min()))
In [7]:
print((df - df.mean()) / df.std())
In [8]:
print((df - df.mean()) / df.std(ddof=0))
In [9]:
print(((df.T - df.T.mean()) / df.T.std()).T)
In [10]:
print(((df.T - df.T.mean()) / df.T.std(ddof=0)).T)
In [11]:
print((df - df.values.mean()) / df.values.std())
In [12]:
print((df - df.values.mean()) / df.values.std(ddof=1))
In [13]:
df_ = df.copy()
s = df_['col1']
df_['col1_min_max'] = (s - s.min()) / (s.max() - s.min())
df_['col1_standardization'] = (s - s.mean()) / s.std()
In [14]:
print(df_)
In [15]:
print(scipy.stats.zscore(df))
In [16]:
print(type(scipy.stats.zscore(df)))
In [17]:
print(scipy.stats.zscore(df, axis=None, ddof=1))
In [18]:
df_standardization = pd.DataFrame(scipy.stats.zscore(df),
index=df.index, columns=df.columns)
In [19]:
print(df_standardization)
In [20]:
df_ = df.copy()
df_['col1_standardization'] = scipy.stats.zscore(df_['col1'])
print(df_)
In [21]:
mm = preprocessing.MinMaxScaler()
In [22]:
print(mm.fit_transform(df))
In [23]:
print(type(mm.fit_transform(df)))
In [24]:
print(preprocessing.minmax_scale(df))
In [25]:
print(type(preprocessing.minmax_scale(df)))
In [26]:
df_min_max = pd.DataFrame(mm.fit_transform(df),
index=df.index, columns=df.columns)
In [27]:
print(df_min_max)
In [28]:
df_ = df.copy()
s = df_['col1'].astype(float)
df_['col1_min_max'] = preprocessing.minmax_scale(s)
df_['col1_standardization'] = preprocessing.scale(s)
In [29]:
print(df_)