In [ ]:
""" quiz materials for feature scaling clustering """
### FYI, the most straightforward implementation might
### throw a divide-by-zero error, if the min and max
### values are the same
### but think about this for a second--that means that every
### data point has the same value for that feature!
### why would you rescale it? Or even use it at all?
def featureScaling(arr):
min_arr = min(arr)
max_arr = max(arr)
if min_arr != max_arr:
for i in xrange(len(arr)):
arr[i] = (arr[i] - min_arr)/float(max_arr - min_arr)
return arr
# tests of your feature scaler--line below is input data
data = [115, 140, 175]
print featureScaling(data)
In [4]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np
weights = np.array([[115.0], [140.0], [175.0]])
scaler = MinMaxScaler()
rescaled_weights = scaler.fit_transform(weights)
rescaled_weights
Out[4]: