notebook.community

Edit and run



In [ ]:

    
""" quiz materials for feature scaling clustering """

### FYI, the most straightforward implementation might 
### throw a divide-by-zero error, if the min and max
### values are the same
### but think about this for a second--that means that every
### data point has the same value for that feature!  
### why would you rescale it?  Or even use it at all?
def featureScaling(arr):
    
    min_arr = min(arr)
    max_arr = max(arr)
    
    if min_arr != max_arr:
        for i in xrange(len(arr)):
            arr[i] = (arr[i] - min_arr)/float(max_arr - min_arr)
    return arr

# tests of your feature scaler--line below is input data
data = [115, 140, 175]
print featureScaling(data)



In [4]:

    
from sklearn.preprocessing import MinMaxScaler
import numpy as np

weights = np.array([[115.0], [140.0], [175.0]])

scaler = MinMaxScaler()

rescaled_weights = scaler.fit_transform(weights)

rescaled_weights









    Out[4]:





array([[ 0.        ],
       [ 0.41666667],
       [ 1.        ]])