In [30]:
import numpy as np

In [48]:
%run k_means_cluster.py



In [12]:
exercised_stock_options = []
for person in data_dict:
    if data_dict[person]['exercised_stock_options'] != 'NaN':
        exercised_stock_options.append(int(data_dict[person]['exercised_stock_options']))
        
print min(exercised_stock_options)
print max(exercised_stock_options)


3285
34348384

In [13]:
salaries = []
for person in data_dict:
    if data_dict[person]['salary'] != 'NaN':
        salaries.append(int(data_dict[person]['salary']))
        
print min(salaries)
print max(salaries)


477
1111258

In [44]:
features_list


Out[44]:
['poi', 'salary', 'exercised_stock_options']

In [40]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(finance_features)


Out[40]:
MinMaxScaler(copy=True, feature_range=(0, 1))

In [45]:
def scale(number, min_, max_):
    if min_ != max_:
        return float(number-min_)/(max_-min_)
    else:
        return 1

In [46]:
scale(200000, 477, 1111258)


Out[46]:
0.17962406631010072

In [47]:
scale(1e6,3285,34348384)


Out[47]:
0.029020588934683227

In [ ]: