In [83]:
import numpy as np
import pandas as pd
from scipy import stats

In [84]:
df = pd.read_csv('datasets/dataset2.csv')

In [85]:
hours = df['average_montly_hours'].values

In [86]:
norm_hours = (hours-np.mean(hours))/np.std(hours)

In [87]:
norm_hours.reshape(-1,1)


Out[87]:
array([[-0.88203988],
       [ 1.22042276],
       [ 1.4206573 ],
       ..., 
       [-1.16236823],
       [ 1.58084493],
       [-0.86201642]])

In [88]:
stats.describe(norm_hours)


Out[88]:
DescribeResult(nobs=14999, minmax=(-2.103470551448857, 2.1815485374686188), mean=-8.7165719967903482e-17, variance=1.0000666755567409, skewness=0.05283670471826951, kurtosis=-1.135003251093129)

In [ ]: