In [1]:
import pandas
import numpy as np
import pandas as pd

#from __future__ import print_function

from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score


import matplotlib.pyplot as plt
import matplotlib.cm as cm

%matplotlib inline  
import seaborn as sns


/usr/local/lib/python2.7/dist-packages/IPython/html.py:14: ShimWarning: The `IPython.html` package has been deprecated. You should import from `notebook` instead. `IPython.html.widgets` has moved to `ipywidgets`.
  "`IPython.html.widgets` has moved to `ipywidgets`.", ShimWarning)

In [2]:
df = pd.read_csv("hUSCensus1990raw50K.csv.bz2",compression = "bz2")

In [85]:
df_linear = pd.DataFrame()



df_linear["AGE"] = df[["AGE"]].copy()
df_linear["INCOME"] = df[["INCOME" + str(i) for i in range(1,8)]].sum(axis = 1)

df_linear["YEARSCH"] = df[["YEARSCH"]].copy()
df_linear["ENGLISH"] = df[["ENGLISH"]].copy()
df_linear["FERTIL"] = df[["FERTIL"]].copy()
df_linear["YRSSERV"] = df[["YRSSERV"]].copy()

#df_linear = df_linear.ix[np.random.choice(df_linear.values.shape[0], 5)]


g = sns.jointplot("INCOME", "AGE", data=df_linear, color = "r",size=10)
g.fig.set_figwidth(12)
g.fig.set_figheight(8)
plt.ylim([0,90])
#plt.xlim([0,None])
plt.savefig("jointplot.png",bbox_inches='tight')



In [84]:
g = sns.jointplot("INCOME", "AGE", data=df_linear, color = "r", kind="reg",joint_kws={'line_kws':{'color':'black'}})
g.fig.set_figwidth(12)
g.fig.set_figheight(8)
plt.ylim([0,90])
plt.xlim([0,None])

plt.savefig("jointplotlin.png",bbox_inches='tight')



In [86]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import Ridge, BayesianRidge, SGDRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler



X = df_linear[["INCOME"]].values
y = df_linear["AGE"].values

# generate points used to plot
x_plot = np.linspace(X.min(), X.max(), 1000)
X_plot = x_plot[:, np.newaxis]

print X.shape, y.shape

colors = ['black', 'blue', 'gold']
lw = 2

plt.scatter(X.T[0], y, color='red', s=30, marker='o', label="training points")

for count, degree in enumerate([1,2,3]):
    model = make_pipeline(PolynomialFeatures(degree),StandardScaler(), SGDRegressor())
    model.fit(X, y)
    y_plot = model.predict(X_plot)
    print x_plot.shape, y_plot.shape
    plt.plot(x_plot, y_plot, color=colors[count], linewidth=lw,
             label="degree %d" % degree)

plt.legend(loc='lower right')
plt.ylim([0,90])
plt.xlim([0,None])
plt.savefig("higherorder.png",bbox_inches='tight',dpi = 200)


(50000, 1) (50000,)
(1000,) (1000,)
(1000,) (1000,)
(1000,) (1000,)

In [89]:
import numpy as np
import matplotlib.pyplot as plt

from keras.layers import Input, Dense
from keras.layers.advanced_activations import PReLU
from keras.models import Model

from sklearn.linear_model import Ridge, BayesianRidge, SGDRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler

def smallnn():
    # this returns a tensor
    inputs = Input(shape=(1,))

    # a layer instance is callable on a tensor, and returns a tensor
    x = Dense(64)(inputs)
    x = PReLU()(x)
    x = Dense(64)(x)
    x = PReLU()(x)
    predictions = Dense(1, activation='linear')(x)
    
    model = Model(input=inputs, output=predictions)
    model.compile(optimizer='adam',
              loss='mse')
    
    return model

X = df_linear[["INCOME"]].values
y = df_linear["AGE"].values

# generate points used to plot
x_plot = np.linspace(0, X.max(), 1000)
X_plot = x_plot[:, np.newaxis]

print X.shape, y.shape

colors = ['black', 'blue', 'gold']
lw = 2

plt.scatter(X.T[0], y, color='red', s=30, marker='o', label="training points")

for count, degree in enumerate([1]):
    sc = StandardScaler()
    sc.fit(X)
    model = smallnn()
    model.fit(sc.transform(X), y, verbose = True, nb_epoch = 100, batch_size = 200)
    y_plot = model.predict(sc.transform(X_plot))
    print x_plot.shape, y_plot.shape
    plt.plot(x_plot, y_plot, color=colors[count], linewidth=lw,
             label="Neural Network")

plt.legend(loc='lower right')
plt.ylim([0,90])
plt.xlim([0,None])
plt.savefig("nn.png",bbox_inches='tight', dpi = 200)
plt.show()


(50000, 1) (50000,)
Epoch 1/100
50000/50000 [==============================] - 0s - loss: 1023.1712     
Epoch 2/100
50000/50000 [==============================] - 0s - loss: 465.9958     
Epoch 3/100
50000/50000 [==============================] - 0s - loss: 412.6379     
Epoch 4/100
50000/50000 [==============================] - 0s - loss: 371.9826     
Epoch 5/100
50000/50000 [==============================] - 0s - loss: 346.2054     
Epoch 6/100
50000/50000 [==============================] - 0s - loss: 333.5802     
Epoch 7/100
50000/50000 [==============================] - 0s - loss: 328.0985     
Epoch 8/100
50000/50000 [==============================] - 1s - loss: 325.5520     
Epoch 9/100
50000/50000 [==============================] - 1s - loss: 324.2975     
Epoch 10/100
50000/50000 [==============================] - 1s - loss: 323.4579     
Epoch 11/100
50000/50000 [==============================] - 1s - loss: 322.6430     
Epoch 12/100
50000/50000 [==============================] - 1s - loss: 322.2188     
Epoch 13/100
50000/50000 [==============================] - 1s - loss: 322.0906     
Epoch 14/100
50000/50000 [==============================] - 1s - loss: 321.5084     
Epoch 15/100
50000/50000 [==============================] - 1s - loss: 321.7783     
Epoch 16/100
50000/50000 [==============================] - 1s - loss: 321.3102     
Epoch 17/100
50000/50000 [==============================] - 1s - loss: 321.2051     
Epoch 18/100
50000/50000 [==============================] - 1s - loss: 320.8288     
Epoch 19/100
50000/50000 [==============================] - 1s - loss: 321.1092     
Epoch 20/100
50000/50000 [==============================] - 1s - loss: 320.8525     
Epoch 21/100
50000/50000 [==============================] - 1s - loss: 320.7480     
Epoch 22/100
50000/50000 [==============================] - 1s - loss: 320.7901     
Epoch 23/100
50000/50000 [==============================] - 1s - loss: 320.4889     
Epoch 24/100
50000/50000 [==============================] - 2s - loss: 320.7746     
Epoch 25/100
50000/50000 [==============================] - 1s - loss: 320.5319     
Epoch 26/100
50000/50000 [==============================] - 1s - loss: 320.5933     
Epoch 27/100
50000/50000 [==============================] - 1s - loss: 320.3831     
Epoch 28/100
50000/50000 [==============================] - 3s - loss: 320.4257     
Epoch 29/100
50000/50000 [==============================] - 0s - loss: 320.1611     
Epoch 30/100
50000/50000 [==============================] - 0s - loss: 320.1031     
Epoch 31/100
50000/50000 [==============================] - 1s - loss: 320.1577     
Epoch 32/100
50000/50000 [==============================] - 1s - loss: 320.0384     
Epoch 33/100
50000/50000 [==============================] - 1s - loss: 319.6796     
Epoch 34/100
50000/50000 [==============================] - 1s - loss: 319.4935     
Epoch 35/100
50000/50000 [==============================] - 1s - loss: 319.7931     
Epoch 36/100
50000/50000 [==============================] - 1s - loss: 319.7957     
Epoch 37/100
50000/50000 [==============================] - 1s - loss: 319.5376     
Epoch 38/100
50000/50000 [==============================] - 1s - loss: 319.4923     
Epoch 39/100
50000/50000 [==============================] - 1s - loss: 319.4390     
Epoch 40/100
50000/50000 [==============================] - 0s - loss: 319.2830     
Epoch 41/100
50000/50000 [==============================] - 1s - loss: 319.5701     
Epoch 42/100
50000/50000 [==============================] - 1s - loss: 319.4145     
Epoch 43/100
50000/50000 [==============================] - 1s - loss: 319.7512     
Epoch 44/100
50000/50000 [==============================] - 2s - loss: 319.3106     
Epoch 45/100
50000/50000 [==============================] - 0s - loss: 319.3621     
Epoch 46/100
50000/50000 [==============================] - 0s - loss: 319.3890     
Epoch 47/100
50000/50000 [==============================] - 0s - loss: 319.4639     
Epoch 48/100
50000/50000 [==============================] - 0s - loss: 319.5221     
Epoch 49/100
50000/50000 [==============================] - 0s - loss: 319.5643     
Epoch 50/100
50000/50000 [==============================] - 0s - loss: 319.3003     
Epoch 51/100
50000/50000 [==============================] - 0s - loss: 319.1936     
Epoch 52/100
50000/50000 [==============================] - 0s - loss: 319.4631     
Epoch 53/100
50000/50000 [==============================] - 0s - loss: 319.4601     
Epoch 54/100
50000/50000 [==============================] - 0s - loss: 319.0831     
Epoch 55/100
50000/50000 [==============================] - 0s - loss: 319.6387     
Epoch 56/100
50000/50000 [==============================] - 1s - loss: 319.2009     
Epoch 57/100
50000/50000 [==============================] - 0s - loss: 319.4790     
Epoch 58/100
50000/50000 [==============================] - 0s - loss: 319.2572     
Epoch 59/100
50000/50000 [==============================] - 0s - loss: 319.2095     
Epoch 60/100
50000/50000 [==============================] - 0s - loss: 319.0900     
Epoch 61/100
50000/50000 [==============================] - 0s - loss: 319.1459     
Epoch 62/100
50000/50000 [==============================] - 0s - loss: 319.1534     
Epoch 63/100
50000/50000 [==============================] - 0s - loss: 319.0827     
Epoch 64/100
50000/50000 [==============================] - 0s - loss: 319.1772     
Epoch 65/100
50000/50000 [==============================] - 0s - loss: 319.1590     
Epoch 66/100
50000/50000 [==============================] - 1s - loss: 319.1221     
Epoch 67/100
50000/50000 [==============================] - 1s - loss: 319.0362     
Epoch 68/100
50000/50000 [==============================] - 1s - loss: 319.6017     
Epoch 69/100
50000/50000 [==============================] - 2s - loss: 319.1809     
Epoch 70/100
50000/50000 [==============================] - 4s - loss: 319.1602     
Epoch 71/100
50000/50000 [==============================] - 0s - loss: 319.1257     
Epoch 72/100
50000/50000 [==============================] - 0s - loss: 318.9996     
Epoch 73/100
50000/50000 [==============================] - 1s - loss: 319.2604     
Epoch 74/100
50000/50000 [==============================] - 1s - loss: 319.1267     
Epoch 75/100
50000/50000 [==============================] - 2s - loss: 318.8151     
Epoch 76/100
50000/50000 [==============================] - 1s - loss: 318.9215     
Epoch 77/100
50000/50000 [==============================] - 1s - loss: 319.2421     
Epoch 78/100
50000/50000 [==============================] - 1s - loss: 318.7780     
Epoch 79/100
50000/50000 [==============================] - 1s - loss: 319.2743     
Epoch 80/100
50000/50000 [==============================] - 1s - loss: 319.1974     
Epoch 81/100
50000/50000 [==============================] - 1s - loss: 318.6934     
Epoch 82/100
50000/50000 [==============================] - 1s - loss: 319.1697     
Epoch 83/100
50000/50000 [==============================] - 1s - loss: 318.9824     
Epoch 84/100
50000/50000 [==============================] - 1s - loss: 319.0433     
Epoch 85/100
50000/50000 [==============================] - 0s - loss: 319.0610     
Epoch 86/100
50000/50000 [==============================] - 1s - loss: 318.8561     
Epoch 87/100
50000/50000 [==============================] - 1s - loss: 319.4423     
Epoch 88/100
50000/50000 [==============================] - 2s - loss: 318.7995     
Epoch 89/100
50000/50000 [==============================] - 1s - loss: 319.1313     
Epoch 90/100
50000/50000 [==============================] - 1s - loss: 318.9898     
Epoch 91/100
50000/50000 [==============================] - 0s - loss: 319.0358     
Epoch 92/100
50000/50000 [==============================] - 0s - loss: 319.0459     
Epoch 93/100
50000/50000 [==============================] - 0s - loss: 319.2125     
Epoch 94/100
50000/50000 [==============================] - 0s - loss: 318.8551     
Epoch 95/100
50000/50000 [==============================] - 0s - loss: 318.7529     
Epoch 96/100
50000/50000 [==============================] - 0s - loss: 318.9234     
Epoch 97/100
50000/50000 [==============================] - 0s - loss: 318.9768     
Epoch 98/100
50000/50000 [==============================] - 0s - loss: 318.8250     
Epoch 99/100
50000/50000 [==============================] - 0s - loss: 318.7373     
Epoch 100/100
50000/50000 [==============================] - 1s - loss: 318.6824     
(1000,) (1000, 1)

In [55]:
df_linear_dummies = pd.get_dummies(df_linear, columns = ["ENGLISH", "FERTIL" ])
print df_linear_dummies.columns
X = df_linear_dummies.drop(["AGE"],1).values
y = df_linear_dummies["AGE"].values


# this returns a tensor
inputs = Input(shape=(22,))

# a layer instance is callable on a tensor, and returns a tensor
x = Dense(64)(inputs)
x = PReLU()(x)
x = Dense(64)(x)
x = PReLU()(x)
predictions = Dense(1, activation='linear')(x)

model = Model(input=inputs, output=predictions)
model.compile(optimizer='adam',
          loss='mse')


sc.fit(X)

model.fit(sc.transform(X), y, verbose = True, nb_epoch = 100, batch_size = 200)


Index([u'AGE', u'INCOME', u'YEARSCH', u'YRSSERV', u'ENGLISH_0', u'ENGLISH_1',
       u'ENGLISH_2', u'ENGLISH_3', u'ENGLISH_4', u'FERTIL_0', u'FERTIL_1',
       u'FERTIL_2', u'FERTIL_3', u'FERTIL_4', u'FERTIL_5', u'FERTIL_6',
       u'FERTIL_7', u'FERTIL_8', u'FERTIL_9', u'FERTIL_10', u'FERTIL_11',
       u'FERTIL_12', u'FERTIL_13'],
      dtype='object')
Epoch 1/100
50000/50000 [==============================] - 0s - loss: 648.0493     
Epoch 2/100
50000/50000 [==============================] - 1s - loss: 295.6654     
Epoch 3/100
50000/50000 [==============================] - 0s - loss: 266.8915     
Epoch 4/100
50000/50000 [==============================] - 1s - loss: 244.0320     
Epoch 5/100
50000/50000 [==============================] - 1s - loss: 232.5559     
Epoch 6/100
50000/50000 [==============================] - 1s - loss: 224.3847     
Epoch 7/100
50000/50000 [==============================] - 1s - loss: 219.2603     
Epoch 8/100
50000/50000 [==============================] - 1s - loss: 215.5547     
Epoch 9/100
50000/50000 [==============================] - 1s - loss: 211.8576     
Epoch 10/100
50000/50000 [==============================] - 1s - loss: 209.0168     
Epoch 11/100
50000/50000 [==============================] - 1s - loss: 206.6129     
Epoch 12/100
50000/50000 [==============================] - 1s - loss: 204.7533     
Epoch 13/100
50000/50000 [==============================] - 1s - loss: 203.2354     
Epoch 14/100
50000/50000 [==============================] - 1s - loss: 201.6447     
Epoch 15/100
50000/50000 [==============================] - 1s - loss: 200.3954     
Epoch 16/100
50000/50000 [==============================] - 1s - loss: 199.5531     
Epoch 17/100
50000/50000 [==============================] - 1s - loss: 199.0067     
Epoch 18/100
50000/50000 [==============================] - 2s - loss: 197.9937     
Epoch 19/100
50000/50000 [==============================] - 1s - loss: 197.4932     
Epoch 20/100
50000/50000 [==============================] - 1s - loss: 196.9997     
Epoch 21/100
50000/50000 [==============================] - 5s - loss: 196.4616     
Epoch 22/100
50000/50000 [==============================] - 1s - loss: 195.9983     
Epoch 23/100
50000/50000 [==============================] - 2s - loss: 195.3195     
Epoch 24/100
50000/50000 [==============================] - 2s - loss: 195.1223     
Epoch 25/100
50000/50000 [==============================] - 2s - loss: 194.6244     
Epoch 26/100
50000/50000 [==============================] - 2s - loss: 194.4080     
Epoch 27/100
50000/50000 [==============================] - 1s - loss: 193.9222     
Epoch 28/100
50000/50000 [==============================] - 1s - loss: 193.6976     
Epoch 29/100
50000/50000 [==============================] - 1s - loss: 193.2242     
Epoch 30/100
50000/50000 [==============================] - 2s - loss: 193.2166     
Epoch 31/100
50000/50000 [==============================] - 1s - loss: 193.0076     
Epoch 32/100
50000/50000 [==============================] - 3s - loss: 192.5740     
Epoch 33/100
50000/50000 [==============================] - 1s - loss: 192.4099     
Epoch 34/100
50000/50000 [==============================] - 1s - loss: 192.0575     
Epoch 35/100
50000/50000 [==============================] - 1s - loss: 191.9290     
Epoch 36/100
50000/50000 [==============================] - 1s - loss: 191.8254     
Epoch 37/100
50000/50000 [==============================] - 1s - loss: 191.4852     
Epoch 38/100
50000/50000 [==============================] - 1s - loss: 191.3626     
Epoch 39/100
50000/50000 [==============================] - 1s - loss: 191.3145     
Epoch 40/100
50000/50000 [==============================] - 0s - loss: 190.8865     
Epoch 41/100
50000/50000 [==============================] - 1s - loss: 190.7139     
Epoch 42/100
50000/50000 [==============================] - 1s - loss: 190.4873     
Epoch 43/100
50000/50000 [==============================] - 0s - loss: 190.2667     
Epoch 44/100
50000/50000 [==============================] - 1s - loss: 190.2609     
Epoch 45/100
50000/50000 [==============================] - 1s - loss: 190.1301     
Epoch 46/100
50000/50000 [==============================] - 1s - loss: 189.7508     
Epoch 47/100
50000/50000 [==============================] - 0s - loss: 189.9597     
Epoch 48/100
50000/50000 [==============================] - 0s - loss: 189.3552     
Epoch 49/100
50000/50000 [==============================] - 0s - loss: 189.2623     
Epoch 50/100
50000/50000 [==============================] - 0s - loss: 189.0250     
Epoch 51/100
50000/50000 [==============================] - 2s - loss: 188.6770     
Epoch 52/100
50000/50000 [==============================] - 1s - loss: 188.4402     
Epoch 53/100
50000/50000 [==============================] - 1s - loss: 188.2945     
Epoch 54/100
50000/50000 [==============================] - 0s - loss: 188.4455     
Epoch 55/100
50000/50000 [==============================] - 1s - loss: 188.1290     
Epoch 56/100
50000/50000 [==============================] - 1s - loss: 187.8839     
Epoch 57/100
50000/50000 [==============================] - 0s - loss: 187.3958     
Epoch 58/100
50000/50000 [==============================] - 1s - loss: 187.3062     
Epoch 59/100
50000/50000 [==============================] - 0s - loss: 187.1894     
Epoch 60/100
50000/50000 [==============================] - 1s - loss: 186.8142     
Epoch 61/100
50000/50000 [==============================] - 0s - loss: 187.0482     
Epoch 62/100
50000/50000 [==============================] - 0s - loss: 186.7818     
Epoch 63/100
50000/50000 [==============================] - 0s - loss: 186.9891     
Epoch 64/100
50000/50000 [==============================] - 1s - loss: 186.3001     
Epoch 65/100
50000/50000 [==============================] - 2s - loss: 186.7025     
Epoch 66/100
50000/50000 [==============================] - 2s - loss: 186.3217     
Epoch 67/100
50000/50000 [==============================] - 3s - loss: 186.0372     
Epoch 68/100
50000/50000 [==============================] - 2s - loss: 185.8248     
Epoch 69/100
50000/50000 [==============================] - 1s - loss: 186.0886     
Epoch 70/100
50000/50000 [==============================] - 2s - loss: 185.9022     
Epoch 71/100
50000/50000 [==============================] - 2s - loss: 185.4833     
Epoch 72/100
50000/50000 [==============================] - 1s - loss: 185.3715     
Epoch 73/100
50000/50000 [==============================] - 2s - loss: 185.2864     
Epoch 74/100
50000/50000 [==============================] - 2s - loss: 184.9788     
Epoch 75/100
50000/50000 [==============================] - 2s - loss: 185.0048     
Epoch 76/100
50000/50000 [==============================] - 2s - loss: 184.9533     
Epoch 77/100
50000/50000 [==============================] - 2s - loss: 184.9352     
Epoch 78/100
50000/50000 [==============================] - 7s - loss: 184.8269     
Epoch 79/100
50000/50000 [==============================] - 1s - loss: 184.6692     
Epoch 80/100
50000/50000 [==============================] - 1s - loss: 184.5830     
Epoch 81/100
50000/50000 [==============================] - 1s - loss: 184.4782     
Epoch 82/100
50000/50000 [==============================] - 1s - loss: 184.4078     
Epoch 83/100
50000/50000 [==============================] - 1s - loss: 184.1777     
Epoch 84/100
50000/50000 [==============================] - 2s - loss: 183.9408     
Epoch 85/100
50000/50000 [==============================] - 1s - loss: 183.8840     
Epoch 86/100
50000/50000 [==============================] - 1s - loss: 184.2188     
Epoch 87/100
50000/50000 [==============================] - 1s - loss: 183.8919     
Epoch 88/100
50000/50000 [==============================] - 1s - loss: 183.8585     
Epoch 89/100
50000/50000 [==============================] - 1s - loss: 183.6395     
Epoch 90/100
50000/50000 [==============================] - 1s - loss: 183.6849     
Epoch 91/100
50000/50000 [==============================] - 1s - loss: 183.7852     
Epoch 92/100
50000/50000 [==============================] - 2s - loss: 183.7040     
Epoch 93/100
50000/50000 [==============================] - 2s - loss: 183.2441     
Epoch 94/100
50000/50000 [==============================] - 1s - loss: 183.5076     
Epoch 95/100
50000/50000 [==============================] - 1s - loss: 183.4035     
Epoch 96/100
50000/50000 [==============================] - 2s - loss: 183.1540     
Epoch 97/100
50000/50000 [==============================] - 1s - loss: 183.3998     
Epoch 98/100
50000/50000 [==============================] - 1s - loss: 183.1502     
Epoch 99/100
50000/50000 [==============================] - 1s - loss: 183.0222     
Epoch 100/100
50000/50000 [==============================] - 1s - loss: 182.9811     
Out[55]:
<keras.callbacks.History at 0x7fcaad6606d0>

In [ ]: