In [4]:
import pandas as pd
house_value = pd.read_csv('Zip_MedianValuePerSqft_AllHomes.csv')

In [6]:
house_value[house_value["Metro"] == "New York"]


Out[6]:
RegionID RegionName City State Metro CountyName SizeRank 1996-04 1996-05 1996-06 ... 2016-06 2016-07 2016-08 2016-09 2016-10 2016-11 2016-12 2017-01 2017-02 2017-03
0 61639 10025 New York NY New York New York 1 NaN NaN NaN ... 1321 1336 1345 1346 1343 1345 1340.0 1327.0 1317.0 1313.0
2 61637 10023 New York NY New York New York 3 NaN NaN NaN ... 1618 1636 1654 1656 1647 1640 1645.0 1643.0 1630.0 1616.0
5 61616 10002 New York NY New York New York 6 NaN NaN NaN ... 1354 1369 1387 1392 1385 1385 1390.0 1390.0 1397.0 1409.0
10 62037 11226 New York NY New York Kings 11 NaN NaN NaN ... 497 501 508 517 522 522 522.0 523.0 525.0 527.0
12 62087 11375 New York NY New York Queens 13 NaN NaN NaN ... 530 535 541 548 556 564 569.0 575.0 583.0 590.0
14 62045 11235 New York NY New York Kings 15 NaN NaN NaN ... 504 503 504 508 513 514 515.0 515.0 515.0 516.0
19 61623 10009 New York NY New York New York 20 NaN NaN NaN ... 1388 1401 1416 1425 1428 1438 1447.0 1447.0 1449.0 1456.0
21 61643 10029 New York NY New York New York 22 NaN NaN NaN ... 969 987 1005 1012 1008 1008 1015.0 1027.0 1044.0 1059.0
22 61703 10128 New York NY New York New York 23 NaN NaN NaN ... 1195 1216 1239 1249 1246 1243 1247.0 1251.0 1257.0 1262.0
28 61802 10462 New York NY New York Bronx 29 NaN NaN NaN ... 164 167 170 173 174 173 172.0 171.0 175.0 178.0
29 61796 10456 New York NY New York Bronx 30 NaN NaN NaN ... 242 243 244 247 252 256 256.0 256.0 259.0 263.0
40 62017 11206 New York NY New York Kings 41 NaN NaN NaN ... 355 357 362 370 376 379 383.0 388.0 397.0 404.0
41 62012 11201 New York NY New York Kings 42 NaN NaN NaN ... 1130 1131 1132 1135 1137 1142 1148.0 1149.0 1144.0 1139.0
43 62023 11212 New York NY New York Kings 44 NaN NaN NaN ... 281 286 292 298 305 310 314.0 319.0 324.0 328.0
46 61638 10024 New York NY New York New York 47 NaN NaN NaN ... 1465 1485 1504 1510 1505 1503 1506.0 1506.0 1500.0 1493.0
50 62025 11214 New York NY New York Kings 51 NaN NaN NaN ... 498 501 505 511 517 523 530.0 535.0 538.0 541.0
56 60545 7030 Hoboken NJ New York Hudson 57 161.0 160.0 159.0 ... 667 669 671 675 683 695 705.0 709.0 713.0 717.0
66 62088 11377 New York NY New York Queens 67 NaN NaN NaN ... 473 485 494 497 501 505 508.0 511.0 518.0 526.0
69 62044 11234 New York NY New York Kings 70 NaN NaN NaN ... 374 377 380 383 385 388 392.0 395.0 397.0 398.0
72 62018 11207 New York NY New York Kings 73 NaN NaN NaN ... 302 305 308 310 314 318 322.0 325.0 327.0 328.0
81 61148 8701 Lakewood Township NJ New York Ocean 82 57.0 57.0 57.0 ... 150 151 152 153 153 154 154.0 155.0 155.0 155.0
83 62067 11355 New York NY New York Queens 84 NaN NaN NaN ... 507 533 550 555 558 557 556.0 555.0 561.0 570.0
89 61790 10314 New York NY New York Richmond 90 101.0 101.0 101.0 ... 280 282 283 285 287 288 291.0 294.0 299.0 303.0
92 62026 11215 New York NY New York Kings 93 NaN NaN NaN ... 1002 1006 1008 1012 1017 1024 1034.0 1042.0 1044.0 1044.0
97 62032 11221 New York NY New York Kings 98 NaN NaN NaN ... 540 549 554 557 565 577 586.0 590.0 593.0 596.0
122 62022 11211 New York NY New York Kings 123 NaN NaN NaN ... 986 989 996 1003 1005 1013 1024.0 1033.0 1036.0 1037.0
124 62093 11385 New York NY New York Queens 125 NaN NaN NaN ... 404 410 416 422 428 433 435.0 437.0 441.0 447.0
130 61169 8753 Toms River NJ New York Ocean 131 75.0 74.0 74.0 ... 155 154 155 156 156 157 158.0 159.0 160.0 160.0
163 60518 7002 Bayonne NJ New York Hudson 164 74.0 74.0 73.0 ... 163 163 162 163 165 167 170.0 173.0 175.0 177.0
174 61797 10457 New York NY New York Bronx 175 NaN NaN NaN ... 239 240 243 247 249 250 251.0 250.0 251.0 253.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
11783 61846 10537 Cortlandt Manor NY New York Westchester 11784 117.0 116.0 115.0 ... 174 175 174 175 176 178 179.0 180.0 183.0 186.0
11795 60586 7077 Woodbridge Township NJ New York Middlesex 11796 89.0 89.0 89.0 ... 191 191 192 193 196 200 202.0 202.0 202.0 203.0
11821 61872 10577 Rye Brook NY New York Westchester 11822 NaN NaN NaN ... 410 405 403 404 407 410 414.0 416.0 414.0 411.0
11876 60894 7980 Long Hill Township NJ New York Morris 11877 123.0 123.0 124.0 ... 256 253 253 254 255 255 256.0 258.0 258.0 258.0
11888 60719 7606 South Hackensack Township NJ New York Bergen 11889 94.0 94.0 94.0 ... 186 186 187 189 190 191 193.0 194.0 196.0 197.0
11890 60762 7711 Ocean Township NJ New York Monmouth 11891 134.0 131.0 129.0 ... 334 338 343 347 350 351 355.0 363.0 368.0 369.0
11903 62677 12578 Town of Pleasant Valley NY New York Dutchess 11904 76.0 75.0 75.0 ... 150 151 152 153 154 156 156.0 156.0 156.0 156.0
11918 62159 11569 Point Lookout NY New York Nassau 11919 185.0 186.0 186.0 ... 463 468 471 473 475 475 476.0 481.0 484.0 482.0
11927 61197 8828 Helmetta NJ New York Middlesex 11928 87.0 86.0 84.0 ... 160 160 160 161 163 167 168.0 168.0 170.0 173.0
11930 62616 12501 Town of Amenia NY New York Dutchess 11931 NaN NaN NaN ... 149 150 151 153 155 158 158.0 157.0 156.0 155.0
11939 61953 10973 Wawayanda NY New York Orange 11940 78.0 77.0 77.0 ... 134 134 134 135 137 138 139.0 140.0 141.0 142.0
11963 61163 8740 Ocean Gate NJ New York Ocean 11964 74.0 72.0 70.0 ... 156 156 155 155 154 153 152.0 153.0 154.0 155.0
11972 61963 10984 Thiells NY New York Rockland 11973 139.0 141.0 141.0 ... 261 261 261 261 262 259 253.0 247.0 244.0 243.0
12019 61156 8732 Island Heights NJ New York Ocean 12020 78.0 76.0 75.0 ... 200 200 200 199 198 200 201.0 201.0 201.0 203.0
12061 62675 12575 New Windsor NY New York Orange 12062 77.0 77.0 77.0 ... 140 141 140 141 142 143 145.0 146.0 147.0 148.0
12068 60537 7021 Essex Fells NJ New York Essex 12069 123.0 124.0 126.0 ... 308 317 328 338 346 353 356.0 356.0 352.0 351.0
12166 62307 11960 Remsenburg NY New York Suffolk 12167 NaN NaN NaN ... 486 452 449 470 486 504 519.0 519.0 515.0 513.0
12202 60722 7620 Alpine NJ New York Bergen 12203 NaN NaN NaN ... 550 550 555 560 559 558 557.0 554.0 555.0 559.0
12205 61161 8738 Mantoloking NJ New York Ocean 12206 170.0 168.0 164.0 ... 471 472 473 476 482 491 500.0 507.0 507.0 503.0
12247 61918 10917 Woodbury NY New York Orange 12248 78.0 79.0 81.0 ... 152 152 152 154 154 154 155.0 157.0 158.0 159.0
12288 62676 12577 Beaverdam Lake-Salisbury Mills NY New York Orange 12289 82.0 82.0 83.0 ... 149 149 150 150 150 151 152.0 152.0 153.0 154.0
12299 61965 10986 Town of Stony Point NY New York Rockland 12300 88.0 88.0 90.0 ... 173 172 172 172 174 174 173.0 172.0 172.0 173.0
12357 62705 12729 Deerpark NY New York Orange 12358 70.0 68.0 67.0 ... 106 106 106 106 107 108 109.0 110.0 110.0 110.0
12412 60834 7847 Succasunna NJ New York Morris 12413 95.0 95.0 95.0 ... 177 177 177 176 176 177 177.0 178.0 178.0 179.0
12467 61239 8887 Readington NJ New York Hunterdon 12468 87.0 89.0 89.0 ... 152 152 152 154 157 160 160.0 161.0 163.0 164.0
12510 62295 11948 Laurel NY New York Suffolk 12511 NaN NaN NaN ... 307 302 303 311 318 323 323.0 323.0 326.0 327.0
12539 61853 10546 Chappaqua NY New York Westchester 12540 146.0 148.0 149.0 ... 320 316 314 315 317 320 323.0 323.0 319.0 316.0
12543 60875 7934 Peapack NJ New York Somerset 12544 132.0 134.0 136.0 ... 267 267 268 269 271 273 273.0 274.0 276.0 279.0
12624 60903 8006 Barnegat Light NJ New York Ocean 12625 NaN NaN NaN ... 435 438 443 449 457 467 474.0 479.0 478.0 477.0
12697 61948 10964 Orangetown NY New York Rockland 12698 NaN NaN NaN ... 323 323 322 321 322 323 322.0 319.0 319.0 320.0

733 rows × 259 columns


In [22]:
ny_housedf = house_value[house_value['City'] == 'New York']
month_format = '{year}-{month:02}'
cols = []
for year in range(2014,2017):
    for month in range(1,13):
        cols.append(month_format.format(year=year, month=month))
cols


Out[22]:
['2014-01',
 '2014-02',
 '2014-03',
 '2014-04',
 '2014-05',
 '2014-06',
 '2014-07',
 '2014-08',
 '2014-09',
 '2014-10',
 '2014-11',
 '2014-12',
 '2015-01',
 '2015-02',
 '2015-03',
 '2015-04',
 '2015-05',
 '2015-06',
 '2015-07',
 '2015-08',
 '2015-09',
 '2015-10',
 '2015-11',
 '2015-12',
 '2016-01',
 '2016-02',
 '2016-03',
 '2016-04',
 '2016-05',
 '2016-06',
 '2016-07',
 '2016-08',
 '2016-09',
 '2016-10',
 '2016-11',
 '2016-12']

In [8]:
used_cols = ['RegionName']
used_cols.extend(cols)
df = ny_housedf[used_cols]

In [9]:
plotdata = [row for row in df.values]

In [10]:
import matplotlib.pyplot as plt
def plot_line(pdata):
    plt.plot(pdata[1:])
    plt.xticks(range(0,len(cols),3), cols[::3], rotation='vertical')
map(plot_line, plotdata)
plt.legend([region[0] for region in plotdata],)
plt.show()



In [27]:
from sklearn.svm import SVR
import numpy as np
from sklearn.model_selection import cross_val_score
VALIDATE = False
test_months = 25
def predict(row):
    y = np.array(row[1:test_months+1])
    X = np.arange(test_months)
    
    svr_rbf = SVR(kernel='rbf',C=1e3, gamma=0.005, epsilon=0.02)
    X= X.reshape(-1,1)
    svr_lin = SVR(kernel='linear', C=1e3)
    svr_poly = SVR(kernel='poly', C=1e3, degree=2)
    Xn = np.arange(len(cols)).reshape(-1,1)
    y_rbf = svr_rbf.fit(X, y).predict(Xn)
    y_lin = svr_lin.fit(X, y).predict(Xn)
    y_poly = svr_poly.fit(X, y).predict(Xn)
    
    #Validate the model
    if VALIDATE:
        scores = cross_val_score(clf, X, y, cv=5)
        return [y_lin, y_poly, y_rbf, scores]
    return [y_lin, y_poly, y_rbf]

In [21]:
lw = 2
plt.scatter(X, y, color='darkorange', label='training data')
plt.scatter(range(test_months, len(cols)+1), plotdata[0][test_months:], label='untrained data')
# plt.hold('on')
plt.plot(Xn, y_rbf, color='navy', lw=lw, label='RBF model')
plt.plot(Xn, y_lin, color='c', lw=lw, label='Linear model')
plt.plot(Xn, y_poly, color='cornflowerblue', lw=lw, label='Polynomial model')
plt.xlabel('data')
plt.ylabel('target')
plt.title('Support Vector Regression')
plt.legend()
plt.show()



In [26]:
for x in [y_lin, y_poly, y_rbf, cols, y]:
    print len(x)


36
36
36
36
25
Out[26]:
37

In [ ]:
res = { "zip": 0, "data": [], "lin": [], "poly": [], "rbf": []}
exp = []
for row in plotdata:
    res["zip"] = row[0]
    res["data"] = row[1:]
    [ res["lin"], res["poly"], res["rbf"] ] = predict(row)
    exp.append(res)

In [ ]: