In [4]:
import pandas as pd
house_value = pd.read_csv('Zip_MedianValuePerSqft_AllHomes.csv')
In [6]:
house_value[house_value["Metro"] == "New York"]
Out[6]:
RegionID
RegionName
City
State
Metro
CountyName
SizeRank
1996-04
1996-05
1996-06
...
2016-06
2016-07
2016-08
2016-09
2016-10
2016-11
2016-12
2017-01
2017-02
2017-03
0
61639
10025
New York
NY
New York
New York
1
NaN
NaN
NaN
...
1321
1336
1345
1346
1343
1345
1340.0
1327.0
1317.0
1313.0
2
61637
10023
New York
NY
New York
New York
3
NaN
NaN
NaN
...
1618
1636
1654
1656
1647
1640
1645.0
1643.0
1630.0
1616.0
5
61616
10002
New York
NY
New York
New York
6
NaN
NaN
NaN
...
1354
1369
1387
1392
1385
1385
1390.0
1390.0
1397.0
1409.0
10
62037
11226
New York
NY
New York
Kings
11
NaN
NaN
NaN
...
497
501
508
517
522
522
522.0
523.0
525.0
527.0
12
62087
11375
New York
NY
New York
Queens
13
NaN
NaN
NaN
...
530
535
541
548
556
564
569.0
575.0
583.0
590.0
14
62045
11235
New York
NY
New York
Kings
15
NaN
NaN
NaN
...
504
503
504
508
513
514
515.0
515.0
515.0
516.0
19
61623
10009
New York
NY
New York
New York
20
NaN
NaN
NaN
...
1388
1401
1416
1425
1428
1438
1447.0
1447.0
1449.0
1456.0
21
61643
10029
New York
NY
New York
New York
22
NaN
NaN
NaN
...
969
987
1005
1012
1008
1008
1015.0
1027.0
1044.0
1059.0
22
61703
10128
New York
NY
New York
New York
23
NaN
NaN
NaN
...
1195
1216
1239
1249
1246
1243
1247.0
1251.0
1257.0
1262.0
28
61802
10462
New York
NY
New York
Bronx
29
NaN
NaN
NaN
...
164
167
170
173
174
173
172.0
171.0
175.0
178.0
29
61796
10456
New York
NY
New York
Bronx
30
NaN
NaN
NaN
...
242
243
244
247
252
256
256.0
256.0
259.0
263.0
40
62017
11206
New York
NY
New York
Kings
41
NaN
NaN
NaN
...
355
357
362
370
376
379
383.0
388.0
397.0
404.0
41
62012
11201
New York
NY
New York
Kings
42
NaN
NaN
NaN
...
1130
1131
1132
1135
1137
1142
1148.0
1149.0
1144.0
1139.0
43
62023
11212
New York
NY
New York
Kings
44
NaN
NaN
NaN
...
281
286
292
298
305
310
314.0
319.0
324.0
328.0
46
61638
10024
New York
NY
New York
New York
47
NaN
NaN
NaN
...
1465
1485
1504
1510
1505
1503
1506.0
1506.0
1500.0
1493.0
50
62025
11214
New York
NY
New York
Kings
51
NaN
NaN
NaN
...
498
501
505
511
517
523
530.0
535.0
538.0
541.0
56
60545
7030
Hoboken
NJ
New York
Hudson
57
161.0
160.0
159.0
...
667
669
671
675
683
695
705.0
709.0
713.0
717.0
66
62088
11377
New York
NY
New York
Queens
67
NaN
NaN
NaN
...
473
485
494
497
501
505
508.0
511.0
518.0
526.0
69
62044
11234
New York
NY
New York
Kings
70
NaN
NaN
NaN
...
374
377
380
383
385
388
392.0
395.0
397.0
398.0
72
62018
11207
New York
NY
New York
Kings
73
NaN
NaN
NaN
...
302
305
308
310
314
318
322.0
325.0
327.0
328.0
81
61148
8701
Lakewood Township
NJ
New York
Ocean
82
57.0
57.0
57.0
...
150
151
152
153
153
154
154.0
155.0
155.0
155.0
83
62067
11355
New York
NY
New York
Queens
84
NaN
NaN
NaN
...
507
533
550
555
558
557
556.0
555.0
561.0
570.0
89
61790
10314
New York
NY
New York
Richmond
90
101.0
101.0
101.0
...
280
282
283
285
287
288
291.0
294.0
299.0
303.0
92
62026
11215
New York
NY
New York
Kings
93
NaN
NaN
NaN
...
1002
1006
1008
1012
1017
1024
1034.0
1042.0
1044.0
1044.0
97
62032
11221
New York
NY
New York
Kings
98
NaN
NaN
NaN
...
540
549
554
557
565
577
586.0
590.0
593.0
596.0
122
62022
11211
New York
NY
New York
Kings
123
NaN
NaN
NaN
...
986
989
996
1003
1005
1013
1024.0
1033.0
1036.0
1037.0
124
62093
11385
New York
NY
New York
Queens
125
NaN
NaN
NaN
...
404
410
416
422
428
433
435.0
437.0
441.0
447.0
130
61169
8753
Toms River
NJ
New York
Ocean
131
75.0
74.0
74.0
...
155
154
155
156
156
157
158.0
159.0
160.0
160.0
163
60518
7002
Bayonne
NJ
New York
Hudson
164
74.0
74.0
73.0
...
163
163
162
163
165
167
170.0
173.0
175.0
177.0
174
61797
10457
New York
NY
New York
Bronx
175
NaN
NaN
NaN
...
239
240
243
247
249
250
251.0
250.0
251.0
253.0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
11783
61846
10537
Cortlandt Manor
NY
New York
Westchester
11784
117.0
116.0
115.0
...
174
175
174
175
176
178
179.0
180.0
183.0
186.0
11795
60586
7077
Woodbridge Township
NJ
New York
Middlesex
11796
89.0
89.0
89.0
...
191
191
192
193
196
200
202.0
202.0
202.0
203.0
11821
61872
10577
Rye Brook
NY
New York
Westchester
11822
NaN
NaN
NaN
...
410
405
403
404
407
410
414.0
416.0
414.0
411.0
11876
60894
7980
Long Hill Township
NJ
New York
Morris
11877
123.0
123.0
124.0
...
256
253
253
254
255
255
256.0
258.0
258.0
258.0
11888
60719
7606
South Hackensack Township
NJ
New York
Bergen
11889
94.0
94.0
94.0
...
186
186
187
189
190
191
193.0
194.0
196.0
197.0
11890
60762
7711
Ocean Township
NJ
New York
Monmouth
11891
134.0
131.0
129.0
...
334
338
343
347
350
351
355.0
363.0
368.0
369.0
11903
62677
12578
Town of Pleasant Valley
NY
New York
Dutchess
11904
76.0
75.0
75.0
...
150
151
152
153
154
156
156.0
156.0
156.0
156.0
11918
62159
11569
Point Lookout
NY
New York
Nassau
11919
185.0
186.0
186.0
...
463
468
471
473
475
475
476.0
481.0
484.0
482.0
11927
61197
8828
Helmetta
NJ
New York
Middlesex
11928
87.0
86.0
84.0
...
160
160
160
161
163
167
168.0
168.0
170.0
173.0
11930
62616
12501
Town of Amenia
NY
New York
Dutchess
11931
NaN
NaN
NaN
...
149
150
151
153
155
158
158.0
157.0
156.0
155.0
11939
61953
10973
Wawayanda
NY
New York
Orange
11940
78.0
77.0
77.0
...
134
134
134
135
137
138
139.0
140.0
141.0
142.0
11963
61163
8740
Ocean Gate
NJ
New York
Ocean
11964
74.0
72.0
70.0
...
156
156
155
155
154
153
152.0
153.0
154.0
155.0
11972
61963
10984
Thiells
NY
New York
Rockland
11973
139.0
141.0
141.0
...
261
261
261
261
262
259
253.0
247.0
244.0
243.0
12019
61156
8732
Island Heights
NJ
New York
Ocean
12020
78.0
76.0
75.0
...
200
200
200
199
198
200
201.0
201.0
201.0
203.0
12061
62675
12575
New Windsor
NY
New York
Orange
12062
77.0
77.0
77.0
...
140
141
140
141
142
143
145.0
146.0
147.0
148.0
12068
60537
7021
Essex Fells
NJ
New York
Essex
12069
123.0
124.0
126.0
...
308
317
328
338
346
353
356.0
356.0
352.0
351.0
12166
62307
11960
Remsenburg
NY
New York
Suffolk
12167
NaN
NaN
NaN
...
486
452
449
470
486
504
519.0
519.0
515.0
513.0
12202
60722
7620
Alpine
NJ
New York
Bergen
12203
NaN
NaN
NaN
...
550
550
555
560
559
558
557.0
554.0
555.0
559.0
12205
61161
8738
Mantoloking
NJ
New York
Ocean
12206
170.0
168.0
164.0
...
471
472
473
476
482
491
500.0
507.0
507.0
503.0
12247
61918
10917
Woodbury
NY
New York
Orange
12248
78.0
79.0
81.0
...
152
152
152
154
154
154
155.0
157.0
158.0
159.0
12288
62676
12577
Beaverdam Lake-Salisbury Mills
NY
New York
Orange
12289
82.0
82.0
83.0
...
149
149
150
150
150
151
152.0
152.0
153.0
154.0
12299
61965
10986
Town of Stony Point
NY
New York
Rockland
12300
88.0
88.0
90.0
...
173
172
172
172
174
174
173.0
172.0
172.0
173.0
12357
62705
12729
Deerpark
NY
New York
Orange
12358
70.0
68.0
67.0
...
106
106
106
106
107
108
109.0
110.0
110.0
110.0
12412
60834
7847
Succasunna
NJ
New York
Morris
12413
95.0
95.0
95.0
...
177
177
177
176
176
177
177.0
178.0
178.0
179.0
12467
61239
8887
Readington
NJ
New York
Hunterdon
12468
87.0
89.0
89.0
...
152
152
152
154
157
160
160.0
161.0
163.0
164.0
12510
62295
11948
Laurel
NY
New York
Suffolk
12511
NaN
NaN
NaN
...
307
302
303
311
318
323
323.0
323.0
326.0
327.0
12539
61853
10546
Chappaqua
NY
New York
Westchester
12540
146.0
148.0
149.0
...
320
316
314
315
317
320
323.0
323.0
319.0
316.0
12543
60875
7934
Peapack
NJ
New York
Somerset
12544
132.0
134.0
136.0
...
267
267
268
269
271
273
273.0
274.0
276.0
279.0
12624
60903
8006
Barnegat Light
NJ
New York
Ocean
12625
NaN
NaN
NaN
...
435
438
443
449
457
467
474.0
479.0
478.0
477.0
12697
61948
10964
Orangetown
NY
New York
Rockland
12698
NaN
NaN
NaN
...
323
323
322
321
322
323
322.0
319.0
319.0
320.0
733 rows × 259 columns
In [22]:
ny_housedf = house_value[house_value['City'] == 'New York']
month_format = '{year}-{month:02}'
cols = []
for year in range(2014,2017):
for month in range(1,13):
cols.append(month_format.format(year=year, month=month))
cols
Out[22]:
['2014-01',
'2014-02',
'2014-03',
'2014-04',
'2014-05',
'2014-06',
'2014-07',
'2014-08',
'2014-09',
'2014-10',
'2014-11',
'2014-12',
'2015-01',
'2015-02',
'2015-03',
'2015-04',
'2015-05',
'2015-06',
'2015-07',
'2015-08',
'2015-09',
'2015-10',
'2015-11',
'2015-12',
'2016-01',
'2016-02',
'2016-03',
'2016-04',
'2016-05',
'2016-06',
'2016-07',
'2016-08',
'2016-09',
'2016-10',
'2016-11',
'2016-12']
In [8]:
used_cols = ['RegionName']
used_cols.extend(cols)
df = ny_housedf[used_cols]
In [9]:
plotdata = [row for row in df.values]
In [10]:
import matplotlib.pyplot as plt
def plot_line(pdata):
plt.plot(pdata[1:])
plt.xticks(range(0,len(cols),3), cols[::3], rotation='vertical')
map(plot_line, plotdata)
plt.legend([region[0] for region in plotdata],)
plt.show()
In [27]:
from sklearn.svm import SVR
import numpy as np
from sklearn.model_selection import cross_val_score
VALIDATE = False
test_months = 25
def predict(row):
y = np.array(row[1:test_months+1])
X = np.arange(test_months)
svr_rbf = SVR(kernel='rbf',C=1e3, gamma=0.005, epsilon=0.02)
X= X.reshape(-1,1)
svr_lin = SVR(kernel='linear', C=1e3)
svr_poly = SVR(kernel='poly', C=1e3, degree=2)
Xn = np.arange(len(cols)).reshape(-1,1)
y_rbf = svr_rbf.fit(X, y).predict(Xn)
y_lin = svr_lin.fit(X, y).predict(Xn)
y_poly = svr_poly.fit(X, y).predict(Xn)
#Validate the model
if VALIDATE:
scores = cross_val_score(clf, X, y, cv=5)
return [y_lin, y_poly, y_rbf, scores]
return [y_lin, y_poly, y_rbf]
In [21]:
lw = 2
plt.scatter(X, y, color='darkorange', label='training data')
plt.scatter(range(test_months, len(cols)+1), plotdata[0][test_months:], label='untrained data')
# plt.hold('on')
plt.plot(Xn, y_rbf, color='navy', lw=lw, label='RBF model')
plt.plot(Xn, y_lin, color='c', lw=lw, label='Linear model')
plt.plot(Xn, y_poly, color='cornflowerblue', lw=lw, label='Polynomial model')
plt.xlabel('data')
plt.ylabel('target')
plt.title('Support Vector Regression')
plt.legend()
plt.show()
In [26]:
for x in [y_lin, y_poly, y_rbf, cols, y]:
print len(x)
36
36
36
36
25
Out[26]:
37
In [ ]:
res = { "zip": 0, "data": [], "lin": [], "poly": [], "rbf": []}
exp = []
for row in plotdata:
res["zip"] = row[0]
res["data"] = row[1:]
[ res["lin"], res["poly"], res["rbf"] ] = predict(row)
exp.append(res)
In [ ]:
Content source: arvind-iyer/socdata
Similar notebooks: