In [8]:
import pandas as pd
import numpy as np
import csv
import sys

reviewfile = "/Users/skhederian/restaurant-health/the_final_countdown.csv"
csv.field_size_limit(sys.maxsize)
df = pd.read_csv(reviewfile, sep=",", encoding = 'utf-8', engine = 'python')
df.head(100)


Out[8]:
_id restaurant_name address_full business_id review_count inspection_date stars latitude longitude violations ... Las Vegas neighborhood0 neighborhood1 neighborhood2 neighborhood3 neighborhood4 neighborhood5 neighborhood6 PreviousViolations DiffPreviousTwo
0 ObjectId(581151a5b6729e7e313481a0) 131 MAIN 9886 REA RD TQU6uoswhbcI2Mj3EV505w 209 3/8/2012 4.0 35.000000 -81.000000 10 ... 0 0 0 0 0 1 0 0 0 0
1 ObjectId(581151a5b6729e7e313481a0) 131 MAIN 9886 REA RD TQU6uoswhbcI2Mj3EV505w 209 6/26/2012 4.0 35.000000 -81.000000 7 ... 0 0 0 0 0 1 0 0 10 0
2 ObjectId(581151a5b6729e7e313481a0) 131 MAIN 9886 REA RD TQU6uoswhbcI2Mj3EV505w 209 9/20/2012 4.0 35.000000 -81.000000 6 ... 0 0 0 0 0 1 0 0 7 -3
3 ObjectId(581151a5b6729e7e313481a0) 131 MAIN 9886 REA RD TQU6uoswhbcI2Mj3EV505w 209 12/7/2012 4.0 35.000000 -81.000000 3 ... 0 0 0 0 0 1 0 0 6 -1
4 ObjectId(581151a5b6729e7e313481a0) 131 MAIN 9886 REA RD TQU6uoswhbcI2Mj3EV505w 209 3/5/2013 4.0 35.000000 -81.000000 2 ... 0 0 0 0 0 1 0 0 3 -3
5 ObjectId(581151a5b6729e7e313481a0) 131 MAIN 9886 REA RD TQU6uoswhbcI2Mj3EV505w 209 4/25/2013 4.0 35.000000 -81.000000 8 ... 0 0 0 0 0 1 0 0 2 -1
6 ObjectId(581151a5b6729e7e313481a0) 131 MAIN 9886 REA RD TQU6uoswhbcI2Mj3EV505w 209 3/11/2014 4.0 35.000000 -81.000000 8 ... 0 0 0 0 0 1 0 0 8 6
7 ObjectId(581151a5b6729e7e313481a0) 131 MAIN 9886 REA RD TQU6uoswhbcI2Mj3EV505w 209 9/26/2014 4.0 35.000000 -81.000000 8 ... 0 0 0 0 0 1 0 0 8 0
8 ObjectId(581151a5b6729e7e313481a0) 131 MAIN 9886 REA RD TQU6uoswhbcI2Mj3EV505w 209 1/15/2015 4.0 35.000000 -81.000000 5 ... 0 0 0 0 0 1 0 0 8 0
9 ObjectId(581151a5b6729e7e313481a0) 131 MAIN 9886 REA RD TQU6uoswhbcI2Mj3EV505w 209 5/26/2015 4.0 35.000000 -81.000000 4 ... 0 0 0 0 0 1 0 0 5 -3
10 ObjectId(581151a5b6729e7e313481a0) 131 MAIN 9886 REA RD TQU6uoswhbcI2Mj3EV505w 209 9/24/2015 4.0 35.000000 -81.000000 6 ... 0 0 0 0 0 1 0 0 4 -1
11 ObjectId(581151a5b6729e7e313481a0) 131 MAIN 9886 REA RD TQU6uoswhbcI2Mj3EV505w 209 12/17/2015 4.0 35.000000 -81.000000 7 ... 0 0 0 0 0 1 0 0 6 2
12 ObjectId(581151a5b6729e7e313481a0) 131 MAIN 9886 REA RD TQU6uoswhbcI2Mj3EV505w 209 3/28/2016 4.0 35.000000 -81.000000 6 ... 0 0 0 0 0 1 0 0 7 1
13 ObjectId(581151a5b6729e7e313481a0) 131 MAIN 9886 REA RD TQU6uoswhbcI2Mj3EV505w 209 6/29/2016 4.0 35.000000 -81.000000 7 ... 0 0 0 0 0 1 0 0 6 -1
14 ObjectId(581151a5b6729e7e313481a0) 131 MAIN 9886 REA RD TQU6uoswhbcI2Mj3EV505w 209 9/28/2016 4.0 35.000000 -81.000000 3 ... 0 0 0 0 0 1 0 0 7 1
15 ObjectId(5830680cf3f071f6de30b48f) 163 Vietnamese Sandwich 66 Harrison AV BOSTON 02111 fOBp9DWgd_FGXh15TgO-6Q 238 2/4/2010 4.0 42.351325 -71.061060 1 ... 0 0 0 0 1 0 0 1 0 0
16 ObjectId(5830680cf3f071f6de30b48f) 163 Vietnamese Sandwich 66 Harrison AV BOSTON 02111 fOBp9DWgd_FGXh15TgO-6Q 238 4/1/2016 4.0 42.351325 -71.061060 5 ... 0 0 0 0 1 0 0 1 1 0
17 ObjectId(5830680cf3f071f6de30b48f) 163 Vietnamese Sandwich 66 Harrison AV BOSTON 02111 fOBp9DWgd_FGXh15TgO-6Q 238 4/1/2016 4.0 42.351325 -71.061060 5 ... 0 0 0 0 1 0 0 1 5 4
18 ObjectId(5830680cf3f071f6de30b48f) 163 Vietnamese Sandwich 66 Harrison AV BOSTON 02111 fOBp9DWgd_FGXh15TgO-6Q 238 4/15/2016 4.0 42.351325 -71.061060 5 ... 0 0 0 0 1 0 0 1 5 0
19 ObjectId(5830680cf3f071f6de30b48f) 163 Vietnamese Sandwich 66 Harrison AV BOSTON 02111 fOBp9DWgd_FGXh15TgO-6Q 238 8/25/2016 4.0 42.351325 -71.061060 5 ... 0 0 0 0 1 0 0 1 5 0
20 ObjectId(5830680cf3f071f6de30b48f) 163 Vietnamese Sandwich 66 Harrison AV BOSTON 02111 fOBp9DWgd_FGXh15TgO-6Q 238 9/9/2016 4.0 42.351325 -71.061060 5 ... 0 0 0 0 1 0 0 1 5 0
21 ObjectId(581151a8b6729e7e31355077) 1897 MARKET 5501 JOSH BIRMINGHAM PKY UNIT 13 rGmkna1Wixy7Hst3kJG3Dw 144 6/22/2015 4.0 35.000000 -81.000000 8 ... 0 0 1 1 1 1 0 0 0 0
22 ObjectId(581151a8b6729e7e31355077) 1897 MARKET 5501 JOSH BIRMINGHAM PKY UNIT 13 rGmkna1Wixy7Hst3kJG3Dw 144 12/22/2015 4.0 35.000000 -81.000000 7 ... 0 0 1 1 1 1 0 0 8 0
23 ObjectId(581151a8b6729e7e31355077) 1897 MARKET 5501 JOSH BIRMINGHAM PKY UNIT 13 rGmkna1Wixy7Hst3kJG3Dw 144 2/4/2016 4.0 35.000000 -81.000000 11 ... 0 0 1 1 1 1 0 0 7 -1
24 ObjectId(581151a8b6729e7e31355077) 1897 MARKET 5501 JOSH BIRMINGHAM PKY UNIT 13 rGmkna1Wixy7Hst3kJG3Dw 144 4/6/2016 4.0 35.000000 -81.000000 6 ... 0 0 1 1 1 1 0 0 11 4
25 ObjectId(581151a8b6729e7e31355077) 1897 MARKET 5501 JOSH BIRMINGHAM PKY UNIT 13 rGmkna1Wixy7Hst3kJG3Dw 144 8/10/2016 4.0 35.000000 -81.000000 7 ... 0 0 1 1 1 1 0 0 6 -5
26 ObjectId(581151a7b6729e7e31351889) 1900 ASIAN CUISINE 5115 Spring Mountain Rd 103 Las Vegas 89146 3zhQAE8jB3PAgDoL1FjNfw 66 2/20/2013 4.0 36.000000 -115.000000 97 ... 1 0 0 0 1 0 0 1 0 0
27 ObjectId(581151a7b6729e7e31351889) 1900 ASIAN CUISINE 5115 Spring Mountain Rd 103 Las Vegas 89146 3zhQAE8jB3PAgDoL1FjNfw 66 7/10/2013 4.0 36.000000 -115.000000 31 ... 1 0 0 0 1 0 0 1 97 0
28 ObjectId(581151a7b6729e7e31351889) 1900 ASIAN CUISINE 5115 Spring Mountain Rd 103 Las Vegas 89146 3zhQAE8jB3PAgDoL1FjNfw 66 7/10/2013 4.0 36.000000 -115.000000 31 ... 1 0 0 0 1 0 0 1 31 -66
29 ObjectId(581151a7b6729e7e31351889) 1900 ASIAN CUISINE 5115 Spring Mountain Rd 103 Las Vegas 89146 3zhQAE8jB3PAgDoL1FjNfw 66 8/27/2013 4.0 36.000000 -115.000000 24 ... 1 0 0 0 1 0 0 1 31 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
70 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 9/24/2009 4.0 42.322280 -71.060925 6 ... 0 0 0 0 1 1 0 1 0 0
71 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 10/13/2009 4.0 42.322280 -71.060925 4 ... 0 0 0 0 1 1 0 1 6 0
72 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 10/13/2009 4.0 42.322280 -71.060925 4 ... 0 0 0 0 1 1 0 1 4 -2
73 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 4/1/2011 4.0 42.322280 -71.060925 2 ... 0 0 0 0 1 1 0 1 4 0
74 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 4/1/2011 4.0 42.322280 -71.060925 2 ... 0 0 0 0 1 1 0 1 2 -2
75 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 4/15/2011 4.0 42.322280 -71.060925 3 ... 0 0 0 0 1 1 0 1 2 0
76 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 4/15/2011 4.0 42.322280 -71.060925 3 ... 0 0 0 0 1 1 0 1 3 1
77 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 4/12/2012 4.0 42.322280 -71.060925 4 ... 0 0 0 0 1 1 0 1 3 0
78 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 4/12/2012 4.0 42.322280 -71.060925 4 ... 0 0 0 0 1 1 0 1 4 1
79 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 4/26/2012 4.0 42.322280 -71.060925 8 ... 0 0 0 0 1 1 0 1 4 0
80 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 4/26/2012 4.0 42.322280 -71.060925 8 ... 0 0 0 0 1 1 0 1 8 4
81 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 5/1/2012 4.0 42.322280 -71.060925 1 ... 0 0 0 0 1 1 0 1 8 0
82 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 5/1/2012 4.0 42.322280 -71.060925 1 ... 0 0 0 0 1 1 0 1 1 -7
83 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 12/30/2012 4.0 42.322280 -71.060925 1 ... 0 0 0 0 1 1 0 1 1 0
84 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 9/18/2013 4.0 42.322280 -71.060925 6 ... 0 0 0 0 1 1 0 1 1 0
85 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 9/18/2013 4.0 42.322280 -71.060925 6 ... 0 0 0 0 1 1 0 1 6 5
86 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 9/25/2013 4.0 42.322280 -71.060925 6 ... 0 0 0 0 1 1 0 1 6 0
87 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 10/2/2013 4.0 42.322280 -71.060925 6 ... 0 0 0 0 1 1 0 1 6 0
88 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 10/15/2014 4.0 42.322280 -71.060925 9 ... 0 0 0 0 1 1 0 1 6 0
89 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 10/15/2014 4.0 42.322280 -71.060925 9 ... 0 0 0 0 1 1 0 1 9 3
90 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 10/23/2014 4.0 42.322280 -71.060925 5 ... 0 0 0 0 1 1 0 1 9 0
91 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 10/23/2014 4.0 42.322280 -71.060925 5 ... 0 0 0 0 1 1 0 1 5 -4
92 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 10/27/2014 4.0 42.322280 -71.060925 1 ... 0 0 0 0 1 1 0 1 5 0
93 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 10/27/2014 4.0 42.322280 -71.060925 1 ... 0 0 0 0 1 1 0 1 1 -4
94 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 1/6/2015 4.0 42.322280 -71.060925 6 ... 0 0 0 0 1 1 0 1 1 0
95 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 1/6/2015 4.0 42.322280 -71.060925 6 ... 0 0 0 0 1 1 0 1 6 5
96 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 1/13/2015 4.0 42.322280 -71.060925 5 ... 0 0 0 0 1 1 0 1 6 0
97 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 1/13/2015 4.0 42.322280 -71.060925 5 ... 0 0 0 0 1 1 0 1 5 -1
98 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 9/27/2016 4.0 42.322280 -71.060925 8 ... 0 0 0 0 1 1 0 1 5 0
99 ObjectId(5830680cf3f071f6de30b431) 224 BOSTON STREET 224 Boston ST Dorchester 02125 63CjKaTYchwqUFfiBV-zhg 130 9/27/2016 4.0 42.322280 -71.060925 8 ... 0 0 0 0 1 1 0 1 8 3

100 rows × 68 columns


In [10]:
#scaled with mean zero
from sklearn import preprocessing
X = np.array(df[['review_count', 'stars', 'violations', 'pricerange', 'PreviousViolations', 'DiffPreviousTwo']])

In [11]:
X_scaled = preprocessing.scale(X)

In [12]:
X_scaled


Out[12]:
array([[ 0.47068601,  0.53588951,  0.34934962,  0.85559539, -0.81865411,
         0.03426208],
       [ 0.47068601,  0.53588951, -0.01599034,  0.85559539,  0.3977954 ,
         0.03426208],
       [ 0.47068601,  0.53588951, -0.13777032,  0.85559539,  0.03286054,
        -0.34658076],
       ..., 
       [-0.63517597,  1.94939932, -0.38133029,  0.85559539, -0.45371926,
         0.1612097 ],
       [-0.63517597,  1.94939932, -0.25955031,  0.85559539, -0.33207431,
         0.1612097 ],
       [-0.63517597,  1.94939932, -0.01599034,  0.85559539, -0.21042936,
         0.1612097 ]])

In [15]:
#scaled with range 0-1
X_train = np.array(df[['review_count', 'stars', 'violations', 'pricerange', 'PreviousViolations', 'DiffPreviousTwo']])
min_max_scaler = preprocessing.MinMaxScaler()
X_train_minmax = min_max_scaler.fit_transform(X_train)
X_train_minmax


Out[15]:
array([[ 0.10734758,  0.75      ,  0.1       ,  0.5       ,  0.        ,
         0.5       ],
       [ 0.10734758,  0.75      ,  0.07      ,  0.5       ,  0.1       ,
         0.5       ],
       [ 0.10734758,  0.75      ,  0.06      ,  0.5       ,  0.07      ,
         0.485     ],
       ..., 
       [ 0.00260552,  1.        ,  0.04      ,  0.5       ,  0.03      ,
         0.505     ],
       [ 0.00260552,  1.        ,  0.05      ,  0.5       ,  0.04      ,
         0.505     ],
       [ 0.00260552,  1.        ,  0.07      ,  0.5       ,  0.05      ,
         0.505     ]])

In [16]:
#Normalization (scaling individual samples to have unit norm)
X_normalized = np.array(df[['review_count', 'stars', 'violations', 'pricerange', 'PreviousViolations', 'DiffPreviousTwo']])
X_normalized = preprocessing.normalize(X, norm='l2')
X_normalized


Out[16]:
array([[ 0.99862923,  0.01911252,  0.0477813 ,  0.00955626,  0.        ,
         0.        ],
       [ 0.99807112,  0.01910184,  0.03342822,  0.00955092,  0.0477546 ,
         0.        ],
       [ 0.99869763,  0.01911383,  0.02867075,  0.00955692,  0.0334492 ,
        -0.01433537],
       ..., 
       [ 0.7333588 ,  0.45834925,  0.3666794 ,  0.1833397 ,  0.27500955,
         0.09166985],
       [ 0.68853037,  0.43033148,  0.43033148,  0.17213259,  0.34426519,
         0.0860663 ],
       [ 0.6172134 ,  0.38575837,  0.54006172,  0.15430335,  0.38575837,
         0.07715167]])

In [ ]: