In [8]:
import pandas as pd
import numpy as np
import csv
import sys
reviewfile = "/Users/skhederian/restaurant-health/the_final_countdown.csv"
csv.field_size_limit(sys.maxsize)
df = pd.read_csv(reviewfile, sep=",", encoding = 'utf-8', engine = 'python')
df.head(100)
Out[8]:
_id
restaurant_name
address_full
business_id
review_count
inspection_date
stars
latitude
longitude
violations
...
Las Vegas
neighborhood0
neighborhood1
neighborhood2
neighborhood3
neighborhood4
neighborhood5
neighborhood6
PreviousViolations
DiffPreviousTwo
0
ObjectId(581151a5b6729e7e313481a0)
131 MAIN
9886 REA RD
TQU6uoswhbcI2Mj3EV505w
209
3/8/2012
4.0
35.000000
-81.000000
10
...
0
0
0
0
0
1
0
0
0
0
1
ObjectId(581151a5b6729e7e313481a0)
131 MAIN
9886 REA RD
TQU6uoswhbcI2Mj3EV505w
209
6/26/2012
4.0
35.000000
-81.000000
7
...
0
0
0
0
0
1
0
0
10
0
2
ObjectId(581151a5b6729e7e313481a0)
131 MAIN
9886 REA RD
TQU6uoswhbcI2Mj3EV505w
209
9/20/2012
4.0
35.000000
-81.000000
6
...
0
0
0
0
0
1
0
0
7
-3
3
ObjectId(581151a5b6729e7e313481a0)
131 MAIN
9886 REA RD
TQU6uoswhbcI2Mj3EV505w
209
12/7/2012
4.0
35.000000
-81.000000
3
...
0
0
0
0
0
1
0
0
6
-1
4
ObjectId(581151a5b6729e7e313481a0)
131 MAIN
9886 REA RD
TQU6uoswhbcI2Mj3EV505w
209
3/5/2013
4.0
35.000000
-81.000000
2
...
0
0
0
0
0
1
0
0
3
-3
5
ObjectId(581151a5b6729e7e313481a0)
131 MAIN
9886 REA RD
TQU6uoswhbcI2Mj3EV505w
209
4/25/2013
4.0
35.000000
-81.000000
8
...
0
0
0
0
0
1
0
0
2
-1
6
ObjectId(581151a5b6729e7e313481a0)
131 MAIN
9886 REA RD
TQU6uoswhbcI2Mj3EV505w
209
3/11/2014
4.0
35.000000
-81.000000
8
...
0
0
0
0
0
1
0
0
8
6
7
ObjectId(581151a5b6729e7e313481a0)
131 MAIN
9886 REA RD
TQU6uoswhbcI2Mj3EV505w
209
9/26/2014
4.0
35.000000
-81.000000
8
...
0
0
0
0
0
1
0
0
8
0
8
ObjectId(581151a5b6729e7e313481a0)
131 MAIN
9886 REA RD
TQU6uoswhbcI2Mj3EV505w
209
1/15/2015
4.0
35.000000
-81.000000
5
...
0
0
0
0
0
1
0
0
8
0
9
ObjectId(581151a5b6729e7e313481a0)
131 MAIN
9886 REA RD
TQU6uoswhbcI2Mj3EV505w
209
5/26/2015
4.0
35.000000
-81.000000
4
...
0
0
0
0
0
1
0
0
5
-3
10
ObjectId(581151a5b6729e7e313481a0)
131 MAIN
9886 REA RD
TQU6uoswhbcI2Mj3EV505w
209
9/24/2015
4.0
35.000000
-81.000000
6
...
0
0
0
0
0
1
0
0
4
-1
11
ObjectId(581151a5b6729e7e313481a0)
131 MAIN
9886 REA RD
TQU6uoswhbcI2Mj3EV505w
209
12/17/2015
4.0
35.000000
-81.000000
7
...
0
0
0
0
0
1
0
0
6
2
12
ObjectId(581151a5b6729e7e313481a0)
131 MAIN
9886 REA RD
TQU6uoswhbcI2Mj3EV505w
209
3/28/2016
4.0
35.000000
-81.000000
6
...
0
0
0
0
0
1
0
0
7
1
13
ObjectId(581151a5b6729e7e313481a0)
131 MAIN
9886 REA RD
TQU6uoswhbcI2Mj3EV505w
209
6/29/2016
4.0
35.000000
-81.000000
7
...
0
0
0
0
0
1
0
0
6
-1
14
ObjectId(581151a5b6729e7e313481a0)
131 MAIN
9886 REA RD
TQU6uoswhbcI2Mj3EV505w
209
9/28/2016
4.0
35.000000
-81.000000
3
...
0
0
0
0
0
1
0
0
7
1
15
ObjectId(5830680cf3f071f6de30b48f)
163 Vietnamese Sandwich
66 Harrison AV BOSTON 02111
fOBp9DWgd_FGXh15TgO-6Q
238
2/4/2010
4.0
42.351325
-71.061060
1
...
0
0
0
0
1
0
0
1
0
0
16
ObjectId(5830680cf3f071f6de30b48f)
163 Vietnamese Sandwich
66 Harrison AV BOSTON 02111
fOBp9DWgd_FGXh15TgO-6Q
238
4/1/2016
4.0
42.351325
-71.061060
5
...
0
0
0
0
1
0
0
1
1
0
17
ObjectId(5830680cf3f071f6de30b48f)
163 Vietnamese Sandwich
66 Harrison AV BOSTON 02111
fOBp9DWgd_FGXh15TgO-6Q
238
4/1/2016
4.0
42.351325
-71.061060
5
...
0
0
0
0
1
0
0
1
5
4
18
ObjectId(5830680cf3f071f6de30b48f)
163 Vietnamese Sandwich
66 Harrison AV BOSTON 02111
fOBp9DWgd_FGXh15TgO-6Q
238
4/15/2016
4.0
42.351325
-71.061060
5
...
0
0
0
0
1
0
0
1
5
0
19
ObjectId(5830680cf3f071f6de30b48f)
163 Vietnamese Sandwich
66 Harrison AV BOSTON 02111
fOBp9DWgd_FGXh15TgO-6Q
238
8/25/2016
4.0
42.351325
-71.061060
5
...
0
0
0
0
1
0
0
1
5
0
20
ObjectId(5830680cf3f071f6de30b48f)
163 Vietnamese Sandwich
66 Harrison AV BOSTON 02111
fOBp9DWgd_FGXh15TgO-6Q
238
9/9/2016
4.0
42.351325
-71.061060
5
...
0
0
0
0
1
0
0
1
5
0
21
ObjectId(581151a8b6729e7e31355077)
1897 MARKET
5501 JOSH BIRMINGHAM PKY UNIT 13
rGmkna1Wixy7Hst3kJG3Dw
144
6/22/2015
4.0
35.000000
-81.000000
8
...
0
0
1
1
1
1
0
0
0
0
22
ObjectId(581151a8b6729e7e31355077)
1897 MARKET
5501 JOSH BIRMINGHAM PKY UNIT 13
rGmkna1Wixy7Hst3kJG3Dw
144
12/22/2015
4.0
35.000000
-81.000000
7
...
0
0
1
1
1
1
0
0
8
0
23
ObjectId(581151a8b6729e7e31355077)
1897 MARKET
5501 JOSH BIRMINGHAM PKY UNIT 13
rGmkna1Wixy7Hst3kJG3Dw
144
2/4/2016
4.0
35.000000
-81.000000
11
...
0
0
1
1
1
1
0
0
7
-1
24
ObjectId(581151a8b6729e7e31355077)
1897 MARKET
5501 JOSH BIRMINGHAM PKY UNIT 13
rGmkna1Wixy7Hst3kJG3Dw
144
4/6/2016
4.0
35.000000
-81.000000
6
...
0
0
1
1
1
1
0
0
11
4
25
ObjectId(581151a8b6729e7e31355077)
1897 MARKET
5501 JOSH BIRMINGHAM PKY UNIT 13
rGmkna1Wixy7Hst3kJG3Dw
144
8/10/2016
4.0
35.000000
-81.000000
7
...
0
0
1
1
1
1
0
0
6
-5
26
ObjectId(581151a7b6729e7e31351889)
1900 ASIAN CUISINE
5115 Spring Mountain Rd 103 Las Vegas 89146
3zhQAE8jB3PAgDoL1FjNfw
66
2/20/2013
4.0
36.000000
-115.000000
97
...
1
0
0
0
1
0
0
1
0
0
27
ObjectId(581151a7b6729e7e31351889)
1900 ASIAN CUISINE
5115 Spring Mountain Rd 103 Las Vegas 89146
3zhQAE8jB3PAgDoL1FjNfw
66
7/10/2013
4.0
36.000000
-115.000000
31
...
1
0
0
0
1
0
0
1
97
0
28
ObjectId(581151a7b6729e7e31351889)
1900 ASIAN CUISINE
5115 Spring Mountain Rd 103 Las Vegas 89146
3zhQAE8jB3PAgDoL1FjNfw
66
7/10/2013
4.0
36.000000
-115.000000
31
...
1
0
0
0
1
0
0
1
31
-66
29
ObjectId(581151a7b6729e7e31351889)
1900 ASIAN CUISINE
5115 Spring Mountain Rd 103 Las Vegas 89146
3zhQAE8jB3PAgDoL1FjNfw
66
8/27/2013
4.0
36.000000
-115.000000
24
...
1
0
0
0
1
0
0
1
31
0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
70
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
9/24/2009
4.0
42.322280
-71.060925
6
...
0
0
0
0
1
1
0
1
0
0
71
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
10/13/2009
4.0
42.322280
-71.060925
4
...
0
0
0
0
1
1
0
1
6
0
72
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
10/13/2009
4.0
42.322280
-71.060925
4
...
0
0
0
0
1
1
0
1
4
-2
73
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
4/1/2011
4.0
42.322280
-71.060925
2
...
0
0
0
0
1
1
0
1
4
0
74
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
4/1/2011
4.0
42.322280
-71.060925
2
...
0
0
0
0
1
1
0
1
2
-2
75
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
4/15/2011
4.0
42.322280
-71.060925
3
...
0
0
0
0
1
1
0
1
2
0
76
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
4/15/2011
4.0
42.322280
-71.060925
3
...
0
0
0
0
1
1
0
1
3
1
77
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
4/12/2012
4.0
42.322280
-71.060925
4
...
0
0
0
0
1
1
0
1
3
0
78
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
4/12/2012
4.0
42.322280
-71.060925
4
...
0
0
0
0
1
1
0
1
4
1
79
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
4/26/2012
4.0
42.322280
-71.060925
8
...
0
0
0
0
1
1
0
1
4
0
80
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
4/26/2012
4.0
42.322280
-71.060925
8
...
0
0
0
0
1
1
0
1
8
4
81
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
5/1/2012
4.0
42.322280
-71.060925
1
...
0
0
0
0
1
1
0
1
8
0
82
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
5/1/2012
4.0
42.322280
-71.060925
1
...
0
0
0
0
1
1
0
1
1
-7
83
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
12/30/2012
4.0
42.322280
-71.060925
1
...
0
0
0
0
1
1
0
1
1
0
84
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
9/18/2013
4.0
42.322280
-71.060925
6
...
0
0
0
0
1
1
0
1
1
0
85
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
9/18/2013
4.0
42.322280
-71.060925
6
...
0
0
0
0
1
1
0
1
6
5
86
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
9/25/2013
4.0
42.322280
-71.060925
6
...
0
0
0
0
1
1
0
1
6
0
87
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
10/2/2013
4.0
42.322280
-71.060925
6
...
0
0
0
0
1
1
0
1
6
0
88
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
10/15/2014
4.0
42.322280
-71.060925
9
...
0
0
0
0
1
1
0
1
6
0
89
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
10/15/2014
4.0
42.322280
-71.060925
9
...
0
0
0
0
1
1
0
1
9
3
90
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
10/23/2014
4.0
42.322280
-71.060925
5
...
0
0
0
0
1
1
0
1
9
0
91
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
10/23/2014
4.0
42.322280
-71.060925
5
...
0
0
0
0
1
1
0
1
5
-4
92
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
10/27/2014
4.0
42.322280
-71.060925
1
...
0
0
0
0
1
1
0
1
5
0
93
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
10/27/2014
4.0
42.322280
-71.060925
1
...
0
0
0
0
1
1
0
1
1
-4
94
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
1/6/2015
4.0
42.322280
-71.060925
6
...
0
0
0
0
1
1
0
1
1
0
95
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
1/6/2015
4.0
42.322280
-71.060925
6
...
0
0
0
0
1
1
0
1
6
5
96
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
1/13/2015
4.0
42.322280
-71.060925
5
...
0
0
0
0
1
1
0
1
6
0
97
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
1/13/2015
4.0
42.322280
-71.060925
5
...
0
0
0
0
1
1
0
1
5
-1
98
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
9/27/2016
4.0
42.322280
-71.060925
8
...
0
0
0
0
1
1
0
1
5
0
99
ObjectId(5830680cf3f071f6de30b431)
224 BOSTON STREET
224 Boston ST Dorchester 02125
63CjKaTYchwqUFfiBV-zhg
130
9/27/2016
4.0
42.322280
-71.060925
8
...
0
0
0
0
1
1
0
1
8
3
100 rows × 68 columns
In [10]:
#scaled with mean zero
from sklearn import preprocessing
X = np.array(df[['review_count', 'stars', 'violations', 'pricerange', 'PreviousViolations', 'DiffPreviousTwo']])
In [11]:
X_scaled = preprocessing.scale(X)
In [12]:
X_scaled
Out[12]:
array([[ 0.47068601, 0.53588951, 0.34934962, 0.85559539, -0.81865411,
0.03426208],
[ 0.47068601, 0.53588951, -0.01599034, 0.85559539, 0.3977954 ,
0.03426208],
[ 0.47068601, 0.53588951, -0.13777032, 0.85559539, 0.03286054,
-0.34658076],
...,
[-0.63517597, 1.94939932, -0.38133029, 0.85559539, -0.45371926,
0.1612097 ],
[-0.63517597, 1.94939932, -0.25955031, 0.85559539, -0.33207431,
0.1612097 ],
[-0.63517597, 1.94939932, -0.01599034, 0.85559539, -0.21042936,
0.1612097 ]])
In [15]:
#scaled with range 0-1
X_train = np.array(df[['review_count', 'stars', 'violations', 'pricerange', 'PreviousViolations', 'DiffPreviousTwo']])
min_max_scaler = preprocessing.MinMaxScaler()
X_train_minmax = min_max_scaler.fit_transform(X_train)
X_train_minmax
Out[15]:
array([[ 0.10734758, 0.75 , 0.1 , 0.5 , 0. ,
0.5 ],
[ 0.10734758, 0.75 , 0.07 , 0.5 , 0.1 ,
0.5 ],
[ 0.10734758, 0.75 , 0.06 , 0.5 , 0.07 ,
0.485 ],
...,
[ 0.00260552, 1. , 0.04 , 0.5 , 0.03 ,
0.505 ],
[ 0.00260552, 1. , 0.05 , 0.5 , 0.04 ,
0.505 ],
[ 0.00260552, 1. , 0.07 , 0.5 , 0.05 ,
0.505 ]])
In [16]:
#Normalization (scaling individual samples to have unit norm)
X_normalized = np.array(df[['review_count', 'stars', 'violations', 'pricerange', 'PreviousViolations', 'DiffPreviousTwo']])
X_normalized = preprocessing.normalize(X, norm='l2')
X_normalized
Out[16]:
array([[ 0.99862923, 0.01911252, 0.0477813 , 0.00955626, 0. ,
0. ],
[ 0.99807112, 0.01910184, 0.03342822, 0.00955092, 0.0477546 ,
0. ],
[ 0.99869763, 0.01911383, 0.02867075, 0.00955692, 0.0334492 ,
-0.01433537],
...,
[ 0.7333588 , 0.45834925, 0.3666794 , 0.1833397 , 0.27500955,
0.09166985],
[ 0.68853037, 0.43033148, 0.43033148, 0.17213259, 0.34426519,
0.0860663 ],
[ 0.6172134 , 0.38575837, 0.54006172, 0.15430335, 0.38575837,
0.07715167]])
In [ ]:
Content source: georgetown-analytics/restaurant-health
Similar notebooks: