notebook.community

Edit and run



In [1]:

    
%matplotlib inline

import os
import requests
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import csv
import sys

from pandas.tools.plotting import scatter_matrix

from sklearn import cross_validation as cv
from sklearn.cross_validation import train_test_split as tts
from sklearn.linear_model import Ridge
from sklearn.linear_model import RandomizedLasso
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Lasso
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error as mse
from sklearn.cross_validation import cross_val_predict
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.cross_validation import cross_val_score
from sklearn.pipeline import FeatureUnion
from sklearn import preprocessing
from sklearn import linear_model
from sklearn.decomposition import PCA
from sklearn.feature_selection import chi2
from sklearn.preprocessing import FunctionTransformer
from scipy import stats
from sklearn.linear_model import ElasticNet









    



C:\Users\mjsteele\Anaconda3\lib\site-packages\sklearn\cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)



In [2]:

    
#Import Data
data = pd.read_csv('D:\\yelp\\data\\Final_Regression_Data\\Final\\the_final_countdown-1\\the_final_countdown.csv')



In [3]:

    
data.head()









    Out[3]:






  
    
      
      _id
      restaurant_name
      address_full
      business_id
      review_count
      inspection_date
      stars
      latitude
      longitude
      violations
      ...
      LasVegas
      neighborhood0
      neighborhood1
      neighborhood2
      neighborhood3
      neighborhood4
      neighborhood5
      neighborhood6
      PreviousViolations
      DiffPreviousTwo
    
  
  
    
      0
      ObjectId(5830680bf3f071f6de30b1d0)
      GRASSHOPPER VEGETARIAN
      1 N Beacon ST Allston 02134
      MiOurH3MHs6CwA6iOWehOQ
      424
      8/4/2008
      4.0
      42.35377
      -71.137418
      8
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      ObjectId(5830680bf3f071f6de30b1d0)
      GRASSHOPPER VEGETARIAN
      1 N Beacon ST Allston 02134
      MiOurH3MHs6CwA6iOWehOQ
      424
      8/18/2008
      4.0
      42.35377
      -71.137418
      8
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      8
      0
    
    
      2
      ObjectId(5830680bf3f071f6de30b1d0)
      GRASSHOPPER VEGETARIAN
      1 N Beacon ST Allston 02134
      MiOurH3MHs6CwA6iOWehOQ
      424
      7/13/2009
      4.0
      42.35377
      -71.137418
      4
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      8
      0
    
    
      3
      ObjectId(5830680bf3f071f6de30b1d0)
      GRASSHOPPER VEGETARIAN
      1 N Beacon ST Allston 02134
      MiOurH3MHs6CwA6iOWehOQ
      424
      7/27/2009
      4.0
      42.35377
      -71.137418
      4
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      4
      0
    
    
      4
      ObjectId(5830680bf3f071f6de30b1d0)
      GRASSHOPPER VEGETARIAN
      1 N Beacon ST Allston 02134
      MiOurH3MHs6CwA6iOWehOQ
      424
      6/3/2010
      4.0
      42.35377
      -71.137418
      12
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      4
      0
    
  

5 rows × 68 columns



In [4]:

    
data.shape









    Out[4]:





(34991, 68)



In [5]:

    
data.describe()









    Out[5]:






  
    
      
      review_count
      stars
      latitude
      longitude
      violations
      ChangeInViolations
      IsAsian
      IsFrench
      IsSandwiches
      IsFastFood
      ...
      LasVegas
      neighborhood0
      neighborhood1
      neighborhood2
      neighborhood3
      neighborhood4
      neighborhood5
      neighborhood6
      PreviousViolations
      DiffPreviousTwo
    
  
  
    
      count
      34991.000000
      34991.000000
      34991.000000
      34991.000000
      34991.000000
      34991.000000
      34991.000000
      34991.000000
      34991.000000
      34991.000000
      ...
      34991.000000
      34991.0
      34991.000000
      34991.000000
      34991.000000
      34991.000000
      34991.000000
      34991.000000
      34991.000000
      34991.000000
    
    
      mean
      121.214227
      3.601626
      39.289480
      -85.464582
      6.934669
      1.555171
      0.192650
      0.008459
      0.113629
      0.050699
      ...
      0.290389
      0.0
      0.581807
      0.521363
      0.663371
      0.551399
      0.350804
      0.443228
      6.326484
      -0.045297
    
    
      std
      178.588309
      0.700143
      3.355512
      19.216630
      8.340175
      5.429591
      0.394386
      0.091586
      0.317365
      0.219385
      ...
      0.453948
      0.0
      0.493269
      0.499551
      0.472564
      0.497358
      0.477229
      0.496774
      8.282943
      3.511681
    
    
      min
      3.000000
      1.000000
      33.000000
      -115.000000
      0.000000
      -42.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      -98.000000
    
    
      25%
      21.000000
      3.000000
      36.000000
      -115.000000
      2.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      1.000000
      0.000000
    
    
      50%
      60.000000
      4.000000
      42.292474
      -71.137565
      5.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.0
      1.000000
      1.000000
      1.000000
      1.000000
      0.000000
      0.000000
      5.000000
      0.000000
    
    
      75%
      148.000000
      4.000000
      42.349124
      -71.068237
      9.000000
      1.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      1.000000
      0.0
      1.000000
      1.000000
      1.000000
      1.000000
      1.000000
      1.000000
      8.000000
      0.000000
    
    
      max
      1922.000000
      5.000000
      42.389913
      -70.996696
      100.000000
      100.000000
      1.000000
      1.000000
      1.000000
      1.000000
      ...
      1.000000
      0.0
      1.000000
      1.000000
      1.000000
      1.000000
      1.000000
      1.000000
      100.000000
      100.000000
    
  

8 rows × 63 columns



In [6]:

    
x = data['stars']
y = data['review_count']
plt.ylabel('review_count')
plt.xlabel('stars')
plt.scatter(x,y)
plt.title('Average number of reviews by star ratings')
print('The average number of stars is:', x.mean(), 'reviews:', y.mean())









    



The average number of stars is: 3.6016261324340544 reviews: 121.21422651538967



In [7]:

    
x = data['review_count']
x.xlabel= ('review_count')
x.hist(bins=500, range = [0, 1000])









    Out[7]:





<matplotlib.axes._subplots.AxesSubplot at 0x23d9851bb38>



In [8]:

    
x = data['review_count']
x.xlabel= ('review_count')
x.hist(bins=500, range = [0, 500])
plt.title('Distribution of review counts')
print('Mean number of reviews:', x.mean())









    



Mean number of reviews: 121.21422651538967



In [9]:

    
x = data['violations']
plt.xlabel= ('violations')
x.hist(bins=50, range = [0, 50])
plt.title('Distribution of violation counts')
print('The mean number of violations is:', x.mean(), 'Standard deviation', x.std())









    



The mean number of violations is: 6.934668914863822 Standard deviation 8.340175123804492



In [10]:

    
data.boxplot('violations', by='stars', figsize =(10,8))
plt.ylim(0,25)









    Out[10]:





(0, 25)



In [11]:

    
vhist = data.boxplot(['violations'], by ='LasVegas', figsize=(5,5))
vhist.set_ylim(0,25), 
print('Number of Las Vegas instances:', data['LasVegas'].sum())
vhist2 = data.boxplot(['violations'], by = 'Charlotte', figsize=(5,5))
vhist2.set_ylim(0,25)
print('Number of Charlotte Instances:', data['Charlotte'].sum())
vhist3 = data.boxplot(['violations'], by = 'Boston', figsize=(5,5))
vhist3.set_ylim(0,25)
print('Number of Boston Instances:', data['Boston'].sum())









    



Number of Las Vegas instances: 10161
Number of Charlotte Instances: 5758
Number of Boston Instances: 19072



In [12]:

    
sns.pairplot(data, x_vars=['review_count','PreviousViolations', 'DiffPreviousTwo'], y_vars='violations', size=7, aspect=.7)









    Out[12]:





<seaborn.axisgrid.PairGrid at 0x23d98995fd0>



In [13]:

    
#scaling continuous independent variables, with mean zero
sdf = data.copy()
scaled = np.array(sdf[['review_count', 'stars', 'pricerange', 'PreviousViolations', 'DiffPreviousTwo']])
scaled = preprocessing.scale(scaled)
# scaled.shape
final = pd.DataFrame(data = scaled,
                     columns =['review_count', 'stars', 'pricerange', 'PreviousViolations', 'DiffPreviousTwo'])

delist = ['review_count', 'stars', 'pricerange', 'PreviousViolations', 'DiffPreviousTwo']
ndf = data.copy()
for i in delist:
    del ndf[i]
# ndf.shape
df = pd.concat([final, ndf], axis=1)

df.shape









    Out[13]:





(34991, 68)



In [14]:

    
variables = df[['PreviousViolations','DiffPreviousTwo','IsAsian','IsFrench','IsSandwiches',
                 'IsFastFood','IsBurgers','IsItalian','IsHawaiian','IsSouthern','IsMexican','IsLatinAmerican','IsMiddleEastern',
                 'IsGreek','IsAmerican','IsDonuts','IsIndian','IsSeafood','IsDesserts','IsSalad','Pizza','IsBuffets',
                 'IsSushiBars','IsDelis','IsSports Bars','IsBakeries','IsPubs','IsCaterers','IsDiners','IsCafes','IsBars',
                 'alcohol','delivery','dogsallowed','smoking','goodforkids','outdoorseating','waiterservice','creditcards',
                 'pricerange','drivethru','tourist','classy','hipster','latenight','upscale','divey','Boston','Charlotte',
                 'LasVegas','neighborhood0','neighborhood1','neighborhood2','neighborhood3','neighborhood4','neighborhood5','neighborhood6']]
target = df['violations']
variables.shape
print(target.shape, variables.shape)









    



(34991,) (34991, 57)



In [37]:

    
#Train test splits
splits = cv.train_test_split(variables, target, test_size=0.083)
X_train, X_test, y_train, y_test = splits



In [16]:

    
#Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)

expected = y_test
predicted = model.predict(X_test)

print("Linear Regression model")
print("Mean Squared Error: %0.3f" % mse(expected, predicted))
print("Coefficient of Determination: %0.3f" % r2_score(expected, predicted))



#Plot measured vs. predicted values
fig, ax = plt.subplots()
ax.scatter(expected, predicted)
ax.plot([target.min(), target.max()], [target.min(), target.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.title('Linear Regression')
plt.show()









    



Linear Regression model
Mean Squared Error: 62.750
Coefficient of Determination: 0.072



In [17]:

    
labels = np.array(['Variable', 'Coefficient'])
coefs1 = pd.DataFrame(list(zip(variables, model.coef_)), columns = labels)
coefs1









    Out[17]:






  
    
      
      Variable
      Coefficient
    
  
  
    
      0
      PreviousViolations
      3.050261e-01
    
    
      1
      DiffPreviousTwo
      3.784858e-01
    
    
      2
      IsAsian
      1.898771e+00
    
    
      3
      IsFrench
      3.478446e-01
    
    
      4
      IsSandwiches
      8.128180e-02
    
    
      5
      IsFastFood
      -7.015571e-01
    
    
      6
      IsBurgers
      1.004427e-01
    
    
      7
      IsItalian
      8.414192e-02
    
    
      8
      IsHawaiian
      7.161296e-01
    
    
      9
      IsSouthern
      -2.199613e-01
    
    
      10
      IsMexican
      5.746720e-01
    
    
      11
      IsLatinAmerican
      -5.461959e+13
    
    
      12
      IsMiddleEastern
      -2.461441e-01
    
    
      13
      IsGreek
      -3.323098e-01
    
    
      14
      IsAmerican
      -6.660949e-01
    
    
      15
      IsDonuts
      -1.422086e+00
    
    
      16
      IsIndian
      1.042908e+00
    
    
      17
      IsSeafood
      7.693909e-01
    
    
      18
      IsDesserts
      -1.196594e+00
    
    
      19
      IsSalad
      -1.610575e-01
    
    
      20
      Pizza
      2.335371e-01
    
    
      21
      IsBuffets
      2.691680e+00
    
    
      22
      IsSushiBars
      -1.959432e-01
    
    
      23
      IsDelis
      -1.660592e-01
    
    
      24
      IsSports Bars
      1.420476e+00
    
    
      25
      IsBakeries
      5.375049e-02
    
    
      26
      IsPubs
      4.329219e-01
    
    
      27
      IsCaterers
      -4.395413e-01
    
    
      28
      IsDiners
      1.430168e+00
    
    
      29
      IsCafes
      -1.185083e+00
    
    
      30
      IsBars
      5.882748e-01
    
    
      31
      alcohol
      4.651887e-01
    
    
      32
      delivery
      3.422767e-01
    
    
      33
      dogsallowed
      -2.429801e-01
    
    
      34
      smoking
      -8.972433e-01
    
    
      35
      goodforkids
      1.016857e+00
    
    
      36
      outdoorseating
      -8.034210e-02
    
    
      37
      waiterservice
      2.174226e-01
    
    
      38
      creditcards
      -2.200620e-01
    
    
      39
      pricerange
      1.076916e-01
    
    
      40
      drivethru
      -6.151114e-01
    
    
      41
      tourist
      -2.532856e+00
    
    
      42
      classy
      -2.897845e-01
    
    
      43
      hipster
      -3.210836e-01
    
    
      44
      latenight
      1.446938e+00
    
    
      45
      upscale
      -1.494549e+00
    
    
      46
      divey
      1.777851e-01
    
    
      47
      Boston
      3.933068e+13
    
    
      48
      Charlotte
      3.933068e+13
    
    
      49
      LasVegas
      3.933068e+13
    
    
      50
      neighborhood0
      6.472870e+00
    
    
      51
      neighborhood1
      -3.123024e-01
    
    
      52
      neighborhood2
      2.430806e-02
    
    
      53
      neighborhood3
      5.805499e-01
    
    
      54
      neighborhood4
      4.208388e-02
    
    
      55
      neighborhood5
      -2.038323e-01
    
    
      56
      neighborhood6
      3.647998e-01



In [18]:

    
# #Remove outliers that have high violation (> 4 standard deviations)
o = df.copy()
odf = o[((o.violations - o.violations.mean()) / o.violations.std()).abs() < 4]
odf.shape









    Out[18]:





(34793, 68)



In [19]:

    
#Select variables and target for cross validation
variables = odf[['PreviousViolations','DiffPreviousTwo','IsAsian','IsFrench','IsSandwiches',
                 'IsFastFood','IsBurgers','IsItalian','IsHawaiian','IsSouthern','IsMexican','IsLatinAmerican','IsMiddleEastern',
                 'IsGreek','IsAmerican','IsDonuts','IsIndian','IsSeafood','IsDesserts','IsSalad','Pizza','IsBuffets',
                 'IsSushiBars','IsDelis','IsSports Bars','IsBakeries','IsPubs','IsCaterers','IsDiners','IsCafes','IsBars',
                 'alcohol','delivery','dogsallowed','smoking','goodforkids','outdoorseating','waiterservice','creditcards',
                 'pricerange','drivethru','tourist','classy','hipster','latenight','upscale','divey','Boston','Charlotte',
                 'LasVegas','neighborhood0','neighborhood1','neighborhood2','neighborhood3','neighborhood4','neighborhood5','neighborhood6']]
target = odf['violations']
print('variables:', variables.shape, 'target:', target.shape)









    



variables: (34793, 57) target: (34793,)



In [20]:

    
#Train test splits
splits = cv.train_test_split(variables, target, test_size=0.83)
X_train, X_test, y_train, y_test = splits



In [21]:

    
#Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)

expected = y_test
predicted = model.predict(X_test)

print("Linear Regression model")
print("Mean Squared Error: %0.3f" % mse(expected, predicted))
print("Coefficient of Determination: %0.3f" % r2_score(expected, predicted))



#Plot measured vs. predicted values
fig, ax = plt.subplots()
ax.scatter(expected, predicted)
ax.plot([target.min(), target.max()], [target.min(), target.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.title('Linear Regression Outliers Removed')
plt.show()









    



Linear Regression model
Mean Squared Error: 31.979
Coefficient of Determination: 0.093



In [22]:

    
labels = np.array(['Variable', 'Coefficient'])
coefs2 = pd.DataFrame(list(zip(variables, model.coef_)), columns = labels)
coefs2









    Out[22]:






  
    
      
      Variable
      Coefficient
    
  
  
    
      0
      PreviousViolations
      6.870151e-01
    
    
      1
      DiffPreviousTwo
      3.592239e-01
    
    
      2
      IsAsian
      1.475796e+00
    
    
      3
      IsFrench
      1.154118e+00
    
    
      4
      IsSandwiches
      -2.411967e-01
    
    
      5
      IsFastFood
      -1.785272e-01
    
    
      6
      IsBurgers
      -9.674334e-01
    
    
      7
      IsItalian
      -3.676471e-02
    
    
      8
      IsHawaiian
      1.192851e+00
    
    
      9
      IsSouthern
      3.197179e-01
    
    
      10
      IsMexican
      8.700916e-01
    
    
      11
      IsLatinAmerican
      -7.975551e+10
    
    
      12
      IsMiddleEastern
      -9.894662e-02
    
    
      13
      IsGreek
      9.014200e-01
    
    
      14
      IsAmerican
      1.693683e-01
    
    
      15
      IsDonuts
      -5.352463e-01
    
    
      16
      IsIndian
      9.663190e-01
    
    
      17
      IsSeafood
      8.501395e-01
    
    
      18
      IsDesserts
      -1.576016e+00
    
    
      19
      IsSalad
      -6.389172e-01
    
    
      20
      Pizza
      -4.059348e-01
    
    
      21
      IsBuffets
      1.027609e+00
    
    
      22
      IsSushiBars
      -2.056971e-01
    
    
      23
      IsDelis
      -4.478304e-01
    
    
      24
      IsSports Bars
      3.560668e-01
    
    
      25
      IsBakeries
      5.485470e-01
    
    
      26
      IsPubs
      -4.533590e-01
    
    
      27
      IsCaterers
      -1.115086e+00
    
    
      28
      IsDiners
      1.341249e+00
    
    
      29
      IsCafes
      -2.049409e-01
    
    
      30
      IsBars
      4.211068e-01
    
    
      31
      alcohol
      3.878248e-01
    
    
      32
      delivery
      6.773240e-01
    
    
      33
      dogsallowed
      -7.704256e-01
    
    
      34
      smoking
      -7.062395e-01
    
    
      35
      goodforkids
      7.559268e-01
    
    
      36
      outdoorseating
      -2.503752e-01
    
    
      37
      waiterservice
      2.823931e-01
    
    
      38
      creditcards
      -5.601161e-01
    
    
      39
      pricerange
      1.233490e-01
    
    
      40
      drivethru
      -2.685093e-03
    
    
      41
      tourist
      -1.476798e+00
    
    
      42
      classy
      -5.595853e-01
    
    
      43
      hipster
      -8.133409e-01
    
    
      44
      latenight
      1.127143e+00
    
    
      45
      upscale
      9.198683e-01
    
    
      46
      divey
      2.467131e-01
    
    
      47
      Boston
      -4.449975e+12
    
    
      48
      Charlotte
      -4.449975e+12
    
    
      49
      LasVegas
      -4.449975e+12
    
    
      50
      neighborhood0
      -1.087952e-02
    
    
      51
      neighborhood1
      -2.434073e-01
    
    
      52
      neighborhood2
      2.120940e-01
    
    
      53
      neighborhood3
      3.043577e-01
    
    
      54
      neighborhood4
      2.385643e-01
    
    
      55
      neighborhood5
      4.557895e-02
    
    
      56
      neighborhood6
      2.531174e-01



In [23]:

    
#Ridge Regression
model = Ridge(alpha=.1)
model.fit(X_train, y_train)

expected = y_test
predicted = model.predict(X_test)

print("Ridge Regression model")
print("Mean Squared Error: %0.3f" % mse(expected, predicted))
print("Coefficient of Determination: %0.3f" % r2_score(expected, predicted))



#Plot values
fig, ax = plt.subplots()
ax.scatter(expected, predicted)
ax.plot([target.min(), target.max()], [target.min(), target.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.title('Ridge Regression')
plt.show()









    



Ridge Regression model
Mean Squared Error: 31.978
Coefficient of Determination: 0.093



In [24]:

    
labels = np.array(['Variable', 'Coefficient'])
coefs3 = pd.DataFrame(list(zip(variables, model.coef_)), columns = labels)
coefs3









    Out[24]:






  
    
      
      Variable
      Coefficient
    
  
  
    
      0
      PreviousViolations
      0.687409
    
    
      1
      DiffPreviousTwo
      0.358654
    
    
      2
      IsAsian
      1.476598
    
    
      3
      IsFrench
      1.154147
    
    
      4
      IsSandwiches
      -0.238188
    
    
      5
      IsFastFood
      -0.177453
    
    
      6
      IsBurgers
      -0.967543
    
    
      7
      IsItalian
      -0.038133
    
    
      8
      IsHawaiian
      1.193894
    
    
      9
      IsSouthern
      0.319172
    
    
      10
      IsMexican
      0.872171
    
    
      11
      IsLatinAmerican
      0.000000
    
    
      12
      IsMiddleEastern
      -0.092791
    
    
      13
      IsGreek
      0.898788
    
    
      14
      IsAmerican
      0.169575
    
    
      15
      IsDonuts
      -0.536633
    
    
      16
      IsIndian
      0.967394
    
    
      17
      IsSeafood
      0.848367
    
    
      18
      IsDesserts
      -1.571790
    
    
      19
      IsSalad
      -0.639429
    
    
      20
      Pizza
      -0.405995
    
    
      21
      IsBuffets
      1.024416
    
    
      22
      IsSushiBars
      -0.202489
    
    
      23
      IsDelis
      -0.445738
    
    
      24
      IsSports Bars
      0.354754
    
    
      25
      IsBakeries
      0.545827
    
    
      26
      IsPubs
      -0.452381
    
    
      27
      IsCaterers
      -1.112549
    
    
      28
      IsDiners
      1.340800
    
    
      29
      IsCafes
      -0.203632
    
    
      30
      IsBars
      0.419797
    
    
      31
      alcohol
      0.387949
    
    
      32
      delivery
      0.676360
    
    
      33
      dogsallowed
      -0.768679
    
    
      34
      smoking
      -0.704693
    
    
      35
      goodforkids
      0.755999
    
    
      36
      outdoorseating
      -0.248823
    
    
      37
      waiterservice
      0.282309
    
    
      38
      creditcards
      -0.558056
    
    
      39
      pricerange
      0.123094
    
    
      40
      drivethru
      -0.003315
    
    
      41
      tourist
      -1.473983
    
    
      42
      classy
      -0.560113
    
    
      43
      hipster
      -0.814321
    
    
      44
      latenight
      1.125969
    
    
      45
      upscale
      0.917370
    
    
      46
      divey
      0.248160
    
    
      47
      Boston
      -1.461837
    
    
      48
      Charlotte
      0.217464
    
    
      49
      LasVegas
      1.244374
    
    
      50
      neighborhood0
      0.000000
    
    
      51
      neighborhood1
      -0.242788
    
    
      52
      neighborhood2
      0.212127
    
    
      53
      neighborhood3
      0.304616
    
    
      54
      neighborhood4
      0.237835
    
    
      55
      neighborhood5
      0.046535
    
    
      56
      neighborhood6
      0.252809



In [26]:

    
# Investigate alpha level for Ridge Regression Model
n_alphas = 200
alphas = np.logspace(-200, 200, n_alphas)
model = linear_model.RidgeCV(alphas = alphas)
model.fit(X_train, y_train)

expected = y_test
predict = model.predict(X_test)
print ('Alpha chosen:', model.alpha_, 'Score:', model.score(X_test, y_test))



#Plot values
fig, ax = plt.subplots()
ax.scatter(target, predicted)
ax.plot([target.min(), target.max()], [target.min(), target.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.title('Ridge Regression')
plt.show()









    



Alpha chosen: 10.1163797977 Score: 0.0937992798088



In [27]:

    
#Lasso Regression
model = Lasso()

model.fit(X_train, y_train)

expected  = y_test
predicted = model.predict(X_test)

# Evaluate fit of the model
print("Lasso Regression model")
print("Mean Squared Error: %0.3f" % mse(expected, predicted))
print("Coefficient of Determination: %0.3f" % r2_score(expected, predicted))


#Plot values
fig, ax = plt.subplots()
ax.scatter(expected, predicted)
ax.plot([target.min(), target.max()], [target.min(), target.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.title('Lasso Regression')
plt.show()









    



Lasso Regression model
Mean Squared Error: 34.958
Coefficient of Determination: 0.009



In [28]:

    
labels = np.array(['Variable', 'Coefficient'])
coefs4 = pd.DataFrame(list(zip(variables, model.coef_)), columns = labels)
coefs4









    Out[28]:






  
    
      
      Variable
      Coefficient
    
  
  
    
      0
      PreviousViolations
      0.19228
    
    
      1
      DiffPreviousTwo
      0.00000
    
    
      2
      IsAsian
      0.00000
    
    
      3
      IsFrench
      0.00000
    
    
      4
      IsSandwiches
      -0.00000
    
    
      5
      IsFastFood
      -0.00000
    
    
      6
      IsBurgers
      -0.00000
    
    
      7
      IsItalian
      -0.00000
    
    
      8
      IsHawaiian
      0.00000
    
    
      9
      IsSouthern
      0.00000
    
    
      10
      IsMexican
      0.00000
    
    
      11
      IsLatinAmerican
      0.00000
    
    
      12
      IsMiddleEastern
      -0.00000
    
    
      13
      IsGreek
      0.00000
    
    
      14
      IsAmerican
      -0.00000
    
    
      15
      IsDonuts
      -0.00000
    
    
      16
      IsIndian
      0.00000
    
    
      17
      IsSeafood
      0.00000
    
    
      18
      IsDesserts
      -0.00000
    
    
      19
      IsSalad
      -0.00000
    
    
      20
      Pizza
      -0.00000
    
    
      21
      IsBuffets
      0.00000
    
    
      22
      IsSushiBars
      0.00000
    
    
      23
      IsDelis
      -0.00000
    
    
      24
      IsSports Bars
      0.00000
    
    
      25
      IsBakeries
      -0.00000
    
    
      26
      IsPubs
      -0.00000
    
    
      27
      IsCaterers
      -0.00000
    
    
      28
      IsDiners
      0.00000
    
    
      29
      IsCafes
      -0.00000
    
    
      30
      IsBars
      0.00000
    
    
      31
      alcohol
      -0.00000
    
    
      32
      delivery
      0.00000
    
    
      33
      dogsallowed
      -0.00000
    
    
      34
      smoking
      0.00000
    
    
      35
      goodforkids
      0.00000
    
    
      36
      outdoorseating
      -0.00000
    
    
      37
      waiterservice
      0.00000
    
    
      38
      creditcards
      0.00000
    
    
      39
      pricerange
      0.00000
    
    
      40
      drivethru
      -0.00000
    
    
      41
      tourist
      -0.00000
    
    
      42
      classy
      -0.00000
    
    
      43
      hipster
      -0.00000
    
    
      44
      latenight
      0.00000
    
    
      45
      upscale
      0.00000
    
    
      46
      divey
      0.00000
    
    
      47
      Boston
      -0.00000
    
    
      48
      Charlotte
      0.00000
    
    
      49
      LasVegas
      0.00000
    
    
      50
      neighborhood0
      0.00000
    
    
      51
      neighborhood1
      0.00000
    
    
      52
      neighborhood2
      0.00000
    
    
      53
      neighborhood3
      0.00000
    
    
      54
      neighborhood4
      0.00000
    
    
      55
      neighborhood5
      0.00000
    
    
      56
      neighborhood6
      0.00000



In [29]:

    
model = ElasticNet()
model.fit(X_train, y_train)

expected = y_test
predicted = model.predict(X_test)

print("Random Forest model")
print("Mean squared error = %0.3f" % mse(expected, predicted))
print("R2 score = %0.3f" % r2_score(expected, predicted))



fig, ax = plt.subplots()
ax.scatter(expected, predicted)
ax.plot([target.min(), target.max()], [target.min(), target.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.title('Elastic Net')
plt.show()









    



Random Forest model
Mean squared error = 34.489
R2 score = 0.022



In [30]:

    
labels = np.array(['Variable', 'Coefficient'])
coefs5 = pd.DataFrame(list(zip(variables, model.coef_)), columns = labels)
coefs5









    Out[30]:






  
    
      
      Variable
      Coefficient
    
  
  
    
      0
      PreviousViolations
      0.451774
    
    
      1
      DiffPreviousTwo
      0.000000
    
    
      2
      IsAsian
      0.000000
    
    
      3
      IsFrench
      0.000000
    
    
      4
      IsSandwiches
      -0.000000
    
    
      5
      IsFastFood
      -0.000000
    
    
      6
      IsBurgers
      -0.000000
    
    
      7
      IsItalian
      -0.000000
    
    
      8
      IsHawaiian
      0.000000
    
    
      9
      IsSouthern
      0.000000
    
    
      10
      IsMexican
      0.000000
    
    
      11
      IsLatinAmerican
      0.000000
    
    
      12
      IsMiddleEastern
      -0.000000
    
    
      13
      IsGreek
      0.000000
    
    
      14
      IsAmerican
      -0.000000
    
    
      15
      IsDonuts
      -0.000000
    
    
      16
      IsIndian
      0.000000
    
    
      17
      IsSeafood
      0.000000
    
    
      18
      IsDesserts
      -0.000000
    
    
      19
      IsSalad
      -0.000000
    
    
      20
      Pizza
      -0.000000
    
    
      21
      IsBuffets
      0.000000
    
    
      22
      IsSushiBars
      0.000000
    
    
      23
      IsDelis
      -0.000000
    
    
      24
      IsSports Bars
      0.000000
    
    
      25
      IsBakeries
      -0.000000
    
    
      26
      IsPubs
      -0.000000
    
    
      27
      IsCaterers
      -0.000000
    
    
      28
      IsDiners
      0.000000
    
    
      29
      IsCafes
      -0.000000
    
    
      30
      IsBars
      0.000000
    
    
      31
      alcohol
      0.000000
    
    
      32
      delivery
      0.000000
    
    
      33
      dogsallowed
      -0.000000
    
    
      34
      smoking
      0.000000
    
    
      35
      goodforkids
      0.000000
    
    
      36
      outdoorseating
      -0.000000
    
    
      37
      waiterservice
      0.000000
    
    
      38
      creditcards
      0.000000
    
    
      39
      pricerange
      0.000000
    
    
      40
      drivethru
      -0.000000
    
    
      41
      tourist
      -0.000000
    
    
      42
      classy
      -0.000000
    
    
      43
      hipster
      -0.000000
    
    
      44
      latenight
      0.000000
    
    
      45
      upscale
      0.000000
    
    
      46
      divey
      0.000000
    
    
      47
      Boston
      -0.142898
    
    
      48
      Charlotte
      0.000000
    
    
      49
      LasVegas
      0.000000
    
    
      50
      neighborhood0
      0.000000
    
    
      51
      neighborhood1
      0.000000
    
    
      52
      neighborhood2
      0.000000
    
    
      53
      neighborhood3
      0.000000
    
    
      54
      neighborhood4
      0.000000
    
    
      55
      neighborhood5
      0.000000
    
    
      56
      neighborhood6
      0.000000



In [31]:

    
model = RandomForestRegressor()
model.fit(X_train, y_train)

expected = y_test
predicted = model.predict(X_test)

print("Random Forest model")
print("Mean squared error = %0.3f" % mse(expected, predicted))
print("R2 score = %0.3f" % r2_score(expected, predicted))



fig, ax = plt.subplots()
ax.scatter(expected, predicted)
ax.plot([target.min(), target.max()], [target.min(), target.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.title('Random Forest')
plt.show()









    



Random Forest model
Mean squared error = 33.721
R2 score = 0.044



In [38]:

    
polypipe = Pipeline([('Polynomial', PolynomialFeatures(2)),
                     ('LinearRegression', LinearRegression())])
        
polypipe.fit(X_train, y_train)

expected = y_test
predicted = polypipe.predict(X_test)

print("Linear Regression model")
print("Mean Squared Error: %0.3f" % mse(expected, predicted))
print("Coefficient of Determination: %0.3f" % r2_score(expected, predicted))


fig, ax = plt.subplots()
ax.scatter(expected, predicted)
ax.plot([target.min(), target.max()], [predicted.min(), predicted.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.title ('Polynomial 2nd Regression')
plt.show()









    



Linear Regression model
Mean Squared Error: 31.256
Coefficient of Determination: 0.173



In [34]:

    
labels = np.array(['Variable', 'Coefficient'])
coefs6 = pd.DataFrame(list(zip(variables, polypipe.named_steps['LinearRegression'].coef_)), columns = labels)
coefs6









    Out[34]:






  
    
      
      Variable
      Coefficient
    
  
  
    
      0
      PreviousViolations
      -3.258138e+07
    
    
      1
      DiffPreviousTwo
      -1.027484e+10
    
    
      2
      IsAsian
      5.656899e+10
    
    
      3
      IsFrench
      -2.312191e+10
    
    
      4
      IsSandwiches
      2.500166e+10
    
    
      5
      IsFastFood
      -3.879142e+09
    
    
      6
      IsBurgers
      3.296400e+10
    
    
      7
      IsItalian
      9.019567e+09
    
    
      8
      IsHawaiian
      -7.549118e+09
    
    
      9
      IsSouthern
      -4.189203e+09
    
    
      10
      IsMexican
      4.796961e+08
    
    
      11
      IsLatinAmerican
      -5.009411e+08
    
    
      12
      IsMiddleEastern
      -3.251170e+09
    
    
      13
      IsGreek
      -3.201290e+09
    
    
      14
      IsAmerican
      -7.556746e+09
    
    
      15
      IsDonuts
      2.591343e+08
    
    
      16
      IsIndian
      -1.034474e+09
    
    
      17
      IsSeafood
      -5.615458e+09
    
    
      18
      IsDesserts
      2.506461e+09
    
    
      19
      IsSalad
      -5.703510e+09
    
    
      20
      Pizza
      -5.052894e+09
    
    
      21
      IsBuffets
      1.031796e+09
    
    
      22
      IsSushiBars
      8.495870e+08
    
    
      23
      IsDelis
      9.834595e+08
    
    
      24
      IsSports Bars
      1.550250e+09
    
    
      25
      IsBakeries
      -2.240768e+09
    
    
      26
      IsPubs
      1.356029e+08
    
    
      27
      IsCaterers
      4.441474e+09
    
    
      28
      IsDiners
      -7.809688e+08
    
    
      29
      IsCafes
      -1.551970e+09
    
    
      30
      IsBars
      4.972032e+08
    
    
      31
      alcohol
      1.767504e+09
    
    
      32
      delivery
      -8.025599e+08
    
    
      33
      dogsallowed
      4.351489e+08
    
    
      34
      smoking
      -6.668421e+08
    
    
      35
      goodforkids
      -3.350801e+08
    
    
      36
      outdoorseating
      1.826866e+09
    
    
      37
      waiterservice
      -2.995990e+08
    
    
      38
      creditcards
      -3.432583e+09
    
    
      39
      pricerange
      -5.070179e+07
    
    
      40
      drivethru
      -3.409175e+09
    
    
      41
      tourist
      4.729005e+08
    
    
      42
      classy
      6.142718e+08
    
    
      43
      hipster
      -1.364332e+09
    
    
      44
      latenight
      3.392369e+08
    
    
      45
      upscale
      -1.253697e+09
    
    
      46
      divey
      7.129742e+08
    
    
      47
      Boston
      -1.340641e+09
    
    
      48
      Charlotte
      1.114100e+09
    
    
      49
      LasVegas
      -1.076058e+09
    
    
      50
      neighborhood0
      -1.927169e+09
    
    
      51
      neighborhood1
      1.105097e+09
    
    
      52
      neighborhood2
      -9.065547e+08
    
    
      53
      neighborhood3
      -1.342486e+09
    
    
      54
      neighborhood4
      1.004575e+09
    
    
      55
      neighborhood5
      3.550831e+09
    
    
      56
      neighborhood6
      -8.337425e+07



In [36]:

    
featurepipe = Pipeline([('Dimension Reduction', PCA()),
                        ('Random Forest', RandomForestRegressor())])
            
featurepipe.fit(X_train, y_train)

expected = y_test
predicted = featurepipe.predict(X_test)

print("Random Forest model")
print("Mean Squared Error: %0.3f" % mse(expected, predicted))
print("Coefficient of Determination: %0.3f" % r2_score(expected, predicted))

#Plot values
fig, ax = plt.subplots()
ax.scatter(expected, predicted)
ax.plot([target.min(), target.max()], [predicted.min(), predicted.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.title('Random Forest PCA')
plt.show()









    



Random Forest model
Mean Squared Error: 34.151
Coefficient of Determination: 0.031

	_id	restaurant_name	address_full	business_id	review_count	inspection_date	stars	latitude	longitude	violations	...	PreviousViolations
0	ObjectId(5830680bf3f071f6de30b1d0)	GRASSHOPPER VEGETARIAN	1 N Beacon ST Allston 02134	MiOurH3MHs6CwA6iOWehOQ	424	8/4/2008	4.0	42.35377	-71.137418	8	...	0
1	ObjectId(5830680bf3f071f6de30b1d0)	GRASSHOPPER VEGETARIAN	1 N Beacon ST Allston 02134	MiOurH3MHs6CwA6iOWehOQ	424	8/18/2008	4.0	42.35377	-71.137418	8	...	8
2	ObjectId(5830680bf3f071f6de30b1d0)	GRASSHOPPER VEGETARIAN	1 N Beacon ST Allston 02134	MiOurH3MHs6CwA6iOWehOQ	424	7/13/2009	4.0	42.35377	-71.137418	4	...	8
3	ObjectId(5830680bf3f071f6de30b1d0)	GRASSHOPPER VEGETARIAN	1 N Beacon ST Allston 02134	MiOurH3MHs6CwA6iOWehOQ	424	7/27/2009	4.0	42.35377	-71.137418	4	...	4
4	ObjectId(5830680bf3f071f6de30b1d0)	GRASSHOPPER VEGETARIAN	1 N Beacon ST Allston 02134	MiOurH3MHs6CwA6iOWehOQ	424	6/3/2010	4.0	42.35377	-71.137418	12	...	4

	review_count	stars	latitude	longitude	violations	ChangeInViolations	IsAsian	IsFrench	IsSandwiches	IsFastFood	...	LasVegas	neighborhood0	neighborhood1	neighborhood2	neighborhood3	neighborhood4	neighborhood5	neighborhood6	PreviousViolations	DiffPreviousTwo
count	34991.000000	34991.000000	34991.000000	34991.000000	34991.000000	34991.000000	34991.000000	34991.000000	34991.000000	34991.000000	...	34991.000000	34991.0	34991.000000	34991.000000	34991.000000	34991.000000	34991.000000	34991.000000	34991.000000	34991.000000
mean	121.214227	3.601626	39.289480	-85.464582	6.934669	1.555171	0.192650	0.008459	0.113629	0.050699	...	0.290389	0.0	0.581807	0.521363	0.663371	0.551399	0.350804	0.443228	6.326484	-0.045297
std	178.588309	0.700143	3.355512	19.216630	8.340175	5.429591	0.394386	0.091586	0.317365	0.219385	...	0.453948	0.0	0.493269	0.499551	0.472564	0.497358	0.477229	0.496774	8.282943	3.511681
min	3.000000	1.000000	33.000000	-115.000000	0.000000	-42.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	-98.000000
25%	21.000000	3.000000	36.000000	-115.000000	2.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	1.000000	0.000000
50%	60.000000	4.000000	42.292474	-71.137565	5.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.0	1.000000	1.000000	1.000000	1.000000	0.000000	0.000000	5.000000	0.000000
75%	148.000000	4.000000	42.349124	-71.068237	9.000000	1.000000	0.000000	0.000000	0.000000	0.000000	...	1.000000	0.0	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	8.000000	0.000000
max	1922.000000	5.000000	42.389913	-70.996696	100.000000	100.000000	1.000000	1.000000	1.000000	1.000000	...	1.000000	0.0	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	100.000000	100.000000

	Variable	Coefficient
0	PreviousViolations	3.050261e-01
1	DiffPreviousTwo	3.784858e-01
2	IsAsian	1.898771e+00
3	IsFrench	3.478446e-01
4	IsSandwiches	8.128180e-02
5	IsFastFood	-7.015571e-01
6	IsBurgers	1.004427e-01
7	IsItalian	8.414192e-02
8	IsHawaiian	7.161296e-01
9	IsSouthern	-2.199613e-01
10	IsMexican	5.746720e-01
11	IsLatinAmerican	-5.461959e+13
12	IsMiddleEastern	-2.461441e-01
13	IsGreek	-3.323098e-01
14	IsAmerican	-6.660949e-01
15	IsDonuts	-1.422086e+00
16	IsIndian	1.042908e+00
17	IsSeafood	7.693909e-01
18	IsDesserts	-1.196594e+00
19	IsSalad	-1.610575e-01
20	Pizza	2.335371e-01
21	IsBuffets	2.691680e+00
22	IsSushiBars	-1.959432e-01
23	IsDelis	-1.660592e-01
24	IsSports Bars	1.420476e+00
25	IsBakeries	5.375049e-02
26	IsPubs	4.329219e-01
27	IsCaterers	-4.395413e-01
28	IsDiners	1.430168e+00
29	IsCafes	-1.185083e+00
30	IsBars	5.882748e-01
31	alcohol	4.651887e-01
32	delivery	3.422767e-01
33	dogsallowed	-2.429801e-01
34	smoking	-8.972433e-01
35	goodforkids	1.016857e+00
36	outdoorseating	-8.034210e-02
37	waiterservice	2.174226e-01
38	creditcards	-2.200620e-01
39	pricerange	1.076916e-01
40	drivethru	-6.151114e-01
41	tourist	-2.532856e+00
42	classy	-2.897845e-01
43	hipster	-3.210836e-01
44	latenight	1.446938e+00
45	upscale	-1.494549e+00
46	divey	1.777851e-01
47	Boston	3.933068e+13
48	Charlotte	3.933068e+13
49	LasVegas	3.933068e+13
50	neighborhood0	6.472870e+00
51	neighborhood1	-3.123024e-01
52	neighborhood2	2.430806e-02
53	neighborhood3	5.805499e-01
54	neighborhood4	4.208388e-02
55	neighborhood5	-2.038323e-01
56	neighborhood6	3.647998e-01

	Variable	Coefficient
0	PreviousViolations	6.870151e-01
1	DiffPreviousTwo	3.592239e-01
2	IsAsian	1.475796e+00
3	IsFrench	1.154118e+00
4	IsSandwiches	-2.411967e-01
5	IsFastFood	-1.785272e-01
6	IsBurgers	-9.674334e-01
7	IsItalian	-3.676471e-02
8	IsHawaiian	1.192851e+00
9	IsSouthern	3.197179e-01
10	IsMexican	8.700916e-01
11	IsLatinAmerican	-7.975551e+10
12	IsMiddleEastern	-9.894662e-02
13	IsGreek	9.014200e-01
14	IsAmerican	1.693683e-01
15	IsDonuts	-5.352463e-01
16	IsIndian	9.663190e-01
17	IsSeafood	8.501395e-01
18	IsDesserts	-1.576016e+00
19	IsSalad	-6.389172e-01
20	Pizza	-4.059348e-01
21	IsBuffets	1.027609e+00
22	IsSushiBars	-2.056971e-01
23	IsDelis	-4.478304e-01
24	IsSports Bars	3.560668e-01
25	IsBakeries	5.485470e-01
26	IsPubs	-4.533590e-01
27	IsCaterers	-1.115086e+00
28	IsDiners	1.341249e+00
29	IsCafes	-2.049409e-01
30	IsBars	4.211068e-01
31	alcohol	3.878248e-01
32	delivery	6.773240e-01
33	dogsallowed	-7.704256e-01
34	smoking	-7.062395e-01
35	goodforkids	7.559268e-01
36	outdoorseating	-2.503752e-01
37	waiterservice	2.823931e-01
38	creditcards	-5.601161e-01
39	pricerange	1.233490e-01
40	drivethru	-2.685093e-03
41	tourist	-1.476798e+00
42	classy	-5.595853e-01
43	hipster	-8.133409e-01
44	latenight	1.127143e+00
45	upscale	9.198683e-01
46	divey	2.467131e-01
47	Boston	-4.449975e+12
48	Charlotte	-4.449975e+12
49	LasVegas	-4.449975e+12
50	neighborhood0	-1.087952e-02
51	neighborhood1	-2.434073e-01
52	neighborhood2	2.120940e-01
53	neighborhood3	3.043577e-01
54	neighborhood4	2.385643e-01
55	neighborhood5	4.557895e-02
56	neighborhood6	2.531174e-01

	Variable	Coefficient
0	PreviousViolations	0.687409
1	DiffPreviousTwo	0.358654
2	IsAsian	1.476598
3	IsFrench	1.154147
4	IsSandwiches	-0.238188
5	IsFastFood	-0.177453
6	IsBurgers	-0.967543
7	IsItalian	-0.038133
8	IsHawaiian	1.193894
9	IsSouthern	0.319172
10	IsMexican	0.872171
11	IsLatinAmerican	0.000000
12	IsMiddleEastern	-0.092791
13	IsGreek	0.898788
14	IsAmerican	0.169575
15	IsDonuts	-0.536633
16	IsIndian	0.967394
17	IsSeafood	0.848367
18	IsDesserts	-1.571790
19	IsSalad	-0.639429
20	Pizza	-0.405995
21	IsBuffets	1.024416
22	IsSushiBars	-0.202489
23	IsDelis	-0.445738
24	IsSports Bars	0.354754
25	IsBakeries	0.545827
26	IsPubs	-0.452381
27	IsCaterers	-1.112549
28	IsDiners	1.340800
29	IsCafes	-0.203632
30	IsBars	0.419797
31	alcohol	0.387949
32	delivery	0.676360
33	dogsallowed	-0.768679
34	smoking	-0.704693
35	goodforkids	0.755999
36	outdoorseating	-0.248823
37	waiterservice	0.282309
38	creditcards	-0.558056
39	pricerange	0.123094
40	drivethru	-0.003315
41	tourist	-1.473983
42	classy	-0.560113
43	hipster	-0.814321
44	latenight	1.125969
45	upscale	0.917370
46	divey	0.248160
47	Boston	-1.461837
48	Charlotte	0.217464
49	LasVegas	1.244374
50	neighborhood0	0.000000
51	neighborhood1	-0.242788
52	neighborhood2	0.212127
53	neighborhood3	0.304616
54	neighborhood4	0.237835
55	neighborhood5	0.046535
56	neighborhood6	0.252809

	Variable	Coefficient
0	PreviousViolations	0.19228
1	DiffPreviousTwo	0.00000
2	IsAsian	0.00000
3	IsFrench	0.00000
4	IsSandwiches	-0.00000
5	IsFastFood	-0.00000
6	IsBurgers	-0.00000
7	IsItalian	-0.00000
8	IsHawaiian	0.00000
9	IsSouthern	0.00000
10	IsMexican	0.00000
11	IsLatinAmerican	0.00000
12	IsMiddleEastern	-0.00000
13	IsGreek	0.00000
14	IsAmerican	-0.00000
15	IsDonuts	-0.00000
16	IsIndian	0.00000
17	IsSeafood	0.00000
18	IsDesserts	-0.00000
19	IsSalad	-0.00000
20	Pizza	-0.00000
21	IsBuffets	0.00000
22	IsSushiBars	0.00000
23	IsDelis	-0.00000
24	IsSports Bars	0.00000
25	IsBakeries	-0.00000
26	IsPubs	-0.00000
27	IsCaterers	-0.00000
28	IsDiners	0.00000
29	IsCafes	-0.00000
30	IsBars	0.00000
31	alcohol	-0.00000
32	delivery	0.00000
33	dogsallowed	-0.00000
34	smoking	0.00000
35	goodforkids	0.00000
36	outdoorseating	-0.00000
37	waiterservice	0.00000
38	creditcards	0.00000
39	pricerange	0.00000
40	drivethru	-0.00000
41	tourist	-0.00000
42	classy	-0.00000
43	hipster	-0.00000
44	latenight	0.00000
45	upscale	0.00000
46	divey	0.00000
47	Boston	-0.00000
48	Charlotte	0.00000
49	LasVegas	0.00000
50	neighborhood0	0.00000
51	neighborhood1	0.00000
52	neighborhood2	0.00000
53	neighborhood3	0.00000
54	neighborhood4	0.00000
55	neighborhood5	0.00000
56	neighborhood6	0.00000

	Variable	Coefficient
0	PreviousViolations	0.451774
1	DiffPreviousTwo	0.000000
2	IsAsian	0.000000
3	IsFrench	0.000000
4	IsSandwiches	-0.000000
5	IsFastFood	-0.000000
6	IsBurgers	-0.000000
7	IsItalian	-0.000000
8	IsHawaiian	0.000000
9	IsSouthern	0.000000
10	IsMexican	0.000000
11	IsLatinAmerican	0.000000
12	IsMiddleEastern	-0.000000
13	IsGreek	0.000000
14	IsAmerican	-0.000000
15	IsDonuts	-0.000000
16	IsIndian	0.000000
17	IsSeafood	0.000000
18	IsDesserts	-0.000000
19	IsSalad	-0.000000
20	Pizza	-0.000000
21	IsBuffets	0.000000
22	IsSushiBars	0.000000
23	IsDelis	-0.000000
24	IsSports Bars	0.000000
25	IsBakeries	-0.000000
26	IsPubs	-0.000000
27	IsCaterers	-0.000000
28	IsDiners	0.000000
29	IsCafes	-0.000000
30	IsBars	0.000000
31	alcohol	0.000000
32	delivery	0.000000
33	dogsallowed	-0.000000
34	smoking	0.000000
35	goodforkids	0.000000
36	outdoorseating	-0.000000
37	waiterservice	0.000000
38	creditcards	0.000000
39	pricerange	0.000000
40	drivethru	-0.000000
41	tourist	-0.000000
42	classy	-0.000000
43	hipster	-0.000000
44	latenight	0.000000
45	upscale	0.000000
46	divey	0.000000
47	Boston	-0.142898
48	Charlotte	0.000000
49	LasVegas	0.000000
50	neighborhood0	0.000000
51	neighborhood1	0.000000
52	neighborhood2	0.000000
53	neighborhood3	0.000000
54	neighborhood4	0.000000
55	neighborhood5	0.000000
56	neighborhood6	0.000000

	Variable	Coefficient
0	PreviousViolations	-3.258138e+07
1	DiffPreviousTwo	-1.027484e+10
2	IsAsian	5.656899e+10
3	IsFrench	-2.312191e+10
4	IsSandwiches	2.500166e+10
5	IsFastFood	-3.879142e+09
6	IsBurgers	3.296400e+10
7	IsItalian	9.019567e+09
8	IsHawaiian	-7.549118e+09
9	IsSouthern	-4.189203e+09
10	IsMexican	4.796961e+08
11	IsLatinAmerican	-5.009411e+08
12	IsMiddleEastern	-3.251170e+09
13	IsGreek	-3.201290e+09
14	IsAmerican	-7.556746e+09
15	IsDonuts	2.591343e+08
16	IsIndian	-1.034474e+09
17	IsSeafood	-5.615458e+09
18	IsDesserts	2.506461e+09
19	IsSalad	-5.703510e+09
20	Pizza	-5.052894e+09
21	IsBuffets	1.031796e+09
22	IsSushiBars	8.495870e+08
23	IsDelis	9.834595e+08
24	IsSports Bars	1.550250e+09
25	IsBakeries	-2.240768e+09
26	IsPubs	1.356029e+08
27	IsCaterers	4.441474e+09
28	IsDiners	-7.809688e+08
29	IsCafes	-1.551970e+09
30	IsBars	4.972032e+08
31	alcohol	1.767504e+09
32	delivery	-8.025599e+08
33	dogsallowed	4.351489e+08
34	smoking	-6.668421e+08
35	goodforkids	-3.350801e+08
36	outdoorseating	1.826866e+09
37	waiterservice	-2.995990e+08
38	creditcards	-3.432583e+09
39	pricerange	-5.070179e+07
40	drivethru	-3.409175e+09
41	tourist	4.729005e+08
42	classy	6.142718e+08
43	hipster	-1.364332e+09
44	latenight	3.392369e+08
45	upscale	-1.253697e+09
46	divey	7.129742e+08
47	Boston	-1.340641e+09
48	Charlotte	1.114100e+09
49	LasVegas	-1.076058e+09
50	neighborhood0	-1.927169e+09
51	neighborhood1	1.105097e+09
52	neighborhood2	-9.065547e+08
53	neighborhood3	-1.342486e+09
54	neighborhood4	1.004575e+09
55	neighborhood5	3.550831e+09
56	neighborhood6	-8.337425e+07