In [1]:
import pandas as pd
df = pd.read_csv('/Users/skhederian/restaurant-health/the_final_countdown.csv', sep=",", encoding = 'utf-8', engine = 'python')
In [2]:
#new attributes
alcohol = []
delivery = []
dogsallowed = []
smoking = []
goodforkids = []
outdoorseating = []
waiterservice = []
creditcards = []
pricerange = []
drivethru = []
tourist = []
classy = []
hipster = []
latenight = []
upscale = []
divey =[]
In [3]:
for i in range(len(df)):
if '"Alcohol":"full_bar"' in df.iloc[i,12]:
alcohol.append(1)
elif '"Alcohol":"wine_and_beer"' in df.iloc[i,12]:
alcohol.append(1)
else:
alcohol.append(0)
In [4]:
for i in range(len(df)):
if '"Delivery":true' in df.iloc[i,12]:
delivery.append(1)
else:
delivery.append(0)
In [5]:
for i in range(len(df)):
if '"touristy":true' in df.iloc[i,12]:
tourist.append(1)
else:
tourist.append(0)
In [6]:
for i in range(len(df)):
if '"Good for Kids":true' in df.iloc[i,12]:
goodforkids.append(1)
else:
goodforkids.append(0)
In [7]:
for i in range(len(df)):
if '"Smoking":"yes"' in df.iloc[i,12]:
smoking.append(1)
else:
smoking.append(0)
In [8]:
for i in range(len(df)):
if '"classy":true' in df.iloc[i,12]:
classy.append(1)
else:
classy.append(0)
In [9]:
for i in range(len(df)):
if '"hipster":true' in df.iloc[i,12]:
hipster.append(1)
else:
hipster.append(0)
In [10]:
for i in range(len(df)):
if '"Waiter Service":true' in df.iloc[i,12]:
waiterservice.append(1)
else:
waiterservice.append(0)
In [11]:
for i in range(len(df)):
if '"Outdoor Seating":true' in df.iloc[i,12]:
outdoorseating.append(1)
else:
outdoorseating.append(0)
In [12]:
for i in range(len(df)):
if '"latenight":true' in df.iloc[i,12]:
latenight.append(1)
else:
latenight.append(0)
In [13]:
for i in range(len(df)):
if '"upscale":true' in df.iloc[i,12]:
upscale.append(1)
else:
upscale.append(0)
In [14]:
for i in range(len(df)):
if '"Dogs Allowed":true' in df.iloc[i,12]:
dogsallowed.append(1)
else:
dogsallowed.append(0)
In [15]:
for i in range(len(df)):
if '"Drive-Thru":true' in df.iloc[i,12]:
drivethru.append(1)
else:
drivethru.append(0)
In [16]:
for i in range(len(df)):
if '"divey":true' in df.iloc[i,12]:
divey.append(1)
else:
divey.append(0)
In [17]:
for i in range(len(df)):
if '"Accepts Credit Cards":true' in df.iloc[i,12]:
creditcards.append(1)
else:
creditcards.append(0)
In [18]:
for i in range(len(df)):
if '"Price Range":4' in df.iloc[i,12]:
pricerange.append(4)
elif '"Price Range":3' in df.iloc[i,12]:
pricerange.append(3)
elif '"Price Range":2' in df.iloc[i,12]:
pricerange.append(2)
elif '"Price Range":1' in df.iloc[i,12]:
pricerange.append(1)
else:
pricerange.append(0)
In [19]:
city_map = {
'Boston': 1,
'Charlestown': 1,
'Roxbury Crossing': 1,
'Dorchester': 1,
'East Boston': 1,
'Allston': 1,
'West Roxbury': 1,
'Brighton': 1,
'Roslindale': 1,
'Roxbury': 1,
'South Boston': 1,
'Hyde Park': 1,
'Dorchester Center': 1,
'Jamaica Plain': 1,
'Mattapan': 1,
'Chestnut Hill': 1,
'Charlotte': 2,
'Pineville': 2,
'Matthews': 2,
'Huntersville': 2,
'Mint Hill': 2,
'North Las Vegas': 3,
'Henderson': 3,
'Las Vegas': 3,
'Boulder City': 3,
'Casa Grande': 3,
'Mesa': 3,
'Chandler': 3,
'Paradise': 3,
'Glendale': 3,
'Spring Valley': 3
}
In [20]:
'''encode city and neighboorhood names
boston = 1
charlotte = 2
las vegas = 3 '''
df.city = df.city.map(city_map)
df.city.unique()
Out[20]:
In [21]:
# newcolumns = [alcohol,delivery,dogsallowed,smoking,goodforkids,outdoorseating,waiterservice,creditcards,pricerange,
# drivethru,tourist,classy,hipster,latenight,upscale,divey]
# for x in newcolumns:
# newcol = pd.Series(x)
# df[x] = newcol.values
In [22]:
alcohol = pd.Series(alcohol)
df['alcohol'] = alcohol.values
delivery = pd.Series(delivery)
df['delivery'] = delivery.values
dogsallowed = pd.Series(dogsallowed)
df['dogsallowed'] = dogsallowed.values
smoking = pd.Series(smoking)
df['smoking'] = smoking.values
goodforkids = pd.Series(goodforkids)
df['goodforkids'] = goodforkids.values
outdoorseating = pd.Series(outdoorseating)
df['outdoorseating'] = outdoorseating.values
waiterservice = pd.Series(waiterservice)
df['waiterservice'] = waiterservice.values
creditcards = pd.Series(creditcards)
df['creditcards'] = creditcards.values
pricerange = pd.Series(pricerange)
df['pricerange'] = pricerange.values
drivethru = pd.Series(drivethru)
df['drivethru'] = drivethru.values
tourist = pd.Series(tourist)
df['tourist'] = tourist.values
classy = pd.Series(classy)
df['classy'] = classy.values
hipster = pd.Series(hipster)
df['hipster'] = hipster.values
latenight = pd.Series(latenight)
df['latenight'] = latenight.values
upscale = pd.Series(upscale)
df['upscale'] = upscale.values
divey = pd.Series(divey)
df['divey'] = divey.values
In [23]:
df.columns.unique()
Out[23]:
In [24]:
df.to_csv('the_final_countdown.csv')
In [ ]: