In [1]:
import pandas as pd
df = pd.read_csv('/Users/skhederian/restaurant-health/the_final_countdown.csv', sep=",", encoding = 'utf-8', engine = 'python')

In [2]:
#new attributes
alcohol = []
delivery = []
dogsallowed = []
smoking = []
goodforkids = []
outdoorseating = []
waiterservice = []
creditcards = []
pricerange = []
drivethru = []
tourist = []
classy = []
hipster = []
latenight = []
upscale = []
divey =[]

In [3]:
for i in range(len(df)):
    if '"Alcohol":"full_bar"' in df.iloc[i,12]:
        alcohol.append(1)
    elif '"Alcohol":"wine_and_beer"' in df.iloc[i,12]:
        alcohol.append(1)
    else:
        alcohol.append(0)

In [4]:
for i in range(len(df)):
    if '"Delivery":true' in df.iloc[i,12]:
        delivery.append(1)
    else:
        delivery.append(0)

In [5]:
for i in range(len(df)):
    if '"touristy":true' in df.iloc[i,12]:
        tourist.append(1)
    else:
        tourist.append(0)

In [6]:
for i in range(len(df)):
    if '"Good for Kids":true' in df.iloc[i,12]:
        goodforkids.append(1)
    else:
        goodforkids.append(0)

In [7]:
for i in range(len(df)):
    if '"Smoking":"yes"' in df.iloc[i,12]:
        smoking.append(1)
    else:
        smoking.append(0)

In [8]:
for i in range(len(df)):
    if '"classy":true' in df.iloc[i,12]:
        classy.append(1)
    else:
        classy.append(0)

In [9]:
for i in range(len(df)):
    if '"hipster":true' in df.iloc[i,12]:
        hipster.append(1)
    else:
        hipster.append(0)

In [10]:
for i in range(len(df)):
    if '"Waiter Service":true' in df.iloc[i,12]:
        waiterservice.append(1)
    else:
        waiterservice.append(0)

In [11]:
for i in range(len(df)):
    if '"Outdoor Seating":true' in df.iloc[i,12]:
        outdoorseating.append(1)
    else:
        outdoorseating.append(0)

In [12]:
for i in range(len(df)):
    if '"latenight":true' in df.iloc[i,12]:
        latenight.append(1)
    else:
        latenight.append(0)

In [13]:
for i in range(len(df)):
    if '"upscale":true' in df.iloc[i,12]:
        upscale.append(1)
    else:
        upscale.append(0)

In [14]:
for i in range(len(df)):
    if '"Dogs Allowed":true' in df.iloc[i,12]:
        dogsallowed.append(1)
    else:
        dogsallowed.append(0)

In [15]:
for i in range(len(df)):
    if '"Drive-Thru":true' in df.iloc[i,12]:
        drivethru.append(1)
    else:
        drivethru.append(0)

In [16]:
for i in range(len(df)):
    if '"divey":true' in df.iloc[i,12]:
        divey.append(1)
    else:
        divey.append(0)

In [17]:
for i in range(len(df)):
    if '"Accepts Credit Cards":true' in df.iloc[i,12]:
        creditcards.append(1)
    else:
        creditcards.append(0)

In [18]:
for i in range(len(df)):
    if '"Price Range":4' in df.iloc[i,12]:
        pricerange.append(4)
    elif '"Price Range":3' in df.iloc[i,12]:
        pricerange.append(3)
    elif '"Price Range":2' in df.iloc[i,12]:
        pricerange.append(2)
    elif '"Price Range":1' in df.iloc[i,12]:
        pricerange.append(1)
    else:
        pricerange.append(0)

In [19]:
city_map = {
    'Boston': 1, 
    'Charlestown': 1, 
    'Roxbury Crossing': 1, 
    'Dorchester': 1,
    'East Boston': 1, 
    'Allston': 1, 
    'West Roxbury': 1, 
    'Brighton': 1, 
    'Roslindale': 1,
    'Roxbury': 1, 
    'South Boston': 1, 
    'Hyde Park': 1, 
    'Dorchester Center': 1,
    'Jamaica Plain': 1, 
    'Mattapan': 1, 
    'Chestnut Hill': 1,
    'Charlotte': 2,
    'Pineville': 2, 
    'Matthews': 2, 
    'Huntersville': 2, 
    'Mint Hill': 2,
    'North Las Vegas': 3, 
    'Henderson': 3, 
    'Las Vegas': 3, 
    'Boulder City': 3,
    'Casa Grande': 3, 
    'Mesa': 3, 
    'Chandler': 3, 
    'Paradise': 3, 
    'Glendale': 3,
    'Spring Valley': 3
}

In [20]:
'''encode city and neighboorhood names
boston = 1
charlotte = 2
las vegas = 3 '''

df.city = df.city.map(city_map)
df.city.unique()


Out[20]:
array([2, 3, 1])

In [21]:
# newcolumns = [alcohol,delivery,dogsallowed,smoking,goodforkids,outdoorseating,waiterservice,creditcards,pricerange,
# drivethru,tourist,classy,hipster,latenight,upscale,divey]

# for x in newcolumns:
#     newcol = pd.Series(x)
#     df[x] = newcol.values

In [22]:
alcohol = pd.Series(alcohol)
df['alcohol'] = alcohol.values

delivery = pd.Series(delivery)
df['delivery'] = delivery.values

dogsallowed = pd.Series(dogsallowed)
df['dogsallowed'] = dogsallowed.values

smoking = pd.Series(smoking)
df['smoking'] = smoking.values

goodforkids = pd.Series(goodforkids)
df['goodforkids'] = goodforkids.values

outdoorseating = pd.Series(outdoorseating)
df['outdoorseating'] = outdoorseating.values

waiterservice = pd.Series(waiterservice)
df['waiterservice'] = waiterservice.values

creditcards = pd.Series(creditcards)
df['creditcards'] = creditcards.values

pricerange = pd.Series(pricerange)
df['pricerange'] = pricerange.values

drivethru = pd.Series(drivethru)
df['drivethru'] = drivethru.values

tourist = pd.Series(tourist)
df['tourist'] = tourist.values

classy = pd.Series(classy)
df['classy'] = classy.values

hipster = pd.Series(hipster)
df['hipster'] = hipster.values

latenight = pd.Series(latenight)
df['latenight'] = latenight.values

upscale = pd.Series(upscale)
df['upscale'] = upscale.values

divey = pd.Series(divey)
df['divey'] = divey.values

In [23]:
df.columns.unique()


Out[23]:
array(['Unnamed: 0', '_id', 'restaurant_name', 'address_full',
       'business_id', 'categories', 'city', 'review_count',
       'inspection_date', 'stars', 'latitude', 'longitude', 'attributes',
       'open', 'neighborhoods', 'violations', 'ChangeInViolations',
       'IsAsian', 'IsFrench', 'IsSandwiches', 'IsFastFood', 'IsBurgers',
       'IsItalian', 'IsHawaiian', 'IsSouthern', 'IsMexican',
       'IsLatinAmerican', 'IsMiddleEastern', 'IsGreek', 'IsAmerican',
       'IsDonuts', 'IsIndian', 'IsSeafood', 'IsDesserts', 'IsSalad',
       'Pizza', 'IsBuffets', 'IsSushi Bars', 'IsDelis', 'IsSports Bars',
       'IsBakeries', 'IsPubs', 'IsCaterers', 'IsDiners', 'IsCafes',
       'IsBars', 'alcohol', 'delivery', 'dogsallowed', 'smoking',
       'goodforkids', 'outdoorseating', 'waiterservice', 'creditcards',
       'pricerange', 'drivethru', 'tourist', 'classy', 'hipster',
       'latenight', 'upscale', 'divey'], dtype=object)

In [24]:
df.to_csv('the_final_countdown.csv')

In [ ]: