In [1]:
import pandas as pd
df = pd.read_csv('the_final_countdown.csv', sep=",", encoding = 'utf-8', engine = 'python')
In [2]:
#list of neighborhoods
distinctneighborhoods = ['Downtown',
'Waterfront',
'North End',
'Financial District',
'Charlestown',
'Back Bay',
'East Boston',
'Allston/Brighton',
'Leather District',
'South Boston',
'Dorchester',
'South End',
'Beacon Hill',
'Fenway',
'Hyde Park',
'Mission Hill',
'West Roxbury',
'Roslindale',
'Fields Corner',
'West Roxbury Center',
'Dudley Square',
'Chinatown',
'Jamaica Plain',
'Roslindale Village',
'Uphams Corner',
'Mattapan',
'Egleston Square',
'First Ward',
'Third Ward',
'Arboretum',
'University City',
'Fourth Ward',
'Uptown',
'Elizabeth',
'Myers Park',
'Steele Creek',
'South End',
'Dilworth',
'Ballantyne',
'Starmount',
'NoDa',
'Plaza Midwood',
'Derita',
'Eastland',
'South Park',
'Biddleville',
'Highland Creek',
'Sedgefield',
'Villa Heights',
'North Charlotte',
'Cotswold',
'Sherwood Forest',
'Eastland',
'Paw Creek',
'Sunrise',
'Westside',
'Summerlin',
'Northwest',
'Spring Valley',
'Anthem',
'Southeast',
'Southwest',
'South Summerlin',
'University',
'Downtown',
'Eastside',
'The Lakes',
'The Strip',
'Chinatown',
'Centennial']
In [8]:
df.iloc[100,13]
Out[8]:
In [9]:
neighborhoods = []
def getneighbor(z):
for i in range(len(df)):
y = 0
for x in z:
if x in df.iloc[i,13]:
neighborhoods.append(x)
break
else:
y+=1
if y == len(z):
neighborhoods.append('none')
return neighborhoods
In [10]:
getneighbor(distinctneighborhoods)
neighborhoods = pd.Series(neighborhoods)
df['Neighborhoods'] = neighborhoods.values
In [11]:
len(neighborhoods)
Out[11]:
In [ ]:
In [ ]:
downtown = []
chinatown = []
for i in range(len(df)):
if "Downtown" in df.iloc[i,13] and '1' in df.iloc[i,6]:
downtown.append('Downtown Boston')
break
else:
downtown.append('Downtown Las Vegas')
for i in range (len(df)):
if "Chinatown" in df.iloc[i,13] and '1' in df.iloc[i,5]:
chinatown.append('Chinatown Boston')
break
else:
chinatown.append('Chinatown ')