In [3]:
import pandas as pd
from os.path import join as pjoin
In [2]:
df = pd.read_csv("/Users/hep/Downloads/19_10_2017_organisations_geocoded.csv")
In [4]:
grid_path = "~/Downloads/grid20170810/"
grid_full = pd.read_csv(pjoin(grid_path,"grid.csv"),low_memory=False)
grid_address = pd.read_csv(pjoin(grid_path,"full_tables/addresses.csv"),low_memory=False)
grid_alias = pd.read_csv(pjoin(grid_path,"full_tables/aliases.csv"),low_memory=False)
# Join the dataframes
grid_df = grid_full.join(grid_address.set_index(keys=["grid_id"]),on="ID")
grid_df = grid_df.join(grid_alias.set_index(keys=["grid_id"]),on="ID")
grid_df = grid_df[["Name","lat","lng","ID","alias"]]
In [5]:
grid_address.head()
Out[5]:
In [21]:
grid_id = []
for match in df.match.values:
id_ = None
condition = grid_full.Name == match
if condition.sum() == 0:
condition = grid_alias.alias == match
if condition.sum() != 0:
id_ = grid_alias.loc[condition,"grid_id"].values[0]
else:
id_ = grid_full.loc[condition,"ID"].values[0]
grid_id.append(id_)
In [22]:
df["ID"] = grid_id
df = df.join(grid_address.set_index(keys=["grid_id"]),on="ID")
In [25]:
_df = df[["institutes","n","longitude","latitude","match","score","ID","city","state","country","country_code"]]
In [26]:
_df.to_csv("/Users/hep/Downloads/19_10_2017_organisations_geocoded_with_location.csv")
In [ ]: