In [3]:
import pandas as pd
from os.path import join as pjoin

In [2]:
df = pd.read_csv("/Users/hep/Downloads/19_10_2017_organisations_geocoded.csv")

In [4]:
grid_path = "~/Downloads/grid20170810/"
grid_full = pd.read_csv(pjoin(grid_path,"grid.csv"),low_memory=False)
grid_address = pd.read_csv(pjoin(grid_path,"full_tables/addresses.csv"),low_memory=False)
grid_alias = pd.read_csv(pjoin(grid_path,"full_tables/aliases.csv"),low_memory=False)
# Join the dataframes                                                                                                                               
grid_df = grid_full.join(grid_address.set_index(keys=["grid_id"]),on="ID")
grid_df = grid_df.join(grid_alias.set_index(keys=["grid_id"]),on="ID")
grid_df = grid_df[["Name","lat","lng","ID","alias"]]

In [5]:
grid_address.head()


Out[5]:
grid_id line_1 line_2 line_3 lat lng postcode primary city state state_code country country_code geonames_city_id
0 grid.1001.0 NaN NaN NaN -35.277800 149.120500 NaN False Canberra Australian Capital Territory AU-ACT Australia AU 2172517.0
1 grid.1002.3 NaN NaN NaN -37.908300 145.138000 NaN False Melbourne Victoria AU-VIC Australia AU 2158177.0
2 grid.1003.2 NaN NaN NaN -27.495964 153.009627 NaN False Brisbane Queensland AU-QLD Australia AU 2174003.0
3 grid.1004.5 NaN NaN NaN -33.775259 151.112915 NaN False Sydney New South Wales AU-NSW Australia AU 2147714.0
4 grid.1005.4 NaN NaN NaN -33.917731 151.230964 NaN False Sydney New South Wales AU-NSW Australia AU 2147714.0

In [21]:
grid_id = []
for match in df.match.values:
    id_ = None
    condition = grid_full.Name == match
    if condition.sum() == 0:
        condition = grid_alias.alias == match
        if condition.sum() != 0:
            id_ = grid_alias.loc[condition,"grid_id"].values[0]
    else:
        id_ = grid_full.loc[condition,"ID"].values[0]
    grid_id.append(id_)

In [22]:
df["ID"] = grid_id
df = df.join(grid_address.set_index(keys=["grid_id"]),on="ID")

In [25]:
_df = df[["institutes","n","longitude","latitude","match","score","ID","city","state","country","country_code"]]

In [26]:
_df.to_csv("/Users/hep/Downloads/19_10_2017_organisations_geocoded_with_location.csv")

In [ ]: