In [3]:
%load_ext autoreload

In [4]:
%autoreload 2
from geocode_mak import lat_lon_from_mak_names
import pandas as pd

In [5]:
mak_df = pd.read_csv("/Users/hep/Downloads/19_10_2017_organisations_to_geocode.csv",names=["institutes","n"])

In [9]:
lat_lon_match_score = lat_lon_from_mak_names(mak_df["institutes"],"~/Downloads/grid20170810/",perfect_only=True)


100%|██████████| 3979/3979 [00:36<00:00, 110.47it/s]

In [14]:
mak_df["longitude"] = [lon for lat,lon,match,score in lat_lon_match_score]
mak_df["latitude"] = [lat for lat,lon,match,score in lat_lon_match_score]
mak_df["match"] = [match for lat,lon,match,score in lat_lon_match_score]
mak_df["score"] = [score for lat,lon,match,score in lat_lon_match_score]

In [15]:
mak_df.to_csv("/Users/hep/Downloads/19_10_2017_organisations_geocoded_perfectOnly.csv")

In [17]:
mak_df.tail()


Out[17]:
institutes n longitude latitude match score
3974 department of environment and conservation 1 -52.721597 47.558815 Department of Environment and Conservation 1.0
3975 babol noshirvani university of technology 1 52.681725 36.561561 Babol Noshirvani University of Technology 1.0
3976 united nations industrial development organiza... 1 16.416660 48.235761 United Nations Industrial Development Organiza... 1.0
3977 cgi group 1 NaN NaN None 0.0
3978 armasuisse 1 NaN NaN None 0.0

In [19]:
null_long = pd.isnull(mak_df.longitude)
null_lat = pd.isnull(mak_df.latitude)
null_match = pd.isnull(mak_df.match)

print((~(null_long | null_lat)).sum())
print(((null_long | null_lat) & null_match).sum())
print(((null_long | null_lat) & ~null_match).sum())


2389
1410
180

In [20]:
mak_df.to_dict(orient="records")[0]


Out[20]:
{'institutes': 'united arab emirates university',
 'latitude': 24.198948,
 'longitude': 55.678852,
 'match': 'United Arab Emirates University',
 'n': 37519,
 'score': 1.0}

In [ ]: