In [1]:
import pandas as pd
import csv
import sys
import requests
import math

In [2]:
# original_df = pd.read_csv('ecd/ecd_master_list_original.csv', sep=";", low_memory=False)
original_df = pd.read_csv('ecd/kzn_nw_ilifa.csv', sep=";", low_memory=False)
# Get info on local municipalities
lm_info_df = pd.read_csv('ecd/local_muni_info.csv', sep=";", low_memory=False)

In [3]:

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3128 entries, 0 to 3127
Columns: 526 entries, PROVINCE to municipality_id
dtypes: float64(254), int64(108), object(164)
memory usage: 12.6+ MB

In [4]:
remove_text = {
    'metropolitan municipality': '',
    'local municipality': '',

replace_with = {
    'nelson mandela': 'nelson mandela bay',
    'sol plaatje': 'sol plaatjie',
    'khara hais': '//khara hais',
    'solplatjie': 'sol plaatjie',
    'rustenberg': 'rustenburg',
    'saldanha': 'saldanha bay',
    'plettenberg bay': 'bitou'

def clean_muni_name(local_muni):
    if not type(local_muni) is str:
        return local_muni
    name = local_muni.lower()

    # Check for some specific cases
    if name in replace_with:
        return replace_with[name]

    # Remove certain phrases
    for k, v in remove_text.iteritems():
        name = name.replace(k, v)

    return name.strip()

In [5]:
df = original_df
# df['local_muni_clean'] = map(clean_muni_name, df["local_municipality"])
df['local_muni_clean'] = map(clean_muni_name, df["LOCAL MUNICIPALITY"])
# df[['address_ward_number']] = df[['address_ward_number']].astype(str)

In [6]:
df = pd.merge(df, lm_info_df, how='left', on="local_muni_clean")

In [7]:
def get_mapit_geo_codes(lat_long):
    latitude, longitude = lat_long.split(",")
    resp = requests.get(
        ",%s.json" % (longitude.strip(), latitude.strip()))
    if not resp.json():
        print "3: None"
        return None
    geo_levels = {}
    for item in resp.json().itervalues():
        geo_levels[item['type_name']] = item['codes']['MDB']
    print "3: %s" % (geo_levels.get('Ward'))
    return geo_levels.get('Ward')

In [8]:
def clean_ward_num(ward_num, ward_prefix, gps_location):
    if ward_num:
        if not math.isnan(ward_num):
            ward_num = '%.f' % ward_num
            if len(ward_num) == 8:
                print "1: %s" % (ward_num)
                return ward_num
            if len(ward_num) <= 3:
                if ward_prefix and len(ward_prefix) == 5:
                    ward_prefix = '%.f' % ward_prefix
                    print "2: %s" % (ward_prefix + ('0' * (3-len(ward_num))) + ward_num)
                    return ward_prefix + ('0' * (3-len(ward_num))) + ward_num
    if not type(gps_location) is float:
        return get_mapit_geo_codes(gps_location)
    return None

In [57]:
# geo_cols = ['address_ward_number', 'geo_local_muni_ward_prefix', 'gps_location']
new_df = df
new_df['geo_ward_num_clean'] = map(

In [10]:
new_df.to_csv('kzn_nw_cleaned_wards.csv', index=False)

In [ ]:
# The code below is to fix some empty geo information

In [183]:
df = pd.read_csv('master_list_cleaned_wards.csv', sep=",", low_memory=False)

In [204]:
def get_local_muni_name(ward_num):
    resp = requests.get(
        "" % (ward_num))
    if not resp.json():
        return None
    for level in resp.json().itervalues():
        if level['type_name'] == 'Municipality':
            print level['name']
            return level['name'].lower()

In [198]:
wn = 'na123'


In [192]:

province ID date start end started complete signature signature_interviewee gps_location ... worker worker_id local_muni_clean geo_local_muni_name geo_local_muni_code geo_local_muni_ward_prefix geo_local_muni_district_code geo_local_muni_province_code geo_local_muni_province_number geo_ward_num_clean
0 EC 0fa3c7a7-175e-4580-b57d-786709954a89 2013-10-17 2013-10-17 19:58:41 +0200 NaN Yes No NaN NaN NaN ... ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C... 982fb966-3260-11e3-9f0e-001e673400ac NaN NaN NaN NaN NaN NaN NaN NaN
1 EC ac670c3c-e6ff-4b00-920c-f96263566e43 2013-11-06 2013-11-06 08:50:45 +0200 NaN Yes No NaN NaN -31.603326,28.791947 ... Masikhule Salathiso Ngumbela - 2:09:51 PM 29/... 3f53a0c2-4602-11e3-b1c4-001e673400ac NaN NaN NaN NaN NaN NaN NaN 21507007

2 rows × 535 columns

In [205]:
# df['geo_ward_num_clean'] = df['geo_ward_num_clean'].astype(str)
local_muni_df = df[
    (df.complete == 'Yes') &
    (df.geo_ward_num_clean.notnull()) &
    (df.geo_local_muni_code.isnull()) &
# local_muni_df.loc[:,('local_muni_clean')]
local_muni_df.loc[:,('local_muni_clean')] = map(get_local_muni_name, local_muni_df.loc[:,('geo_ward_num_clean')])

Albert Luthuli
City of Matlosana
City of Matlosana
City of Matlosana

In [207]:
test_df = local_muni_df.copy()

In [208]:
cols = [u'geo_local_muni_name', u'geo_local_muni_code',
       u'geo_local_muni_ward_prefix', u'geo_local_muni_district_code',
       u'geo_local_muni_province_code', u'geo_local_muni_province_number']

for col in cols:
    del local_muni_df[col]
local_muni_df = pd.merge(local_muni_df, lm_info_df, how='left', on="local_muni_clean")

In [ ]:
test_df.loc[df.ID.isin(local_muni_df.ID), cols] = local_muni_df[cols]

In [168]:

province ID date start end started complete signature signature_interviewee gps_location ... worker worker_id local_muni_clean geo_local_muni_name geo_local_muni_code geo_local_muni_ward_prefix geo_local_muni_district_code geo_local_muni_province_code geo_local_muni_province_number geo_ward_num_clean
0 EC 0fa3c7a7-175e-4580-b57d-786709954a89 2013-10-17 2013-10-17 19:58:41 +0200 NaN Yes No NaN NaN NaN ... ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C... 982fb966-3260-11e3-9f0e-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
1 EC ac670c3c-e6ff-4b00-920c-f96263566e43 2013-11-06 2013-11-06 08:50:45 +0200 NaN Yes No NaN NaN -31.603326,28.791947 ... Masikhule Salathiso Ngumbela - 2:09:51 PM 29/... 3f53a0c2-4602-11e3-b1c4-001e673400ac NaN NaN NaN NaN NaN NaN NaN 21507007

2 rows × 535 columns

In [211]:

province ID date start end started complete signature signature_interviewee gps_location ... worker worker_id local_muni_clean geo_local_muni_name geo_local_muni_code geo_local_muni_ward_prefix geo_local_muni_district_code geo_local_muni_province_code geo_local_muni_province_number geo_ward_num_clean
8486 GT bea631f6-3e90-4f08-b070-65b22c226326 2014-03-31 2014-03-31 13:16:00 +0200 2014-03-31 13:26:00 +0200 Yes Yes -26.171552,28.161673 ... Ekhuruleni 2 ~ Ntombizodwa Msibi - 12:52:24 PM... 403c1390-81e7-11e3-966f-001e673400ac ekurhuleni NaN NaN NaN NaN NaN NaN 79700092
11030 KZN 86e16912-7288-4abe-9f36-9bdb4f396cb7 2014-05-21 2014-05-21 09:12:00 +0200 2014-05-27 15:22:00 +0200 Yes Yes -29.697030,31.029527 ... NB. Gugulethu Bhengu - 2:25:10 PM 30/5/2014 - ... 7c60d414-c15c-11e3-ac28-001e6733fe3c ethekwini NaN NaN NaN NaN NaN NaN 59500050
11031 KZN fdf9884e-09f4-463f-9f8f-b7c864159781 2014-05-26 2014-05-26 13:42:00 +0200 2014-05-27 15:33:00 +0200 Yes Yes -29.677255,31.020398 ... NB. Gugulethu Bhengu - 2:25:10 PM 30/5/2014 - ... 7c60d414-c15c-11e3-ac28-001e6733fe3c ethekwini NaN NaN NaN NaN NaN NaN 59500051
11033 KZN 5469dd13-35be-4603-8e8d-abd88eda1b45 2014-05-20 2014-05-20 10:02:00 +0200 2014-05-27 15:18:00 +0200 Yes Yes -29.909540,31.020890 ... NB. Gugulethu Bhengu - 2:25:10 PM 30/5/2014 - ... 7c60d414-c15c-11e3-ac28-001e6733fe3c None NaN NaN NaN NaN NaN NaN 59500066
11034 KZN c01a2a39-391b-4317-8791-637669fdca05 2014-05-23 2014-05-23 12:27:00 +0200 2014-05-23 17:41:00 +0200 Yes Yes -29.881843,30.911903 ... NB. Rooku K - 9:54:06 AM 4/6/2014 - Logged in ... cabaae2e-dc0e-11e3-8509-001e6733fe3c ethekwini NaN NaN NaN NaN NaN NaN 59500063
16393 MP 5336f2c0-bc36-4f9f-883a-b42157dbfefb 2014-03-12 2014-03-12 11:09:00 +0200 2014-04-18 15:28:00 +0200 Yes Yes -26.046355,30.779757 ... Albert Luthuli. Beauty Nkosi - 9:26:29 AM 9... 5badb7f8-9e09-11e3-bbfd-001e673400ac albert luthuli NaN NaN NaN NaN NaN NaN 83001024
16450 MP 32b6f764-b002-4d67-8d5b-9ff77c4250eb 2014-03-24 2014-03-24 09:07:26 +0200 2014-03-24 09:41:45 +0200 Yes Yes -24.773478,31.241988 ... Enhlazeni Petronella Glorry Mkhabela - 7:31:46... b37a8c88-9d6c-11e3-b3ce-001e673400ac bushbuckridge NaN NaN NaN NaN NaN NaN 83205029
16474 MP 72406869-1418-42c2-872d-6e14462400bb 2014-03-25 2014-03-25 12:07:47 +0200 2014-03-28 20:00:43 +0200 Yes Yes -24.619508,31.055680 ... Enhlazeni Lolicky Pretty Ngobeni - 7:50:40 PM ... d49f9fa0-9d6e-11e3-b3ce-001e673400ac bushbuckridge NaN NaN NaN NaN NaN NaN 83205018
18137 NW d2595f89-0ec9-4497-9e2e-0cb15e6ba5af 2014-04-09 2014-04-09 14:02:00 +0200 2014-04-09 14:16:00 +0200 Yes Yes -26.892013,26.599790 ... North West Kabelo Raputsoane - 8:44:32 AM 2/5/... 5d72ba0a-baf8-11e3-b6f3-001e6733fe3c city of matlosana NaN NaN NaN NaN NaN NaN 64003010
18138 NW b8a7c607-401d-4a47-b365-6c3d627799ce 2014-04-24 2014-04-24 11:53:00 +0200 2014-05-02 08:44:00 +0200 Yes Yes -26.884335,26.598318 ... North West Kabelo Raputsoane - 8:44:32 AM 2/5/... 5d72ba0a-baf8-11e3-b6f3-001e6733fe3c city of matlosana NaN NaN NaN NaN NaN NaN 64003005
18140 NW ecc9ba60-a49e-4107-bd0e-b19fc679784a 2014-04-08 2014-04-08 13:32:00 +0200 2014-04-09 17:31:00 +0200 Yes Yes -26.845738,26.750667 ... North West Kabelo Raputsoane - 8:44:32 AM 2/5/... 5d72ba0a-baf8-11e3-b6f3-001e6733fe3c city of matlosana NaN NaN NaN NaN NaN NaN 64003030
21013 NC 913bdc15-cfc4-41a6-815a-0a47ea71e036 2014-03-11 2014-03-11 13:53:00 +0200 2014-03-11 14:29:00 +0200 Yes Yes -34.051832,23.338703 ... EDEN Terrance November - 10:23:42 AM 17/6/2014... de2811da-267a-11e3-bcd3-001e673400ac bitou NaN NaN NaN NaN NaN NaN 10407004
21018 NC 5fa2c6de-fc12-4e7d-8b03-afe2e0e11507 2014-03-12 2014-03-12 08:44:00 +0200 2014-03-12 09:52:00 +0200 Yes Yes -34.048502,23.320948 ... EDEN Chesney Newman - 9:13:56 AM 17/6/2014 -... 31be7c2a-34cd-11e3-a982-001e673400ac bitou NaN NaN NaN NaN NaN NaN 10407005
21026 NC 3e09a3c8-50d1-45b6-8c0f-2f4e21699efb 2014-03-12 2014-03-12 10:23:00 +0200 2014-03-12 11:28:00 +0200 Yes Yes -34.046214,23.324565 ... EDEN Chesney Newman - 9:13:56 AM 17/6/2014 -... 31be7c2a-34cd-11e3-a982-001e673400ac bitou NaN NaN NaN NaN NaN NaN 10407004
21034 NC 536ee4d9-6ed8-4d1b-94c2-1ea946b62838 2014-03-12 2014-03-12 12:05:00 +0200 2014-03-12 12:27:00 +0200 Yes Yes -34.048502,23.320948 ... EDEN Chesney Newman - 9:13:56 AM 17/6/2014 -... 31be7c2a-34cd-11e3-a982-001e673400ac bitou NaN NaN NaN NaN NaN NaN 10407005
21043 NC f54ba639-da15-482a-abda-669b951885b7 2014-03-12 2014-03-12 15:49:00 +0200 2014-03-12 16:32:00 +0200 Yes Yes -34.062516,23.359684 ... EDEN Chesney Newman - 9:13:56 AM 17/6/2014 -... 31be7c2a-34cd-11e3-a982-001e673400ac None NaN NaN NaN NaN NaN NaN 10407002
21045 NC 0ce338f3-a20b-4dea-b573-62d80dbb1281 2014-03-13 2014-03-13 08:45:00 +0200 2014-03-13 09:08:00 +0200 Yes Yes -34.048496,23.320948 ... EDEN Chesney Newman - 9:13:56 AM 17/6/2014 -... 31be7c2a-34cd-11e3-a982-001e673400ac bitou NaN NaN NaN NaN NaN NaN 10407005
21048 NC 05a4d7a0-9cf7-4a3a-b429-a2c79b3cbc9a 2014-03-13 2014-03-13 09:18:00 +0200 2014-03-13 09:53:00 +0200 Yes Yes -34.048496,23.320948 ... EDEN Chesney Newman - 9:13:56 AM 17/6/2014 -... 31be7c2a-34cd-11e3-a982-001e673400ac bitou NaN NaN NaN NaN NaN NaN 10407005
21061 NC 12acecd7-658d-4cca-b90f-649d4e48f2af 2014-03-13 2014-03-13 11:50:00 +0200 2014-03-13 12:18:00 +0200 Yes Yes -34.050631,23.359520 ... EDEN Chesney Newman - 9:13:56 AM 17/6/2014 -... 31be7c2a-34cd-11e3-a982-001e673400ac None NaN NaN NaN NaN NaN NaN 10407002
21063 NC de8c392a-5e7f-4de4-9422-849b567c93e3 2014-03-13 2014-03-13 12:45:00 +0200 2014-03-13 13:12:00 +0200 Yes Yes -33.955734,23.475315 ... EDEN Chesney Newman - 9:13:56 AM 17/6/2014 -... 31be7c2a-34cd-11e3-a982-001e673400ac None NaN NaN NaN NaN NaN NaN 10407001
21703 NC a67536dc-d22d-4204-831b-5cefdbf7b5c9 2013-10-29 2013-10-29 13:06:00 +0200 NaN Yes Yes NaN NaN -33.413610,19.206700 ... Early Years Services ~ Sharleen Lintnaar - 2... 0fbc713a-34c9-11e3-a7f2-001e673400ac None NaN NaN NaN NaN NaN NaN 10202007

21 rows × 535 columns

In [178]:
# df[(df.complete == 'Yes') &
#     (df.geo_ward_num_clean.notnull()) &
#     (df.geo_local_muni_code.isnull()) &
#     (~df.geo_ward_num_clean.str.contains('na'))].loc[:,(cols)] = local_muni_df.loc[:,(cols)]

df.loc[df.ID.isin(local_muni_df.ID), cols] = local_muni_df[cols]
# df.loc[df.Name.isin(df1.Name), ['Nonprofit', 'Education']] = df1[['Nonprofit', 'Education']]
# df.loc[df.ID.isin(local_muni_df.ID), cols]
# local_muni_df.loc[:,(['ID'] + cols)]

In [179]:

<class 'pandas.core.frame.DataFrame'>
Int64Index: 21877 entries, 0 to 21876
Columns: 535 entries, province to geo_ward_num_clean
dtypes: float64(142), int64(151), object(242)
memory usage: 89.5+ MB

In [181]:

province ID date start end started complete signature signature_interviewee gps_location ... worker worker_id local_muni_clean geo_local_muni_name geo_local_muni_code geo_local_muni_ward_prefix geo_local_muni_district_code geo_local_muni_province_code geo_local_muni_province_number geo_ward_num_clean
0 EC 0fa3c7a7-175e-4580-b57d-786709954a89 2013-10-17 2013-10-17 19:58:41 +0200 NaN Yes No NaN NaN NaN ... ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C... 982fb966-3260-11e3-9f0e-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
1 EC 0fa3c7a7-175e-4580-b57d-786709954a89 2013-10-17 2013-10-17 19:58:41 +0200 NaN Yes No NaN NaN NaN ... ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C... 982fb966-3260-11e3-9f0e-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
2 EC 0fa3c7a7-175e-4580-b57d-786709954a89 2013-10-17 2013-10-17 19:58:41 +0200 NaN Yes No NaN NaN NaN ... ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C... 982fb966-3260-11e3-9f0e-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
3 EC 0fa3c7a7-175e-4580-b57d-786709954a89 2013-10-17 2013-10-17 19:58:41 +0200 NaN Yes No NaN NaN NaN ... ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C... 982fb966-3260-11e3-9f0e-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
4 EC 0fa3c7a7-175e-4580-b57d-786709954a89 2013-10-17 2013-10-17 19:58:41 +0200 NaN Yes No NaN NaN NaN ... ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C... 982fb966-3260-11e3-9f0e-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
5 EC ac670c3c-e6ff-4b00-920c-f96263566e43 2013-11-06 2013-11-06 08:50:45 +0200 NaN Yes No NaN NaN -31.603326,28.791947 ... Masikhule Salathiso Ngumbela - 2:09:51 PM 29/... 3f53a0c2-4602-11e3-b1c4-001e673400ac NaN NaN NaN NaN NaN NaN NaN 21507007
6 EC ac670c3c-e6ff-4b00-920c-f96263566e43 2013-11-06 2013-11-06 08:50:45 +0200 NaN Yes No NaN NaN -31.603326,28.791947 ... Masikhule Salathiso Ngumbela - 2:09:51 PM 29/... 3f53a0c2-4602-11e3-b1c4-001e673400ac NaN NaN NaN NaN NaN NaN NaN 21507007
7 EC ac670c3c-e6ff-4b00-920c-f96263566e43 2013-11-06 2013-11-06 08:50:45 +0200 NaN Yes No NaN NaN -31.603326,28.791947 ... Masikhule Salathiso Ngumbela - 2:09:51 PM 29/... 3f53a0c2-4602-11e3-b1c4-001e673400ac NaN NaN NaN NaN NaN NaN NaN 21507007
8 EC ac670c3c-e6ff-4b00-920c-f96263566e43 2013-11-06 2013-11-06 08:50:45 +0200 NaN Yes No NaN NaN -31.603326,28.791947 ... Masikhule Salathiso Ngumbela - 2:09:51 PM 29/... 3f53a0c2-4602-11e3-b1c4-001e673400ac NaN NaN NaN NaN NaN NaN NaN 21507007
9 EC ac670c3c-e6ff-4b00-920c-f96263566e43 2013-11-06 2013-11-06 08:50:45 +0200 NaN Yes No NaN NaN -31.603326,28.791947 ... Masikhule Salathiso Ngumbela - 2:09:51 PM 29/... 3f53a0c2-4602-11e3-b1c4-001e673400ac NaN NaN NaN NaN NaN NaN NaN 21507007
10 EC c60863eb-6bfb-4b66-8d0b-e6502cd0cbf5 2013-11-15 2013-11-15 12:12:19 +0200 NaN Yes No NaN NaN NaN ... Regional Educare Council Hlekiso Bulelwa - 4:0... ca69b50a-4d02-11e3-8185-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
11 EC c60863eb-6bfb-4b66-8d0b-e6502cd0cbf5 2013-11-15 2013-11-15 12:12:19 +0200 NaN Yes No NaN NaN NaN ... Regional Educare Council Hlekiso Bulelwa - 4:0... ca69b50a-4d02-11e3-8185-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
12 EC c60863eb-6bfb-4b66-8d0b-e6502cd0cbf5 2013-11-15 2013-11-15 12:12:19 +0200 NaN Yes No NaN NaN NaN ... Regional Educare Council Hlekiso Bulelwa - 4:0... ca69b50a-4d02-11e3-8185-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
13 EC c60863eb-6bfb-4b66-8d0b-e6502cd0cbf5 2013-11-15 2013-11-15 12:12:19 +0200 NaN Yes No NaN NaN NaN ... Regional Educare Council Hlekiso Bulelwa - 4:0... ca69b50a-4d02-11e3-8185-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
14 EC c60863eb-6bfb-4b66-8d0b-e6502cd0cbf5 2013-11-15 2013-11-15 12:12:19 +0200 NaN Yes No NaN NaN NaN ... Regional Educare Council Hlekiso Bulelwa - 4:0... ca69b50a-4d02-11e3-8185-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
15 EC e0204732-1ac0-46a8-850a-6d5df6979ece 2013-11-26 2013-11-26 10:19:03 +0200 NaN Yes No NaN NaN -32.606151,27.450803 ... Regional Educare Council Kolisi Zonke - 10:48:... a8802a1e-4d02-11e3-857c-001e673400ac NaN NaN NaN NaN NaN NaN NaN 21204017
16 EC e0204732-1ac0-46a8-850a-6d5df6979ece 2013-11-26 2013-11-26 10:19:03 +0200 NaN Yes No NaN NaN -32.606151,27.450803 ... Regional Educare Council Kolisi Zonke - 10:48:... a8802a1e-4d02-11e3-857c-001e673400ac NaN NaN NaN NaN NaN NaN NaN 21204017
17 EC e0204732-1ac0-46a8-850a-6d5df6979ece 2013-11-26 2013-11-26 10:19:03 +0200 NaN Yes No NaN NaN -32.606151,27.450803 ... Regional Educare Council Kolisi Zonke - 10:48:... a8802a1e-4d02-11e3-857c-001e673400ac NaN NaN NaN NaN NaN NaN NaN 21204017
18 EC e0204732-1ac0-46a8-850a-6d5df6979ece 2013-11-26 2013-11-26 10:19:03 +0200 NaN Yes No NaN NaN -32.606151,27.450803 ... Regional Educare Council Kolisi Zonke - 10:48:... a8802a1e-4d02-11e3-857c-001e673400ac NaN NaN NaN NaN NaN NaN NaN 21204017
19 EC e0204732-1ac0-46a8-850a-6d5df6979ece 2013-11-26 2013-11-26 10:19:03 +0200 NaN Yes No NaN NaN -32.606151,27.450803 ... Regional Educare Council Kolisi Zonke - 10:48:... a8802a1e-4d02-11e3-857c-001e673400ac NaN NaN NaN NaN NaN NaN NaN 21204017
20 EC 5d272669-50bb-4ce4-ba6f-04773863c91f 2013-11-12 2013-11-12 12:03:49 +0200 NaN Yes No NaN NaN NaN ... ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C... 982fb966-3260-11e3-9f0e-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
21 EC 5d272669-50bb-4ce4-ba6f-04773863c91f 2013-11-12 2013-11-12 12:03:49 +0200 NaN Yes No NaN NaN NaN ... ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C... 982fb966-3260-11e3-9f0e-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
22 EC 5d272669-50bb-4ce4-ba6f-04773863c91f 2013-11-12 2013-11-12 12:03:49 +0200 NaN Yes No NaN NaN NaN ... ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C... 982fb966-3260-11e3-9f0e-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
23 EC 5d272669-50bb-4ce4-ba6f-04773863c91f 2013-11-12 2013-11-12 12:03:49 +0200 NaN Yes No NaN NaN NaN ... ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C... 982fb966-3260-11e3-9f0e-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
24 EC 5d272669-50bb-4ce4-ba6f-04773863c91f 2013-11-12 2013-11-12 12:03:49 +0200 NaN Yes No NaN NaN NaN ... ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C... 982fb966-3260-11e3-9f0e-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
25 EC 10a73525-f386-4ee8-8701-28b5ba0f889c 2013-11-18 2013-11-18 07:16:10 +0200 NaN Yes No NaN NaN NaN ... Regional Educare Council Mlahlwa Nokuphumla - ... d877209c-4d02-11e3-99f2-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
26 EC 10a73525-f386-4ee8-8701-28b5ba0f889c 2013-11-18 2013-11-18 07:16:10 +0200 NaN Yes No NaN NaN NaN ... Regional Educare Council Mlahlwa Nokuphumla - ... d877209c-4d02-11e3-99f2-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
27 EC 10a73525-f386-4ee8-8701-28b5ba0f889c 2013-11-18 2013-11-18 07:16:10 +0200 NaN Yes No NaN NaN NaN ... Regional Educare Council Mlahlwa Nokuphumla - ... d877209c-4d02-11e3-99f2-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
28 EC 10a73525-f386-4ee8-8701-28b5ba0f889c 2013-11-18 2013-11-18 07:16:10 +0200 NaN Yes No NaN NaN NaN ... Regional Educare Council Mlahlwa Nokuphumla - ... d877209c-4d02-11e3-99f2-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
29 EC 10a73525-f386-4ee8-8701-28b5ba0f889c 2013-11-18 2013-11-18 07:16:10 +0200 NaN Yes No NaN NaN NaN ... Regional Educare Council Mlahlwa Nokuphumla - ... d877209c-4d02-11e3-99f2-001e673400ac NaN NaN NaN NaN NaN NaN NaN nan
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
12334 NC 0e258d9e-f219-11e3-8f37-001e6733fe3c NaN NaN NaN NaN Yes NaN NaN NaN ... Eden - 929d3f2c-ecbf-11e3-9560-001e6733fe3c bitou Bitou WC047 10407 DC4 WC 1 nan
12335 NC 0e258d9e-f219-11e3-8f37-001e6733fe3c NaN NaN NaN NaN Yes NaN NaN NaN ... Eden - 929d3f2c-ecbf-11e3-9560-001e6733fe3c bitou Bitou WC047 10407 DC4 WC 1 nan
12336 NC 0e258d9e-f219-11e3-8f37-001e6733fe3c NaN NaN NaN NaN Yes NaN NaN NaN ... Eden - 929d3f2c-ecbf-11e3-9560-001e6733fe3c bitou Bitou WC047 10407 DC4 WC 1 nan
12337 NC 0e258d9e-f219-11e3-8f37-001e6733fe3c NaN NaN NaN NaN Yes NaN NaN NaN ... Eden - 929d3f2c-ecbf-11e3-9560-001e6733fe3c bitou Bitou WC047 10407 DC4 WC 1 nan
12338 NC 0e258d9e-f219-11e3-8f37-001e6733fe3c NaN NaN NaN NaN Yes NaN NaN NaN ... Eden - 929d3f2c-ecbf-11e3-9560-001e6733fe3c bitou Bitou WC047 10407 DC4 WC 1 nan
12339 NC 0e258d9e-f219-11e3-8f37-001e6733fe3c NaN NaN NaN NaN Yes NaN NaN NaN ... Eden - 929d3f2c-ecbf-11e3-9560-001e6733fe3c bitou Bitou WC047 10407 DC4 WC 1 nan
12340 NC 0e505498-f219-11e3-8f37-001e6733fe3c NaN NaN NaN NaN Yes NaN NaN NaN ... Eden - 929d3f2c-ecbf-11e3-9560-001e6733fe3c bitou Bitou WC047 10407 DC4 WC 1 nan
12341 NC 0e505498-f219-11e3-8f37-001e6733fe3c NaN NaN NaN NaN Yes NaN NaN NaN ... Eden - 929d3f2c-ecbf-11e3-9560-001e6733fe3c bitou Bitou WC047 10407 DC4 WC 1 nan
12342 NC 0e505498-f219-11e3-8f37-001e6733fe3c NaN NaN NaN NaN Yes NaN NaN NaN ... Eden - 929d3f2c-ecbf-11e3-9560-001e6733fe3c bitou Bitou WC047 10407 DC4 WC 1 nan
12343 NC 0e505498-f219-11e3-8f37-001e6733fe3c NaN NaN NaN NaN Yes NaN NaN NaN ... Eden - 929d3f2c-ecbf-11e3-9560-001e6733fe3c bitou Bitou WC047 10407 DC4 WC 1 nan
12344 NC 0e505498-f219-11e3-8f37-001e6733fe3c NaN NaN NaN NaN Yes NaN NaN NaN ... Eden - 929d3f2c-ecbf-11e3-9560-001e6733fe3c bitou Bitou WC047 10407 DC4 WC 1 nan
12345 NC 0e505498-f219-11e3-8f37-001e6733fe3c NaN NaN NaN NaN Yes NaN NaN NaN ... Eden - 929d3f2c-ecbf-11e3-9560-001e6733fe3c bitou Bitou WC047 10407 DC4 WC 1 nan
12346 NC 9d1ea09d-5fb6-4052-8376-90c2386520ca 2014-03-11 2014-03-11 16:06:00 +0200 2014-03-11 16:52:00 +0200 Yes Yes -34.006363,23.344305 ... EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S... c10cf6f2-5734-11e3-8330-001e673400ac bitou Bitou WC047 10407 DC4 WC 1 10407001
12347 NC 9d1ea09d-5fb6-4052-8376-90c2386520ca 2014-03-11 2014-03-11 16:06:00 +0200 2014-03-11 16:52:00 +0200 Yes Yes -34.006363,23.344305 ... EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S... c10cf6f2-5734-11e3-8330-001e673400ac bitou Bitou WC047 10407 DC4 WC 1 10407001
12348 NC 9d1ea09d-5fb6-4052-8376-90c2386520ca 2014-03-11 2014-03-11 16:06:00 +0200 2014-03-11 16:52:00 +0200 Yes Yes -34.006363,23.344305 ... EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S... c10cf6f2-5734-11e3-8330-001e673400ac bitou Bitou WC047 10407 DC4 WC 1 10407001
12349 NC 9d1ea09d-5fb6-4052-8376-90c2386520ca 2014-03-11 2014-03-11 16:06:00 +0200 2014-03-11 16:52:00 +0200 Yes Yes -34.006363,23.344305 ... EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S... c10cf6f2-5734-11e3-8330-001e673400ac bitou Bitou WC047 10407 DC4 WC 1 10407001
12350 NC 9d1ea09d-5fb6-4052-8376-90c2386520ca 2014-03-11 2014-03-11 16:06:00 +0200 2014-03-11 16:52:00 +0200 Yes Yes -34.006363,23.344305 ... EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S... c10cf6f2-5734-11e3-8330-001e673400ac bitou Bitou WC047 10407 DC4 WC 1 10407001
12351 NC 9d1ea09d-5fb6-4052-8376-90c2386520ca 2014-03-11 2014-03-11 16:06:00 +0200 2014-03-11 16:52:00 +0200 Yes Yes -34.006363,23.344305 ... EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S... c10cf6f2-5734-11e3-8330-001e673400ac bitou Bitou WC047 10407 DC4 WC 1 10407001
12352 NC d3835b6f-470b-48de-9831-f91e4f44ff46 2014-03-12 2014-03-12 11:06:00 +0200 2014-03-12 11:20:00 +0200 Yes Yes -34.048502,23.320948 ... EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S... c10cf6f2-5734-11e3-8330-001e673400ac bitou Bitou WC047 10407 DC4 WC 1 10407005
12353 NC d3835b6f-470b-48de-9831-f91e4f44ff46 2014-03-12 2014-03-12 11:06:00 +0200 2014-03-12 11:20:00 +0200 Yes Yes -34.048502,23.320948 ... EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S... c10cf6f2-5734-11e3-8330-001e673400ac bitou Bitou WC047 10407 DC4 WC 1 10407005
12354 NC d3835b6f-470b-48de-9831-f91e4f44ff46 2014-03-12 2014-03-12 11:06:00 +0200 2014-03-12 11:20:00 +0200 Yes Yes -34.048502,23.320948 ... EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S... c10cf6f2-5734-11e3-8330-001e673400ac bitou Bitou WC047 10407 DC4 WC 1 10407005
12355 NC d3835b6f-470b-48de-9831-f91e4f44ff46 2014-03-12 2014-03-12 11:06:00 +0200 2014-03-12 11:20:00 +0200 Yes Yes -34.048502,23.320948 ... EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S... c10cf6f2-5734-11e3-8330-001e673400ac bitou Bitou WC047 10407 DC4 WC 1 10407005
12356 NC d3835b6f-470b-48de-9831-f91e4f44ff46 2014-03-12 2014-03-12 11:06:00 +0200 2014-03-12 11:20:00 +0200 Yes Yes -34.048502,23.320948 ... EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S... c10cf6f2-5734-11e3-8330-001e673400ac bitou Bitou WC047 10407 DC4 WC 1 10407005
12357 NC d3835b6f-470b-48de-9831-f91e4f44ff46 2014-03-12 2014-03-12 11:06:00 +0200 2014-03-12 11:20:00 +0200 Yes Yes -34.048502,23.320948 ... EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S... c10cf6f2-5734-11e3-8330-001e673400ac bitou Bitou WC047 10407 DC4 WC 1 10407005
12358 NC 222a305c-f21d-4f99-9242-1be39a16b853 2014-03-12 2014-03-12 09:50:00 +0200 2014-03-12 10:20:00 +0200 Yes Yes -34.048278,23.323786 ... EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S... c10cf6f2-5734-11e3-8330-001e673400ac bitou Bitou WC047 10407 DC4 WC 1 10407005
12359 NC 222a305c-f21d-4f99-9242-1be39a16b853 2014-03-12 2014-03-12 09:50:00 +0200 2014-03-12 10:20:00 +0200 Yes Yes -34.048278,23.323786 ... EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S... c10cf6f2-5734-11e3-8330-001e673400ac bitou Bitou WC047 10407 DC4 WC 1 10407005
12360 NC 222a305c-f21d-4f99-9242-1be39a16b853 2014-03-12 2014-03-12 09:50:00 +0200 2014-03-12 10:20:00 +0200 Yes Yes -34.048278,23.323786 ... EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S... c10cf6f2-5734-11e3-8330-001e673400ac bitou Bitou WC047 10407 DC4 WC 1 10407005
12361 NC 222a305c-f21d-4f99-9242-1be39a16b853 2014-03-12 2014-03-12 09:50:00 +0200 2014-03-12 10:20:00 +0200 Yes Yes -34.048278,23.323786 ... EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S... c10cf6f2-5734-11e3-8330-001e673400ac bitou Bitou WC047 10407 DC4 WC 1 10407005
12362 NC 222a305c-f21d-4f99-9242-1be39a16b853 2014-03-12 2014-03-12 09:50:00 +0200 2014-03-12 10:20:00 +0200 Yes Yes -34.048278,23.323786 ... EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S... c10cf6f2-5734-11e3-8330-001e673400ac bitou Bitou WC047 10407 DC4 WC 1 10407005
12363 NC 222a305c-f21d-4f99-9242-1be39a16b853 2014-03-12 2014-03-12 09:50:00 +0200 2014-03-12 10:20:00 +0200 Yes Yes -34.048278,23.323786 ... EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S... c10cf6f2-5734-11e3-8330-001e673400ac bitou Bitou WC047 10407 DC4 WC 1 10407005

12364 rows × 535 columns

In [182]:
# df.loc[df['ID'] == 'bea631f6-3e90-4f08-b070-65b22c226326']

Index([u'province', u'ID', u'date', u'start', u'end', u'started', u'complete',
       u'signature', u'signature_interviewee', u'gps_location',
       u'worker', u'worker_id', u'local_muni_clean', u'geo_local_muni_name',
       u'geo_local_muni_code', u'geo_local_muni_ward_prefix',
       u'geo_local_muni_district_code', u'geo_local_muni_province_code',
       u'geo_local_muni_province_number', u'geo_ward_num_clean'],
      dtype='object', length=535)

In [161]:
df['geo_ward_num_clean'] = df['geo_ward_num_clean'].astype(str)
df[(df.complete == 'Yes') &
    (df.geo_ward_num_clean.notnull()) &
    (df.geo_local_muni_code.isnull()) &

geo_local_muni_name geo_local_muni_code geo_local_muni_ward_prefix geo_local_muni_district_code geo_local_muni_province_code geo_local_muni_province_number

In [59]:
# These entries have no ward number or GPS location associated with them.

    (df.complete == 'Yes') &
    (df.geo_ward_num_clean.isnull()) &
    (df.address_ward_number.isnull()) &

<class 'pandas.core.frame.DataFrame'>
Int64Index: 0 entries
Columns: 535 entries, province to geo_ward_num_clean
dtypes: float64(142), int64(151), object(242)
memory usage: 0.0+ bytes

In [ ]:
df.to_csv('.csv', index=False)