In [1]:
import pandas as pd
import csv
import sys
import requests
import math
In [2]:
# original_df = pd.read_csv('ecd/ecd_master_list_original.csv', sep=";", low_memory=False)
original_df = pd.read_csv('ecd/kzn_nw_ilifa.csv', sep=";", low_memory=False)
# Get info on local municipalities
lm_info_df = pd.read_csv('ecd/local_muni_info.csv', sep=";", low_memory=False)
In [3]:
original_df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 3128 entries, 0 to 3127
Columns: 526 entries, PROVINCE to municipality_id
dtypes: float64(254), int64(108), object(164)
memory usage: 12.6+ MB
In [4]:
remove_text = {
'metropolitan municipality': '',
'local municipality': '',
}
replace_with = {
'nelson mandela': 'nelson mandela bay',
'sol plaatje': 'sol plaatjie',
'khara hais': '//khara hais',
'solplatjie': 'sol plaatjie',
'rustenberg': 'rustenburg',
'saldanha': 'saldanha bay',
'plettenberg bay': 'bitou'
}
def clean_muni_name(local_muni):
if not type(local_muni) is str:
return local_muni
name = local_muni.lower()
# Check for some specific cases
if name in replace_with:
return replace_with[name]
# Remove certain phrases
for k, v in remove_text.iteritems():
name = name.replace(k, v)
return name.strip()
In [5]:
df = original_df
# df['local_muni_clean'] = map(clean_muni_name, df["local_municipality"])
df['local_muni_clean'] = map(clean_muni_name, df["LOCAL MUNICIPALITY"])
# df[['address_ward_number']] = df[['address_ward_number']].astype(str)
In [6]:
df = pd.merge(df, lm_info_df, how='left', on="local_muni_clean")
In [7]:
def get_mapit_geo_codes(lat_long):
latitude, longitude = lat_long.split(",")
resp = requests.get(
"http://mapit.code4sa.org/point/4326/%s,%s.json" % (longitude.strip(), latitude.strip()))
if not resp.json():
print "3: None"
return None
geo_levels = {}
for item in resp.json().itervalues():
geo_levels[item['type_name']] = item['codes']['MDB']
print "3: %s" % (geo_levels.get('Ward'))
return geo_levels.get('Ward')
In [8]:
def clean_ward_num(ward_num, ward_prefix, gps_location):
if ward_num:
if not math.isnan(ward_num):
ward_num = '%.f' % ward_num
if len(ward_num) == 8:
print "1: %s" % (ward_num)
return ward_num
if len(ward_num) <= 3:
if ward_prefix and len(ward_prefix) == 5:
ward_prefix = '%.f' % ward_prefix
print "2: %s" % (ward_prefix + ('0' * (3-len(ward_num))) + ward_num)
return ward_prefix + ('0' * (3-len(ward_num))) + ward_num
if not type(gps_location) is float:
return get_mapit_geo_codes(gps_location)
return None
In [57]:
# geo_cols = ['address_ward_number', 'geo_local_muni_ward_prefix', 'gps_location']
new_df = df
new_df['geo_ward_num_clean'] = map(
clean_ward_num,
new_df['address_ward_number'],
new_df['geo_local_muni_ward_prefix'],
new_df['gps_location'])
In [10]:
new_df.to_csv('kzn_nw_cleaned_wards.csv', index=False)
In [ ]:
# The code below is to fix some empty geo information
In [183]:
df = pd.read_csv('master_list_cleaned_wards.csv', sep=",", low_memory=False)
In [204]:
def get_local_muni_name(ward_num):
resp = requests.get(
"http://mapit.code4sa.org/area/MDB:%s/covered.json" % (ward_num))
if not resp.json():
return None
for level in resp.json().itervalues():
if level['type_name'] == 'Municipality':
print level['name']
return level['name'].lower()
In [198]:
wn = 'na123'
wn.find('z')
Out[198]:
-1
In [192]:
df.head(n=2)
Out[192]:
province
ID
date
start
end
started
complete
signature
signature_interviewee
gps_location
...
worker
worker_id
local_muni_clean
geo_local_muni_name
geo_local_muni_code
geo_local_muni_ward_prefix
geo_local_muni_district_code
geo_local_muni_province_code
geo_local_muni_province_number
geo_ward_num_clean
0
EC
0fa3c7a7-175e-4580-b57d-786709954a89
2013-10-17
2013-10-17 19:58:41 +0200
NaN
Yes
No
NaN
NaN
NaN
...
ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C...
982fb966-3260-11e3-9f0e-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
1
EC
ac670c3c-e6ff-4b00-920c-f96263566e43
2013-11-06
2013-11-06 08:50:45 +0200
NaN
Yes
No
NaN
NaN
-31.603326,28.791947
...
Masikhule Salathiso Ngumbela - 2:09:51 PM 29/...
3f53a0c2-4602-11e3-b1c4-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
21507007
2 rows × 535 columns
In [205]:
# df['geo_ward_num_clean'] = df['geo_ward_num_clean'].astype(str)
local_muni_df = df[
(df.complete == 'Yes') &
(df.geo_ward_num_clean.notnull()) &
(df.geo_local_muni_code.isnull()) &
~(df.geo_ward_num_clean.astype(str).str.contains('na'))]
# local_muni_df.loc[:,('local_muni_clean')]
local_muni_df.loc[:,('local_muni_clean')] = map(get_local_muni_name, local_muni_df.loc[:,('geo_ward_num_clean')])
Ekurhuleni
Ethekwini
Ethekwini
Ethekwini
Albert Luthuli
Bushbuckridge
Bushbuckridge
City of Matlosana
City of Matlosana
City of Matlosana
Bitou
Bitou
Bitou
Bitou
Bitou
Bitou
In [207]:
test_df = local_muni_df.copy()
In [208]:
cols = [u'geo_local_muni_name', u'geo_local_muni_code',
u'geo_local_muni_ward_prefix', u'geo_local_muni_district_code',
u'geo_local_muni_province_code', u'geo_local_muni_province_number']
for col in cols:
del local_muni_df[col]
local_muni_df = pd.merge(local_muni_df, lm_info_df, how='left', on="local_muni_clean")
In [ ]:
test_df.loc[df.ID.isin(local_muni_df.ID), cols] = local_muni_df[cols]
In [168]:
df.head(n=2)
Out[168]:
province
ID
date
start
end
started
complete
signature
signature_interviewee
gps_location
...
worker
worker_id
local_muni_clean
geo_local_muni_name
geo_local_muni_code
geo_local_muni_ward_prefix
geo_local_muni_district_code
geo_local_muni_province_code
geo_local_muni_province_number
geo_ward_num_clean
0
EC
0fa3c7a7-175e-4580-b57d-786709954a89
2013-10-17
2013-10-17 19:58:41 +0200
NaN
Yes
No
NaN
NaN
NaN
...
ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C...
982fb966-3260-11e3-9f0e-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
1
EC
ac670c3c-e6ff-4b00-920c-f96263566e43
2013-11-06
2013-11-06 08:50:45 +0200
NaN
Yes
No
NaN
NaN
-31.603326,28.791947
...
Masikhule Salathiso Ngumbela - 2:09:51 PM 29/...
3f53a0c2-4602-11e3-b1c4-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
21507007
2 rows × 535 columns
In [211]:
test_df
Out[211]:
province
ID
date
start
end
started
complete
signature
signature_interviewee
gps_location
...
worker
worker_id
local_muni_clean
geo_local_muni_name
geo_local_muni_code
geo_local_muni_ward_prefix
geo_local_muni_district_code
geo_local_muni_province_code
geo_local_muni_province_number
geo_ward_num_clean
8486
GT
bea631f6-3e90-4f08-b070-65b22c226326
2014-03-31
2014-03-31 13:16:00 +0200
2014-03-31 13:26:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-26.171552,28.161673
...
Ekhuruleni 2 ~ Ntombizodwa Msibi - 12:52:24 PM...
403c1390-81e7-11e3-966f-001e673400ac
ekurhuleni
NaN
NaN
NaN
NaN
NaN
NaN
79700092
11030
KZN
86e16912-7288-4abe-9f36-9bdb4f396cb7
2014-05-21
2014-05-21 09:12:00 +0200
2014-05-27 15:22:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-29.697030,31.029527
...
NB. Gugulethu Bhengu - 2:25:10 PM 30/5/2014 - ...
7c60d414-c15c-11e3-ac28-001e6733fe3c
ethekwini
NaN
NaN
NaN
NaN
NaN
NaN
59500050
11031
KZN
fdf9884e-09f4-463f-9f8f-b7c864159781
2014-05-26
2014-05-26 13:42:00 +0200
2014-05-27 15:33:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-29.677255,31.020398
...
NB. Gugulethu Bhengu - 2:25:10 PM 30/5/2014 - ...
7c60d414-c15c-11e3-ac28-001e6733fe3c
ethekwini
NaN
NaN
NaN
NaN
NaN
NaN
59500051
11033
KZN
5469dd13-35be-4603-8e8d-abd88eda1b45
2014-05-20
2014-05-20 10:02:00 +0200
2014-05-27 15:18:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-29.909540,31.020890
...
NB. Gugulethu Bhengu - 2:25:10 PM 30/5/2014 - ...
7c60d414-c15c-11e3-ac28-001e6733fe3c
None
NaN
NaN
NaN
NaN
NaN
NaN
59500066
11034
KZN
c01a2a39-391b-4317-8791-637669fdca05
2014-05-23
2014-05-23 12:27:00 +0200
2014-05-23 17:41:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-29.881843,30.911903
...
NB. Rooku K - 9:54:06 AM 4/6/2014 - Logged in ...
cabaae2e-dc0e-11e3-8509-001e6733fe3c
ethekwini
NaN
NaN
NaN
NaN
NaN
NaN
59500063
16393
MP
5336f2c0-bc36-4f9f-883a-b42157dbfefb
2014-03-12
2014-03-12 11:09:00 +0200
2014-04-18 15:28:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-26.046355,30.779757
...
Albert Luthuli. Beauty Nkosi - 9:26:29 AM 9...
5badb7f8-9e09-11e3-bbfd-001e673400ac
albert luthuli
NaN
NaN
NaN
NaN
NaN
NaN
83001024
16450
MP
32b6f764-b002-4d67-8d5b-9ff77c4250eb
2014-03-24
2014-03-24 09:07:26 +0200
2014-03-24 09:41:45 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-24.773478,31.241988
...
Enhlazeni Petronella Glorry Mkhabela - 7:31:46...
b37a8c88-9d6c-11e3-b3ce-001e673400ac
bushbuckridge
NaN
NaN
NaN
NaN
NaN
NaN
83205029
16474
MP
72406869-1418-42c2-872d-6e14462400bb
2014-03-25
2014-03-25 12:07:47 +0200
2014-03-28 20:00:43 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-24.619508,31.055680
...
Enhlazeni Lolicky Pretty Ngobeni - 7:50:40 PM ...
d49f9fa0-9d6e-11e3-b3ce-001e673400ac
bushbuckridge
NaN
NaN
NaN
NaN
NaN
NaN
83205018
18137
NW
d2595f89-0ec9-4497-9e2e-0cb15e6ba5af
2014-04-09
2014-04-09 14:02:00 +0200
2014-04-09 14:16:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-26.892013,26.599790
...
North West Kabelo Raputsoane - 8:44:32 AM 2/5/...
5d72ba0a-baf8-11e3-b6f3-001e6733fe3c
city of matlosana
NaN
NaN
NaN
NaN
NaN
NaN
64003010
18138
NW
b8a7c607-401d-4a47-b365-6c3d627799ce
2014-04-24
2014-04-24 11:53:00 +0200
2014-05-02 08:44:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-26.884335,26.598318
...
North West Kabelo Raputsoane - 8:44:32 AM 2/5/...
5d72ba0a-baf8-11e3-b6f3-001e6733fe3c
city of matlosana
NaN
NaN
NaN
NaN
NaN
NaN
64003005
18140
NW
ecc9ba60-a49e-4107-bd0e-b19fc679784a
2014-04-08
2014-04-08 13:32:00 +0200
2014-04-09 17:31:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-26.845738,26.750667
...
North West Kabelo Raputsoane - 8:44:32 AM 2/5/...
5d72ba0a-baf8-11e3-b6f3-001e6733fe3c
city of matlosana
NaN
NaN
NaN
NaN
NaN
NaN
64003030
21013
NC
913bdc15-cfc4-41a6-815a-0a47ea71e036
2014-03-11
2014-03-11 13:53:00 +0200
2014-03-11 14:29:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.051832,23.338703
...
EDEN Terrance November - 10:23:42 AM 17/6/2014...
de2811da-267a-11e3-bcd3-001e673400ac
bitou
NaN
NaN
NaN
NaN
NaN
NaN
10407004
21018
NC
5fa2c6de-fc12-4e7d-8b03-afe2e0e11507
2014-03-12
2014-03-12 08:44:00 +0200
2014-03-12 09:52:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.048502,23.320948
...
EDEN Chesney Newman - 9:13:56 AM 17/6/2014 -...
31be7c2a-34cd-11e3-a982-001e673400ac
bitou
NaN
NaN
NaN
NaN
NaN
NaN
10407005
21026
NC
3e09a3c8-50d1-45b6-8c0f-2f4e21699efb
2014-03-12
2014-03-12 10:23:00 +0200
2014-03-12 11:28:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.046214,23.324565
...
EDEN Chesney Newman - 9:13:56 AM 17/6/2014 -...
31be7c2a-34cd-11e3-a982-001e673400ac
bitou
NaN
NaN
NaN
NaN
NaN
NaN
10407004
21034
NC
536ee4d9-6ed8-4d1b-94c2-1ea946b62838
2014-03-12
2014-03-12 12:05:00 +0200
2014-03-12 12:27:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.048502,23.320948
...
EDEN Chesney Newman - 9:13:56 AM 17/6/2014 -...
31be7c2a-34cd-11e3-a982-001e673400ac
bitou
NaN
NaN
NaN
NaN
NaN
NaN
10407005
21043
NC
f54ba639-da15-482a-abda-669b951885b7
2014-03-12
2014-03-12 15:49:00 +0200
2014-03-12 16:32:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.062516,23.359684
...
EDEN Chesney Newman - 9:13:56 AM 17/6/2014 -...
31be7c2a-34cd-11e3-a982-001e673400ac
None
NaN
NaN
NaN
NaN
NaN
NaN
10407002
21045
NC
0ce338f3-a20b-4dea-b573-62d80dbb1281
2014-03-13
2014-03-13 08:45:00 +0200
2014-03-13 09:08:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.048496,23.320948
...
EDEN Chesney Newman - 9:13:56 AM 17/6/2014 -...
31be7c2a-34cd-11e3-a982-001e673400ac
bitou
NaN
NaN
NaN
NaN
NaN
NaN
10407005
21048
NC
05a4d7a0-9cf7-4a3a-b429-a2c79b3cbc9a
2014-03-13
2014-03-13 09:18:00 +0200
2014-03-13 09:53:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.048496,23.320948
...
EDEN Chesney Newman - 9:13:56 AM 17/6/2014 -...
31be7c2a-34cd-11e3-a982-001e673400ac
bitou
NaN
NaN
NaN
NaN
NaN
NaN
10407005
21061
NC
12acecd7-658d-4cca-b90f-649d4e48f2af
2014-03-13
2014-03-13 11:50:00 +0200
2014-03-13 12:18:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.050631,23.359520
...
EDEN Chesney Newman - 9:13:56 AM 17/6/2014 -...
31be7c2a-34cd-11e3-a982-001e673400ac
None
NaN
NaN
NaN
NaN
NaN
NaN
10407002
21063
NC
de8c392a-5e7f-4de4-9422-849b567c93e3
2014-03-13
2014-03-13 12:45:00 +0200
2014-03-13 13:12:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-33.955734,23.475315
...
EDEN Chesney Newman - 9:13:56 AM 17/6/2014 -...
31be7c2a-34cd-11e3-a982-001e673400ac
None
NaN
NaN
NaN
NaN
NaN
NaN
10407001
21703
NC
a67536dc-d22d-4204-831b-5cefdbf7b5c9
2013-10-29
2013-10-29 13:06:00 +0200
NaN
Yes
Yes
NaN
NaN
-33.413610,19.206700
...
Early Years Services ~ Sharleen Lintnaar - 2...
0fbc713a-34c9-11e3-a7f2-001e673400ac
None
NaN
NaN
NaN
NaN
NaN
NaN
10202007
21 rows × 535 columns
In [178]:
# df[(df.complete == 'Yes') &
# (df.geo_ward_num_clean.notnull()) &
# (df.geo_local_muni_code.isnull()) &
# (~df.geo_ward_num_clean.str.contains('na'))].loc[:,(cols)] = local_muni_df.loc[:,(cols)]
df.loc[df.ID.isin(local_muni_df.ID), cols] = local_muni_df[cols]
# df.loc[df.Name.isin(df1.Name), ['Nonprofit', 'Education']] = df1[['Nonprofit', 'Education']]
# df.loc[df.ID.isin(local_muni_df.ID), cols]
# local_muni_df.loc[:,(['ID'] + cols)]
In [179]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 21877 entries, 0 to 21876
Columns: 535 entries, province to geo_ward_num_clean
dtypes: float64(142), int64(151), object(242)
memory usage: 89.5+ MB
In [181]:
df.merge(local_muni_df[cols])
Out[181]:
province
ID
date
start
end
started
complete
signature
signature_interviewee
gps_location
...
worker
worker_id
local_muni_clean
geo_local_muni_name
geo_local_muni_code
geo_local_muni_ward_prefix
geo_local_muni_district_code
geo_local_muni_province_code
geo_local_muni_province_number
geo_ward_num_clean
0
EC
0fa3c7a7-175e-4580-b57d-786709954a89
2013-10-17
2013-10-17 19:58:41 +0200
NaN
Yes
No
NaN
NaN
NaN
...
ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C...
982fb966-3260-11e3-9f0e-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
1
EC
0fa3c7a7-175e-4580-b57d-786709954a89
2013-10-17
2013-10-17 19:58:41 +0200
NaN
Yes
No
NaN
NaN
NaN
...
ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C...
982fb966-3260-11e3-9f0e-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
2
EC
0fa3c7a7-175e-4580-b57d-786709954a89
2013-10-17
2013-10-17 19:58:41 +0200
NaN
Yes
No
NaN
NaN
NaN
...
ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C...
982fb966-3260-11e3-9f0e-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
3
EC
0fa3c7a7-175e-4580-b57d-786709954a89
2013-10-17
2013-10-17 19:58:41 +0200
NaN
Yes
No
NaN
NaN
NaN
...
ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C...
982fb966-3260-11e3-9f0e-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
4
EC
0fa3c7a7-175e-4580-b57d-786709954a89
2013-10-17
2013-10-17 19:58:41 +0200
NaN
Yes
No
NaN
NaN
NaN
...
ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C...
982fb966-3260-11e3-9f0e-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
5
EC
ac670c3c-e6ff-4b00-920c-f96263566e43
2013-11-06
2013-11-06 08:50:45 +0200
NaN
Yes
No
NaN
NaN
-31.603326,28.791947
...
Masikhule Salathiso Ngumbela - 2:09:51 PM 29/...
3f53a0c2-4602-11e3-b1c4-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
21507007
6
EC
ac670c3c-e6ff-4b00-920c-f96263566e43
2013-11-06
2013-11-06 08:50:45 +0200
NaN
Yes
No
NaN
NaN
-31.603326,28.791947
...
Masikhule Salathiso Ngumbela - 2:09:51 PM 29/...
3f53a0c2-4602-11e3-b1c4-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
21507007
7
EC
ac670c3c-e6ff-4b00-920c-f96263566e43
2013-11-06
2013-11-06 08:50:45 +0200
NaN
Yes
No
NaN
NaN
-31.603326,28.791947
...
Masikhule Salathiso Ngumbela - 2:09:51 PM 29/...
3f53a0c2-4602-11e3-b1c4-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
21507007
8
EC
ac670c3c-e6ff-4b00-920c-f96263566e43
2013-11-06
2013-11-06 08:50:45 +0200
NaN
Yes
No
NaN
NaN
-31.603326,28.791947
...
Masikhule Salathiso Ngumbela - 2:09:51 PM 29/...
3f53a0c2-4602-11e3-b1c4-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
21507007
9
EC
ac670c3c-e6ff-4b00-920c-f96263566e43
2013-11-06
2013-11-06 08:50:45 +0200
NaN
Yes
No
NaN
NaN
-31.603326,28.791947
...
Masikhule Salathiso Ngumbela - 2:09:51 PM 29/...
3f53a0c2-4602-11e3-b1c4-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
21507007
10
EC
c60863eb-6bfb-4b66-8d0b-e6502cd0cbf5
2013-11-15
2013-11-15 12:12:19 +0200
NaN
Yes
No
NaN
NaN
NaN
...
Regional Educare Council Hlekiso Bulelwa - 4:0...
ca69b50a-4d02-11e3-8185-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
11
EC
c60863eb-6bfb-4b66-8d0b-e6502cd0cbf5
2013-11-15
2013-11-15 12:12:19 +0200
NaN
Yes
No
NaN
NaN
NaN
...
Regional Educare Council Hlekiso Bulelwa - 4:0...
ca69b50a-4d02-11e3-8185-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
12
EC
c60863eb-6bfb-4b66-8d0b-e6502cd0cbf5
2013-11-15
2013-11-15 12:12:19 +0200
NaN
Yes
No
NaN
NaN
NaN
...
Regional Educare Council Hlekiso Bulelwa - 4:0...
ca69b50a-4d02-11e3-8185-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
13
EC
c60863eb-6bfb-4b66-8d0b-e6502cd0cbf5
2013-11-15
2013-11-15 12:12:19 +0200
NaN
Yes
No
NaN
NaN
NaN
...
Regional Educare Council Hlekiso Bulelwa - 4:0...
ca69b50a-4d02-11e3-8185-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
14
EC
c60863eb-6bfb-4b66-8d0b-e6502cd0cbf5
2013-11-15
2013-11-15 12:12:19 +0200
NaN
Yes
No
NaN
NaN
NaN
...
Regional Educare Council Hlekiso Bulelwa - 4:0...
ca69b50a-4d02-11e3-8185-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
15
EC
e0204732-1ac0-46a8-850a-6d5df6979ece
2013-11-26
2013-11-26 10:19:03 +0200
NaN
Yes
No
NaN
NaN
-32.606151,27.450803
...
Regional Educare Council Kolisi Zonke - 10:48:...
a8802a1e-4d02-11e3-857c-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
21204017
16
EC
e0204732-1ac0-46a8-850a-6d5df6979ece
2013-11-26
2013-11-26 10:19:03 +0200
NaN
Yes
No
NaN
NaN
-32.606151,27.450803
...
Regional Educare Council Kolisi Zonke - 10:48:...
a8802a1e-4d02-11e3-857c-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
21204017
17
EC
e0204732-1ac0-46a8-850a-6d5df6979ece
2013-11-26
2013-11-26 10:19:03 +0200
NaN
Yes
No
NaN
NaN
-32.606151,27.450803
...
Regional Educare Council Kolisi Zonke - 10:48:...
a8802a1e-4d02-11e3-857c-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
21204017
18
EC
e0204732-1ac0-46a8-850a-6d5df6979ece
2013-11-26
2013-11-26 10:19:03 +0200
NaN
Yes
No
NaN
NaN
-32.606151,27.450803
...
Regional Educare Council Kolisi Zonke - 10:48:...
a8802a1e-4d02-11e3-857c-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
21204017
19
EC
e0204732-1ac0-46a8-850a-6d5df6979ece
2013-11-26
2013-11-26 10:19:03 +0200
NaN
Yes
No
NaN
NaN
-32.606151,27.450803
...
Regional Educare Council Kolisi Zonke - 10:48:...
a8802a1e-4d02-11e3-857c-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
21204017
20
EC
5d272669-50bb-4ce4-ba6f-04773863c91f
2013-11-12
2013-11-12 12:03:49 +0200
NaN
Yes
No
NaN
NaN
NaN
...
ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C...
982fb966-3260-11e3-9f0e-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
21
EC
5d272669-50bb-4ce4-ba6f-04773863c91f
2013-11-12
2013-11-12 12:03:49 +0200
NaN
Yes
No
NaN
NaN
NaN
...
ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C...
982fb966-3260-11e3-9f0e-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
22
EC
5d272669-50bb-4ce4-ba6f-04773863c91f
2013-11-12
2013-11-12 12:03:49 +0200
NaN
Yes
No
NaN
NaN
NaN
...
ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C...
982fb966-3260-11e3-9f0e-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
23
EC
5d272669-50bb-4ce4-ba6f-04773863c91f
2013-11-12
2013-11-12 12:03:49 +0200
NaN
Yes
No
NaN
NaN
NaN
...
ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C...
982fb966-3260-11e3-9f0e-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
24
EC
5d272669-50bb-4ce4-ba6f-04773863c91f
2013-11-12
2013-11-12 12:03:49 +0200
NaN
Yes
No
NaN
NaN
NaN
...
ITEC Sicelo Dyubele - 4:11:47 PM 26/3/2014 - C...
982fb966-3260-11e3-9f0e-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
25
EC
10a73525-f386-4ee8-8701-28b5ba0f889c
2013-11-18
2013-11-18 07:16:10 +0200
NaN
Yes
No
NaN
NaN
NaN
...
Regional Educare Council Mlahlwa Nokuphumla - ...
d877209c-4d02-11e3-99f2-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
26
EC
10a73525-f386-4ee8-8701-28b5ba0f889c
2013-11-18
2013-11-18 07:16:10 +0200
NaN
Yes
No
NaN
NaN
NaN
...
Regional Educare Council Mlahlwa Nokuphumla - ...
d877209c-4d02-11e3-99f2-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
27
EC
10a73525-f386-4ee8-8701-28b5ba0f889c
2013-11-18
2013-11-18 07:16:10 +0200
NaN
Yes
No
NaN
NaN
NaN
...
Regional Educare Council Mlahlwa Nokuphumla - ...
d877209c-4d02-11e3-99f2-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
28
EC
10a73525-f386-4ee8-8701-28b5ba0f889c
2013-11-18
2013-11-18 07:16:10 +0200
NaN
Yes
No
NaN
NaN
NaN
...
Regional Educare Council Mlahlwa Nokuphumla - ...
d877209c-4d02-11e3-99f2-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
29
EC
10a73525-f386-4ee8-8701-28b5ba0f889c
2013-11-18
2013-11-18 07:16:10 +0200
NaN
Yes
No
NaN
NaN
NaN
...
Regional Educare Council Mlahlwa Nokuphumla - ...
d877209c-4d02-11e3-99f2-001e673400ac
NaN
NaN
NaN
NaN
NaN
NaN
NaN
nan
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
12334
NC
0e258d9e-f219-11e3-8f37-001e6733fe3c
NaN
NaN
NaN
NaN
Yes
NaN
NaN
NaN
...
Eden -
929d3f2c-ecbf-11e3-9560-001e6733fe3c
bitou
Bitou
WC047
10407
DC4
WC
1
nan
12335
NC
0e258d9e-f219-11e3-8f37-001e6733fe3c
NaN
NaN
NaN
NaN
Yes
NaN
NaN
NaN
...
Eden -
929d3f2c-ecbf-11e3-9560-001e6733fe3c
bitou
Bitou
WC047
10407
DC4
WC
1
nan
12336
NC
0e258d9e-f219-11e3-8f37-001e6733fe3c
NaN
NaN
NaN
NaN
Yes
NaN
NaN
NaN
...
Eden -
929d3f2c-ecbf-11e3-9560-001e6733fe3c
bitou
Bitou
WC047
10407
DC4
WC
1
nan
12337
NC
0e258d9e-f219-11e3-8f37-001e6733fe3c
NaN
NaN
NaN
NaN
Yes
NaN
NaN
NaN
...
Eden -
929d3f2c-ecbf-11e3-9560-001e6733fe3c
bitou
Bitou
WC047
10407
DC4
WC
1
nan
12338
NC
0e258d9e-f219-11e3-8f37-001e6733fe3c
NaN
NaN
NaN
NaN
Yes
NaN
NaN
NaN
...
Eden -
929d3f2c-ecbf-11e3-9560-001e6733fe3c
bitou
Bitou
WC047
10407
DC4
WC
1
nan
12339
NC
0e258d9e-f219-11e3-8f37-001e6733fe3c
NaN
NaN
NaN
NaN
Yes
NaN
NaN
NaN
...
Eden -
929d3f2c-ecbf-11e3-9560-001e6733fe3c
bitou
Bitou
WC047
10407
DC4
WC
1
nan
12340
NC
0e505498-f219-11e3-8f37-001e6733fe3c
NaN
NaN
NaN
NaN
Yes
NaN
NaN
NaN
...
Eden -
929d3f2c-ecbf-11e3-9560-001e6733fe3c
bitou
Bitou
WC047
10407
DC4
WC
1
nan
12341
NC
0e505498-f219-11e3-8f37-001e6733fe3c
NaN
NaN
NaN
NaN
Yes
NaN
NaN
NaN
...
Eden -
929d3f2c-ecbf-11e3-9560-001e6733fe3c
bitou
Bitou
WC047
10407
DC4
WC
1
nan
12342
NC
0e505498-f219-11e3-8f37-001e6733fe3c
NaN
NaN
NaN
NaN
Yes
NaN
NaN
NaN
...
Eden -
929d3f2c-ecbf-11e3-9560-001e6733fe3c
bitou
Bitou
WC047
10407
DC4
WC
1
nan
12343
NC
0e505498-f219-11e3-8f37-001e6733fe3c
NaN
NaN
NaN
NaN
Yes
NaN
NaN
NaN
...
Eden -
929d3f2c-ecbf-11e3-9560-001e6733fe3c
bitou
Bitou
WC047
10407
DC4
WC
1
nan
12344
NC
0e505498-f219-11e3-8f37-001e6733fe3c
NaN
NaN
NaN
NaN
Yes
NaN
NaN
NaN
...
Eden -
929d3f2c-ecbf-11e3-9560-001e6733fe3c
bitou
Bitou
WC047
10407
DC4
WC
1
nan
12345
NC
0e505498-f219-11e3-8f37-001e6733fe3c
NaN
NaN
NaN
NaN
Yes
NaN
NaN
NaN
...
Eden -
929d3f2c-ecbf-11e3-9560-001e6733fe3c
bitou
Bitou
WC047
10407
DC4
WC
1
nan
12346
NC
9d1ea09d-5fb6-4052-8376-90c2386520ca
2014-03-11
2014-03-11 16:06:00 +0200
2014-03-11 16:52:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.006363,23.344305
...
EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S...
c10cf6f2-5734-11e3-8330-001e673400ac
bitou
Bitou
WC047
10407
DC4
WC
1
10407001
12347
NC
9d1ea09d-5fb6-4052-8376-90c2386520ca
2014-03-11
2014-03-11 16:06:00 +0200
2014-03-11 16:52:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.006363,23.344305
...
EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S...
c10cf6f2-5734-11e3-8330-001e673400ac
bitou
Bitou
WC047
10407
DC4
WC
1
10407001
12348
NC
9d1ea09d-5fb6-4052-8376-90c2386520ca
2014-03-11
2014-03-11 16:06:00 +0200
2014-03-11 16:52:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.006363,23.344305
...
EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S...
c10cf6f2-5734-11e3-8330-001e673400ac
bitou
Bitou
WC047
10407
DC4
WC
1
10407001
12349
NC
9d1ea09d-5fb6-4052-8376-90c2386520ca
2014-03-11
2014-03-11 16:06:00 +0200
2014-03-11 16:52:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.006363,23.344305
...
EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S...
c10cf6f2-5734-11e3-8330-001e673400ac
bitou
Bitou
WC047
10407
DC4
WC
1
10407001
12350
NC
9d1ea09d-5fb6-4052-8376-90c2386520ca
2014-03-11
2014-03-11 16:06:00 +0200
2014-03-11 16:52:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.006363,23.344305
...
EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S...
c10cf6f2-5734-11e3-8330-001e673400ac
bitou
Bitou
WC047
10407
DC4
WC
1
10407001
12351
NC
9d1ea09d-5fb6-4052-8376-90c2386520ca
2014-03-11
2014-03-11 16:06:00 +0200
2014-03-11 16:52:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.006363,23.344305
...
EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S...
c10cf6f2-5734-11e3-8330-001e673400ac
bitou
Bitou
WC047
10407
DC4
WC
1
10407001
12352
NC
d3835b6f-470b-48de-9831-f91e4f44ff46
2014-03-12
2014-03-12 11:06:00 +0200
2014-03-12 11:20:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.048502,23.320948
...
EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S...
c10cf6f2-5734-11e3-8330-001e673400ac
bitou
Bitou
WC047
10407
DC4
WC
1
10407005
12353
NC
d3835b6f-470b-48de-9831-f91e4f44ff46
2014-03-12
2014-03-12 11:06:00 +0200
2014-03-12 11:20:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.048502,23.320948
...
EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S...
c10cf6f2-5734-11e3-8330-001e673400ac
bitou
Bitou
WC047
10407
DC4
WC
1
10407005
12354
NC
d3835b6f-470b-48de-9831-f91e4f44ff46
2014-03-12
2014-03-12 11:06:00 +0200
2014-03-12 11:20:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.048502,23.320948
...
EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S...
c10cf6f2-5734-11e3-8330-001e673400ac
bitou
Bitou
WC047
10407
DC4
WC
1
10407005
12355
NC
d3835b6f-470b-48de-9831-f91e4f44ff46
2014-03-12
2014-03-12 11:06:00 +0200
2014-03-12 11:20:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.048502,23.320948
...
EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S...
c10cf6f2-5734-11e3-8330-001e673400ac
bitou
Bitou
WC047
10407
DC4
WC
1
10407005
12356
NC
d3835b6f-470b-48de-9831-f91e4f44ff46
2014-03-12
2014-03-12 11:06:00 +0200
2014-03-12 11:20:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.048502,23.320948
...
EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S...
c10cf6f2-5734-11e3-8330-001e673400ac
bitou
Bitou
WC047
10407
DC4
WC
1
10407005
12357
NC
d3835b6f-470b-48de-9831-f91e4f44ff46
2014-03-12
2014-03-12 11:06:00 +0200
2014-03-12 11:20:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.048502,23.320948
...
EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S...
c10cf6f2-5734-11e3-8330-001e673400ac
bitou
Bitou
WC047
10407
DC4
WC
1
10407005
12358
NC
222a305c-f21d-4f99-9242-1be39a16b853
2014-03-12
2014-03-12 09:50:00 +0200
2014-03-12 10:20:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.048278,23.323786
...
EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S...
c10cf6f2-5734-11e3-8330-001e673400ac
bitou
Bitou
WC047
10407
DC4
WC
1
10407005
12359
NC
222a305c-f21d-4f99-9242-1be39a16b853
2014-03-12
2014-03-12 09:50:00 +0200
2014-03-12 10:20:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.048278,23.323786
...
EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S...
c10cf6f2-5734-11e3-8330-001e673400ac
bitou
Bitou
WC047
10407
DC4
WC
1
10407005
12360
NC
222a305c-f21d-4f99-9242-1be39a16b853
2014-03-12
2014-03-12 09:50:00 +0200
2014-03-12 10:20:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.048278,23.323786
...
EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S...
c10cf6f2-5734-11e3-8330-001e673400ac
bitou
Bitou
WC047
10407
DC4
WC
1
10407005
12361
NC
222a305c-f21d-4f99-9242-1be39a16b853
2014-03-12
2014-03-12 09:50:00 +0200
2014-03-12 10:20:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.048278,23.323786
...
EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S...
c10cf6f2-5734-11e3-8330-001e673400ac
bitou
Bitou
WC047
10407
DC4
WC
1
10407005
12362
NC
222a305c-f21d-4f99-9242-1be39a16b853
2014-03-12
2014-03-12 09:50:00 +0200
2014-03-12 10:20:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.048278,23.323786
...
EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S...
c10cf6f2-5734-11e3-8330-001e673400ac
bitou
Bitou
WC047
10407
DC4
WC
1
10407005
12363
NC
222a305c-f21d-4f99-9242-1be39a16b853
2014-03-12
2014-03-12 09:50:00 +0200
2014-03-12 10:20:00 +0200
Yes
Yes
https://run.journeyapps.com/media/W1siZiIsIjUz...
https://run.journeyapps.com/media/W1siZiIsIjUz...
-34.048278,23.323786
...
EDEN Le-Roy Adams - 12:09:41 PM 14/5/2014 - S...
c10cf6f2-5734-11e3-8330-001e673400ac
bitou
Bitou
WC047
10407
DC4
WC
1
10407005
12364 rows × 535 columns
In [182]:
# df.loc[df['ID'] == 'bea631f6-3e90-4f08-b070-65b22c226326']
df.columns
Out[182]:
Index([u'province', u'ID', u'date', u'start', u'end', u'started', u'complete',
u'signature', u'signature_interviewee', u'gps_location',
...
u'worker', u'worker_id', u'local_muni_clean', u'geo_local_muni_name',
u'geo_local_muni_code', u'geo_local_muni_ward_prefix',
u'geo_local_muni_district_code', u'geo_local_muni_province_code',
u'geo_local_muni_province_number', u'geo_ward_num_clean'],
dtype='object', length=535)
In [161]:
df['geo_ward_num_clean'] = df['geo_ward_num_clean'].astype(str)
df[(df.complete == 'Yes') &
(df.geo_ward_num_clean.notnull()) &
(df.geo_local_muni_code.isnull()) &
(~df.geo_ward_num_clean.str.contains('na'))].loc[:,(cols)]
Out[161]:
geo_local_muni_name
geo_local_muni_code
geo_local_muni_ward_prefix
geo_local_muni_district_code
geo_local_muni_province_code
geo_local_muni_province_number
In [59]:
# These entries have no ward number or GPS location associated with them.
df[
(df.complete == 'Yes') &
(df.geo_ward_num_clean.isnull()) &
(df.address_ward_number.isnull()) &
(df.gps_location.isnull())].info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 0 entries
Columns: 535 entries, province to geo_ward_num_clean
dtypes: float64(142), int64(151), object(242)
memory usage: 0.0+ bytes
In [ ]:
df.to_csv('.csv', index=False)
Content source: Code4SA/wazi-ecd-centre-data
Similar notebooks: