In [13]:
import pandas as pd
import json
import re
In [2]:
!ls ../data
In [3]:
apr_data = pd.read_csv('../data/Indicators-RCH-prgress-tracking_V2.xlsx - Apr.csv')
may_data = pd.read_csv('../data/Indicators-RCH-prgress-tracking_V2.xlsx - May.csv')
In [4]:
apr_data.head()
Out[4]:
In [5]:
categories = pd.read_csv('../data/Indicators-RCH-Category-Structure.csv')
In [43]:
merged_data = pd.merge(left=apr_data, right=may_data, on=['Block', 'Sector', 'SHC'], suffixes=['_apr', '_may'])
In [72]:
[col for col in merged_data.columns if 'ANC registered' in col]
Out[72]:
In [73]:
apr_data['ANC registered'].head()
Out[73]:
In [74]:
may_data['ANC registered'].head()
Out[74]:
In [75]:
merged_data[['ANC registered_apr', 'ANC registered_may']]
Out[75]:
In [80]:
class GenerateJsonData(object):
'''Given a data file and config file in csv format generate the json to be used in story generator.
The Config file should contain hierarchy information in each row, with columns as :-
- Category = This is the highest level category that will be displayed on the left nav bar of story generator.
- Sub Cateogry = These are the name of the options that will be displayed under a category.
- Target Column = This is the map of the column from the `data` dataframe that needs to be used as values
- unit = The unit with which the value needs to be extended.
The data file should contain in this case some base columns `S.No.`, `Block`, `Sector`, `SHC` and all the
column names mentioned in the `Target Column`
'''
def __init__(self, data_file_paths, config_file_path):
'''
Read and set the file paths
Args:
- data_file_path (`dict`:{'month, year': datafilepath}): Dictionary with keys
as month, year and value as datafilepath.
- config_file_path (string): path to the config file.
'''
self.data_file_path = data_file_paths
self.config_file_path = config_file_path
self.data = self.config = None
def load_data(self):
'''
Load the config file and data file.
Returns:
None
'''
data = None
last_month = None
for data_source in self.data_file_path:
if data is None:
data = pd.read_csv(self.data_file_path[data_source])
data.columns = [col.strip() for col in data.columns]
else:
new_data = pd.read_csv(self.data_file_path[data_source])
new_data.columns = [col.strip() for col in new_data.columns]
new_month = data_source.split(',')[0]
data = pd.merge(left=data, right=new_data,
on=['Block', 'Sector', 'SHC'],
suffixes=['_{0}'.format(new_month), '_{0}'.format(last_month)])
last_month = data_source.split(',')[0]
self.data = data
self.config = pd.read_csv(self.config_file_path)
return None
@staticmethod
def generate_slug(string_val):
'''
Convert a string value to a slug.
Arg:
string_val (string): A string object to be slugged
Returns:
A slugged string.
'''
return re.sub('[^0-9a-zA-Z]+', '_', string_val.lower())
def generate_json_data(self, output_file):
if self.data is None or self.config_file_path is None:
self.load_data()
data = self.data.fillna(0)
json_data = []
meta_cols = ['SHC', 'Sector', 'Block']
for category, group in self.config.groupby('Category'):
category_slug = self.generate_slug(category)
category_json = {'category_name': category,
'category_slug': category_slug,
'sub_records': []}
for _, row in group.iterrows():
record = {}
record['record_name'] = row['Sub Category']
record['record_slug'] = self.generate_slug(row['Sub Category'])
record['unit'] = row['unit']
record['record_figures'] = []
target_cols = []
for data_source in self.data_file_path:
month = data_source.split(',')[0]
target_cols.append(row['Target Column'] + '_{0}'.format(month))
cols = meta_cols + target_cols
print(target_cols)
for index, sub_row in data[cols].iterrows():
be = []
for data_source, target_col in zip(self.data_file_path, target_cols):
be.append({data_source: str(sub_row[target_col])})
figures = {'BE': be}
record['record_figures'].append({
'figures': figures,
'grpby_name': sub_row['SHC'],
'shc': sub_row['SHC'],
'block': sub_row['Block'],
'sector': sub_row['Sector']
})
category_json['sub_records'].append(record)
json_data.append(category_json)
with open(output_file, 'w') as json_data_file:
json.dump(json_data, json_data_file)
In [81]:
GenerateJsonData({'Apr, 2017-18': '../data/Indicators-RCH-prgress-tracking_V2.xlsx - Apr.csv',
'May, 2017-18': '../data/Indicators-RCH-prgress-tracking_V2.xlsx - May.csv',
'Jun, 2017-18': '../data/Indicators-RCH-prgress-tracking_V2.xlsx - Jun.csv',
'Jul, 2017-18': '../data/Indicators-RCH-prgress-tracking_V2.xlsx - Jul.csv'},
'../data/Indicators-RCH-Category-Structure.csv').generate_json_data('../data/timeseries.json')
In [ ]: