notebook.community

Edit and run



In [13]:

    
import pandas as pd
import json
import re



In [2]:

    
!ls ../data









    



'Data for Akshada monitoring tool'
 Indicators-RCH-Category-Structure.csv
 Indicators-RCH-Data.csv
'Indicators-RCH-prgress-tracking_V2.xlsx - Apr.csv'
'Indicators-RCH-prgress-tracking_V2.xlsx - Jul.csv'
'Indicators-RCH-prgress-tracking_V2.xlsx - Jun.csv'
'Indicators-RCH-prgress-tracking_V2.xlsx - May.csv'
 shc_data.json



In [3]:

    
apr_data = pd.read_csv('../data/Indicators-RCH-prgress-tracking_V2.xlsx - Apr.csv')
may_data = pd.read_csv('../data/Indicators-RCH-prgress-tracking_V2.xlsx - May.csv')



In [4]:

    
apr_data.head()









    Out[4]:







  
    
      
      S.No
      Block
      Sector
      SHC
      Unit Type
      Target Pregnant Women
      Target Live Birth
      Traget Full Immunization
      ANC registered
      Total Number of pregnant registered  before 12 weeks against total number of ANC
      ...
      %.7
      Total number of new borns received 6 post natal HBNC visit
      Percentage of new borns received 6 post natal HBNC visit
      % Given zero dose to children against total live birth
      %.8
      Children fully immunized
      Percentage of children fully immunized
      Unnamed: 38
      IMR
      MMR
    
  
  
    
      0
      1
      Kishanganj
      Bajranggarh
      Gobarcha
      SHC
      6
      5
      5
      6
      5
      ...
      #DIV/0!
      5
      NaN
      0
      #DIV/0!
      5
      100.00
      NaN
      NaN
      NaN
    
    
      1
      2
      Kishanganj
      Bajranggarh
      Kheruna
      SHC
      5
      5
      5
      9
      4
      ...
      #DIV/0!
      0
      NaN
      0
      #DIV/0!
      7
      140.00
      NaN
      NaN
      NaN
    
    
      2
      3
      Kishanganj
      Bhanwargarh
      Basthooni
      SHC
      4
      3
      3
      4
      0
      ...
      #DIV/0!
      3
      NaN
      0
      #DIV/0!
      4
      133.33
      NaN
      NaN
      NaN
    
    
      3
      4
      Kishanganj
      Bhanwargarh
      Hirapur
      SHC
      5
      4
      4
      7
      4
      ...
      #DIV/0!
      3
      NaN
      0
      #DIV/0!
      3
      75.00
      NaN
      NaN
      NaN
    
    
      4
      5
      Kishanganj
      Bhanwargarh
      Peenjana
      SHC
      7
      6
      6
      7
      1
      ...
      #DIV/0!
      4
      NaN
      0
      #DIV/0!
      7
      116.67
      NaN
      NaN
      NaN
    
  

5 rows × 41 columns



In [5]:

    
categories = pd.read_csv('../data/Indicators-RCH-Category-Structure.csv')



In [43]:

    
merged_data = pd.merge(left=apr_data, right=may_data, on=['Block', 'Sector', 'SHC'], suffixes=['_apr', '_may'])



In [72]:

    
[col for col in merged_data.columns if 'ANC registered' in col]









    Out[72]:





['ANC registered_apr', 'ANC registered_may']



In [73]:

    
apr_data['ANC registered'].head()









    Out[73]:





0    6
1    9
2    4
3    7
4    7
Name: ANC registered, dtype: int64



In [74]:

    
may_data['ANC registered'].head()









    Out[74]:





0     2
1    16
2     0
3     7
4     5
Name: ANC registered, dtype: int64



In [75]:

    
merged_data[['ANC registered_apr', 'ANC registered_may']]









    Out[75]:







  
    
      
      ANC registered_apr
      ANC registered_may
    
  
  
    
      0
      6
      2
    
    
      1
      9
      16
    
    
      2
      4
      0
    
    
      3
      7
      7
    
    
      4
      7
      5
    
    
      5
      16
      7
    
    
      6
      12
      14
    
    
      7
      3
      6
    
    
      8
      4
      5
    
    
      9
      3
      11
    
    
      10
      4
      5
    
    
      11
      7
      14
    
    
      12
      7
      8
    
    
      13
      7
      6
    
    
      14
      10
      5
    
    
      15
      7
      13
    
    
      16
      8
      9
    
    
      17
      7
      10
    
    
      18
      4
      3
    
    
      19
      10
      6
    
    
      20
      7
      5
    
    
      21
      7
      3
    
    
      22
      13
      3
    
    
      23
      12
      3
    
    
      24
      7
      7
    
    
      25
      16
      7
    
    
      26
      3
      12
    
    
      27
      8
      6
    
    
      28
      11
      3
    
    
      29
      16
      0
    
    
      30
      9
      5
    
    
      31
      2
      17
    
    
      32
      6
      3
    
    
      33
      7
      1
    
    
      34
      9
      5
    
    
      35
      0
      3
    
    
      36
      7
      2
    
    
      37
      17
      14
    
    
      38
      9
      7
    
    
      39
      18
      8
    
    
      40
      17
      18
    
    
      41
      13
      10
    
    
      42
      7
      8
    
    
      43
      20
      19
    
    
      44
      15
      18



In [80]:

    
class GenerateJsonData(object):
    '''Given a data file and config file in csv format generate the json to be used in story generator.
    
    The Config file should contain hierarchy information in each row, with columns as :-
        - Category = This is the highest level category that will be displayed on the left nav bar of story generator.
        - Sub Cateogry = These are the name of the options that will be displayed under a category.
        - Target Column = This is the map of the column from the `data` dataframe that needs to be used as values
        - unit = The unit with which the value needs to be extended.
    
    The data file should contain in this case some base columns `S.No.`, `Block`, `Sector`, `SHC` and all the
    column names mentioned in the `Target Column`
    '''
    
    def __init__(self, data_file_paths, config_file_path):
        '''
        Read and set the file paths
        
        Args:
            - data_file_path (`dict`:{'month, year': datafilepath}): Dictionary with keys
                as month, year and value as datafilepath.
            - config_file_path (string): path to the config file.
        '''
        self.data_file_path = data_file_paths
        self.config_file_path = config_file_path
        self.data = self.config = None
    
    def load_data(self):
        '''
        Load the config file and data file.
        
        Returns:
            None
        '''
        data = None
        last_month = None
        for data_source in self.data_file_path:
            if data is None:
                data = pd.read_csv(self.data_file_path[data_source])
                data.columns = [col.strip() for col in data.columns]
            else:
                new_data = pd.read_csv(self.data_file_path[data_source])
                new_data.columns = [col.strip() for col in new_data.columns]
                new_month = data_source.split(',')[0]
                data = pd.merge(left=data, right=new_data,
                                on=['Block', 'Sector', 'SHC'],
                                suffixes=['_{0}'.format(new_month), '_{0}'.format(last_month)])
            last_month = data_source.split(',')[0]
        self.data = data
        self.config = pd.read_csv(self.config_file_path)
        return None
    
    @staticmethod
    def generate_slug(string_val):
        '''
        Convert a string value to a slug.
        
        Arg:
            string_val (string): A string object to be slugged
        
        Returns:
            A slugged string.
        '''
        return re.sub('[^0-9a-zA-Z]+', '_', string_val.lower())


    def generate_json_data(self, output_file):
        if self.data is None or self.config_file_path is None:
            self.load_data()
        data = self.data.fillna(0)
        json_data = []
        meta_cols = ['SHC', 'Sector', 'Block']
        for category, group in self.config.groupby('Category'):
            category_slug = self.generate_slug(category)
            category_json = {'category_name': category,
                             'category_slug': category_slug, 
                             'sub_records': []}
            for _, row in group.iterrows():
                record = {}
                record['record_name'] = row['Sub Category']
                record['record_slug'] = self.generate_slug(row['Sub Category'])
                record['unit'] = row['unit']
                record['record_figures'] = []
                target_cols = []
                for data_source in self.data_file_path:
                    month = data_source.split(',')[0]
                    target_cols.append(row['Target Column'] + '_{0}'.format(month))
                cols = meta_cols + target_cols
                print(target_cols)
                for index, sub_row in data[cols].iterrows():
                    be = []
                    for data_source, target_col in zip(self.data_file_path, target_cols):
                        be.append({data_source: str(sub_row[target_col])})
                    figures = {'BE': be}
                    record['record_figures'].append({
                        'figures': figures,
                        'grpby_name': sub_row['SHC'],
                        'shc': sub_row['SHC'],
                        'block': sub_row['Block'],
                        'sector': sub_row['Sector']
                    })
                category_json['sub_records'].append(record)
            json_data.append(category_json)
        with open(output_file, 'w') as json_data_file:
            json.dump(json_data, json_data_file)



In [81]:

    
GenerateJsonData({'Apr, 2017-18': '../data/Indicators-RCH-prgress-tracking_V2.xlsx - Apr.csv',
                  'May, 2017-18': '../data/Indicators-RCH-prgress-tracking_V2.xlsx - May.csv',
                  'Jun, 2017-18': '../data/Indicators-RCH-prgress-tracking_V2.xlsx - Jun.csv',
                  'Jul, 2017-18': '../data/Indicators-RCH-prgress-tracking_V2.xlsx - Jul.csv'}, 
                 '../data/Indicators-RCH-Category-Structure.csv').generate_json_data('../data/timeseries.json')









    



['Target Pregnant Women_Apr', 'Target Pregnant Women_May', 'Target Pregnant Women_Jun', 'Target Pregnant Women_Jul']
['Total Number of pregnant registered\xa0 before 12 weeks against total number of ANC_Apr', 'Total Number of pregnant registered\xa0 before 12 weeks against total number of ANC_May', 'Total Number of pregnant registered\xa0 before 12 weeks against total number of ANC_Jun', 'Total Number of pregnant registered\xa0 before 12 weeks against total number of ANC_Jul']
['Percentage of pregnant registered  before 12 weeks against total number of ANC_Apr', 'Percentage of pregnant registered  before 12 weeks against total number of ANC_May', 'Percentage of pregnant registered  before 12 weeks against total number of ANC_Jun', 'Percentage of pregnant registered  before 12 weeks against total number of ANC_Jul']
['Number of pregnant women received 3 check ups_Apr', 'Number of pregnant women received 3 check ups_May', 'Number of pregnant women received 3 check ups_Jun', 'Number of pregnant women received 3 check ups_Jul']
['Percentage of pregnant women received 3 check ups_Apr', 'Percentage of pregnant women received 3 check ups_May', 'Percentage of pregnant women received 3 check ups_Jun', 'Percentage of pregnant women received 3 check ups_Jul']
['Children fully immunized_Apr', 'Children fully immunized_May', 'Children fully immunized_Jun', 'Children fully immunized_Jul']
['Percentage of children fully immunized_Apr', 'Percentage of children fully immunized_May', 'Percentage of children fully immunized_Jun', 'Percentage of children fully immunized_Jul']
['Total number of new borns received 6 post natal HBNC visit_Apr', 'Total number of new borns received 6 post natal HBNC visit_May', 'Total number of new borns received 6 post natal HBNC visit_Jun', 'Total number of new borns received 6 post natal HBNC visit_Jul']
['Percentage of new borns received 6 post natal HBNC visit_Apr', 'Percentage of new borns received 6 post natal HBNC visit_May', 'Percentage of new borns received 6 post natal HBNC visit_Jun', 'Percentage of new borns received 6 post natal HBNC visit_Jul']



In [ ]:

	S.No	Block	Sector	SHC	Unit Type	Target Pregnant Women	Target Live Birth	Traget Full Immunization	ANC registered	Total Number of pregnant registered before 12 weeks against total number of ANC	...	%.7	Total number of new borns received 6 post natal HBNC visit	Percentage of new borns received 6 post natal HBNC visit	%.8	Children fully immunized	Percentage of children fully immunized	Unnamed: 38	IMR	MMR
0	1	Kishanganj	Bajranggarh	Gobarcha	SHC	6	5	5	6	5	...	#DIV/0!	5	NaN	#DIV/0!	5	100.00	NaN	NaN	NaN
1	2	Kishanganj	Bajranggarh	Kheruna	SHC	5	5	5	9	4	...	#DIV/0!	0	NaN	#DIV/0!	7	140.00	NaN	NaN	NaN
2	3	Kishanganj	Bhanwargarh	Basthooni	SHC	4	3	3	4	0	...	#DIV/0!	3	NaN	#DIV/0!	4	133.33	NaN	NaN	NaN
3	4	Kishanganj	Bhanwargarh	Hirapur	SHC	5	4	4	7	4	...	#DIV/0!	3	NaN	#DIV/0!	3	75.00	NaN	NaN	NaN
4	5	Kishanganj	Bhanwargarh	Peenjana	SHC	7	6	6	7	1	...	#DIV/0!	4	NaN	#DIV/0!	7	116.67	NaN	NaN	NaN

	ANC registered_apr	ANC registered_may
0	6	2
1	9	16
2	4	0
3	7	7
4	7	5
5	16	7
6	12	14
7	3	6
8	4	5
9	3	11
10	4	5
11	7	14
12	7	8
13	7	6
14	10	5
15	7	13
16	8	9
17	7	10
18	4	3
19	10	6
20	7	5
21	7	3
22	13	3
23	12	3
24	7	7
25	16	7
26	3	12
27	8	6
28	11	3
29	16	0
30	9	5
31	2	17
32	6	3
33	7	1
34	9	5
35	0	3
36	7	2
37	17	14
38	9	7
39	18	8
40	17	18
41	13	10
42	7	8
43	20	19
44	15	18

	ANC registered_apr	ANC registered_may
0	6	2
1	9	16
2	4	0
3	7	7
4	7	5
5	16	7
6	12	14
7	3	6
8	4	5
9	3	11
10	4	5
11	7	14
12	7	8
13	7	6
14	10	5
15	7	13
16	8	9
17	7	10
18	4	3
19	10	6
20	7	5
21	7	3
22	13	3
23	12	3
24	7	7
25	16	7
26	3	12
27	8	6
28	11	3
29	16	0
30	9	5
31	2	17
32	6	3
33	7	1
34	9	5
35	0	3
36	7	2
37	17	14
38	9	7
39	18	8
40	17	18
41	13	10
42	7	8
43	20	19
44	15	18

	ANC registered_apr	ANC registered_may
0	6	2
1	9	16
2	4	0
3	7	7
4	7	5
5	16	7
6	12	14
7	3	6
8	4	5
9	3	11
10	4	5
11	7	14
12	7	8
13	7	6
14	10	5
15	7	13
16	8	9
17	7	10
18	4	3
19	10	6
20	7	5
21	7	3
22	13	3
23	12	3
24	7	7
25	16	7
26	3	12
27	8	6
28	11	3
29	16	0
30	9	5
31	2	17
32	6	3
33	7	1
34	9	5
35	0	3
36	7	2
37	17	14
38	9	7
39	18	8
40	17	18
41	13	10
42	7	8
43	20	19
44	15	18