In [1]:
import pandas as pd
import requests
import csv

In [2]:
# process columns like major head as they are converted to numbers, which shouldnt happen
# MH	SMH	MINH	GSH	SH	DH	SDH
# data.columns[2:9]

In [3]:
fill_config = {u'MH': 4, 
 u'SMH': 2, 
 u'MINH': 3, 
 u'GSH': 2, 
 u'SH': 2, 
 u'DH': 3, 
 u'SDH': 3}

In [4]:
# this data should not be null, thus no error handling for nan values
def fill_process_col(data, n_fill):
    return data.astype(int).astype(str).apply(lambda x: '0' * (n_fill - len(x.strip())) + x.strip())

In [5]:
def get_ap_treasury_data(year, district_code):
    url = 'https://treasury.ap.gov.in/budreq/index.php?service=REPORTN'
    params = {'fin_year': year,
    'district': district_code,
    'chkall': '',
    'april': '04',
    'may': '05',
    'june': '06',
    'july': '07',
    'aug': '08',
    'sep': '09',
    'oct':'10',
    'nov':'11',
    'dec':'12',
    'jan':'01',
    'feb':'02',
    'mar':'03',
    'mh':'MH',
    'smh':'SMH',
    'minh':'MINH',
    'gsh':'GSH',
    'sh':'SH',
    'dh':'DH',
    'sdh':'SDH',
    'npn':'NPN',
    'dist':'DIST',
    'sub':'Submit'}
    resp = requests.get(url=url, params=params)
    print(resp.status_code)
    parsed_data = pd.read_html(resp.text, header=[0])
    data = parsed_data[0]
    return data[data['MH TYPE'] != 'TOTAL']

In [6]:
hoas_selected_cols = [
 'mh',
 'mh_desc',
 'smh',
 'smh_desc',
 'minh',
 'minh_desc',
 'gsh',
 'gsh_desc',
 'sh',
 'sh_desc',
 'dh',
 'dh_desc',
 'sdh',
 'sdh_desc',
 'cv',
 'mhtype',
 'hoa']

def get_haos_data():
    hoas = pd.read_csv('hoa_2018.csv', dtype=str)
    hoas.columns = hoas.iloc[0]
    hoas_processed = hoas.drop('0', axis=1).drop(0)
    return hoas_processed

def extract_ap_districts_treasury_data(districts, years):
    for district in districts:
        district_data = pd.DataFrame()
        for year in years:
            data = get_ap_treasury_data(year, district)
            for col in fill_config:
                data[col] = fill_process_col(data[col], fill_config[col])
            time_cols = data.columns[11:23]
            cat_cols = data.columns[1:11]
            processed_data = pd.DataFrame()
            for index, row in data.iterrows():
                cat_info = row[cat_cols]
                for t_col in time_cols:
                    cat_info['MONTH'] = t_col
                    cat_info['AMOUNT'] = row[t_col]
                    processed_data = processed_data.append(cat_info, ignore_index=False)
            processed_data.reset_index(inplace=True, drop=True)
            hoas_group_cols = [key.lower() for key in  fill_config.keys()]
            hoas_processed = get_haos_data()
            hoas_unique = hoas_processed.drop_duplicates('hoa')
            data_group_cols = fill_config.keys()
            combined_data = pd.merge(left=processed_data, left_on=list(data_group_cols), 
                                     right=hoas_unique[hoas_selected_cols], right_on=list(hoas_group_cols),
                                     how='left')
            combined_data.drop(hoas_group_cols, axis=1, inplace=True)
            district_name = combined_data['DISTRICT'].iloc[0]
            combined_data.to_csv('../csvs/ditrict_treasuries/ap/{}/{}.csv'.format(year, district_name),
                                index=False, quoting=csv.QUOTE_ALL)
            print('Completed for ', district, year)

In [7]:
extract_ap_districts_treasury_data(['0100', '0200', '0300', '0400'], ['2016-17', '2017-18'])


200
Completed for  0100 2016-17
200
Completed for  0100 2017-18
200
-------------------------------------------------------------
KeyboardInterrupt           Traceback (most recent call last)
<ipython-input-7-0a2d9349c8c8> in <module>()
----> 1 extract_ap_districts_treasury_data(['0100', '0200', '0300', '0400'], ['2016-17', '2017-18'])

<ipython-input-6-fd46644f02f4> in extract_ap_districts_treasury_data(districts, years)
     39                     cat_info['MONTH'] = t_col
     40                     cat_info['AMOUNT'] = row[t_col]
---> 41                     processed_data = processed_data.append(cat_info, ignore_index=False)
     42             processed_data.reset_index(inplace=True, drop=True)
     43             hoas_group_cols = [key.lower() for key in  fill_config.keys()]

~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in append(self, other, ignore_index, verify_integrity)
   5173 
   5174             combined_columns = self.columns.tolist() + self.columns.union(
-> 5175                 other.index).difference(self.columns).tolist()
   5176             other = other.reindex(combined_columns, copy=False)
   5177             other = DataFrame(other.values.reshape((1, len(other))),

~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in union(self, other)
   2268 
   2269         # for subclasses
-> 2270         return self._wrap_union_result(other, result)
   2271 
   2272     def _wrap_union_result(self, other, result):

~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in _wrap_union_result(self, other, result)
   2272     def _wrap_union_result(self, other, result):
   2273         name = self.name if self.name == other.name else None
-> 2274         return self.__class__(result, name=name)
   2275 
   2276     def intersection(self, other):

~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in __new__(cls, data, dtype, copy, name, fastpath, tupleize_cols, **kwargs)
    347                         except IncompatibleFrequency:
    348                             pass
--> 349             return cls._simple_new(subarr, name)
    350 
    351         elif hasattr(data, '__array__'):

~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in _simple_new(cls, values, name, dtype, **kwargs)
    414         result._data = values
    415         result.name = name
--> 416         for k, v in compat.iteritems(kwargs):
    417             setattr(result, k, v)
    418         return result._reset_identity()

KeyboardInterrupt: 

In [73]:
data = get_ap_treasury_data('2016-17', '0500')


200

In [74]:
for col in fill_config:
    data[col] = fill_process_col(data[col], fill_config[col])

In [75]:
data[data.columns[2:9]]


Out[75]:
MH SMH MINH GSH SH DH SDH
0 0028 00 107 00 01 000 000
2 0028 00 107 00 04 000 000
4 0029 00 101 00 01 000 000
6 0029 00 101 00 02 000 000
8 0029 00 106 00 01 000 000
10 0029 00 107 00 01 000 000
12 0029 00 800 00 01 000 000
14 0029 00 800 00 81 000 000
16 0030 01 101 00 01 000 000
18 0030 01 102 00 01 000 000
20 0030 01 102 00 02 000 000
22 0030 01 800 00 81 000 000
24 0030 02 102 00 01 000 000
26 0030 02 102 00 96 000 000
28 0030 02 103 00 01 000 000
30 0030 02 800 00 01 000 000
32 0030 02 800 00 81 000 000
34 0030 02 800 00 96 000 000
36 0030 02 901 00 01 000 000
38 0030 03 104 00 01 000 000
40 0030 03 104 00 96 000 000
42 0030 03 800 00 81 001 000
44 0030 03 800 00 81 800 000
46 0035 00 101 00 01 001 000
48 0035 00 101 00 01 002 000
50 0039 00 102 00 01 000 000
52 0039 00 102 00 02 000 000
54 0039 00 102 00 81 000 000
56 0039 00 105 00 01 000 000
58 0039 00 105 00 02 000 000
... ... ... ... ... ... ... ...
5524 8448 00 110 00 70 001 000
5526 8448 00 120 00 02 000 000
5528 8448 00 120 00 03 000 000
5530 8448 00 120 00 12 000 000
5532 8448 00 120 00 13 001 000
5534 8448 00 120 00 19 000 000
5536 8448 00 120 00 22 000 000
5538 8449 00 120 00 30 000 000
5540 8449 00 120 00 85 000 000
5542 8449 00 120 00 99 000 000
5544 8658 00 101 00 63 004 000
5546 8782 00 102 00 01 000 000
5548 8782 00 102 00 02 000 000
5550 8782 00 103 00 01 000 000
5552 8782 00 103 00 02 000 000
5554 8793 00 102 00 00 000 000
5556 8793 00 103 00 00 000 000
5558 8793 00 108 00 00 000 000
5560 8793 00 109 00 00 000 000
5562 8793 00 110 00 00 000 000
5564 8793 00 111 00 00 000 000
5566 8793 00 115 00 00 000 000
5568 8793 00 116 00 00 000 000
5570 8793 00 118 00 00 000 000
5572 8793 00 120 00 00 000 000
5574 8793 00 122 00 00 000 000
5576 8793 00 124 00 00 000 000
5578 8793 00 127 00 00 000 000
5580 8793 00 128 00 00 000 000
5582 8793 00 129 00 00 000 000

2792 rows × 7 columns


In [76]:
# Now I want to put it in a format that is usable as a time series
data.columns[11:23]


Out[76]:
Index(['APRIL', 'MAY', 'JUNE', 'JULY', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC',
       'JAN', 'FEB', 'MAR'],
      dtype='object')

In [77]:
time_cols = data.columns[11:23]
cat_cols = data.columns[1:11]

In [78]:
processed_data = pd.DataFrame()

In [79]:
for index, row in data.iterrows():
    cat_info = row[cat_cols]
    for t_col in time_cols:
        cat_info['MONTH'] = t_col
        cat_info['AMOUNT'] = row[t_col]
        processed_data = processed_data.append(cat_info, ignore_index=False)

In [80]:
processed_data.reset_index(inplace=True, drop=True)
processed_data


Out[80]:
AMOUNT DH DISTRICT GSH MH MH TYPE MINH MONTH NPN SDH SH SMH
0 11315565.0 000 KRISHNA 00 0028 R 107 APRIL N 000 01 00
1 12981956.0 000 KRISHNA 00 0028 R 107 MAY N 000 01 00
2 12462097.0 000 KRISHNA 00 0028 R 107 JUNE N 000 01 00
3 14215216.0 000 KRISHNA 00 0028 R 107 JULY N 000 01 00
4 12888154.0 000 KRISHNA 00 0028 R 107 AUG N 000 01 00
5 13221445.0 000 KRISHNA 00 0028 R 107 SEP N 000 01 00
6 12939059.0 000 KRISHNA 00 0028 R 107 OCT N 000 01 00
7 12977087.0 000 KRISHNA 00 0028 R 107 NOV N 000 01 00
8 12488969.0 000 KRISHNA 00 0028 R 107 DEC N 000 01 00
9 13189411.0 000 KRISHNA 00 0028 R 107 JAN N 000 01 00
10 11978180.0 000 KRISHNA 00 0028 R 107 FEB N 000 01 00
11 13591327.0 000 KRISHNA 00 0028 R 107 MAR N 000 01 00
12 5417089.0 000 KRISHNA 00 0028 R 107 APRIL N 000 04 00
13 3258087.0 000 KRISHNA 00 0028 R 107 MAY N 000 04 00
14 8187933.0 000 KRISHNA 00 0028 R 107 JUNE N 000 04 00
15 9297279.0 000 KRISHNA 00 0028 R 107 JULY N 000 04 00
16 8735422.0 000 KRISHNA 00 0028 R 107 AUG N 000 04 00
17 16262088.0 000 KRISHNA 00 0028 R 107 SEP N 000 04 00
18 6837869.0 000 KRISHNA 00 0028 R 107 OCT N 000 04 00
19 5930071.0 000 KRISHNA 00 0028 R 107 NOV N 000 04 00
20 11475430.0 000 KRISHNA 00 0028 R 107 DEC N 000 04 00
21 11260727.0 000 KRISHNA 00 0028 R 107 JAN N 000 04 00
22 7431100.0 000 KRISHNA 00 0028 R 107 FEB N 000 04 00
23 23317889.0 000 KRISHNA 00 0028 R 107 MAR N 000 04 00
24 591047.0 000 KRISHNA 00 0029 R 101 APRIL N 000 01 00
25 493169.0 000 KRISHNA 00 0029 R 101 MAY N 000 01 00
26 435022.0 000 KRISHNA 00 0029 R 101 JUNE N 000 01 00
27 154236.0 000 KRISHNA 00 0029 R 101 JULY N 000 01 00
28 10386.0 000 KRISHNA 00 0029 R 101 AUG N 000 01 00
29 6314.0 000 KRISHNA 00 0029 R 101 SEP N 000 01 00
... ... ... ... ... ... ... ... ... ... ... ... ...
33474 0.0 000 KRISHNA 00 8793 P 127 OCT N 000 00 00
33475 0.0 000 KRISHNA 00 8793 P 127 NOV N 000 00 00
33476 0.0 000 KRISHNA 00 8793 P 127 DEC N 000 00 00
33477 0.0 000 KRISHNA 00 8793 P 127 JAN N 000 00 00
33478 0.0 000 KRISHNA 00 8793 P 127 FEB N 000 00 00
33479 0.0 000 KRISHNA 00 8793 P 127 MAR N 000 00 00
33480 16457.0 000 KRISHNA 00 8793 P 128 APRIL N 000 00 00
33481 16457.0 000 KRISHNA 00 8793 P 128 MAY N 000 00 00
33482 16457.0 000 KRISHNA 00 8793 P 128 JUNE N 000 00 00
33483 16457.0 000 KRISHNA 00 8793 P 128 JULY N 000 00 00
33484 16457.0 000 KRISHNA 00 8793 P 128 AUG N 000 00 00
33485 16992.0 000 KRISHNA 00 8793 P 128 SEP N 000 00 00
33486 20737.0 000 KRISHNA 00 8793 P 128 OCT N 000 00 00
33487 16992.0 000 KRISHNA 00 8793 P 128 NOV N 000 00 00
33488 16992.0 000 KRISHNA 00 8793 P 128 DEC N 000 00 00
33489 16992.0 000 KRISHNA 00 8793 P 128 JAN N 000 00 00
33490 16992.0 000 KRISHNA 00 8793 P 128 FEB N 000 00 00
33491 16992.0 000 KRISHNA 00 8793 P 128 MAR N 000 00 00
33492 3814736.0 000 KRISHNA 00 8793 P 129 APRIL N 000 00 00
33493 7456329.0 000 KRISHNA 00 8793 P 129 MAY N 000 00 00
33494 9994558.0 000 KRISHNA 00 8793 P 129 JUNE N 000 00 00
33495 27594528.0 000 KRISHNA 00 8793 P 129 JULY N 000 00 00
33496 25719339.0 000 KRISHNA 00 8793 P 129 AUG N 000 00 00
33497 39093835.0 000 KRISHNA 00 8793 P 129 SEP N 000 00 00
33498 46692771.0 000 KRISHNA 00 8793 P 129 OCT N 000 00 00
33499 1316728.0 000 KRISHNA 00 8793 P 129 NOV N 000 00 00
33500 1913595.0 000 KRISHNA 00 8793 P 129 DEC N 000 00 00
33501 7708034.0 000 KRISHNA 00 8793 P 129 JAN N 000 00 00
33502 439235.0 000 KRISHNA 00 8793 P 129 FEB N 000 00 00
33503 3565882.0 000 KRISHNA 00 8793 P 129 MAR N 000 00 00

33504 rows × 12 columns


In [81]:
# Now we want to get information on head of account
hoas = pd.read_csv('hoa_2018.csv', dtype=str)

In [82]:
hoas.columns = hoas.iloc[0]
hoas_processed = hoas.drop('0', axis=1).drop(0)
hoas_processed


Out[82]:
demand demandname sdept dept_code sdeptname mh mh_desc smh smh_desc minh ... dh_desc sdh sdh_desc cv accts_1516 be_1617 re_1617 be_1718 mhtype hoa
1 NaN NaN NaN NaN NaN 0020 Corporation tax 00 Not Applicable 901 ... Not Applicable 000 Not Applicable V 68897100 77293400 80499006 85837400 1 0020009010001000000NVN
2 NaN NaN NaN NaN NaN 0021 Taxes on Income Other Than Corporation Tax 00 Not Applicable 901 ... Not Applicable 000 Not Applicable V 47920700 59900200 60842567 75044200 1 0021009010001000000NVN
3 NaN NaN NaN NaN NaN 0028 Other Taxes on Income and Expenditure 00 Not Applicable 107 ... Not Applicable 000 Not Applicable V 2229564 3444158 3516485 3691306 1 0028001070001000000NVN
4 NaN NaN NaN NaN NaN 0028 Other Taxes on Income and Expenditure 00 Not Applicable 107 ... Not Applicable 000 Not Applicable V 123089 0 0 203788 1 0028001070003000000NVN
5 NaN NaN NaN NaN NaN 0028 Other Taxes on Income and Expenditure 00 Not Applicable 107 ... Not Applicable 000 Not Applicable V 150511 0 0 249188 1 0028001070004000000NVN
6 NaN NaN NaN NaN NaN 0028 Other Taxes on Income and Expenditure 00 Not Applicable 901 ... Not Applicable 000 Not Applicable V 1500 0 0 2484 1 0028009010001000000NVN
7 NaN NaN NaN NaN NaN 0029 Land Revenue 00 Not Applicable 101 ... Not Applicable 000 Not Applicable V 129171 0 0 0 1 0029001010001000000NVN
8 NaN NaN NaN NaN NaN 0029 Land Revenue 00 Not Applicable 101 ... Not Applicable 000 Not Applicable V 294 0 0 0 1 0029001010002000000NVN
9 NaN NaN NaN NaN NaN 0029 Land Revenue 00 Not Applicable 103 ... Not Applicable 000 Not Applicable V 493 0 0 0 1 0029001030001000000NVN
10 NaN NaN NaN NaN NaN 0029 Land Revenue 00 Not Applicable 104 ... Not Applicable 000 Not Applicable V 138 2590 2590 1231 1 0029001040001000000NVN
11 NaN NaN NaN NaN NaN 0029 Land Revenue 00 Not Applicable 106 ... Not Applicable 000 Not Applicable V 3025 122785 122785 58376 1 0029001060001000000NVN
12 NaN NaN NaN NaN NaN 0029 Land Revenue 00 Not Applicable 107 ... Not Applicable 000 Not Applicable V 165391 5274001 5274001 2507449 1 0029001070001000000NVN
13 NaN NaN NaN NaN NaN 0029 Land Revenue 00 Not Applicable 107 ... Not Applicable 000 Not Applicable V 4 1345 1345 639 1 0029001070002000000NVN
14 NaN NaN NaN NaN NaN 0029 Land Revenue 00 Not Applicable 800 ... Not Applicable 000 Not Applicable V 12164 138133 138133 65673 1 0029008000001000000NVN
15 NaN NaN NaN NaN NaN 0029 Land Revenue 00 Not Applicable 800 ... Not Applicable 000 Not Applicable V 538 14389 14389 6841 1 0029008000002000000NVN
16 NaN NaN NaN NaN NaN 0029 Land Revenue 00 Not Applicable 800 ... Other Receipts 000 Not Applicable V 1894 32519 32519 15461 1 0029008000003001000NVN
17 NaN NaN NaN NaN NaN 0029 Land Revenue 00 Not Applicable 800 ... User Charges 000 Not Applicable V 2975 671 671 319 1 0029008000003800000NVN
18 NaN NaN NaN NaN NaN 0029 Land Revenue 00 Not Applicable 800 ... Not Applicable 000 Not Applicable V 874 671 671 319 1 0029008000080000000NVN
19 NaN NaN NaN NaN NaN 0029 Land Revenue 00 Not Applicable 800 ... Not Applicable 000 Not Applicable V 206583 722896 722896 343692 1 0029008000081000000NVN
20 NaN NaN NaN NaN NaN 0030 Stamps and Registration Fees 01 Stamps - Judicial 101 ... Not Applicable 000 Not Applicable V 25136 207566 140247 30322 1 0030011010001000000NVN
21 NaN NaN NaN NaN NaN 0030 Stamps and Registration Fees 01 Stamps - Judicial 102 ... Not Applicable 000 Not Applicable V 677308 967306 653585 564081 1 0030011020001000000NVN
22 NaN NaN NaN NaN NaN 0030 Stamps and Registration Fees 01 Stamps - Judicial 102 ... Not Applicable 000 Not Applicable V 2124 15977 10795 23903 1 0030011020002000000NVN
23 NaN NaN NaN NaN NaN 0030 Stamps and Registration Fees 01 Stamps - Judicial 102 ... Not Applicable 000 Not Applicable V -11900 -12630 -8534 -10538 1 0030011020096000000NVN
24 NaN NaN NaN NaN NaN 0030 Stamps and Registration Fees 01 Stamps - Judicial 800 ... Not Applicable 000 Not Applicable V 515 443 299 692 1 0030018000001000000NVN
25 NaN NaN NaN NaN NaN 0030 Stamps and Registration Fees 01 Stamps - Judicial 800 ... Not Applicable 000 Not Applicable V 173 842 569 359 1 0030018000081000000NVN
26 NaN NaN NaN NaN NaN 0030 Stamps and Registration Fees 02 Stamps - Non-Judicial 102 ... Not Applicable 000 Not Applicable V 2713883 3260419 2202986 2531552 1 0030021020001000000NVN
27 NaN NaN NaN NaN NaN 0030 Stamps and Registration Fees 02 Stamps - Non-Judicial 102 ... Not Applicable 000 Not Applicable V -11694 -639011 -431764 -6972 1 0030021020096000000NVN
28 NaN NaN NaN NaN NaN 0030 Stamps and Registration Fees 02 Stamps - Non-Judicial 103 ... Not Applicable 000 Not Applicable V 30457975 50368514 34032780 34654562 1 0030021030001000000NVN
29 NaN NaN NaN NaN NaN 0030 Stamps and Registration Fees 02 Stamps - Non-Judicial 103 ... Not Applicable 000 Not Applicable V -13725 -1 -1 -13187 1 0030021030096000000NVN
30 NaN NaN NaN NaN NaN 0030 Stamps and Registration Fees 02 Stamps - Non-Judicial 800 ... Not Applicable 000 Not Applicable V 6085 17788 12019 9498 1 0030028000001000000NVN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
14035 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 114 ... NaN 000 NaN V 0 0 0 0 1 8793001140000000000NVN
14036 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 114 ... NaN 000 NaN V 10 0 0 0 2 8793001140000000000NVN
14037 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 115 ... NaN 000 NaN V 29 0 0 0 2 8793001150000000000NVN
14038 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 115 ... NaN 000 NaN V 0 0 0 0 1 8793001150000000000NVN
14039 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 116 ... NaN 000 NaN V 721 0 0 0 2 8793001160000000000NVN
14040 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 116 ... NaN 000 NaN V 152 0 0 0 1 8793001160000000000NVN
14041 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 117 ... NaN 000 NaN V 49 0 0 0 2 8793001170000000000NVN
14042 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 117 ... NaN 000 NaN V -41 0 0 0 1 8793001170000000000NVN
14043 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 118 ... NaN 000 NaN V 8 0 0 0 2 8793001180000000000NVN
14044 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 118 ... NaN 000 NaN V 0 0 0 0 1 8793001180000000000NVN
14045 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 120 ... NaN 000 NaN V 26 0 0 0 1 8793001200000000000NVN
14046 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 120 ... NaN 000 NaN V -401 0 0 0 2 8793001200000000000NVN
14047 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 121 ... NaN 000 NaN V -151 0 0 0 1 8793001210000000000NVN
14048 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 121 ... NaN 000 NaN V 2 0 0 0 2 8793001210000000000NVN
14049 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 122 ... NaN 000 NaN V 9 0 0 0 2 8793001220000000000NVN
14050 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 122 ... NaN 000 NaN V 70 0 0 0 1 8793001220000000000NVN
14051 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 123 ... NaN 000 NaN V 0 0 0 0 1 8793001230000000000NVN
14052 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 123 ... NaN 000 NaN V -1 0 0 0 2 8793001230000000000NVN
14053 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 124 ... NaN 000 NaN V -3 0 0 0 1 8793001240000000000NVN
14054 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 124 ... NaN 000 NaN V 162 0 0 0 2 8793001240000000000NVN
14055 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 126 ... NaN 000 NaN V 23 0 0 0 2 8793001260000000000NVN
14056 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 126 ... NaN 000 NaN V 0 0 0 0 1 8793001260000000000NVN
14057 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 127 ... NaN 000 NaN V 6 0 0 0 2 8793001270000000000NVN
14058 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 127 ... NaN 000 NaN V 0 0 0 0 1 8793001270000000000NVN
14059 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 128 ... NaN 000 NaN V 133 0 0 0 2 8793001280000000000NVN
14060 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 NaN 128 ... NaN 000 NaN V 0 0 0 0 1 8793001280000000000NVN
14061 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 Not Applicable 129 ... Not Applicable 000 Not Applicable V -3124517 0 0 0 2 8793001290000000000NVN
14062 NaN NaN NaN NaN NaN 8793 Inter-State Suspense Accounts 00 Not Applicable 129 ... Not Applicable 000 Not Applicable V -637 0 0 0 1 8793001290000000000NVN
14063 NaN NaN NaN NaN NaN 8999 Opening Cash Balance 00 Not Applicable 102 ... Not Applicable 000 Not Applicable V 444209114 0 0 0 2 8999001020000000000NVN
14064 NaN NaN NaN NaN NaN 8999 Opening Cash Balance 00 Not Applicable 102 ... Not Applicable 000 Not Applicable V 445869518 0 0 0 1 8999001020000000000NVN

14064 rows × 26 columns


In [83]:
processed_data.columns


Out[83]:
Index(['AMOUNT', 'DH', 'DISTRICT', 'GSH', 'MH', 'MH TYPE', 'MINH', 'MONTH',
       'NPN', 'SDH', 'SH', 'SMH'],
      dtype='object')

In [84]:
hoas_group_cols = [key.lower() for key in  fill_config.keys()]
hoas_group_cols


Out[84]:
['mh', 'smh', 'minh', 'gsh', 'sh', 'dh', 'sdh']

In [85]:
hoas_unique = hoas_processed.drop_duplicates('hoa')

In [86]:
data_group_cols = fill_config.keys()
data_group_cols


Out[86]:
dict_keys(['MH', 'SMH', 'MINH', 'GSH', 'SH', 'DH', 'SDH'])

In [87]:
hoas_selected_cols = [
 'mh',
 'mh_desc',
 'smh',
 'smh_desc',
 'minh',
 'minh_desc',
 'gsh',
 'gsh_desc',
 'sh',
 'sh_desc',
 'dh',
 'dh_desc',
 'sdh',
 'sdh_desc',
 'cv',
 'mhtype',
 'hoa']

In [88]:
combined_data = pd.merge(left=processed_data, left_on=list(data_group_cols), 
         right=hoas_unique[hoas_selected_cols], right_on=list(hoas_group_cols),
         how='left')

In [89]:
hoas_group_cols


Out[89]:
['mh', 'smh', 'minh', 'gsh', 'sh', 'dh', 'sdh']

In [90]:
combined_data.drop(hoas_group_cols, axis=1, inplace=True)

In [91]:
combined_data.to_csv('../csvs/ditrict_treasuries/ap/2016-17/krishna.csv', index=False, quoting=csv.QUOTE_ALL)

In [38]:
combined_data[combined_data.MH == '2202'].to_csv('Major_head_2202_Krishna_monthly.csv', quoting=csv.QUOTE_ALL, index=False)

In [35]:
combined_data[combined_data.MH == '4602'].to_csv('Major_head_4602_Krishna_monthly.csv', index=False)
combined_data[combined_data.MH == '6202'].to_csv('Major_head_6202_Krishna_monthly.csv', index=False)

In [37]:
combined_data.to_csv('ap_monthly_treasury_data.csv', quoting=csv.QUOTE_ALL)

In [ ]: