In [43]:
import pandas as pd
import csv
import sqlalchemy

In [31]:
data = pd.read_csv('../csvs/ditrict_treasuries/ap/2017-18/krishna.csv', dtype=str)
data['fin_year'] = '2017-18'
data2 = pd.read_csv('../csvs/ditrict_treasuries/ap/2016-17/krishna.csv', dtype=str)
data['fin_year'] = '2016-17'

In [33]:
data = pd.concat([data, data2])

In [34]:
data.AMOUNT = data.AMOUNT.astype(float)

In [35]:
data.head()


Out[35]:
AMOUNT DH DISTRICT GSH MH MH TYPE MINH MONTH NPN SDH ... dh_desc fin_year gsh_desc hoa mh_desc mhtype minh_desc sdh_desc sh_desc smh_desc
0 0.0 000 KRISHNA 00 0006 R 101 APRIL N 000 ... NaN 2016-17 NaN NaN NaN NaN NaN NaN NaN NaN
1 0.0 000 KRISHNA 00 0006 R 101 MAY N 000 ... NaN 2016-17 NaN NaN NaN NaN NaN NaN NaN NaN
2 0.0 000 KRISHNA 00 0006 R 101 JUNE N 000 ... NaN 2016-17 NaN NaN NaN NaN NaN NaN NaN NaN
3 0.0 000 KRISHNA 00 0006 R 101 JULY N 000 ... NaN 2016-17 NaN NaN NaN NaN NaN NaN NaN NaN
4 6580.0 000 KRISHNA 00 0006 R 101 AUG N 000 ... NaN 2016-17 NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 23 columns


In [36]:
data['date_month'] = data.MONTH.apply(lambda x: '01-{0}-2017'.format(x) if x not in ['JAN', 'FEB', 'MAR'] else '01-{0}-2018'.format(x))

In [37]:
data.head()


Out[37]:
AMOUNT DH DISTRICT GSH MH MH TYPE MINH MONTH NPN SDH ... fin_year gsh_desc hoa mh_desc mhtype minh_desc sdh_desc sh_desc smh_desc date_month
0 0.0 000 KRISHNA 00 0006 R 101 APRIL N 000 ... 2016-17 NaN NaN NaN NaN NaN NaN NaN NaN 01-APRIL-2017
1 0.0 000 KRISHNA 00 0006 R 101 MAY N 000 ... 2016-17 NaN NaN NaN NaN NaN NaN NaN NaN 01-MAY-2017
2 0.0 000 KRISHNA 00 0006 R 101 JUNE N 000 ... 2016-17 NaN NaN NaN NaN NaN NaN NaN NaN 01-JUNE-2017
3 0.0 000 KRISHNA 00 0006 R 101 JULY N 000 ... 2016-17 NaN NaN NaN NaN NaN NaN NaN NaN 01-JULY-2017
4 6580.0 000 KRISHNA 00 0006 R 101 AUG N 000 ... 2016-17 NaN NaN NaN NaN NaN NaN NaN NaN 01-AUG-2017

5 rows × 24 columns


In [38]:
data.to_csv('../csvs/ditrict_treasuries/ap/2017-18/krishna_with_time_col.csv', index=False)

In [39]:
# Generate sqlite db
data['date_month'] = pd.to_datetime(data['date_month'])

In [40]:
data.dtypes


Out[40]:
AMOUNT               float64
DH                    object
DISTRICT              object
GSH                   object
MH                    object
MH TYPE               object
MINH                  object
MONTH                 object
NPN                   object
SDH                   object
SH                    object
SMH                   object
cv                    object
dh_desc               object
fin_year              object
gsh_desc              object
hoa                   object
mh_desc               object
mhtype                object
minh_desc             object
sdh_desc              object
sh_desc               object
smh_desc              object
date_month    datetime64[ns]
dtype: object

In [44]:
def save_to_sqlite(o, combined_data):
    '''
    Save the combined data to sqlite file.
    Args:
        o (str): output file name.
    Return:
        True if sqlite file saved else raise error.
    '''
    engine = sqlalchemy.create_engine('sqlite:///{}'.format(o))
    combined_data.to_sql(name='krishna', if_exists='replace', con=engine, chunksize=10000)
    return True

In [45]:
save_to_sqlite('district_treasuries_ap', data)


Out[45]:
True

In [47]:
engine = sqlalchemy.create_engine('sqlite:///district_treasuries_ap.sqlite')
pd.read_sql('select * from krishna', con=engine).


Out[47]:
index           int64
AMOUNT        float64
DH             object
DISTRICT       object
GSH            object
MH             object
MH TYPE        object
MINH           object
MONTH          object
NPN            object
SDH            object
SH             object
SMH            object
cv             object
dh_desc        object
fin_year       object
gsh_desc       object
hoa            object
mh_desc        object
mhtype         object
minh_desc      object
sdh_desc       object
sh_desc        object
smh_desc       object
date_month     object
dtype: object

In [ ]: