In [1]:
%load_ext autoreload
%autoreload 2
In [7]:
import openpyxl as pyxl
import pandas as pd
import numpy as np
from bidict import bidict
from excel_helpers import *
from db_entries import *
from unified_excel_sheet import *
import re
from itertools import chain
# we only need the function datetime.datetime.now; we can now reference it as dt.now
from datetime import datetime as dt
import sqlite3 as sql
import logging
logger = logging.getLogger()
In [8]:
%%capture
db_filename = 'my-budget-dev-v3.sqlite'
%run nuclear_option.py $db_filename
%run database_setup.py $db_filename
db = sql.connect(db_filename)
event_groups_df = pd.read_sql('SELECT * FROM event_groups', db, index_col='group_id')
In [9]:
excel_filename = 'GemeinsameBilanzierung_16_17__dev.xlsx'
current_sheet_name = 'August'
comment_sheet_name = 'august_'
# We take here the option data_only since for now we are not interested in the expression
wb = pyxl.load_workbook(excel_filename, data_only=True)
cuurent_sheet = wb.get_sheet_by_name(current_sheet_name)
comment_sheet = wb.copy_worksheet(august)
comment_sheet.title = comment_sheet_name
comment_sheet.sheet_state = 'hidden'
In [ ]:
col_titles_budget = ['budget_pot', 'description', 'date', 'amount', 'excel_range']
col_titles_payments = ['description', 'date', 'amount', 'excel_range']
budgeting = get_df_by_range(august_, 'A6', 'D130', 2)
budgeting.columns = col_titles_budget
max_bargeld = get_df_by_range(august_, 'H7', 'J130', 1)
max_bargeld.columns = col_titles_payments
max_bargeld['money_pot'] = 'BM'
paul_bargeld = get_df_by_range(august_, 'K7', 'M130', 1)
paul_bargeld.columns = col_titles_payments
paul_bargeld['money_pot'] = 'BP'
konto = get_df_by_range(august_, 'N7', 'P130', 1)
konto.columns = col_titles_payments
konto['money_pot'] = 'KG'
all_payments = pd.concat([max_bargeld, paul_bargeld, konto])
all_info = pd.merge(budgeting, all_payments, how='outer',
on=['description', 'date', 'amount'], indicator=True)
all_info['treated'] = 'No'
all_info['date'] = all_info['date'].dt.date
all_info['amount'] = all_info['amount'].round(2)
In [ ]:
## Creating Data for Bargeld Payments data
rules = [{'_merge' : 'both', 'money_pot' : B, 'treated' : 'No'} for B in ['BM', 'BP']]
full_match_bargeld = multirule_subset(all_info, rules)
bargeld_wb_filename = 'bargeld_payments.xlsx'
definition_data = 'base_excel.json'
with open(definition_data) as f :
json_data = f.read()
sheet_def = json.loads(json_data)
sheet_def['main_sheet']['type_description']['default'] = 'Barzahlung'
sheet_def['main_sheet']['is_budget_event']['default'] = 'X'
sheet_def['main_sheet']['is_payment']['default'] = 'X'
sheet_def['main_sheet']['ignore']['default'] = ''
sheet_def['main_sheet']['temporary_id']['hidden'] = True
sheet_def['main_sheet']['is_transfer']['hidden'] = True
sheet_def['main_sheet']['is_recieving']['hidden'] = True
sheet_def['main_sheet']['money_pot_sink']['hidden'] = True
sheet_def['main_sheet']['money_sink_name']['hidden'] = True
create_excel_table_from_data(full_match_bargeld, bargeld_wb_filename, db, sheet_def)
checked_full_match_bargeld = use_excel_for_data_entry(bargeld_wb_filename, copy_mode=False)
In [ ]:
## create empy dataframe first
untreated_data = checked_full_match_bargeld[checked_full_match_bargeld['treated'] == '']
for index, row in checked_full_match_bargeld.iterrows() :
if row['ignore'] == 'X' :
untreated_data = untreated_data.append(checked_full_match_bargeld.loc[index])
continue
elif not np.isnan(row['temporary_id']) :
raise NotImplementedError('The grouping of events by ID is not implemented yet.')
the_id = automatically_enter_payment_row(row['date'], 'Barzahlung',
row['description'], row['amount'], row['money_pot'],
row['budget_pot'], [row['excel_range_x'], row['excel_range_y']],
excel_filename, august, august_, db_filename)
if not np.isnan(row['in_group']) :
crsr = db.cursor()
if row['in_group'] not in event_groups_df.index :
if row['group_name'][0] == '=' :
raise ValueError(
"You should not use a formula as group name! The given group name was {}.".format(
row['group_name']))
crsr.execute('INSERT INTO event_groups VALUES ({}, "{}")'.format(
row['in_group'], row['group_name']))
crsr.execute('INSERT INTO event_in_group VALUES ({}, {})'.format(row['in_group'], the_id))
db.commit()
all_info.loc[index, 'treated'] = 'Yes'
In [ ]:
## For the "GK" (Gemeinsames Konto) Payments we create an excel sheet, where we can change the payment type
subsetting_rules = {'_merge' : 'both', 'money_pot' : 'KG', 'treated' : 'No'}
full_match_konto = subset_data_frame(all_info, subsetting_rules)
konto_wb_filename = 'konto_payments.xlsx'
definition_data = 'base_excel.json'
with open(definition_data) as f :
json_data = f.read()
sheet_def = json.loads(json_data)
sheet_def['main_sheet']['type_description']['default'] = 'Kartenzahlung'
sheet_def['main_sheet']['is_budget_event']['default'] = 'X'
sheet_def['main_sheet']['is_payment']['default'] = 'X'
sheet_def['main_sheet']['ignore']['default'] = ''
sheet_def['main_sheet']['temporary_id']['hidden'] = True
sheet_def['main_sheet']['is_transfer']['hidden'] = True
sheet_def['main_sheet']['is_recieving']['hidden'] = True
sheet_def['main_sheet']['money_pot_sink']['hidden'] = True
sheet_def['main_sheet']['money_sink_name']['hidden'] = True
create_excel_table_from_data(full_match_konto, konto_wb_filename, db, sheet_def)
checked_full_match_konto = use_excel_for_data_entry(konto_wb_filename, copy_mode=False)
In [ ]:
for index, row in checked_full_match_konto.iterrows() :
if row['ignore'] == 'X' :
untreated_data = untreated_data.append(checked_full_match_konto.loc[index])
continue
elif not np.isnan(row['temporary_id']) :
raise NotImplementedError('The grouping of events by ID is not implemented yet.')
the_id = automatically_enter_payment_row(row['date'], 'Barzahlung',
row['description'], row['amount'], row['money_pot'],
row['budget_pot'], [row['excel_range_x'], row['excel_range_y']],
excel_filename, august, august_, db_filename)
if not np.isnan(row['in_group']) :
crsr = db.cursor()
if row['in_group'] not in event_groups_df.index :
if row['group_name'][0] == '=' :
raise ValueError(
"You should not use a formula as group name! The given group name was {}.".format(
row['group_name']))
crsr.execute('INSERT INTO event_groups VALUES ({}, "{}")'.format(
row['in_group'], row['group_name']))
crsr.execute('INSERT INTO event_in_group VALUES ({}, {})'.format(row['in_group'], the_id))
db.commit()
all_info.loc[index, 'treated'] = 'Yes'
In [ ]:
## List of all fully matched things we didnt treat yet
subsetting_rules = {'_merge' : 'both', 'treated' : 'No'}
remaining_matched = subset_data_frame(all_info, subsetting_rules)
display(remaining_matched)
display(untreated_data)
In [ ]:
## For the "GK" (Gemeinsames Konto) Payments we create an excel sheet, where we can change the payment type
subsetting_rules = {'treated' : 'No'}
remaining = subset_data_frame(all_info, subsetting_rules)
remaining = remaining.loc[remaining.index.difference(untreated_data.index)]
wb_filename = 'remaining.xlsx'
definition_data = 'base_excel.json'
create_excel_table_from_data(remaining, wb_filename, db, definition_data)
checked_remaining = use_excel_for_data_entry(wb_filename, copy_mode=False)
In [ ]:
temporary_id_lookup = {}
for index, row in checked_remaining.iterrows() :
if row['ignore'] == 'X' :
untreated_data = untreated_data.append(checked_full_match_konto.loc[index])
continue
excel_ranges = ([] if np.isnan(row['excel_range_x']) else [row['excel_range_x']]) + \
([] if np.isnan(row['excel_range_y']) else [row['excel_range_y']])
if not np.isnan(row['temporary_id']) and row['temporary_id'] in temporary_id_lookup :
the_id = temporary_id_lookup[row['temporary_id']]
else :
the_id = create_money_event(db, row['type_description'], row['description'], row['date'])
full_excel_ranges = [current_sheet.title + '!' + a_range for a_range in excel_ranges]
database_comment = autogenerate_database_comment_multiple_excel_ranges(
excel_filename, full_excel_ranges, comment_sheet.title)
add_database_event(db, the_id, database_comment)
if not np.isnan(row['temporary_id']) :
temporary_id_lookup[row['temporary_id']] = the_id
## ToDo : Add commenting into database and Excel
if row['is_budget_event'] == 'X' :
add_budget_event(db, the_id, row['budget_pot'], row['amount'])
if row['is_payment'] == 'X' :
add_payment(db, the_id, row['money_pot'], row['amount'])
if row['is_recieving'] == 'X' :
add_payment(db, the_id, row['money_pot'], row['amount'])
if row['is_transfer'] == 'X' :
add_payment(db, the_id, row['money_pot'], row['money_pot_sink'], row['amount'])
if not np.isnan(row['in_group']) :
if row['in_group'] not in event_groups_df.index :
if row['group_name'][0] == '=' :
raise ValueError(
"You should not use a formula as group name! The given group name was {}.".format(
row['group_name']))
crsr = db.cursor()
crsr.execute('INSERT INTO event_groups VALUES ({}, "{}")'.format(
row['in_group'], row['group_name']))
db.commit()
event_groups_df = pd.read_sql('SELECT * FROM event_groups', db, index_col='group_id')
crsr = db.cursor()
crsr.execute('INSERT INTO event_in_group VALUES ({}, {})'.format(row['in_group'], the_id))
db.commit()
excel_comment = autogenerate_excel_comment(dt.now().strftime('%Y-%m-%d'), db_filename, the_id)
excel_cell_list = list(chain.from_iterable([list_from_range_string(a_range) for a_range in ranges]))
put_comment_into_excel(comment_sheet, excel_cell_list, excel_comment)
all_info.loc[index, 'treated'] = 'Yes'
In [ ]:
wb.save('GemBil.xlsx')
In [ ]:
db.close()
In [ ]: