In [1]:
%load_ext autoreload
%autoreload 2
In [2]:
import openpyxl as pyxl
import pandas as pd
import numpy as np
from bidict import bidict
from excel_helpers import *
from db_entries import *
from unified_excel_sheet import *
import re
from itertools import chain
# we only need the function datetime.datetime.now; we can now reference it as dt.now
from datetime import datetime as dt
import sqlite3 as sql
import logging
logger = logging.getLogger()
In [3]:
%%capture
db_filename = 'my-budget-dev-v5.sqlite'
%run nuclear_option.py $db_filename
%run database_setup.py $db_filename
db = sql.connect(db_filename)
event_groups_df = pd.read_sql('SELECT * FROM event_groups', db, index_col='group_id')
In [4]:
excel_filename = 'GemeinsameBilanzierung_16_17__2017-11-03.xlsx'
current_sheet_name = 'August'
comment_sheet_name = 'august_'
# We take here the option data_only since for now we are not interested in the expression
wb = pyxl.load_workbook(excel_filename, data_only=True)
current_sheet = wb.get_sheet_by_name(current_sheet_name)
comment_sheet = wb.copy_worksheet(current_sheet)
comment_sheet.title = comment_sheet_name
comment_sheet.sheet_state = 'hidden'
In [5]:
col_titles_budget = ['budget_pot', 'description', 'date', 'amount', 'excel_range']
col_titles_payments = ['description', 'date', 'amount', 'excel_range']
budgeting = get_df_by_range(comment_sheet, 'A6', 'D130', 2)
budgeting.columns = col_titles_budget
budgeting['is_budget_event'] = 'X'
max_bargeld = get_df_by_range(comment_sheet, 'H7', 'J130', 1)
max_bargeld.columns = col_titles_payments
max_bargeld['money_pot'] = 'BM'
max_bargeld['type_description'] = 'Barzahlung'
paul_bargeld = get_df_by_range(comment_sheet, 'K7', 'M130', 1)
paul_bargeld.columns = col_titles_payments
paul_bargeld['money_pot'] = 'BP'
paul_bargeld['type_description'] = 'Barzahlung'
konto = get_df_by_range(comment_sheet, 'N7', 'P130', 1)
konto.columns = col_titles_payments
konto['money_pot'] = 'KG'
konto['type_description'] = 'Kartenzahlung'
all_payments = pd.concat([max_bargeld, paul_bargeld, konto])
all_payments['is_payment'] = 'X'
all_info = pd.merge(budgeting, all_payments, how='outer',
on=['description', 'date', 'amount'], indicator=True)
all_info['treated'] = 'No'
all_info['date'] = all_info['date'].dt.date
all_info['amount'] = all_info['amount'].round(2)
In [ ]:
## Creating Data for Bargeld Payments data
wb_filename = 'full_month_august.xlsx'
definition_data = 'base_excel.json'
with open(definition_data) as f :
json_data = f.read()
sheet_def = json.loads(json_data)
sheet_def['main_sheet']['event_type']['hidden'] = True
sheet_def['main_sheet']['money_pot_name']['hidden'] = True
sheet_def['main_sheet']['money_sink_name']['hidden'] = True
sheet_def['main_sheet']['budget_type']['hidden'] = True
create_excel_table_from_data(all_info, wb_filename, db, sheet_def)
checked_data = use_excel_for_data_entry(wb_filename, copy_mode=False)
In [ ]:
ignored_data_index = checked_data[checked_data['ignore'] == 'X'].index
nonignore_data_index = checked_data.index.difference(ignored_data_index)
ignored_data = checked_data.loc[ignored_data_index]
nonignore_data = checked_data.loc[nonignore_data_index]
nonignore_data = nonignore_data.sort_values('date')
display(nonignore_data)
definition_data = 'base_excel.json'
recheck_wb_filename = 'recheck_full_month.xlsx'
with open(definition_data) as f :
json_data = f.read()
sheet_def = json.loads(json_data)
sheet_def['main_sheet']['event_type']['hidden'] = True
sheet_def['main_sheet']['money_pot_name']['hidden'] = True
sheet_def['main_sheet']['money_sink_name']['hidden'] = True
sheet_def['main_sheet']['budget_type']['hidden'] = True
create_excel_table_from_data(nonignore_data, recheck_wb_filename, db, sheet_def)
double_checked = use_excel_for_data_entry(recheck_wb_filename, copy_mode=False)
In [ ]:
parse_unified_excel_table(checked_data, db, db_filename, current_sheet, comment_sheet, excel_filename)
In [ ]:
### List of all fully matched things we didnt treat yet
#subsetting_rules = {'_merge' : 'both', 'treated' : 'No'}
#remaining_matched = subset_data_frame(all_info, subsetting_rules)
#
#display(remaining_matched)
#display(untreated_data)
In [ ]:
### For the "GK" (Gemeinsames Konto) Payments we create an excel sheet, where we can change the payment type
#subsetting_rules = {'treated' : 'No'}
#remaining = subset_data_frame(all_info, subsetting_rules)
#remaining = remaining.loc[remaining.index.difference(untreated_data.index)]
#
#wb_filename = 'remaining.xlsx'
#definition_data = 'base_excel.json'
#
#create_excel_table_from_data(remaining, wb_filename, db, definition_data)
#checked_remaining = use_excel_for_data_entry(wb_filename, copy_mode=False)
In [ ]:
#temporary_id_lookup = {}
#
#for index, row in checked_remaining.iterrows() :
# if row['ignore'] == 'X' :
# untreated_data = untreated_data.append(checked_full_match_konto.loc[index])
# continue
#
# excel_ranges = ([] if np.isnan(row['excel_range_x']) else [row['excel_range_x']]) + \
# ([] if np.isnan(row['excel_range_y']) else [row['excel_range_y']])
#
# if not np.isnan(row['temporary_id']) and row['temporary_id'] in temporary_id_lookup :
# the_id = temporary_id_lookup[row['temporary_id']]
# else :
# the_id = create_money_event(db, row['type_description'], row['description'], row['date'])
#
#
# full_excel_ranges = [current_sheet.title + '!' + a_range for a_range in excel_ranges]
#
# database_comment = autogenerate_database_comment_multiple_excel_ranges(
# excel_filename, full_excel_ranges, comment_sheet.title)
#
# add_database_event(db, the_id, database_comment)
#
# if not np.isnan(row['temporary_id']) :
# temporary_id_lookup[row['temporary_id']] = the_id
#
# ## ToDo : Add commenting into database and Excel
#
# if row['is_budget_event'] == 'X' :
# add_budget_event(db, the_id, row['budget_pot'], row['amount'])
# if row['is_payment'] == 'X' :
# add_payment(db, the_id, row['money_pot'], row['amount'])
# if row['is_recieving'] == 'X' :
# add_payment(db, the_id, row['money_pot'], row['amount'])
# if row['is_transfer'] == 'X' :
# add_payment(db, the_id, row['money_pot'], row['money_pot_sink'], row['amount'])
#
# if not np.isnan(row['in_group']) :
# if row['in_group'] not in event_groups_df.index :
# if row['group_name'][0] == '=' :
# raise ValueError(
# "You should not use a formula as group name! The given group name was {}.".format(
# row['group_name']))
# crsr = db.cursor()
# crsr.execute('INSERT INTO event_groups VALUES ({}, "{}")'.format(
# row['in_group'], row['group_name']))
# db.commit()
# event_groups_df = pd.read_sql('SELECT * FROM event_groups', db, index_col='group_id')
#
# crsr = db.cursor()
# crsr.execute('INSERT INTO event_in_group VALUES ({}, {})'.format(row['in_group'], the_id))
# db.commit()
#
# excel_comment = autogenerate_excel_comment(dt.now().strftime('%Y-%m-%d'), db_filename, the_id)
# excel_cell_list = list(chain.from_iterable([list_from_range_string(a_range) for a_range in ranges]))
# put_comment_into_excel(comment_sheet, excel_cell_list, excel_comment)
#
# all_info.loc[index, 'treated'] = 'Yes'
In [ ]:
wb.save('GemBil.xlsx')
In [ ]:
db.close()
In [ ]:
# read a full table into a pandas data framd
crsr = db.cursor()
crsr.execute("SELECT name FROM sqlite_master WHERE type='table';")
for table in crsr.fetchall() :
display(pd.read_sql_query('SELECT * FROM {};'.format(table[0]), db))
In [ ]:
db_filename = 'my-budget-dev-v4.sqlite'
db = sql.connect(db_filename)
display(pd.read_sql_query('''
SELECT * FROM
(money_events LEFT JOIN budget_events ON money_events.id = budget_events.id)
LEFT JOIN payments ON money_events.id = payments.id
''', db))
In [ ]:
display(pd.read_sql_query('''
SELECT * FROM
(money_events LEFT JOIN budget_events ON money_events.id = budget_events.id)
LEFT JOIN payments ON money_events.id = payments.id
WHERE money_events.id = 2017081002
''', db))
In [ ]: