notebook.community

Edit and run



In [1]:

    
%load_ext autoreload
%autoreload 2



In [7]:

    
import openpyxl as pyxl
import pandas as pd
import numpy as np

from bidict import bidict

from excel_helpers import *
from db_entries import *
from unified_excel_sheet import *

import re
from itertools import chain
# we only need the function datetime.datetime.now; we can now reference it as dt.now
from datetime import datetime as dt

import sqlite3 as sql

import logging
logger = logging.getLogger()



In [8]:

    
%%capture

db_filename = 'my-budget-dev-v3.sqlite'

%run nuclear_option.py $db_filename
%run database_setup.py $db_filename

db = sql.connect(db_filename)

event_groups_df = pd.read_sql('SELECT * FROM event_groups', db, index_col='group_id')



In [9]:

    
excel_filename = 'GemeinsameBilanzierung_16_17__dev.xlsx'

current_sheet_name = 'August'
comment_sheet_name = 'august_'

# We take here the option data_only since for now we are not interested in the expression
wb = pyxl.load_workbook(excel_filename, data_only=True)


cuurent_sheet = wb.get_sheet_by_name(current_sheet_name)
comment_sheet = wb.copy_worksheet(august)
comment_sheet.title = comment_sheet_name
comment_sheet.sheet_state = 'hidden'



In [ ]:

    
col_titles_budget = ['budget_pot', 'description', 'date', 'amount', 'excel_range']
col_titles_payments = ['description', 'date', 'amount', 'excel_range']

budgeting = get_df_by_range(august_, 'A6', 'D130', 2)
budgeting.columns = col_titles_budget

max_bargeld = get_df_by_range(august_, 'H7', 'J130', 1)
max_bargeld.columns = col_titles_payments
max_bargeld['money_pot'] = 'BM'

paul_bargeld = get_df_by_range(august_, 'K7', 'M130', 1)
paul_bargeld.columns = col_titles_payments
paul_bargeld['money_pot'] = 'BP'

konto = get_df_by_range(august_, 'N7', 'P130', 1)
konto.columns = col_titles_payments
konto['money_pot'] = 'KG'

all_payments = pd.concat([max_bargeld, paul_bargeld, konto])

all_info = pd.merge(budgeting, all_payments, how='outer', 
                    on=['description', 'date', 'amount'], indicator=True)

all_info['treated'] = 'No'

all_info['date'] = all_info['date'].dt.date
all_info['amount'] = all_info['amount'].round(2)



In [ ]:

    
## Creating Data for Bargeld Payments data

rules = [{'_merge' : 'both', 'money_pot' : B,  'treated' : 'No'} for B in ['BM', 'BP']]
full_match_bargeld = multirule_subset(all_info, rules)

bargeld_wb_filename = 'bargeld_payments.xlsx'

definition_data = 'base_excel.json'

with open(definition_data) as f :
    json_data = f.read()
    sheet_def = json.loads(json_data)

sheet_def['main_sheet']['type_description']['default'] = 'Barzahlung'
sheet_def['main_sheet']['is_budget_event']['default'] = 'X'
sheet_def['main_sheet']['is_payment']['default'] = 'X'
sheet_def['main_sheet']['ignore']['default'] = ''

sheet_def['main_sheet']['temporary_id']['hidden'] = True
sheet_def['main_sheet']['is_transfer']['hidden'] = True
sheet_def['main_sheet']['is_recieving']['hidden'] = True
sheet_def['main_sheet']['money_pot_sink']['hidden'] = True
sheet_def['main_sheet']['money_sink_name']['hidden'] = True

create_excel_table_from_data(full_match_bargeld, bargeld_wb_filename, db, sheet_def)
checked_full_match_bargeld = use_excel_for_data_entry(bargeld_wb_filename, copy_mode=False)



In [ ]:

    
## create empy dataframe first
untreated_data = checked_full_match_bargeld[checked_full_match_bargeld['treated'] == '']

for index, row in checked_full_match_bargeld.iterrows() :
    if row['ignore'] == 'X' :
        untreated_data = untreated_data.append(checked_full_match_bargeld.loc[index])
        continue
    elif not np.isnan(row['temporary_id']) : 
        raise NotImplementedError('The grouping of events by ID is not implemented yet.')
    
    the_id = automatically_enter_payment_row(row['date'], 'Barzahlung', 
        row['description'], row['amount'], row['money_pot'], 
        row['budget_pot'], [row['excel_range_x'], row['excel_range_y']],
        excel_filename, august, august_, db_filename)
    
    if not np.isnan(row['in_group']) :
        crsr = db.cursor()
        if row['in_group'] not in event_groups_df.index :
            if row['group_name'][0] == '=' :
                raise ValueError(
                    "You should not use a formula as group name! The given group name was {}.".format(
                        row['group_name']))
            crsr.execute('INSERT INTO event_groups VALUES ({}, "{}")'.format(
                row['in_group'], row['group_name']))
        crsr.execute('INSERT INTO event_in_group VALUES ({}, {})'.format(row['in_group'], the_id))
        db.commit()
    
    all_info.loc[index, 'treated'] = 'Yes'



In [ ]:

    
## For the "GK" (Gemeinsames Konto) Payments we create an excel sheet, where we can change the payment type
subsetting_rules = {'_merge' : 'both', 'money_pot' : 'KG',  'treated' : 'No'}
full_match_konto = subset_data_frame(all_info, subsetting_rules)

konto_wb_filename = 'konto_payments.xlsx'

definition_data = 'base_excel.json'

with open(definition_data) as f :
    json_data = f.read()
    sheet_def = json.loads(json_data)

sheet_def['main_sheet']['type_description']['default'] = 'Kartenzahlung'
sheet_def['main_sheet']['is_budget_event']['default'] = 'X'
sheet_def['main_sheet']['is_payment']['default'] = 'X'
sheet_def['main_sheet']['ignore']['default'] = ''

sheet_def['main_sheet']['temporary_id']['hidden'] = True
sheet_def['main_sheet']['is_transfer']['hidden'] = True
sheet_def['main_sheet']['is_recieving']['hidden'] = True
sheet_def['main_sheet']['money_pot_sink']['hidden'] = True
sheet_def['main_sheet']['money_sink_name']['hidden'] = True

create_excel_table_from_data(full_match_konto, konto_wb_filename, db, sheet_def)
checked_full_match_konto = use_excel_for_data_entry(konto_wb_filename, copy_mode=False)



In [ ]:

    
for index, row in checked_full_match_konto.iterrows() :
    if row['ignore'] == 'X' :
        untreated_data = untreated_data.append(checked_full_match_konto.loc[index])
        continue
    elif not np.isnan(row['temporary_id']) : 
        raise NotImplementedError('The grouping of events by ID is not implemented yet.')
    
    the_id = automatically_enter_payment_row(row['date'], 'Barzahlung', 
        row['description'], row['amount'], row['money_pot'], 
        row['budget_pot'], [row['excel_range_x'], row['excel_range_y']],
        excel_filename, august, august_, db_filename)
    
    if not np.isnan(row['in_group']) :
        crsr = db.cursor()
        if row['in_group'] not in event_groups_df.index :
            if row['group_name'][0] == '=' :
                raise ValueError(
                    "You should not use a formula as group name! The given group name was {}.".format(
                        row['group_name']))
            crsr.execute('INSERT INTO event_groups VALUES ({}, "{}")'.format(
                row['in_group'], row['group_name']))
        crsr.execute('INSERT INTO event_in_group VALUES ({}, {})'.format(row['in_group'], the_id))
        db.commit()
    
    all_info.loc[index, 'treated'] = 'Yes'



In [ ]:

    
## List of all fully matched things we didnt treat yet
subsetting_rules = {'_merge' : 'both', 'treated' : 'No'}
remaining_matched = subset_data_frame(all_info, subsetting_rules)

display(remaining_matched)
display(untreated_data)



In [ ]:

    
## For the "GK" (Gemeinsames Konto) Payments we create an excel sheet, where we can change the payment type
subsetting_rules = {'treated' : 'No'}
remaining = subset_data_frame(all_info, subsetting_rules)
remaining = remaining.loc[remaining.index.difference(untreated_data.index)]

wb_filename = 'remaining.xlsx'
definition_data = 'base_excel.json'

create_excel_table_from_data(remaining, wb_filename, db, definition_data)
checked_remaining = use_excel_for_data_entry(wb_filename, copy_mode=False)



In [ ]:

    
temporary_id_lookup = {}

for index, row in checked_remaining.iterrows() :
    if row['ignore'] == 'X' :
        untreated_data = untreated_data.append(checked_full_match_konto.loc[index])
        continue
        
    excel_ranges = ([] if np.isnan(row['excel_range_x']) else [row['excel_range_x']]) + \
                   ([] if np.isnan(row['excel_range_y']) else [row['excel_range_y']])
    
    if not np.isnan(row['temporary_id']) and row['temporary_id'] in temporary_id_lookup :
        the_id = temporary_id_lookup[row['temporary_id']]
    else :
        the_id = create_money_event(db, row['type_description'], row['description'], row['date'])
        

        full_excel_ranges = [current_sheet.title + '!' + a_range for a_range in excel_ranges]
    
        database_comment = autogenerate_database_comment_multiple_excel_ranges(
            excel_filename, full_excel_ranges, comment_sheet.title)
        
        add_database_event(db, the_id, database_comment)
        
        if not np.isnan(row['temporary_id']) :
            temporary_id_lookup[row['temporary_id']] = the_id
    
    ## ToDo : Add commenting into database and Excel
    
    if row['is_budget_event'] == 'X' :
        add_budget_event(db, the_id, row['budget_pot'], row['amount'])
    if row['is_payment'] == 'X' :
        add_payment(db, the_id, row['money_pot'], row['amount'])
    if row['is_recieving'] == 'X' :
        add_payment(db, the_id, row['money_pot'], row['amount'])
    if row['is_transfer'] == 'X' :
        add_payment(db, the_id, row['money_pot'], row['money_pot_sink'], row['amount'])
    
    if not np.isnan(row['in_group']) :  
        if row['in_group'] not in event_groups_df.index :
            if row['group_name'][0] == '=' :
                raise ValueError(
                    "You should not use a formula as group name! The given group name was {}.".format(
                        row['group_name']))
            crsr = db.cursor()
            crsr.execute('INSERT INTO event_groups VALUES ({}, "{}")'.format(
                row['in_group'], row['group_name']))
            db.commit()
            event_groups_df = pd.read_sql('SELECT * FROM event_groups', db, index_col='group_id')
            
        crsr = db.cursor()
        crsr.execute('INSERT INTO event_in_group VALUES ({}, {})'.format(row['in_group'], the_id))
        db.commit()
    
    excel_comment = autogenerate_excel_comment(dt.now().strftime('%Y-%m-%d'), db_filename, the_id)
    excel_cell_list = list(chain.from_iterable([list_from_range_string(a_range) for a_range in ranges]))
    put_comment_into_excel(comment_sheet, excel_cell_list, excel_comment)
    
    all_info.loc[index, 'treated'] = 'Yes'



In [ ]:

    
wb.save('GemBil.xlsx')



In [ ]:

    
db.close()



In [ ]: