notebook.community

Edit and run



In [1]:

    
%load_ext autoreload
%autoreload 2



In [2]:

    
import openpyxl as pyxl
import pandas as pd
import numpy as np

from bidict import bidict

from excel_helpers import *
from db_entries import *
from unified_excel_sheet import *

import re
from itertools import chain
# we only need the function datetime.datetime.now; we can now reference it as dt.now
from datetime import datetime as dt

import sqlite3 as sql

import logging
logger = logging.getLogger()



In [3]:

    
%%capture

db_filename = 'my-budget-dev-v5.sqlite'

%run nuclear_option.py $db_filename
%run database_setup.py $db_filename

db = sql.connect(db_filename)

event_groups_df = pd.read_sql('SELECT * FROM event_groups', db, index_col='group_id')



In [4]:

    
excel_filename = 'GemeinsameBilanzierung_16_17__2017-11-03.xlsx'

current_sheet_name = 'August'
comment_sheet_name = 'august_'

# We take here the option data_only since for now we are not interested in the expression
wb = pyxl.load_workbook(excel_filename, data_only=True)


current_sheet = wb.get_sheet_by_name(current_sheet_name)
comment_sheet = wb.copy_worksheet(current_sheet)
comment_sheet.title = comment_sheet_name
comment_sheet.sheet_state = 'hidden'



In [5]:

    
col_titles_budget = ['budget_pot', 'description', 'date', 'amount', 'excel_range']
col_titles_payments = ['description', 'date', 'amount', 'excel_range']

budgeting = get_df_by_range(comment_sheet, 'A6', 'D130', 2)
budgeting.columns = col_titles_budget
budgeting['is_budget_event'] = 'X'

max_bargeld = get_df_by_range(comment_sheet, 'H7', 'J130', 1)
max_bargeld.columns = col_titles_payments
max_bargeld['money_pot'] = 'BM'
max_bargeld['type_description'] = 'Barzahlung'

paul_bargeld = get_df_by_range(comment_sheet, 'K7', 'M130', 1)
paul_bargeld.columns = col_titles_payments
paul_bargeld['money_pot'] = 'BP'
paul_bargeld['type_description'] = 'Barzahlung'

konto = get_df_by_range(comment_sheet, 'N7', 'P130', 1)
konto.columns = col_titles_payments
konto['money_pot'] = 'KG'
konto['type_description'] = 'Kartenzahlung'

all_payments = pd.concat([max_bargeld, paul_bargeld, konto])
all_payments['is_payment'] = 'X'

all_info = pd.merge(budgeting, all_payments, how='outer', 
                    on=['description', 'date', 'amount'], indicator=True)

all_info['treated'] = 'No'

all_info['date'] = all_info['date'].dt.date
all_info['amount'] = all_info['amount'].round(2)



In [ ]:

    
## Creating Data for Bargeld Payments data

wb_filename = 'full_month_august.xlsx'
definition_data = 'base_excel.json'

with open(definition_data) as f :
    json_data = f.read()
    sheet_def = json.loads(json_data)

sheet_def['main_sheet']['event_type']['hidden'] = True
sheet_def['main_sheet']['money_pot_name']['hidden'] = True
sheet_def['main_sheet']['money_sink_name']['hidden'] = True
sheet_def['main_sheet']['budget_type']['hidden'] = True

create_excel_table_from_data(all_info, wb_filename, db, sheet_def)
checked_data = use_excel_for_data_entry(wb_filename, copy_mode=False)









    



The file full_month_august.xlsx will be opened in Excel for data entry. Enter Q to abort, enter anything else to continue:



In [ ]:

    
ignored_data_index = checked_data[checked_data['ignore'] == 'X'].index
nonignore_data_index = checked_data.index.difference(ignored_data_index)

ignored_data = checked_data.loc[ignored_data_index]
nonignore_data = checked_data.loc[nonignore_data_index]

nonignore_data = nonignore_data.sort_values('date')
display(nonignore_data)

definition_data = 'base_excel.json'
recheck_wb_filename = 'recheck_full_month.xlsx'

with open(definition_data) as f :
    json_data = f.read()
    sheet_def = json.loads(json_data)

sheet_def['main_sheet']['event_type']['hidden'] = True
sheet_def['main_sheet']['money_pot_name']['hidden'] = True
sheet_def['main_sheet']['money_sink_name']['hidden'] = True
sheet_def['main_sheet']['budget_type']['hidden'] = True

create_excel_table_from_data(nonignore_data, recheck_wb_filename, db, sheet_def)
double_checked = use_excel_for_data_entry(recheck_wb_filename, copy_mode=False)



In [ ]:

    
parse_unified_excel_table(checked_data, db, db_filename, current_sheet, comment_sheet, excel_filename)



In [ ]:

    
### List of all fully matched things we didnt treat yet
#subsetting_rules = {'_merge' : 'both', 'treated' : 'No'}
#remaining_matched = subset_data_frame(all_info, subsetting_rules)
#
#display(remaining_matched)
#display(untreated_data)



In [ ]:

    
### For the "GK" (Gemeinsames Konto) Payments we create an excel sheet, where we can change the payment type
#subsetting_rules = {'treated' : 'No'}
#remaining = subset_data_frame(all_info, subsetting_rules)
#remaining = remaining.loc[remaining.index.difference(untreated_data.index)]
#
#wb_filename = 'remaining.xlsx'
#definition_data = 'base_excel.json'
#
#create_excel_table_from_data(remaining, wb_filename, db, definition_data)
#checked_remaining = use_excel_for_data_entry(wb_filename, copy_mode=False)



In [ ]:

    
#temporary_id_lookup = {}
#
#for index, row in checked_remaining.iterrows() :
#    if row['ignore'] == 'X' :
#        untreated_data = untreated_data.append(checked_full_match_konto.loc[index])
#        continue
#        
#    excel_ranges = ([] if np.isnan(row['excel_range_x']) else [row['excel_range_x']]) + \
#                   ([] if np.isnan(row['excel_range_y']) else [row['excel_range_y']])
#    
#    if not np.isnan(row['temporary_id']) and row['temporary_id'] in temporary_id_lookup :
#        the_id = temporary_id_lookup[row['temporary_id']]
#    else :
#        the_id = create_money_event(db, row['type_description'], row['description'], row['date'])
#        
#
#        full_excel_ranges = [current_sheet.title + '!' + a_range for a_range in excel_ranges]
#    
#        database_comment = autogenerate_database_comment_multiple_excel_ranges(
#            excel_filename, full_excel_ranges, comment_sheet.title)
#        
#        add_database_event(db, the_id, database_comment)
#        
#        if not np.isnan(row['temporary_id']) :
#            temporary_id_lookup[row['temporary_id']] = the_id
#    
#    ## ToDo : Add commenting into database and Excel
#    
#    if row['is_budget_event'] == 'X' :
#        add_budget_event(db, the_id, row['budget_pot'], row['amount'])
#    if row['is_payment'] == 'X' :
#        add_payment(db, the_id, row['money_pot'], row['amount'])
#    if row['is_recieving'] == 'X' :
#        add_payment(db, the_id, row['money_pot'], row['amount'])
#    if row['is_transfer'] == 'X' :
#        add_payment(db, the_id, row['money_pot'], row['money_pot_sink'], row['amount'])
#    
#    if not np.isnan(row['in_group']) :  
#        if row['in_group'] not in event_groups_df.index :
#            if row['group_name'][0] == '=' :
#                raise ValueError(
#                    "You should not use a formula as group name! The given group name was {}.".format(
#                        row['group_name']))
#            crsr = db.cursor()
#            crsr.execute('INSERT INTO event_groups VALUES ({}, "{}")'.format(
#                row['in_group'], row['group_name']))
#            db.commit()
#            event_groups_df = pd.read_sql('SELECT * FROM event_groups', db, index_col='group_id')
#            
#        crsr = db.cursor()
#        crsr.execute('INSERT INTO event_in_group VALUES ({}, {})'.format(row['in_group'], the_id))
#        db.commit()
#    
#    excel_comment = autogenerate_excel_comment(dt.now().strftime('%Y-%m-%d'), db_filename, the_id)
#    excel_cell_list = list(chain.from_iterable([list_from_range_string(a_range) for a_range in ranges]))
#    put_comment_into_excel(comment_sheet, excel_cell_list, excel_comment)
#    
#    all_info.loc[index, 'treated'] = 'Yes'



In [ ]:

    
wb.save('GemBil.xlsx')



In [ ]:

    
db.close()



In [ ]:

    
# read a full table into a pandas data framd
crsr = db.cursor()
crsr.execute("SELECT name FROM sqlite_master WHERE type='table';")

for table in crsr.fetchall() :
    display(pd.read_sql_query('SELECT * FROM {};'.format(table[0]), db))



In [ ]:

    
db_filename = 'my-budget-dev-v4.sqlite'

db = sql.connect(db_filename)

display(pd.read_sql_query('''
    SELECT * FROM 
    (money_events LEFT JOIN budget_events ON money_events.id = budget_events.id)
    LEFT JOIN payments ON money_events.id = payments.id
''', db))



In [ ]:

    
display(pd.read_sql_query('''
    SELECT * FROM 
    (money_events LEFT JOIN budget_events ON money_events.id = budget_events.id)
    LEFT JOIN payments ON money_events.id = payments.id
    WHERE money_events.id = 2017081002
''', db))



In [ ]: