In [1]:
%load_ext autoreload
%autoreload 2
In [2]:
import openpyxl as pyxl
import pandas as pd
import numpy as np
from excel_helpers import *
from db_entries import *
import re
from itertools import chain
# we only need the function datetime.datetime.now; we can now reference it as dt.now
from datetime import datetime as dt
import sqlite3 as sql
import logging
logger = logging.getLogger()
In [3]:
%%capture
db_filename = 'my-budget-dev-v1.sqlite'
%run nuclear_option.py $db_filename
%run database_setup.py $db_filename
db = sql.connect(db_filename)
In [4]:
excel_filename = 'GemeinsameBilanzierung_16_17__dev.xlsx'
current_sheet = 'August'
comment_sheet = 'august_'
# We take here the option data_only since for now we are not interested in the expression
wb = pyxl.load_workbook(excel_filename, data_only=True)
august = wb.get_sheet_by_name(current_sheet)
august_ = wb.copy_worksheet(august)
august_.title = comment_sheet
august_.sheet_state = 'hidden'
In [5]:
def autogenerate_database_comment_multiple_excel_ranges(filename, ranges,
comment_sheet) :
try :
ranges_string = ', '.join(ranges[:-1]) + \
(' and ' if len(ranges) > 1 else '') + ranges[-1]
except IndexError :
raise ValueError('You should provide at least one Excel range')
return 'This entry was automatically generated from the excel file ' + \
'{}. It is based on the cells {}. A note has been '.format(
filename, ranges_string) + \
'added to the respective cells in the sheet {}.'.format(comment_sheet.title)
def autogenerate_database_comment(filename, range_, comment_sheet) :
return autogenerate_database_comment_multiple_excel_ranges(
filename, [range_], comment_sheet)
def autogenerate_excel_comment(date, db_filename, the_id) :
return 'On {} this cell was automatically read and '.format(date) + \
'inserted into the database {}. The id ofthe entry is {}.'.format(
date, db_filename, the_id)
In [6]:
def automatically_enter_payment_row(date, payment_type, description, amount, money_pot, budget_type,
ranges, excel_filename, current_sheet, comment_sheet, db_filename) :
list_of_ranges = [current_sheet.title + '!' + a_range for a_range in ranges]
database_comment = autogenerate_database_comment_multiple_excel_ranges(
excel_filename, list_of_ranges, comment_sheet.title)
the_id = put_payment_into_database_autogenerated(db, date, payment_type, description, amount,
money_pot, budget_type, database_comment)
excel_comment = autogenerate_excel_comment(
dt.now().strftime('%Y-%m-%d'), db_filename, the_id)
#Get a list of all referenced cells in the ranges
cells = list(chain.from_iterable([list_from_range_string(a_range) for a_range in ranges]))
put_comment_into_excel(comment_sheet, cells, excel_comment)
return the_id
def automatically_enter_transfer_row(date, transfer_type, description, amount, money_pot_source,
money_pot_sink, ranges, excel_filename, current_sheet,
comment_sheet, db_filename, effect_date = None) :
list_of_ranges = [current_sheet.title + '!' + a_range for a_range in ranges]
database_comment = autogenerate_database_comment_multiple_excel_ranges(
excel_filename, list_of_ranges, comment_sheet)
the_id = put_transfer_into_database_autogenerated(db, date, transfer_type, description, amount,
money_pot_source, money_pot_sink, comment=database_comment)
excel_comment = autogenerate_excel_comment(
dt.now().strftime('%Y-%m-%d'), db_filename, the_id)
#Get a list of all referenced cells in the ranges
cells = list(chain.from_iterable([list_from_range_string(a_range) for a_range in ranges]))
put_comment_into_excel(comment_sheet, cells, excel_comment)
return the_id
In [7]:
def subset_data_frame(data_frame, rules) :
"""Subset a pandas.DataFrame by a dictionary of rules. The rules should have the following form:
{<column name> : <restriction>} where <restriction> is either a number, a string starting with
'<' or '>' and followed by number or a general string. If it is a simple string or number, the
subsetting expression
data_frame[dataframe[<column name>] == <restriction>]
will be used. The other case will be interpreted as
data_frame[dataframe[<column name>] <|> <number>].
"""
result = data_frame
for rule in rules.items() :
column = rule[0]
restriction = rule[1]
if type(restriction) is str and restriction[0] in ['<', '>'] :
try :
number = float(restriction[1:])
except :
raise ValueError('After a rule starting with < or > you need to specify a number')
result = result.loc[result[column] < number] if restriction[0] == '<' \
else result.loc[result[column] > number]
continue
result = result.loc[result[column] == restriction]
if len(result) == 0 :
logger.info('After the rule ' + str(rule) + ' no item was left')
return result
logger.info(str(len(result)) + ' items were selected.')
return result
def multirule_subset(data_frame, rules) :
"""Collect multiple subsets (union of multiple subsets) of a pandas.DataFrame into a single DataFrame
by using subset_data_frame.
Keyword Arguments:
data_frame -- the DataFrame to be subsetted
rules -- a list of dictionaries, where the dicts fulfill the requirements specified for
subset_data_frame
"""
new_data = pd.DataFrame(columns=data_frame.columns)
for rule in rules :
new_data = new_data.append(subset_data_frame(data_frame, rule))
return new_data
In [8]:
budgeting = get_df_by_range(august_, 'A6', 'D130', 2)
budgeting.columns = ['budget_type', 'description', 'date', 'amount', 'excel_range']
col_titles = ['description', 'date', 'amount', 'excel_range']
max_bargeld = get_df_by_range(august_, 'H7', 'J130', 1)
max_bargeld.columns = col_titles
max_bargeld['money_pot'] = 'BM'
paul_bargeld = get_df_by_range(august_, 'K7', 'M130', 1)
paul_bargeld.columns = col_titles
paul_bargeld['money_pot'] = 'BP'
konto = get_df_by_range(august_, 'N7', 'P130', 1)
konto.columns = col_titles
konto['money_pot'] = 'KG'
conjoined = pd.concat([max_bargeld, paul_bargeld, konto])
In [9]:
all_info = pd.merge(budgeting, conjoined, how='outer',
on=['description', 'date', 'amount'], indicator=True)
all_info['treated'] = 'No'
all_info['date'] = all_info['date'].dt.date
all_info['amount'] = all_info['amount'].round(2)
In [10]:
## Creating Data for Bargeld Payments data
rules = [{'_merge' : 'both', 'money_pot' : B, 'treated' : 'No'} for B in ['BM', 'BP']]
full_match_bargeld = multirule_subset(all_info, rules)
full_match_bargeld['payment_type'] = 'Barzahlung'
full_match_bargeld['use_data'] = 'X'
full_match_bargeld['complex'] = ''
bargeld_wb_filename = 'bargeld_payments.xlsx'
In [11]:
full_match_bargeld.to_excel(bargeld_wb_filename)
bargeld_wb = pyxl.load_workbook(bargeld_wb_filename, data_only=True)
bargeld_sheet = bargeld_wb.active
the_fill = pyxl.styles.PatternFill(fill_type='solid', start_color='FFFF02')
the_list_a = ['Barzahlung', 'Kartenzahlung', 'Überweisung', 'Dauerauftrag',
'SEPA-Mandat', 'Bankeinzug', 'Abheben', 'Kontotransfer',
'Bargeldtransfer', 'Einnahme', 'Geldfund']
the_list_b = ['B', 'K', 'U', 'D', 'S', 'BE', 'A', 'KT', 'BT', 'E', 'GF']
the_list_str = '"' + ','.join(the_list_a + the_list_b) + '"'
dv1 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1=the_list_str, allow_blank=False)
dv1.error ='Your entry is not in the list'
dv1.errorTitle = 'Invalid Entry'
dv1.prompt = 'Please select from the list'
dv1.promptTitle = 'List Selection'
dv2 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1='"X"', allow_blank=True)
dv2.error ='Your entry is not in the list'
dv2.errorTitle = 'Invalid Entry'
dv2.prompt = 'Please check with an X or leave blank'
dv2.promptTitle = 'Checkmark'
dv1.ranges.append('K2:K' + str(bargeld_sheet.max_row))
dv2.ranges.append('L2:M' + str(bargeld_sheet.max_row))
bargeld_sheet.add_data_validation(dv1)
bargeld_sheet.add_data_validation(dv2)
for i in range(2, bargeld_sheet.max_row + 1) :
bargeld_sheet['D' + str(i)].number_format = 'DD/MM/YY'
bargeld_sheet['K' + str(i)].fill = the_fill
bargeld_sheet['L' + str(i)].fill = the_fill
bargeld_sheet['M' + str(i)].fill = the_fill
for column_cells in bargeld_sheet.columns:
length = max(len(str(cell.value)) for cell in column_cells)
bargeld_sheet.column_dimensions[column_cells[0].column].width = length
for row in ['F', 'G', 'H', 'I', 'J'] :
bargeld_sheet.column_dimensions[row].hidden = True
bargeld_wb.save(bargeld_wb_filename)
In [12]:
checked_full_match_konto = use_excel_for_data_entry(bargeld_wb_filename, copy_mode=False)
In [13]:
for index, row in full_match_bargeld.iterrows() :
the_id = automatically_enter_payment_row(row['date'], 'Barzahlung',
row['description'], row['amount'], row['money_pot'],
row['budget_type'], [row['excel_range_x'], row['excel_range_y']],
excel_filename, august, august_, db_filename)
all_info.loc[index, 'treated'] = 'Yes'
In [ ]:
#************************************#
# This code will not be used anymore #
#************************************#
## Putting all the Info which had matching info on the left and the right hand of the budget sheet into
## the database.
## Only doing the "Barzahlung" payments right now
##for BB in ['BM', 'BP'] :
## subsetting_rules = {'_merge' : 'both', 'money_pot' : BB, 'treated' : 'No'}
##
## full_match_bargeld = subset_data_frame(all_info, subsetting_rules)
##
## for index, row in full_match_bargeld.iterrows() :
## the_id = automatically_enter_payment_row(row['date'], 'Barzahlung',
## row['description'], row['amount'], row['money_pot'],
## row['budget_type'], [row['excel_range_x'], row['excel_range_y']],
## excel_filename, august, august_, db_filename)
##
## all_info.loc[index, 'treated'] = 'Yes'
In [14]:
## For the "GK" (Gemeinsames Konto) Payments we create an excel sheet, where we can change the payment type
subsetting_rules = {'_merge' : 'both', 'money_pot' : 'KG', 'treated' : 'No'}
full_match_konto = subset_data_frame(all_info, subsetting_rules)
full_match_konto['payment_type'] = 'Kartenzahlung'
full_match_konto['use_data'] = 'X'
full_match_konto['complex'] = ''
In [15]:
gk_wb_filename = 'konto_payments.xlsx'
In [16]:
full_match_konto.to_excel(gk_wb_filename)
gk_wb = pyxl.load_workbook(gk_wb_filename, data_only=True)
gk_sheet = gk_wb.active
the_fill = pyxl.styles.PatternFill(fill_type='solid', start_color='FFFF02')
the_list_a = ['Barzahlung', 'Kartenzahlung', 'Überweisung', 'Dauerauftrag',
'SEPA-Mandat', 'Bankeinzug', 'Abheben', 'Kontotransfer',
'Bargeldtransfer', 'Einnahme', 'Geldfund']
the_list_b = ['B', 'K', 'U', 'D', 'S', 'BE', 'A', 'KT', 'BT', 'E', 'GF']
the_list_str = '"' + ','.join(the_list_a + the_list_b) + '"'
dv1 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1=the_list_str, allow_blank=False)
dv1.error ='Your entry is not in the list'
dv1.errorTitle = 'Invalid Entry'
dv1.prompt = 'Please select from the list'
dv1.promptTitle = 'List Selection'
dv2 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1='"X"', allow_blank=True)
dv2.error ='Your entry is not in the list'
dv2.errorTitle = 'Invalid Entry'
dv2.prompt = 'Please check with an X or leave blank'
dv2.promptTitle = 'Checkmark'
dv1.ranges.append('K2:K' + str(gk_sheet.max_row))
dv2.ranges.append('L2:M' + str(gk_sheet.max_row))
gk_sheet.add_data_validation(dv1)
gk_sheet.add_data_validation(dv2)
for i in range(2, gk_sheet.max_row + 1) :
gk_sheet['D' + str(i)].number_format = 'DD/MM/YY'
gk_sheet['K' + str(i)].fill = the_fill
gk_sheet['L' + str(i)].fill = the_fill
gk_sheet['M' + str(i)].fill = the_fill
for column_cells in gk_sheet.columns:
length = max(len(str(cell.value)) for cell in column_cells)
gk_sheet.column_dimensions[column_cells[0].column].width = length
for row in ['F', 'G', 'H', 'I', 'J'] :
gk_sheet.column_dimensions[row].hidden = True
gk_wb.save(gk_wb_filename)
In [18]:
checked_full_match_konto = use_excel_for_data_entry(gk_wb_filename, copy_mode=False)
In [19]:
miete_rules = {'description' : 'Miete', 'amount' : -568, 'treated' : 'No'}
haftpflicht_rules = {'description' : 'Haftpflichtversicherung', 'amount' : -7.5, 'treated' : 'No'}
berufsunfaehigkeit_rules = {'description' : 'Berufsunfähigkeitsversicherung', 'amount' : -49.05, 'treated' : 'No'}
strom_rules_rules = {'description' : 'Strom EnviaM', 'amount' : -51, 'treated' : 'No'}
vodafone_rules = {'description' : 'Vodafone', 'amount' : -19.99, 'treated' : 'No'}
handy_paul_rules = {'description' : 'Handy Paul', 'amount' : -7.99, 'treated' : 'No'}
handy_max_rules = {'description' : 'Handy Max', 'amount' : -7.99, 'treated' : 'No'}
music_paul_rules = {'description' : 'Apple Music Paul', 'amount' : -4.99, 'treated' : 'No'}
music_max_rules = {'description' : 'Spotify Max', 'amount' : -4.99, 'treated' : 'No'}
backblaze_paul_rules = {'description' : 'Backblaze Paul', 'amount' : -3.77, 'treated' : 'No'}
backblaze_max_rules = {'description' : 'Backblaze Max', 'amount' : -3.77, 'treated' : 'No'}
semester_paul_rules = {'description' : 'Semestergebühr Paul', 'amount' : -46.15, 'treated' : 'No'}
semester_max_rules = {'description' : 'Semestergebühr Max', 'amount' : -50.57, 'treated' : 'No'}
sport_jahre_rules = {'description' : '', 'amount' : -8, 'treated' : 'No'}
sport_rules = {'description' : 'Fitnessstudio', 'amount' : -39.8, 'treated' : 'No'}
gew_rules = {'description' : 'GEW', 'amount' : -2.5, 'treated' : 'No'}
gez_rules = {'description' : 'GEZ', 'amount' : -17.5, 'treated' : 'No'}
miete_ffm_rules = {'description' : 'Miete FFM', 'amount' : -450, 'treated' : 'No'}
all_rules = { 1 : [miete_rules ],
2 : [haftpflicht_rules ],
3 : [berufsunfaehigkeit_rules ],
4 : [strom_rules_rules ],
5 : [vodafone_rules ],
6 : [handy_paul_rules ],
7 : [handy_max_rules ],
8 : [music_paul_rules ],
9 : [music_max_rules ],
10 : [backblaze_paul_rules, backblaze_max_rules],
11 : [semester_paul_rules ],
12 : [semester_max_rules ],
13 : [sport_jahre_rules ],
14 : [sport_rules ],
15 : [gew_rules ],
16 : [gez_rules ],
17 : [miete_ffm_rules ]}
monthly = [1, 2, 3, 4, 8, 9, 14, 17]
bigger_intervalls = [10, 11, 12, 13, 15, 16]
variable_prices = [5, 6, 7]
## ToDo : 2 + 3 sonderfall beachten; insbesondere wird das gemeinsam vom konto abgebucht
## ToDo : 4 is only 51 € since mid 2017
for rule_number in monthly :
rule = all_rules[rule_number][0]
the_info = subset_data_frame(checked_full_match_konto, rule)
for index, row in the_info.iterrows() :
the_id = automatically_enter_payment_row(row['date'], row['payment_type'],
row['description'], row['amount'], row['money_pot'],
row['budget_type'], [row['excel_range_x'], row['excel_range_y']],
excel_filename, august, august_, db_filename)
crsr = db.cursor()
crsr.execute('''INSERT INTO event_in_group VALUES (1, {});'''.format(the_id))
db.commit()
all_info.loc[index, 'treated'] = 'Yes'
## ToDo : do the bigger intervalls
In [20]:
for index, row in checked_full_match_konto.iterrows() :
if checked_full_match_konto.loc[index, 'use_data'] != 'X' :
continue
if checked_full_match_konto.loc[index, 'treated'] == 'Yes' :
continue
the_id = automatically_enter_payment_row(row['date'], row['payment_type'],
row['description'], row['amount'], row['money_pot'],
row['budget_type'], [row['excel_range_x'], row['excel_range_y']],
excel_filename, august, august_, db_filename)
all_info.loc[index, 'treated'] = 'Yes'
In [21]:
## List of all fully matched things we didnt treat yet
subsetting_rules = {'_merge' : 'both', 'treated' : 'No'}
remaining_matched = subset_data_frame(all_info, subsetting_rules)
display(remaining_matched)
In [22]:
display(all_info[all_info['description'].str.contains('Schatulle')])
subsetting_rules = {'treated' : 'xxx'}
schatulle_info = subset_data_frame(all_info, subsetting_rules)
for direction in ['an', 'aus'] :
descriptor = 'Geld ' + direction + ' Schatulle'
subsetting_rules = {'description' : descriptor, 'treated' : 'No'}
schatulle_info = schatulle_info.append(subset_data_frame(all_info, subsetting_rules))
display(schatulle_info)
In [23]:
display(schatulle_info)
for index, row in schatulle_info.iterrows() :
i = 0
while True :
the_date = all_info.loc[index+i, 'date']
if str(the_date) == 'NaT' or the_date is np.nan :
the_date = all_info.loc[index-i, 'date']
if str(the_date) == 'NaT' or the_date is np.nan :
i = i + 1
continue
break
automatically_enter_transfer_row(
the_date, 'Bargeldtransfer', row['description'], row['amount'], row['money_pot'], 'SB',
[row['excel_range_y']], excel_filename, august, august_, db_filename)
all_info.loc[index, 'treated'] = 'Yes'
In [24]:
## The Three individual usages of the White Trianon Card : Charging
subsetting_rules = {'description' : 'Geld aufladen Trianon', 'amount' : -20, 'treated' : 'No'}
trianon_aufladen = subset_data_frame(all_info, subsetting_rules)
for index, row in trianon_aufladen.iterrows() :
automatically_enter_transfer_row(
row['date'], 'Aufladen', row['description'], row['amount'], row['money_pot'], 'CT',
[row['excel_range_y']], excel_filename, august, august_, db_filename)
all_info.loc[index, 'treated'] = 'Yes'
In [25]:
doppelter_kaffee = {'budget_type' : 'AM', 'description' : 'Doppelter Kaffee Trianon',
'amount' : -1.5, 'treated' : 'No'}
einfacher_kaffee = {'budget_type' : 'AM', 'description' : 'Kaffee',
'amount' : -0.75, 'treated' : 'No'}
rules = [doppelter_kaffee, einfacher_kaffee]
trianon_payments = multirule_subset(all_info, rules)
for index, row in trianon_payments.iterrows() :
automatically_enter_payment_row(
row['date'], 'Kartenzahlung', row['description'], row['amount'], 'CT',
row['budget_type'], [row['excel_range_x']], excel_filename, august, august_, db_filename)
all_info.loc[index, 'treated'] = 'Yes'
In [26]:
subsetting_rules = {'budget_type' : 'AM', 'treated' : 'No'}
bundesbank_karte = subset_data_frame(all_info, subsetting_rules)
for index, row in bundesbank_karte.iterrows() :
automatically_enter_payment_row(
row['date'], 'Kartenzahlung', row['description'], row['amount'], 'CB',
row['budget_type'], [row['excel_range_x']], excel_filename, august, august_, db_filename)
all_info.loc[index, 'treated'] = 'Yes'
In [27]:
subsetting_rules = {'description' : 'Geld abheben', '_merge' : 'right_only', 'treated' : 'No'}
geld_abheben = subset_data_frame(all_info, subsetting_rules)
subsetting_rules = {'description' : 'Geld abheben', 'amount' : '>0', '_merge' : 'right_only', 'treated' : 'No'}
geld_abheben_gotten = subset_data_frame(all_info, subsetting_rules)
subsetting_rules = {'description' : 'Geld abheben', 'amount' : '<0', '_merge' : 'right_only', 'treated' : 'No'}
geld_abheben_drawn = subset_data_frame(all_info, subsetting_rules)
geld_abheben_drawn['amount_abs'] = - geld_abheben_drawn['amount']
## Subselect relevant columns
abheben_gotten = geld_abheben_gotten[['date', 'amount', 'money_pot', 'description',
'excel_range_y']].reset_index(level=0)
abheben_drawn = geld_abheben_drawn[['date', 'amount_abs', 'money_pot',
'excel_range_y']].reset_index(level=0)
abheben_data = pd.merge(abheben_gotten, abheben_drawn, how='outer', left_on = ['date', 'amount'],
right_on=['date', 'amount_abs'], left_index=True, indicator=True)
display(abheben_data)
for index, row in abheben_data[abheben_data['_merge'] == 'both'].iterrows() :
automatically_enter_transfer_row(
row['date'], 'Abheben', row['description'], row['amount'], row['money_pot_y'], row['money_pot_x'],
[row['excel_range_y_x'], row['excel_range_y_y']], excel_filename, august, august_, db_filename)
all_info.loc[row['index_x'], 'treated'] = 'Yes'
all_info.loc[row['index_y'], 'treated'] = 'Yes'
In [28]:
## For the "GK" (Gemeinsames Konto) Payments we create an excel sheet, where we can change the payment type
subsetting_rules = {'_merge' : 'right_only', 'treated' : 'No'}
remaining = subset_data_frame(all_info, subsetting_rules)
remaining['is_transfer'] = ''
remaining['transfer_type'] = ''
remaining['money_pot_source'] = remaining['amount'].apply(lambda x : 1 if x < 0 else 0) * \
remaining['money_pot']
remaining['money_pot_sink'] = remaining['amount'].apply(lambda x : 1 if x > 0 else 0) * \
remaining['money_pot']
In [29]:
mark_transfers_filename = 'testtest2.xlsx'
remaining.to_excel(mark_transfers_filename)
mark_transfers = pyxl.load_workbook(mark_transfers_filename, data_only=True)
remaining_sheet = mark_transfers.active
the_fill = pyxl.styles.PatternFill(fill_type='solid', start_color='FFFF02')
dv1 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1='"X"', allow_blank=True)
dv1.error ='Your entry is not in the list'
dv1.errorTitle = 'Invalid Entry'
dv1.prompt = 'Please check with an X or leave blank'
dv1.promptTitle = 'Checkmark'
list_of_transfer_types = ['Abheben', 'Aufladen', 'Kontotransfer', 'Bargeldtransfer']
rule = '"' + ','.join(list_of_transfer_types) + '"'
dv2 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1=rule, allow_blank=True)
dv2.error ='Your entry is not in the list'
dv2.errorTitle = 'Invalid Entry'
dv2.prompt = 'Please select from the list'
dv2.promptTitle = 'List Selection'
list_of_accounts = ['KG', 'KE', 'KM', 'KP', 'KB', 'KC', 'BM', 'BP', 'CB', 'CT', 'CM', 'GM', 'SB', 'SF']
rule = '"' + ','.join(list_of_accounts) + '"'
dv3 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1=rule, allow_blank=True)
dv3.error ='Your entry is not in the list'
dv3.errorTitle = 'Invalid Entry'
dv3.prompt = 'Please select from the list'
dv3.promptTitle = 'List Selection'
dv1.ranges.append('K2:K' + str(remaining_sheet.max_row))
dv2.ranges.append('L2:L' + str(remaining_sheet.max_row))
dv3.ranges.append('M2:N' + str(remaining_sheet.max_row))
remaining_sheet.add_data_validation(dv1)
remaining_sheet.add_data_validation(dv2)
remaining_sheet.add_data_validation(dv3)
for i in range(2, remaining_sheet.max_row + 1) :
remaining_sheet['D' + str(i)].number_format = 'DD/MM/YY'
remaining_sheet['K' + str(i)].fill = the_fill
remaining_sheet['L' + str(i)].fill = the_fill
remaining_sheet['M' + str(i)].fill = the_fill
remaining_sheet['N' + str(i)].fill = the_fill
for column_cells in remaining_sheet.columns:
length = max(len(str(cell.value)) for cell in column_cells)
remaining_sheet.column_dimensions[column_cells[0].column].width = length
for row in ['B', 'F', 'G', 'H', 'I', 'J'] :
remaining_sheet.column_dimensions[row].hidden = True
mark_transfers.save(mark_transfers_filename)
In [30]:
checked_data = use_excel_for_data_entry(mark_transfers_filename, copy_mode=False, open_before_read=True)
checked_transfer_data = checked_data[checked_data['is_transfer'] == 'X']
In [31]:
display(checked_transfer_data)
for index, row in checked_transfer_data.iterrows() :
automatically_enter_transfer_row(
row['date'], row['transfer_type'], row['description'], row['amount'],
row['money_pot_source'], row['money_pot_sink'],
[row['excel_range_y']], excel_filename, august, august_, db_filename)
all_info.loc[index, 'treated'] = 'Yes'
In [76]:
## Mark remaining as : Budget_Event, Money_Transfer, Payment, Complex Process
## Give the Opportunity to give provisorial ids
subsetting_rules = {'treated' : 'No'}
## If I do not use copy(), it will throw warnings when only
## subsetting with one rule. I suppose it does copy automatically
## after from the second subsetting
remaining = subset_data_frame(all_info, subsetting_rules).copy()
remaining['ignore'] = 'X'
remaining['is_complex'] = ''
remaining['temporary_id'] = ''
remaining['event_abbr'] = ''
remaining['event_type'] = ''
remaining['is_budget_event'] = ''
remaining['is_recieving'] = ''
remaining['is_payment'] = ''
remaining['is_transfer'] = ''
remaining['additional_description'] = ''
remaining['effect_date'] = ''
remaining['money_pot_name'] = ''
remaining['money_pot_sink'] = ''
remaining['money_pot_sink_name'] = ''
remaining['budget_type_name'] = ''
remaining['in_group'] = ''
remaining['group_name'] = ''
## ToDo : Add additional_descriptions and effect_date
remaining = remaining[[
'ignore', 'is_complex', 'temporary_id', 'event_abbr',
'event_type', 'description', 'date',
'amount', 'money_pot', 'money_pot_name',
'money_pot_sink','money_pot_sink_name',
'is_budget_event', 'budget_type', 'budget_type_name',
'is_recieving', 'is_payment', 'is_transfer', 'in_group', 'group_name', 'excel_range_x',
'excel_range_y', '_merge', 'treated'
]]
display(remaining)
In [ ]:
mark_remaining_filename = 'remaining.xlsx'
remaining.to_excel(mark_remaining_filename)
mark_remaining = pyxl.load_workbook(mark_remaining_filename, data_only=False)
remaining_sheet = mark_remaining.active
def add_lookup_sheet(wb, db, table, list_of_cols, lookup_sheet_name = 'lookup', sorted_by = None) :
crsr = db.cursor()
if sorted_by is None :
crsr.execute('SELECT {} FROM {}'.format(','.join(list_of_cols), table))
else :
crsr.execute('SELECT {} FROM {} ORDER BY {}'.format(','.join(list_of_cols), table, sorted_by))
results = crsr.fetchall()
#return [r[0] for r in results]
lookup_sheet = wb.create_sheet(lookup_sheet_name)
for row in results :
lookup_sheet.append(row)
add_lookup_sheet(mark_remaining, db, 'money_pots', ['key', 'description'], 'lookup_money_pots', 'key')
add_lookup_sheet(mark_remaining, db, 'event_types', ['abbreviation', 'type'], 'lookup_event_types', 'type')
add_lookup_sheet(mark_remaining, db, 'budget_pots', ['key', 'description'], 'lookup_budget_pots', 'key')
add_lookup_sheet(mark_remaining, db, 'event_groups', ['group_id', 'description'], 'lookup_event_groups', 'group_id')
yellow_fill = pyxl.styles.PatternFill(fill_type='solid', start_color='FFFF02')
grey_fill = pyxl.styles.PatternFill(fill_type='solid', start_color='BFBFBF')
unwrap_text = pyxl.styles.Alignment(horizontal='fill')
validate_checkmark = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1='"X"', allow_blank=True)
validate_checkmark.error ='Your entry is not in the list'
validate_checkmark.errorTitle = 'Invalid Entry'
validate_checkmark.prompt = 'Please check with an X or leave blank'
validate_checkmark.promptTitle = 'Checkmark'
def keylist_from_db(db, table, colname) :
crsr = db.cursor()
crsr.execute('SELECT {} FROM {}'.format(colname, table))
results = crsr.fetchall()
return [r[0] for r in results]
def add_validation_notes(dv) :
dv.error ='Your entry is not in the list'
dv.errorTitle = 'Invalid Entry'
dv.prompt = 'Please select from the list'
dv.promptTitle = 'List Selection'
return dv
the_list = keylist_from_db(db, 'event_types', 'type')
the_list_str = '"' + ','.join(the_list) + '"'
validate_event_type = pyxl.worksheet.datavalidation.DataValidation(
type="list", formula1=the_list_str, allow_blank=False)
validate_event_type = add_validation_notes(validate_event_type)
the_list = keylist_from_db(db, 'money_pots', 'key')
the_list_str = '"' + ','.join(the_list) + '"'
validate_money_pot = pyxl.worksheet.datavalidation.DataValidation(
type="list", formula1=the_list_str, allow_blank=False)
validate_money_pot = add_validation_notes(validate_money_pot)
the_list = keylist_from_db(db, 'budget_pots', 'key')
the_list_str = '"' + ','.join(the_list) + '"'
validate_budget_pot = pyxl.worksheet.datavalidation.DataValidation(
type="list", formula1=the_list_str, allow_blank=False)
validate_budget_pot = add_validation_notes(validate_budget_pot)
validate_checkmark.ranges.append('B2:C' + str(remaining_sheet.max_row))
validate_checkmark.ranges.append('N2:N' + str(remaining_sheet.max_row))
validate_checkmark.ranges.append('Q2:S' + str(remaining_sheet.max_row))
validate_event_type.ranges.append('F2:F' + str(remaining_sheet.max_row))
validate_money_pot.ranges.append('J2:J' + str(remaining_sheet.max_row))
validate_money_pot.ranges.append('L2:L' + str(remaining_sheet.max_row))
validate_budget_pot.ranges.append('O2:O' + str(remaining_sheet.max_row))
remaining_sheet.add_data_validation(validate_checkmark)
remaining_sheet.add_data_validation(validate_event_type)
remaining_sheet.add_data_validation(validate_money_pot)
remaining_sheet.add_data_validation(validate_budget_pot)
for i in range(2, remaining_sheet.max_row + 1) :
remaining_sheet['H' + str(i)].number_format = 'DD/MM/YY'
for s in ['D', 'F', 'J', 'L', 'O', 'T'] :
remaining_sheet[s + str(i)].fill = yellow_fill
for s in ['B', 'C', 'N', 'Q', 'R', 'S'] :
remaining_sheet[s + str(i)].fill = grey_fill
remaining_sheet['E' + str(i)] = '=IF(ISBLANK(F' + str(i) + '), "", LOOKUP(F' + str(i) + \
r', lookup_event_types!B:B, lookup_event_types!A:A))'
remaining_sheet['K' + str(i)] = '=IF(ISBLANK(J' + str(i) + '), "", LOOKUP(J' + str(i) + \
r', lookup_money_pots!A:A, lookup_money_pots!B:B))'
remaining_sheet['M' + str(i)] = '=IF(ISBLANK(L' + str(i) + '), "", LOOKUP(L' + str(i) + \
r', lookup_money_pots!A:A, lookup_money_pots!B:B))'
remaining_sheet['P' + str(i)] = '=IF(ISBLANK(O' + str(i) + '), "", LOOKUP(O' + str(i) + \
r', lookup_budget_pots!A:A, lookup_budget_pots!B:B))'
remaining_sheet['P' + str(i)].alignment = unwrap_text
remaining_sheet['U' + str(i)] = '=IF(ISBLANK(T' + str(i) + '), "", LOOKUP(T' + str(i) + \
r', lookup_event_groups!A:A, lookup_event_groups!B:B))'
for column_cells in remaining_sheet.columns :
if column_cells[0].column in ['B', 'C', 'D', 'E', 'J', 'L', 'N', 'O', 'Q', 'R', 'S', 'T'] :
remaining_sheet.column_dimensions[column_cells[0].column].width = 4
elif column_cells[0].column in ['K', 'M', 'P', 'U'] :
remaining_sheet.column_dimensions[column_cells[0].column].width = 20
else :
length = max(len(str(cell.value)) for cell in column_cells)
remaining_sheet.column_dimensions[column_cells[0].column].width = length
for row in ['V', 'W', 'X', 'Y'] :
remaining_sheet.column_dimensions[row].hidden = True
mark_remaining.save(mark_remaining_filename)
In [ ]:
try :
checked_data = use_excel_for_data_entry(mark_remaining_filename, copy_mode=False, open_before_read=True)
except RuntimeError :
pass
#checked_transfer_data = checked_data[checked_data['ignore'] != 'X']
In [ ]:
remaining = all_info[all_info['treated'] == 'No']
display(remaining)
print("number remaining: " + str(len(remaining)))
In [ ]:
#for table in ["money_events", "payments", "budget_events"] :
# print('The table {}: '.format(table))
# display(pd.read_sql_query('SELECT * FROM {};'.format(table), db))
with pd.option_context('display.max_rows', None):
display(pd.read_sql_query(
'SELECT * FROM payments LEFT JOIN money_events where money_events.id = payments.id', db))
display(pd.read_sql_query(
'SELECT * FROM transfers LEFT JOIN money_events where money_events.id = transfers.id', db))
In [ ]:
wb.save('GemBil.xlsx')
In [ ]:
db.close()
In [ ]:
all_info.columns
In [ ]: