In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import openpyxl as pyxl
import pandas as pd
import numpy as np

from excel_helpers import *
from db_entries import *

import re
from itertools import chain
# we only need the function datetime.datetime.now; we can now reference it as dt.now
from datetime import datetime as dt

import sqlite3 as sql

import logging
logger = logging.getLogger()

In [3]:
%%capture

db_filename = 'my-budget-dev-v1.sqlite'

%run nuclear_option.py $db_filename
%run database_setup.py $db_filename

db = sql.connect(db_filename)

In [4]:
excel_filename = 'GemeinsameBilanzierung_16_17__dev.xlsx'

current_sheet = 'August'
comment_sheet = 'august_'

# We take here the option data_only since for now we are not interested in the expression
wb = pyxl.load_workbook(excel_filename, data_only=True)


august = wb.get_sheet_by_name(current_sheet)
august_ = wb.copy_worksheet(august)
august_.title = comment_sheet
august_.sheet_state = 'hidden'

In [5]:
def autogenerate_database_comment_multiple_excel_ranges(filename, ranges,
                                                        comment_sheet) :
    try :
        ranges_string = ', '.join(ranges[:-1]) + \
                        (' and ' if len(ranges) > 1 else '') + ranges[-1]
    except IndexError :
        raise ValueError('You should provide at least one Excel range')
    return 'This entry was automatically generated from the excel file ' + \
           '{}. It is based on the cells {}. A note has been '.format(
               filename, ranges_string) + \
           'added to the respective cells in the sheet {}.'.format(comment_sheet.title)

def autogenerate_database_comment(filename, range_, comment_sheet) :
    return autogenerate_database_comment_multiple_excel_ranges(
                filename, [range_], comment_sheet)

def autogenerate_excel_comment(date, db_filename, the_id) :
    return 'On {} this cell was automatically read and '.format(date) + \
           'inserted into the database {}. The id ofthe entry is {}.'.format(
               date, db_filename, the_id)

In [6]:
def automatically_enter_payment_row(date, payment_type, description, amount, money_pot, budget_type,
                                    ranges, excel_filename, current_sheet, comment_sheet, db_filename) :

    list_of_ranges = [current_sheet.title + '!' + a_range for a_range in ranges]
    
    database_comment = autogenerate_database_comment_multiple_excel_ranges(
        excel_filename, list_of_ranges, comment_sheet.title)
    
    the_id = put_payment_into_database_autogenerated(db, date, payment_type, description, amount, 
                                                     money_pot, budget_type, database_comment)
   
    excel_comment = autogenerate_excel_comment(
        dt.now().strftime('%Y-%m-%d'), db_filename, the_id)
    
    #Get a list of all referenced cells in the ranges
    cells = list(chain.from_iterable([list_from_range_string(a_range) for a_range in ranges]))
        
    put_comment_into_excel(comment_sheet, cells, excel_comment)
    
    return the_id

def automatically_enter_transfer_row(date, transfer_type, description, amount, money_pot_source, 
                                     money_pot_sink, ranges, excel_filename, current_sheet, 
                                     comment_sheet, db_filename, effect_date = None) :

    list_of_ranges = [current_sheet.title + '!' + a_range for a_range in ranges]
    
    database_comment = autogenerate_database_comment_multiple_excel_ranges(
        excel_filename, list_of_ranges, comment_sheet)
    
    the_id = put_transfer_into_database_autogenerated(db, date, transfer_type, description, amount, 
                                                      money_pot_source, money_pot_sink, comment=database_comment)
   
    excel_comment = autogenerate_excel_comment(
        dt.now().strftime('%Y-%m-%d'), db_filename, the_id)
    
    #Get a list of all referenced cells in the ranges
    cells = list(chain.from_iterable([list_from_range_string(a_range) for a_range in ranges]))
        
    put_comment_into_excel(comment_sheet, cells, excel_comment)
    
    return the_id

In [7]:
def subset_data_frame(data_frame, rules) :
    """Subset a pandas.DataFrame by a dictionary of rules. The rules should have the following form:
    {<column name> : <restriction>} where <restriction> is either a number, a string starting with 
    '<' or '>' and followed by number or a general string. If it is a simple string or number, the 
    subsetting expression
        data_frame[dataframe[<column name>] == <restriction>]
    will be used. The other case will be interpreted as
        data_frame[dataframe[<column name>] <|> <number>].
    """
    
    result = data_frame
    for rule in rules.items() :
        column = rule[0]
        restriction = rule[1]
        
        if type(restriction) is str and restriction[0] in ['<', '>'] :
                try :
                    number = float(restriction[1:])
                except :
                    raise ValueError('After a rule starting with < or > you need to specify a number')
                result = result.loc[result[column] < number] if restriction[0] == '<' \
                            else result.loc[result[column] > number]
                continue

        result = result.loc[result[column] == restriction]
        if len(result) == 0 :
            logger.info('After the rule ' + str(rule) + ' no item was left')
            return result
    
    logger.info(str(len(result)) + ' items were selected.')
    return result

def multirule_subset(data_frame, rules) :
    """Collect multiple subsets (union of multiple subsets) of a pandas.DataFrame into a single DataFrame
    by using subset_data_frame.
    
    Keyword Arguments:
        data_frame -- the DataFrame to be subsetted
        rules      -- a list of dictionaries, where the dicts fulfill the requirements specified for 
                      subset_data_frame
    """
    new_data = pd.DataFrame(columns=data_frame.columns)
    for rule in rules :
        new_data = new_data.append(subset_data_frame(data_frame, rule))
        
    return new_data

In [8]:
budgeting = get_df_by_range(august_, 'A6', 'D130', 2)
budgeting.columns = ['budget_type', 'description', 'date', 'amount', 'excel_range']

col_titles = ['description', 'date', 'amount', 'excel_range']

max_bargeld = get_df_by_range(august_, 'H7', 'J130', 1)
max_bargeld.columns = col_titles
max_bargeld['money_pot'] = 'BM'

paul_bargeld = get_df_by_range(august_, 'K7', 'M130', 1)
paul_bargeld.columns = col_titles
paul_bargeld['money_pot'] = 'BP'

konto = get_df_by_range(august_, 'N7', 'P130', 1)
konto.columns = col_titles
konto['money_pot'] = 'KG'

conjoined = pd.concat([max_bargeld, paul_bargeld, konto])

In [9]:
all_info = pd.merge(budgeting, conjoined, how='outer', 
                    on=['description', 'date', 'amount'], indicator=True)

all_info['treated'] = 'No'

all_info['date'] = all_info['date'].dt.date
all_info['amount'] = all_info['amount'].round(2)

In [10]:
## Creating Data for Bargeld Payments data

rules = [{'_merge' : 'both', 'money_pot' : B,  'treated' : 'No'} for B in ['BM', 'BP']]
full_match_bargeld = multirule_subset(all_info, rules)
full_match_bargeld['payment_type'] = 'Barzahlung'
full_match_bargeld['use_data'] = 'X'
full_match_bargeld['complex'] = ''

bargeld_wb_filename = 'bargeld_payments.xlsx'

In [11]:
full_match_bargeld.to_excel(bargeld_wb_filename)
bargeld_wb = pyxl.load_workbook(bargeld_wb_filename, data_only=True)

bargeld_sheet = bargeld_wb.active

the_fill = pyxl.styles.PatternFill(fill_type='solid', start_color='FFFF02')
        
the_list_a = ['Barzahlung', 'Kartenzahlung', 'Überweisung', 'Dauerauftrag',
            'SEPA-Mandat', 'Bankeinzug', 'Abheben', 'Kontotransfer', 
            'Bargeldtransfer', 'Einnahme', 'Geldfund']
the_list_b = ['B', 'K', 'U', 'D', 'S', 'BE', 'A', 'KT', 'BT', 'E', 'GF']

the_list_str = '"' + ','.join(the_list_a + the_list_b) + '"'

dv1 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1=the_list_str, allow_blank=False)
dv1.error ='Your entry is not in the list'
dv1.errorTitle = 'Invalid Entry'
dv1.prompt = 'Please select from the list'
dv1.promptTitle = 'List Selection'

dv2 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1='"X"', allow_blank=True)
dv2.error ='Your entry is not in the list'
dv2.errorTitle = 'Invalid Entry'
dv2.prompt = 'Please check with an X or leave blank'
dv2.promptTitle = 'Checkmark'

dv1.ranges.append('K2:K' + str(bargeld_sheet.max_row))
dv2.ranges.append('L2:M' + str(bargeld_sheet.max_row))

bargeld_sheet.add_data_validation(dv1)
bargeld_sheet.add_data_validation(dv2)

for i in range(2, bargeld_sheet.max_row + 1) :
    bargeld_sheet['D' + str(i)].number_format = 'DD/MM/YY'
    bargeld_sheet['K' + str(i)].fill = the_fill
    bargeld_sheet['L' + str(i)].fill = the_fill
    bargeld_sheet['M' + str(i)].fill = the_fill
    
for column_cells in bargeld_sheet.columns:
    length = max(len(str(cell.value)) for cell in column_cells)
    bargeld_sheet.column_dimensions[column_cells[0].column].width = length
    
for row in ['F', 'G', 'H', 'I', 'J'] :
    bargeld_sheet.column_dimensions[row].hidden = True    
    
bargeld_wb.save(bargeld_wb_filename)

In [12]:
checked_full_match_konto = use_excel_for_data_entry(bargeld_wb_filename, copy_mode=False)


The file bargeld_payments.xlsx will be opened in Excel for data entry. Enter Q to abort, enter anything else to continue: 
Make any input to continue. Abort with Q: 
The read data is:
budget_type description date amount excel_range_x excel_range_y money_pot _merge treated payment_type use_data complex
68 A Eisessen 2017-08-13 -4.80 A74:D74 H13:J13 BM both No Barzahlung X NaN
69 L Wasser 2017-08-13 -1.30 A75:D75 H14:J14 BM both No Barzahlung X NaN
79 A Eisessen 2017-08-17 -1.20 A85:D85 H16:J16 BM both No Barzahlung X NaN
84 M Toilette 2017-08-18 -0.25 A90:D90 H17:J17 BM both No Barzahlung X NaN
85 S Spitzer 2017-08-19 -2.00 A91:D91 H18:J18 BM both No Barzahlung X NaN
98 A Wein 2017-08-22 -10.00 A103:D103 H23:J23 BM both No Barzahlung X NaN
101 A Tapas 2017-08-23 -14.00 A106:D106 H24:J24 BM both No Barzahlung X NaN
105 A Eis essen 2017-08-25 -1.20 A110:D110 H26:J26 BM both No Barzahlung X NaN
111 A Eis essen 2017-08-28 -1.20 A116:D116 H27:J27 BM both No Barzahlung X NaN
30 L Brot 2017-08-01 -0.95 A36:D36 K8:M8 BP both No Barzahlung X NaN
31 L Brot 2017-08-01 -0.95 A37:D37 K8:M8 BP both No Barzahlung X NaN
58 L Brot 2017-08-08 -1.40 A64:D64 K9:M9 BP both No Barzahlung X NaN
118 AM PUCK aufladen 2017-08-21 -20.00 A123:D123 K11:M11 BP both No Barzahlung X NaN
If you are not satisfied with the read data, enter any number. If you do this, the workbook will be opened again. Continue with any other input; abort with Q: 
OK.

In [13]:
for index, row in full_match_bargeld.iterrows() :
    the_id = automatically_enter_payment_row(row['date'], 'Barzahlung', 
        row['description'], row['amount'], row['money_pot'], 
        row['budget_type'], [row['excel_range_x'], row['excel_range_y']],
        excel_filename, august, august_, db_filename)
    
    all_info.loc[index, 'treated'] = 'Yes'

In [ ]:
#************************************#
# This code will not be used anymore #
#************************************#

## Putting all the Info which had matching info on the left and the right hand of the budget sheet into
## the database.

## Only doing the "Barzahlung" payments right now

##for BB in ['BM', 'BP'] :
##    subsetting_rules = {'_merge' : 'both', 'money_pot' : BB,  'treated' : 'No'}
##
##    full_match_bargeld = subset_data_frame(all_info, subsetting_rules)
##
##    for index, row in full_match_bargeld.iterrows() :
##        the_id = automatically_enter_payment_row(row['date'], 'Barzahlung', 
##            row['description'], row['amount'], row['money_pot'], 
##            row['budget_type'], [row['excel_range_x'], row['excel_range_y']],
##            excel_filename, august, august_, db_filename)
##        
##        all_info.loc[index, 'treated'] = 'Yes'

In [14]:
## For the "GK" (Gemeinsames Konto) Payments we create an excel sheet, where we can change the payment type
subsetting_rules = {'_merge' : 'both', 'money_pot' : 'KG',  'treated' : 'No'}
full_match_konto = subset_data_frame(all_info, subsetting_rules)
full_match_konto['payment_type'] = 'Kartenzahlung'
full_match_konto['use_data'] = 'X'
full_match_konto['complex'] = ''

In [15]:
gk_wb_filename = 'konto_payments.xlsx'

In [16]:
full_match_konto.to_excel(gk_wb_filename)
gk_wb = pyxl.load_workbook(gk_wb_filename, data_only=True)

gk_sheet = gk_wb.active

the_fill = pyxl.styles.PatternFill(fill_type='solid', start_color='FFFF02')
        
the_list_a = ['Barzahlung', 'Kartenzahlung', 'Überweisung', 'Dauerauftrag',
            'SEPA-Mandat', 'Bankeinzug', 'Abheben', 'Kontotransfer', 
            'Bargeldtransfer', 'Einnahme', 'Geldfund']
the_list_b = ['B', 'K', 'U', 'D', 'S', 'BE', 'A', 'KT', 'BT', 'E', 'GF']

the_list_str = '"' + ','.join(the_list_a + the_list_b) + '"'

dv1 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1=the_list_str, allow_blank=False)
dv1.error ='Your entry is not in the list'
dv1.errorTitle = 'Invalid Entry'
dv1.prompt = 'Please select from the list'
dv1.promptTitle = 'List Selection'

dv2 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1='"X"', allow_blank=True)
dv2.error ='Your entry is not in the list'
dv2.errorTitle = 'Invalid Entry'
dv2.prompt = 'Please check with an X or leave blank'
dv2.promptTitle = 'Checkmark'

dv1.ranges.append('K2:K' + str(gk_sheet.max_row))
dv2.ranges.append('L2:M' + str(gk_sheet.max_row))

gk_sheet.add_data_validation(dv1)
gk_sheet.add_data_validation(dv2)

for i in range(2, gk_sheet.max_row + 1) :
    gk_sheet['D' + str(i)].number_format = 'DD/MM/YY'
    gk_sheet['K' + str(i)].fill = the_fill
    gk_sheet['L' + str(i)].fill = the_fill
    gk_sheet['M' + str(i)].fill = the_fill
    
for column_cells in gk_sheet.columns:
    length = max(len(str(cell.value)) for cell in column_cells)
    gk_sheet.column_dimensions[column_cells[0].column].width = length
    
for row in ['F', 'G', 'H', 'I', 'J'] :
    gk_sheet.column_dimensions[row].hidden = True    
    
gk_wb.save(gk_wb_filename)

In [18]:
checked_full_match_konto = use_excel_for_data_entry(gk_wb_filename, copy_mode=False)


The file konto_payments.xlsx will be opened in Excel for data entry. Enter Q to abort, enter anything else to continue: 
Make any input to continue. Abort with Q: 
The read data is:
budget_type description date amount excel_range_x excel_range_y money_pot _merge treated payment_type use_data complex
0 R Miete 2017-08-01 -568.00 A6:D6 N13:P13 KG both No Dauerauftrag X NaN
1 R Miete FFM 2017-08-02 -450.00 A7:D7 N20:P20 KG both No SEPA-Mandat X NaN
2 R Berufsunfähigkeitsversicherung 2017-08-01 -49.05 A8:D8 N12:P12 KG both No SEPA-Mandat X NaN
3 R Strom EnviaM 2017-08-30 -51.00 A9:D9 N89:P89 KG both No SEPA-Mandat X NaN
7 R Haftpflichtversicherung 2017-08-01 -7.50 A13:D13 N11:P11 KG both No SEPA-Mandat X NaN
12 R Handy Max 2017-08-10 -7.99 A18:D18 N41:P41 KG both No SEPA-Mandat X NaN
14 G Spotify Max 2017-08-15 -4.99 A20:D20 N52:P52 KG both No SEPA-Mandat X NaN
15 G Apple Music Paul 2017-08-25 -4.99 A21:D21 N76:P76 KG both No Kartenzahlung X NaN
19 R Monatskarte FFM 2017-07-31 -87.40 A25:D25 N9:P9 KG both No Kartenzahlung X NaN
21 G Fitnessstudio 2017-08-03 -39.80 A27:D27 N23:P23 KG both No SEPA-Mandat X NaN
22 L Aldi 2017-07-31 -23.97 A28:D28 N8:P8 KG both No Kartenzahlung X NaN
23 L Netto 2017-07-31 -9.44 A29:D29 N10:P10 KG both No Kartenzahlung X NaN
24 L Rewe 2017-08-01 -8.71 A30:D30 N14:P14 KG both No Kartenzahlung X NaN
25 D Rossmann 2017-08-01 -9.17 A31:D31 N15:P15 KG both No Kartenzahlung X NaN
26 D DM 2017-08-01 -9.45 A32:D32 N16:P16 KG both No Kartenzahlung X NaN
27 D DM 2017-08-01 -12.25 A33:D33 N17:P17 KG both No Kartenzahlung X NaN
29 L Türkischer Supermarkt 2017-08-01 -10.37 A35:D35 N18:P18 KG both No Kartenzahlung X NaN
35 L Real 2017-08-02 -6.86 A41:D41 N22:P22 KG both No Kartenzahlung X NaN
42 L Kaufland 2017-08-04 -13.95 A48:D48 N25:P25 KG both No Kartenzahlung X NaN
43 A McDonalds 2017-08-05 -2.59 A49:D49 N26:P26 KG both No Kartenzahlung X NaN
44 S H&M 2017-08-05 -55.08 A50:D50 N27:P27 KG both No Kartenzahlung X NaN
45 L Rewe Wasser 2017-08-05 -1.17 A51:D51 N28:P28 KG both No Kartenzahlung X NaN
46 L Edeka 2017-08-05 -5.43 A52:D52 N31:P31 KG both No Kartenzahlung X NaN
47 T Sitzplatz reservierung 2017-08-06 -4.50 A53:D53 N33:P33 KG both No Kartenzahlung X NaN
48 S Amazon Bestellung 2017-08-06 -39.24 A54:D54 N32:P32 KG both No Kartenzahlung X NaN
49 S Amazon Festplatte Paul 2017-08-06 -52.94 A55:D55 N34:P34 KG both No Kartenzahlung X NaN
50 L Rewe 2017-08-07 -21.00 A56:D56 N36:P36 KG both No Kartenzahlung X NaN
51 S H&M Hose 2017-08-07 -30.14 A57:D57 N37:P37 KG both No Kartenzahlung X NaN
52 M Briefmarken 2017-08-07 -21.50 A58:D58 N38:P38 KG both No Kartenzahlung X NaN
53 L Netto 2017-08-07 -16.60 A59:D59 N39:P39 KG both No Kartenzahlung X NaN
54 L Real 2017-08-07 -3.99 A60:D60 N40:P40 KG both No Kartenzahlung X NaN
62 L Edeka 2017-08-10 -3.17 A68:D68 N44:P44 KG both No Kartenzahlung X NaN
66 L Aldi 2017-08-11 -19.31 A72:D72 N45:P45 KG both No Kartenzahlung X NaN
67 D Schirm Rossmann 2017-08-11 -2.95 A73:D73 N46:P46 KG both No Kartenzahlung X NaN
70 L Rewe 2017-08-13 -34.42 A76:D76 N47:P47 KG both No Kartenzahlung X NaN
72 L Netto 2017-08-14 -13.42 A78:D78 N50:P50 KG both No Kartenzahlung X NaN
73 T Fahrkarte Frankfurt 2017-08-14 -2.90 A79:D79 N51:P51 KG both No Kartenzahlung X NaN
76 L Rewe 2017-08-16 -17.57 A82:D82 N53:P53 KG both No Kartenzahlung X NaN
78 L Rewe 2017-08-16 -3.42 A84:D84 N54:P54 KG both No Kartenzahlung X NaN
80 L Real 2017-08-18 -10.19 A86:D86 N55:P55 KG both No Kartenzahlung X NaN
81 L Rewe 2017-08-18 -2.69 A87:D87 N56:P56 KG both No Kartenzahlung X NaN
83 T Sitzplatz reservierung 2017-08-18 -4.50 A89:D89 N57:P57 KG both No Kartenzahlung X NaN
86 L Netto 2017-08-19 -2.31 A92:D92 N58:P58 KG both No Kartenzahlung X NaN
87 L Netto 2017-08-19 -32.39 A93:D93 N59:P59 KG both No Kartenzahlung X NaN
88 L Asia Markt 2017-08-19 -7.12 A94:D94 N60:P60 KG both No Kartenzahlung X NaN
89 D DM 2017-08-19 -16.70 A95:D95 N61:P61 KG both No Kartenzahlung X NaN
90 D Apotheke 2017-08-19 -27.42 A96:D96 N62:P62 KG both No Kartenzahlung X NaN
91 M Tree-App 2017-08-20 -2.29 A97:D97 N63:P63 KG both No Kartenzahlung X NaN
94 A McDonalds Frühstück Köln 2017-08-22 -2.99 A100:D100 N66:P66 KG both No Kartenzahlung X NaN
95 L Penny 2017-08-22 -2.57 A101:D101 N67:P67 KG both No Kartenzahlung X NaN
96 T Einzelticket Köln 2017-08-22 -2.80 A102:D102 N69:P69 KG both No Kartenzahlung X NaN
97 T Einzelticket Köln 2017-08-22 -2.80 A102:D102 N70:P70 KG both No Kartenzahlung X NaN
99 L Penny Brötchen 2017-08-23 -0.29 A104:D104 N71:P71 KG both No Kartenzahlung X NaN
100 A Rewe 2017-08-23 -2.18 A105:D105 N72:P72 KG both No Kartenzahlung X NaN
102 L Rewe 2017-08-24 -3.91 A107:D107 N75:P75 KG both No Kartenzahlung X NaN
103 L Lidl 2017-08-25 -1.13 A108:D108 N77:P77 KG both No Kartenzahlung X NaN
106 L Lidl 2017-08-27 -1.59 A111:D111 N79:P79 KG both No Kartenzahlung X NaN
107 L Rewe 2017-08-27 -2.74 A112:D112 N80:P80 KG both No Kartenzahlung X NaN
108 L Rewe 2017-08-28 -15.00 A113:D113 N84:P84 KG both No Kartenzahlung X NaN
109 L Rewe 2017-08-28 -7.96 A114:D114 N85:P85 KG both No Kartenzahlung X NaN
112 L Aldi 2017-08-29 -7.90 A117:D117 N87:P87 KG both No Kartenzahlung X NaN
113 D DM 2017-08-29 -10.60 A118:D118 N88:P88 KG both No Kartenzahlung X NaN
114 L Real 2017-08-21 -5.17 A119:D119 N64:P64 KG both No Kartenzahlung X NaN
117 L Netto 2017-08-28 -5.96 A122:D122 N86:P86 KG both No Kartenzahlung X NaN
If you are not satisfied with the read data, enter any number. If you do this, the workbook will be opened again. Continue with any other input; abort with Q: 
OK.

In [19]:
miete_rules              = {'description' : 'Miete', 'amount' : -568,  'treated' : 'No'} 
haftpflicht_rules        = {'description' : 'Haftpflichtversicherung', 'amount' : -7.5,  'treated' : 'No'} 
berufsunfaehigkeit_rules = {'description' : 'Berufsunfähigkeitsversicherung', 'amount' : -49.05,  'treated' : 'No'} 
strom_rules_rules        = {'description' : 'Strom EnviaM', 'amount' : -51,  'treated' : 'No'} 
vodafone_rules           = {'description' : 'Vodafone', 'amount' : -19.99,  'treated' : 'No'} 
handy_paul_rules         = {'description' : 'Handy Paul', 'amount' : -7.99,  'treated' : 'No'} 
handy_max_rules          = {'description' : 'Handy Max', 'amount' : -7.99,  'treated' : 'No'} 
music_paul_rules         = {'description' : 'Apple Music Paul', 'amount' : -4.99,  'treated' : 'No'} 
music_max_rules          = {'description' : 'Spotify Max', 'amount' : -4.99,  'treated' : 'No'} 
backblaze_paul_rules     = {'description' : 'Backblaze Paul', 'amount' : -3.77,  'treated' : 'No'} 
backblaze_max_rules      = {'description' : 'Backblaze Max', 'amount' : -3.77,  'treated' : 'No'} 
semester_paul_rules      = {'description' : 'Semestergebühr Paul', 'amount' : -46.15,  'treated' : 'No'} 
semester_max_rules       = {'description' : 'Semestergebühr Max', 'amount' : -50.57,  'treated' : 'No'} 
sport_jahre_rules        = {'description' : '', 'amount' : -8,  'treated' : 'No'} 
sport_rules              = {'description' : 'Fitnessstudio', 'amount' : -39.8,  'treated' : 'No'} 
gew_rules                = {'description' : 'GEW', 'amount' : -2.5,  'treated' : 'No'} 
gez_rules                = {'description' : 'GEZ', 'amount' : -17.5,  'treated' : 'No'} 
miete_ffm_rules          = {'description' : 'Miete FFM', 'amount' : -450,  'treated' : 'No'} 

all_rules = { 1 : [miete_rules                              ],
              2 : [haftpflicht_rules                        ],
              3 : [berufsunfaehigkeit_rules                 ],
              4 : [strom_rules_rules                        ],
              5 : [vodafone_rules                           ],
              6 : [handy_paul_rules                         ],
              7 : [handy_max_rules                          ],
              8 : [music_paul_rules                         ],
              9 : [music_max_rules                          ],
             10 : [backblaze_paul_rules, backblaze_max_rules],
             11 : [semester_paul_rules                      ],
             12 : [semester_max_rules                       ],
             13 : [sport_jahre_rules                        ],
             14 : [sport_rules                              ],
             15 : [gew_rules                                ],
             16 : [gez_rules                                ],
             17 : [miete_ffm_rules                                ]}

monthly = [1, 2, 3, 4, 8, 9, 14, 17]
bigger_intervalls = [10, 11, 12, 13, 15, 16]
variable_prices = [5, 6, 7]
## ToDo : 2 + 3 sonderfall beachten; insbesondere wird das gemeinsam vom konto abgebucht
## ToDo : 4 is only 51 € since mid 2017

for rule_number in monthly :
    rule = all_rules[rule_number][0]
    
    the_info = subset_data_frame(checked_full_match_konto, rule)

    for index, row in the_info.iterrows() :
        the_id = automatically_enter_payment_row(row['date'], row['payment_type'], 
            row['description'], row['amount'], row['money_pot'], 
            row['budget_type'], [row['excel_range_x'], row['excel_range_y']],
            excel_filename, august, august_, db_filename)

        crsr = db.cursor()
        crsr.execute('''INSERT INTO event_in_group VALUES (1, {});'''.format(the_id))
        db.commit()

        all_info.loc[index, 'treated'] = 'Yes'
        
## ToDo : do the bigger intervalls

In [20]:
for index, row in checked_full_match_konto.iterrows() :
    if checked_full_match_konto.loc[index, 'use_data'] != 'X' :
        continue
    if checked_full_match_konto.loc[index, 'treated'] == 'Yes' :
        continue
        
    the_id = automatically_enter_payment_row(row['date'], row['payment_type'], 
            row['description'], row['amount'], row['money_pot'], 
            row['budget_type'], [row['excel_range_x'], row['excel_range_y']],
            excel_filename, august, august_, db_filename)
    
    all_info.loc[index, 'treated'] = 'Yes'

In [21]:
## List of all fully matched things we didnt treat yet
subsetting_rules = {'_merge' : 'both', 'treated' : 'No'}
remaining_matched = subset_data_frame(all_info, subsetting_rules)

display(remaining_matched)


budget_type description date amount excel_range_x excel_range_y money_pot _merge treated

In [22]:
display(all_info[all_info['description'].str.contains('Schatulle')])

subsetting_rules = {'treated' : 'xxx'}
schatulle_info = subset_data_frame(all_info, subsetting_rules)

for direction in ['an', 'aus'] :
    descriptor = 'Geld ' + direction + ' Schatulle'
    subsetting_rules = {'description' : descriptor, 'treated' : 'No'}
    schatulle_info = schatulle_info.append(subset_data_frame(all_info, subsetting_rules))
    
display(schatulle_info)


budget_type description date amount excel_range_x excel_range_y money_pot _merge treated
122 NaN Geld aus Schatulle 2017-08-05 1.30 NaN H10:J10 BM right_only No
126 NaN Geld aus Schatulle 2017-08-20 0.50 NaN H19:J19 BM right_only No
127 NaN Schatullenkorrektur 2017-08-20 1.05 NaN H20:J20 BM right_only No
128 NaN Schatullenkorrektur 2017-08-20 -1.05 NaN H21:J21 BM right_only No
131 NaN Geld aus Schatulle NaN 1.00 NaN K7:M7 BP right_only No
132 NaN Geld aus Schatulle 2017-08-08 1.35 NaN K10:M10 BP right_only No
budget_type description date amount excel_range_x excel_range_y money_pot _merge treated
122 NaN Geld aus Schatulle 2017-08-05 1.30 NaN H10:J10 BM right_only No
126 NaN Geld aus Schatulle 2017-08-20 0.50 NaN H19:J19 BM right_only No
131 NaN Geld aus Schatulle NaN 1.00 NaN K7:M7 BP right_only No
132 NaN Geld aus Schatulle 2017-08-08 1.35 NaN K10:M10 BP right_only No

In [23]:
display(schatulle_info)
for index, row in schatulle_info.iterrows() :
    i = 0
    while True :
        the_date = all_info.loc[index+i, 'date']
        if str(the_date) == 'NaT' or the_date is np.nan :
            the_date = all_info.loc[index-i, 'date']
            if str(the_date) == 'NaT' or the_date is np.nan :
                i = i + 1
                continue
        
        break
        
    automatically_enter_transfer_row(
        the_date, 'Bargeldtransfer', row['description'], row['amount'], row['money_pot'], 'SB',
        [row['excel_range_y']], excel_filename, august, august_, db_filename)
    
    all_info.loc[index, 'treated'] = 'Yes'


budget_type description date amount excel_range_x excel_range_y money_pot _merge treated
122 NaN Geld aus Schatulle 2017-08-05 1.30 NaN H10:J10 BM right_only No
126 NaN Geld aus Schatulle 2017-08-20 0.50 NaN H19:J19 BM right_only No
131 NaN Geld aus Schatulle NaN 1.00 NaN K7:M7 BP right_only No
132 NaN Geld aus Schatulle 2017-08-08 1.35 NaN K10:M10 BP right_only No

In [24]:
## The Three individual usages of the White Trianon Card : Charging
subsetting_rules = {'description' : 'Geld aufladen Trianon', 'amount' : -20, 'treated' : 'No'}
trianon_aufladen = subset_data_frame(all_info, subsetting_rules)

for index, row in trianon_aufladen.iterrows() :
    automatically_enter_transfer_row(
        row['date'], 'Aufladen', row['description'], row['amount'], row['money_pot'], 'CT',
        [row['excel_range_y']], excel_filename, august, august_, db_filename)
    
    all_info.loc[index, 'treated'] = 'Yes'

In [25]:
doppelter_kaffee = {'budget_type' : 'AM', 'description' : 'Doppelter Kaffee Trianon', 
                                     'amount' : -1.5, 'treated' : 'No'}
einfacher_kaffee = {'budget_type' : 'AM', 'description' : 'Kaffee', 
                                     'amount' : -0.75, 'treated' : 'No'}
rules = [doppelter_kaffee, einfacher_kaffee]
trianon_payments = multirule_subset(all_info, rules)

for index, row in trianon_payments.iterrows() :
    automatically_enter_payment_row(
        row['date'], 'Kartenzahlung', row['description'], row['amount'], 'CT', 
        row['budget_type'], [row['excel_range_x']], excel_filename, august, august_, db_filename)
    
    all_info.loc[index, 'treated'] = 'Yes'

In [26]:
subsetting_rules = {'budget_type' : 'AM', 'treated' : 'No'}
bundesbank_karte = subset_data_frame(all_info, subsetting_rules)

for index, row in bundesbank_karte.iterrows() :
    automatically_enter_payment_row(
        row['date'], 'Kartenzahlung', row['description'], row['amount'], 'CB', 
        row['budget_type'], [row['excel_range_x']], excel_filename, august, august_, db_filename)
    
    all_info.loc[index, 'treated'] = 'Yes'

In [27]:
subsetting_rules = {'description' : 'Geld abheben', '_merge' : 'right_only', 'treated' : 'No'}
geld_abheben = subset_data_frame(all_info, subsetting_rules)

subsetting_rules = {'description' : 'Geld abheben', 'amount' : '>0', '_merge' : 'right_only', 'treated' : 'No'}
geld_abheben_gotten = subset_data_frame(all_info, subsetting_rules)

subsetting_rules = {'description' : 'Geld abheben', 'amount' : '<0', '_merge' : 'right_only', 'treated' : 'No'}
geld_abheben_drawn = subset_data_frame(all_info, subsetting_rules)
geld_abheben_drawn['amount_abs'] = - geld_abheben_drawn['amount']

## Subselect relevant columns
abheben_gotten = geld_abheben_gotten[['date', 'amount', 'money_pot', 'description', 
                                      'excel_range_y']].reset_index(level=0)
abheben_drawn = geld_abheben_drawn[['date', 'amount_abs', 'money_pot', 
                                    'excel_range_y']].reset_index(level=0)


abheben_data = pd.merge(abheben_gotten, abheben_drawn, how='outer', left_on = ['date', 'amount'], 
                        right_on=['date', 'amount_abs'], left_index=True, indicator=True)

display(abheben_data)


for index, row in abheben_data[abheben_data['_merge'] == 'both'].iterrows() :
    automatically_enter_transfer_row(
        row['date'], 'Abheben', row['description'], row['amount'], row['money_pot_y'], row['money_pot_x'],
        [row['excel_range_y_x'], row['excel_range_y_y']], excel_filename, august, august_, db_filename)
    
    all_info.loc[row['index_x'], 'treated'] = 'Yes'
    all_info.loc[row['index_y'], 'treated'] = 'Yes'


index_x date amount money_pot_x description excel_range_y_x index_y amount_abs money_pot_y excel_range_y_y _merge
1 120 2017-08-03 30.0 BM Geld abheben H8:J8 NaN NaN NaN NaN left_only
1 129 2017-08-22 40.0 BM Geld abheben H22:J22 146.0 40.0 KG N68:P68 both
0 133 2017-08-21 20.0 BP Geld abheben K12:M12 145.0 20.0 KG N65:P65 both

In [28]:
## For the "GK" (Gemeinsames Konto) Payments we create an excel sheet, where we can change the payment type
subsetting_rules = {'_merge' : 'right_only', 'treated' : 'No'}
remaining = subset_data_frame(all_info, subsetting_rules)
remaining['is_transfer'] = ''
remaining['transfer_type'] = ''
remaining['money_pot_source'] = remaining['amount'].apply(lambda x : 1 if x < 0 else 0) * \
                                remaining['money_pot'] 
remaining['money_pot_sink'] = remaining['amount'].apply(lambda x : 1 if x > 0 else 0) * \
                              remaining['money_pot']

In [29]:
mark_transfers_filename = 'testtest2.xlsx'
remaining.to_excel(mark_transfers_filename)
mark_transfers = pyxl.load_workbook(mark_transfers_filename, data_only=True)

remaining_sheet = mark_transfers.active

the_fill = pyxl.styles.PatternFill(fill_type='solid', start_color='FFFF02')

dv1 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1='"X"', allow_blank=True)
dv1.error ='Your entry is not in the list'
dv1.errorTitle = 'Invalid Entry'
dv1.prompt = 'Please check with an X or leave blank'
dv1.promptTitle = 'Checkmark'
 
list_of_transfer_types = ['Abheben', 'Aufladen', 'Kontotransfer', 'Bargeldtransfer']
rule = '"' + ','.join(list_of_transfer_types) + '"'
dv2 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1=rule, allow_blank=True)
dv2.error ='Your entry is not in the list'
dv2.errorTitle = 'Invalid Entry'
dv2.prompt = 'Please select from the list'
dv2.promptTitle = 'List Selection'

list_of_accounts = ['KG', 'KE', 'KM', 'KP', 'KB', 'KC', 'BM', 'BP', 'CB', 'CT', 'CM', 'GM', 'SB', 'SF']
rule = '"' + ','.join(list_of_accounts) + '"'
dv3 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1=rule, allow_blank=True)
dv3.error ='Your entry is not in the list'
dv3.errorTitle = 'Invalid Entry'
dv3.prompt = 'Please select from the list'
dv3.promptTitle = 'List Selection'

dv1.ranges.append('K2:K' + str(remaining_sheet.max_row))
dv2.ranges.append('L2:L' + str(remaining_sheet.max_row))
dv3.ranges.append('M2:N' + str(remaining_sheet.max_row))

remaining_sheet.add_data_validation(dv1)
remaining_sheet.add_data_validation(dv2)
remaining_sheet.add_data_validation(dv3)

for i in range(2, remaining_sheet.max_row + 1) :
    remaining_sheet['D' + str(i)].number_format = 'DD/MM/YY'
    remaining_sheet['K' + str(i)].fill = the_fill
    remaining_sheet['L' + str(i)].fill = the_fill
    remaining_sheet['M' + str(i)].fill = the_fill
    remaining_sheet['N' + str(i)].fill = the_fill
    
for column_cells in remaining_sheet.columns:
    length = max(len(str(cell.value)) for cell in column_cells)
    remaining_sheet.column_dimensions[column_cells[0].column].width = length
    
for row in ['B', 'F', 'G', 'H', 'I', 'J'] :
    remaining_sheet.column_dimensions[row].hidden = True    
    
mark_transfers.save(mark_transfers_filename)

In [30]:
checked_data = use_excel_for_data_entry(mark_transfers_filename, copy_mode=False, open_before_read=True)
checked_transfer_data = checked_data[checked_data['is_transfer'] == 'X']


The file testtest2.xlsx will be opened in Excel for data entry. Enter Q to abort, enter anything else to continue: 
Make any input to continue. Abort with Q: 
The read data is:
budget_type description date amount excel_range_x excel_range_y money_pot _merge treated is_transfer transfer_type money_pot_source money_pot_sink
119 NaN Geld aufladen Mitarbeiter ausweis 2017-08-01 -20.00 NaN H7:J7 BM right_only No X Aufladen BM CB
120 NaN Geld abheben 2017-08-03 30.00 NaN H8:J8 BM right_only No NaN NaN NaN BM
123 NaN Betriebsausflug 2017-08-07 -18.50 NaN H11:J11 BM right_only No NaN NaN BM NaN
124 NaN Geldfund 2017-08-12 0.05 NaN H12:J12 BM right_only No NaN NaN NaN BM
125 NaN BBk Karte Aufladen 2017-08-14 -10.00 NaN H15:J15 BM right_only No NaN NaN BM NaN
127 NaN Schatullenkorrektur 2017-08-20 1.05 NaN H20:J20 BM right_only No NaN NaN NaN BM
128 NaN Schatullenkorrektur 2017-08-20 -1.05 NaN H21:J21 BM right_only No NaN NaN BM NaN
130 NaN Karte aufladen 2017-08-28 -10.00 NaN H25:J25 BM right_only No NaN NaN BM NaN
134 NaN Budgetbeitrag von BMS 2017-07-28 1468.00 NaN N7:P7 KG right_only No NaN NaN NaN KG
135 NaN Miete FFM auf Max Konto 2017-08-02 450.00 NaN N19:P19 KG right_only No NaN NaN NaN KG
136 NaN Ausgehen mit Kollegen 2017-08-02 -20.60 NaN N21:P21 KG right_only No NaN NaN KG NaN
137 NaN Abheben 2017-08-03 -30.00 NaN N24:P24 KG right_only No NaN NaN KG NaN
138 NaN Sommerticket DB 2017-08-05 -96.00 NaN N29:P29 KG right_only No NaN NaN KG NaN
139 NaN Sommerticket DB 2017-08-05 -96.00 NaN N30:P30 KG right_only No NaN NaN KG NaN
140 NaN Amazon Fail Topf 2017-08-06 -4.61 NaN N35:P35 KG right_only No NaN NaN KG NaN
141 NaN Handy Paul 2017-08-10 -13.99 NaN N42:P42 KG right_only No NaN NaN KG NaN
142 NaN Rossmann 2017-08-10 -20.53 NaN N43:P43 KG right_only No NaN NaN KG NaN
143 NaN Sommerticket DB 2017-08-14 -192.00 NaN N48:P48 KG right_only No NaN NaN KG NaN
144 NaN Vorschuss für Sparticket 2017-08-14 192.00 NaN N49:P49 KG right_only No NaN NaN NaN KG
147 NaN Umbuchung 2017-08-23 302.79 NaN N73:P73 KG right_only No NaN NaN NaN KG
148 NaN Hotelkosten 2017-08-24 -188.00 NaN N74:P74 KG right_only No NaN NaN KG NaN
149 NaN Vorschuss Reisekosten Köln 2017-08-26 193.60 NaN N78:P78 KG right_only No NaN NaN NaN KG
150 NaN Prüfungsgebühr auf Max' Konto 2017-08-28 15.00 NaN N81:P81 KG right_only No NaN NaN NaN KG
151 NaN Prüfungsgebühr Dr. Hochs Konservatorium 2017-08-28 -15.00 NaN N82:P82 KG right_only No NaN NaN KG NaN
152 NaN Vodafone 2017-08-28 -20.19 NaN N83:P83 KG right_only No NaN NaN KG NaN
If you are not satisfied with the read data, enter any number. If you do this, the workbook will be opened again. Continue with any other input; abort with Q: 
OK.

In [31]:
display(checked_transfer_data)

for index, row in checked_transfer_data.iterrows() :
    automatically_enter_transfer_row(
        row['date'], row['transfer_type'], row['description'], row['amount'], 
        row['money_pot_source'], row['money_pot_sink'],
        [row['excel_range_y']], excel_filename, august, august_, db_filename)
    
    all_info.loc[index, 'treated'] = 'Yes'


budget_type description date amount excel_range_x excel_range_y money_pot _merge treated is_transfer transfer_type money_pot_source money_pot_sink
119 NaN Geld aufladen Mitarbeiter ausweis 2017-08-01 -20.0 NaN H7:J7 BM right_only No X Aufladen BM CB

In [76]:
## Mark remaining as : Budget_Event, Money_Transfer, Payment, Complex Process
## Give the Opportunity to give provisorial ids

subsetting_rules = {'treated' : 'No'}
## If I do not use copy(), it will throw warnings when only 
## subsetting with one rule. I suppose it does copy automatically 
## after from the second subsetting
remaining = subset_data_frame(all_info, subsetting_rules).copy()
remaining['ignore'] = 'X'
remaining['is_complex'] = ''
remaining['temporary_id'] = ''
remaining['event_abbr'] = ''
remaining['event_type'] = ''
remaining['is_budget_event'] = ''
remaining['is_recieving'] = ''
remaining['is_payment'] = ''
remaining['is_transfer'] = ''
remaining['additional_description'] = ''
remaining['effect_date'] = ''
remaining['money_pot_name'] = ''
remaining['money_pot_sink'] = ''
remaining['money_pot_sink_name'] = ''
remaining['budget_type_name'] = ''
remaining['in_group'] = ''
remaining['group_name'] = ''

## ToDo : Add additional_descriptions and effect_date

remaining = remaining[[
    'ignore', 'is_complex', 'temporary_id', 'event_abbr',
    'event_type', 'description', 'date', 
    'amount', 'money_pot', 'money_pot_name', 
    'money_pot_sink','money_pot_sink_name',
    'is_budget_event', 'budget_type', 'budget_type_name',  
    'is_recieving', 'is_payment', 'is_transfer', 'in_group', 'group_name', 'excel_range_x',
    'excel_range_y',  '_merge', 'treated'
]]

display(remaining)


ignore is_complex temporary_id event_abbr event_type description date amount money_pot money_pot_name ... budget_type_name is_recieving is_payment is_transfer in_group group_name excel_range_x excel_range_y _merge treated
4 X Vodafone 2017-08-28 -19.99 NaN ... A10:D10 NaN left_only No
5 X Telefonie 2017-08-28 -6.20 NaN ... A11:D11 NaN left_only No
6 X Rechtsschutzversicherung 2017-01-30 -13.90 NaN ... A12:D12 NaN left_only No
8 X GEW 2017-07-05 -2.50 NaN ... A14:D14 NaN left_only No
9 X Semestergebühr Paul 2016-06-21 -46.15 NaN ... A15:D15 NaN left_only No
10 X Semestergebühr Max 2016-06-21 -50.56 NaN ... A16:D16 NaN left_only No
11 X GEZ 2017-06-29 -17.50 NaN ... A17:D17 NaN left_only No
13 X Handy Paul 2017-08-10 -7.99 NaN ... A19:D19 NaN left_only No
16 X Backblaze Max 2017-04-18 -3.77 NaN ... A22:D22 NaN left_only No
17 X Sommertickets 2017-07-20 -48.00 NaN ... A23:D23 NaN left_only No
18 X Sommertickets 2017-08-05 -144.00 NaN ... A24:D24 NaN left_only No
20 X Backblaze Paul 2017-04-18 -3.77 NaN ... A26:D26 NaN left_only No
60 X Rossmann Zahnbürste 2017-08-10 -15.99 NaN ... A66:D66 NaN left_only No
61 X Rossmann 2017-08-10 -4.54 NaN ... A67:D67 NaN left_only No
115 X Fahrt BLN -> KM 2017-08-25 -24.00 NaN ... A120:D120 NaN left_only No
116 X Fahrt KM -> BLN 2017-08-27 -24.00 NaN ... A121:D121 NaN left_only No
120 X Geld abheben 2017-08-03 30.00 BM ... NaN H8:J8 right_only No
123 X Betriebsausflug 2017-08-07 -18.50 BM ... NaN H11:J11 right_only No
124 X Geldfund 2017-08-12 0.05 BM ... NaN H12:J12 right_only No
125 X BBk Karte Aufladen 2017-08-14 -10.00 BM ... NaN H15:J15 right_only No
127 X Schatullenkorrektur 2017-08-20 1.05 BM ... NaN H20:J20 right_only No
128 X Schatullenkorrektur 2017-08-20 -1.05 BM ... NaN H21:J21 right_only No
130 X Karte aufladen 2017-08-28 -10.00 BM ... NaN H25:J25 right_only No
134 X Budgetbeitrag von BMS 2017-07-28 1468.00 KG ... NaN N7:P7 right_only No
135 X Miete FFM auf Max Konto 2017-08-02 450.00 KG ... NaN N19:P19 right_only No
136 X Ausgehen mit Kollegen 2017-08-02 -20.60 KG ... NaN N21:P21 right_only No
137 X Abheben 2017-08-03 -30.00 KG ... NaN N24:P24 right_only No
138 X Sommerticket DB 2017-08-05 -96.00 KG ... NaN N29:P29 right_only No
139 X Sommerticket DB 2017-08-05 -96.00 KG ... NaN N30:P30 right_only No
140 X Amazon Fail Topf 2017-08-06 -4.61 KG ... NaN N35:P35 right_only No
141 X Handy Paul 2017-08-10 -13.99 KG ... NaN N42:P42 right_only No
142 X Rossmann 2017-08-10 -20.53 KG ... NaN N43:P43 right_only No
143 X Sommerticket DB 2017-08-14 -192.00 KG ... NaN N48:P48 right_only No
144 X Vorschuss für Sparticket 2017-08-14 192.00 KG ... NaN N49:P49 right_only No
147 X Umbuchung 2017-08-23 302.79 KG ... NaN N73:P73 right_only No
148 X Hotelkosten 2017-08-24 -188.00 KG ... NaN N74:P74 right_only No
149 X Vorschuss Reisekosten Köln 2017-08-26 193.60 KG ... NaN N78:P78 right_only No
150 X Prüfungsgebühr auf Max' Konto 2017-08-28 15.00 KG ... NaN N81:P81 right_only No
151 X Prüfungsgebühr Dr. Hochs Konservatorium 2017-08-28 -15.00 KG ... NaN N82:P82 right_only No
152 X Vodafone 2017-08-28 -20.19 KG ... NaN N83:P83 right_only No

40 rows × 24 columns


In [ ]:
mark_remaining_filename = 'remaining.xlsx'
remaining.to_excel(mark_remaining_filename)
mark_remaining = pyxl.load_workbook(mark_remaining_filename, data_only=False)

remaining_sheet = mark_remaining.active

def add_lookup_sheet(wb, db, table, list_of_cols, lookup_sheet_name = 'lookup', sorted_by = None) :
    crsr = db.cursor()
    if sorted_by is None : 
        crsr.execute('SELECT {} FROM {}'.format(','.join(list_of_cols), table))
    else :
        crsr.execute('SELECT {} FROM {} ORDER BY {}'.format(','.join(list_of_cols), table, sorted_by))
    results = crsr.fetchall()
    #return [r[0] for r in results]
    
    lookup_sheet = wb.create_sheet(lookup_sheet_name)
    for row in results : 
        lookup_sheet.append(row)
    
add_lookup_sheet(mark_remaining, db, 'money_pots', ['key', 'description'], 'lookup_money_pots', 'key')
add_lookup_sheet(mark_remaining, db, 'event_types', ['abbreviation', 'type'], 'lookup_event_types', 'type')
add_lookup_sheet(mark_remaining, db, 'budget_pots', ['key', 'description'], 'lookup_budget_pots', 'key')
add_lookup_sheet(mark_remaining, db, 'event_groups', ['group_id', 'description'], 'lookup_event_groups', 'group_id')

yellow_fill = pyxl.styles.PatternFill(fill_type='solid', start_color='FFFF02')
grey_fill = pyxl.styles.PatternFill(fill_type='solid', start_color='BFBFBF')
unwrap_text = pyxl.styles.Alignment(horizontal='fill')

validate_checkmark = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1='"X"', allow_blank=True)
validate_checkmark.error ='Your entry is not in the list'
validate_checkmark.errorTitle = 'Invalid Entry'
validate_checkmark.prompt = 'Please check with an X or leave blank'
validate_checkmark.promptTitle = 'Checkmark'

def keylist_from_db(db, table, colname) :
    crsr = db.cursor()
    crsr.execute('SELECT {} FROM {}'.format(colname, table))
    results = crsr.fetchall()
    return [r[0] for r in results]

def add_validation_notes(dv) :
    dv.error ='Your entry is not in the list'
    dv.errorTitle = 'Invalid Entry'
    dv.prompt = 'Please select from the list'
    dv.promptTitle = 'List Selection'
    return dv



the_list = keylist_from_db(db, 'event_types', 'type')
the_list_str = '"' + ','.join(the_list) + '"'
validate_event_type = pyxl.worksheet.datavalidation.DataValidation(
    type="list", formula1=the_list_str, allow_blank=False)
validate_event_type = add_validation_notes(validate_event_type)

the_list = keylist_from_db(db, 'money_pots', 'key')
the_list_str = '"' + ','.join(the_list) + '"'
validate_money_pot = pyxl.worksheet.datavalidation.DataValidation(
    type="list", formula1=the_list_str, allow_blank=False)
validate_money_pot = add_validation_notes(validate_money_pot)

the_list = keylist_from_db(db, 'budget_pots', 'key')
the_list_str = '"' + ','.join(the_list) + '"'
validate_budget_pot = pyxl.worksheet.datavalidation.DataValidation(
    type="list", formula1=the_list_str, allow_blank=False)
validate_budget_pot = add_validation_notes(validate_budget_pot)

validate_checkmark.ranges.append('B2:C' + str(remaining_sheet.max_row))
validate_checkmark.ranges.append('N2:N' + str(remaining_sheet.max_row))
validate_checkmark.ranges.append('Q2:S' + str(remaining_sheet.max_row))

validate_event_type.ranges.append('F2:F' + str(remaining_sheet.max_row))
validate_money_pot.ranges.append('J2:J' + str(remaining_sheet.max_row))
validate_money_pot.ranges.append('L2:L' + str(remaining_sheet.max_row))
validate_budget_pot.ranges.append('O2:O' + str(remaining_sheet.max_row))

remaining_sheet.add_data_validation(validate_checkmark)
remaining_sheet.add_data_validation(validate_event_type)
remaining_sheet.add_data_validation(validate_money_pot)
remaining_sheet.add_data_validation(validate_budget_pot)

for i in range(2, remaining_sheet.max_row + 1) :
    remaining_sheet['H' + str(i)].number_format = 'DD/MM/YY'
    for s in ['D', 'F', 'J', 'L', 'O', 'T'] : 
        remaining_sheet[s + str(i)].fill = yellow_fill
    for s in ['B', 'C', 'N', 'Q', 'R', 'S'] :
        remaining_sheet[s + str(i)].fill = grey_fill
    remaining_sheet['E' + str(i)] = '=IF(ISBLANK(F' + str(i) + '), "", LOOKUP(F' + str(i) + \
                                     r', lookup_event_types!B:B, lookup_event_types!A:A))'
    remaining_sheet['K' + str(i)] = '=IF(ISBLANK(J' + str(i) + '), "", LOOKUP(J' + str(i) + \
                                     r', lookup_money_pots!A:A, lookup_money_pots!B:B))'
    remaining_sheet['M' + str(i)] = '=IF(ISBLANK(L' + str(i) + '), "", LOOKUP(L' + str(i) + \
                                     r', lookup_money_pots!A:A, lookup_money_pots!B:B))'
    remaining_sheet['P' + str(i)] = '=IF(ISBLANK(O' + str(i) + '), "", LOOKUP(O' + str(i) + \
                                     r', lookup_budget_pots!A:A, lookup_budget_pots!B:B))'
    remaining_sheet['P' + str(i)].alignment = unwrap_text
    remaining_sheet['U' + str(i)] = '=IF(ISBLANK(T' + str(i) + '), "", LOOKUP(T' + str(i) + \
                                     r', lookup_event_groups!A:A, lookup_event_groups!B:B))'


for column_cells in remaining_sheet.columns :
    if column_cells[0].column in ['B', 'C', 'D', 'E', 'J', 'L', 'N', 'O', 'Q', 'R', 'S', 'T'] :
        remaining_sheet.column_dimensions[column_cells[0].column].width = 4
    elif column_cells[0].column in ['K', 'M', 'P', 'U'] :
        remaining_sheet.column_dimensions[column_cells[0].column].width = 20
    else :
        length = max(len(str(cell.value)) for cell in column_cells)
        remaining_sheet.column_dimensions[column_cells[0].column].width = length
    
for row in ['V', 'W', 'X', 'Y'] :
    remaining_sheet.column_dimensions[row].hidden = True    

mark_remaining.save(mark_remaining_filename)

In [ ]:
try :
    checked_data = use_excel_for_data_entry(mark_remaining_filename, copy_mode=False, open_before_read=True)
except RuntimeError :
    pass
#checked_transfer_data = checked_data[checked_data['ignore'] != 'X']


The file remaining.xlsx will be opened in Excel for data entry. Enter Q to abort, enter anything else to continue: 

In [ ]:
remaining = all_info[all_info['treated'] == 'No']

display(remaining)

print("number remaining: " + str(len(remaining)))

In [ ]:
#for table in ["money_events", "payments", "budget_events"] :
#    print('The table {}: '.format(table))
#    display(pd.read_sql_query('SELECT * FROM {};'.format(table), db))
with pd.option_context('display.max_rows', None):
    display(pd.read_sql_query(
        'SELECT * FROM payments LEFT JOIN money_events where money_events.id = payments.id', db))
    display(pd.read_sql_query(
        'SELECT * FROM transfers LEFT JOIN money_events where money_events.id = transfers.id', db))

In [ ]:
wb.save('GemBil.xlsx')

In [ ]:
db.close()

In [ ]:
all_info.columns

In [ ]: