In [1]:
%load_ext autoreload
%autoreload 2

In [23]:
import openpyxl as pyxl
import pandas as pd
import numpy as np

from bidict import bidict

from excel_helpers import *
from db_entries import *

import re
from itertools import chain
# we only need the function datetime.datetime.now; we can now reference it as dt.now
from datetime import datetime as dt

import sqlite3 as sql

import logging
logger = logging.getLogger()

In [4]:
%%capture

db_filename = 'my-budget-dev-v2.sqlite'

%run nuclear_option.py $db_filename
%run database_setup.py $db_filename

db = sql.connect(db_filename)

In [5]:
excel_filename = 'GemeinsameBilanzierung_16_17__dev.xlsx'

current_sheet = 'August'
comment_sheet = 'august_'

# We take here the option data_only since for now we are not interested in the expression
wb = pyxl.load_workbook(excel_filename, data_only=True)


august = wb.get_sheet_by_name(current_sheet)
august_ = wb.copy_worksheet(august)
august_.title = comment_sheet
august_.sheet_state = 'hidden'

In [6]:
def autogenerate_database_comment_multiple_excel_ranges(filename, ranges,
                                                        comment_sheet) :
    try :
        ranges_string = ', '.join(ranges[:-1]) + \
                        (' and ' if len(ranges) > 1 else '') + ranges[-1]
    except IndexError :
        raise ValueError('You should provide at least one Excel range')
    return 'This entry was automatically generated from the excel file ' + \
           '{}. It is based on the cells {}. A note has been '.format(
               filename, ranges_string) + \
           'added to the respective cells in the sheet {}.'.format(comment_sheet.title)

def autogenerate_database_comment(filename, range_, comment_sheet) :
    return autogenerate_database_comment_multiple_excel_ranges(
                filename, [range_], comment_sheet)

def autogenerate_excel_comment(date, db_filename, the_id) :
    return 'On {} this cell was automatically read and '.format(date) + \
           'inserted into the database {}. The id ofthe entry is {}.'.format(
               date, db_filename, the_id)

In [7]:
def automatically_enter_payment_row(date, payment_type, description, amount, money_pot, budget_type,
                                    ranges, excel_filename, current_sheet, comment_sheet, db_filename) :

    list_of_ranges = [current_sheet.title + '!' + a_range for a_range in ranges]
    
    database_comment = autogenerate_database_comment_multiple_excel_ranges(
        excel_filename, list_of_ranges, comment_sheet.title)
    
    the_id = put_payment_into_database_autogenerated(db, date, payment_type, description, amount, 
                                                     money_pot, budget_type, database_comment)
   
    excel_comment = autogenerate_excel_comment(
        dt.now().strftime('%Y-%m-%d'), db_filename, the_id)
    
    #Get a list of all referenced cells in the ranges
    cells = list(chain.from_iterable([list_from_range_string(a_range) for a_range in ranges]))
        
    put_comment_into_excel(comment_sheet, cells, excel_comment)
    
    return the_id

def automatically_enter_transfer_row(date, transfer_type, description, amount, money_pot_source, 
                                     money_pot_sink, ranges, excel_filename, current_sheet, 
                                     comment_sheet, db_filename, effect_date = None) :

    list_of_ranges = [current_sheet.title + '!' + a_range for a_range in ranges]
    
    database_comment = autogenerate_database_comment_multiple_excel_ranges(
        excel_filename, list_of_ranges, comment_sheet)
    
    the_id = put_transfer_into_database_autogenerated(db, date, transfer_type, description, amount, 
                                                      money_pot_source, money_pot_sink, comment=database_comment)
   
    excel_comment = autogenerate_excel_comment(
        dt.now().strftime('%Y-%m-%d'), db_filename, the_id)
    
    #Get a list of all referenced cells in the ranges
    cells = list(chain.from_iterable([list_from_range_string(a_range) for a_range in ranges]))
        
    put_comment_into_excel(comment_sheet, cells, excel_comment)
    
    return the_id

In [8]:
def subset_data_frame(data_frame, rules) :
    """Subset a pandas.DataFrame by a dictionary of rules. The rules should have the following form:
    {<column name> : <restriction>} where <restriction> is either a number, a string starting with 
    '<' or '>' and followed by number or a general string. If it is a simple string or number, the 
    subsetting expression
        data_frame[dataframe[<column name>] == <restriction>]
    will be used. The other case will be interpreted as
        data_frame[dataframe[<column name>] <|> <number>].
    """
    
    result = data_frame
    for rule in rules.items() :
        column = rule[0]
        restriction = rule[1]
        
        if type(restriction) is str and restriction[0] in ['<', '>'] :
                try :
                    number = float(restriction[1:])
                except :
                    raise ValueError('After a rule starting with < or > you need to specify a number')
                result = result.loc[result[column] < number] if restriction[0] == '<' \
                            else result.loc[result[column] > number]
                continue

        result = result.loc[result[column] == restriction]
        if len(result) == 0 :
            logger.info('After the rule ' + str(rule) + ' no item was left')
            return result
    
    logger.info(str(len(result)) + ' items were selected.')
    return result

def multirule_subset(data_frame, rules) :
    """Collect multiple subsets (union of multiple subsets) of a pandas.DataFrame into a single DataFrame
    by using subset_data_frame.
    
    Keyword Arguments:
        data_frame -- the DataFrame to be subsetted
        rules      -- a list of dictionaries, where the dicts fulfill the requirements specified for 
                      subset_data_frame
    """
    new_data = pd.DataFrame(columns=data_frame.columns)
    for rule in rules :
        new_data = new_data.append(subset_data_frame(data_frame, rule))
        
    return new_data

In [9]:
col_titles_budget = ['budget_type', 'description', 'date', 'amount', 'excel_range']
col_titles_payments = ['description', 'date', 'amount', 'excel_range']

budgeting = get_df_by_range(august_, 'A6', 'D130', 2)
budgeting.columns = col_titles_budget

max_bargeld = get_df_by_range(august_, 'H7', 'J130', 1)
max_bargeld.columns = col_titles_payments
max_bargeld['money_pot'] = 'BM'

paul_bargeld = get_df_by_range(august_, 'K7', 'M130', 1)
paul_bargeld.columns = col_titles_payments
paul_bargeld['money_pot'] = 'BP'

konto = get_df_by_range(august_, 'N7', 'P130', 1)
konto.columns = col_titles_payments
konto['money_pot'] = 'KG'

all_payments = pd.concat([max_bargeld, paul_bargeld, konto])

all_info = pd.merge(budgeting, all_payments, how='outer', 
                    on=['description', 'date', 'amount'], indicator=True)

all_info['treated'] = 'No'

all_info['date'] = all_info['date'].dt.date
all_info['amount'] = all_info['amount'].round(2)

In [10]:
## Creating Data for Bargeld Payments data

rules = [{'_merge' : 'both', 'money_pot' : B,  'treated' : 'No'} for B in ['BM', 'BP']]
full_match_bargeld = multirule_subset(all_info, rules)
full_match_bargeld['payment_type'] = 'Barzahlung'
full_match_bargeld['use_data'] = 'X'
full_match_bargeld['complex'] = ''

bargeld_wb_filename = 'bargeld_payments.xlsx'

In [30]:
cols = bidict({'ignore' : 'A',
                   'is_complex' : 'B',
                   'temporary_id' : 'C',
                   'event_type' : 'D',
                   'description' : 'E',
                   'date' : 'F',
                   'amount' : 'G',
                   'money_pot' : 'H',
                   'money_pot_sink' : 'I',
                   'is_budget_event' : 'J',
                   'budget_type' : 'K',
                   'is_recieving' : 'L',
                   'is_payment' : 'M',
                   'is_transfer' : 'O',
                   'in_group' : 'P',
                   'group_name' : 'Q',
                   'excel_range_x' : 'R',
                   'excel_range_y' : 'S',
                   '_merge' : 'T',
                   'treated' : 'U'})
cols.
#print(cols.keys())


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-30-0050431c6aba> in <module>()
     20                    'treated' : 'U'})
     21 
---> 22 print(cols.keys()[4])

TypeError: 'KeysView' object does not support indexing

In [53]:
def create_excel_table_from_data(dataframe, filename, hidden_cols = None) :
    
    ## Predefine the cols and remeber their col_names in Excel
    cols = bidict({'ignore'           : 'B',
                   'is_complex'       : 'C',
                   'temporary_id'     : 'D',
                   'event_type'       : 'E',
                   'type_description' : 'F',
                   'description'      : 'G',
                   'date'             : 'H',
                   'amount'           : 'I',
                   'money_pot'        : 'J',
                   'money_pot_name'   : 'K',
                   'money_pot_sink'   : 'L',
                   'money_sink_name'  : 'M',
                   'is_budget_event'  : 'N',
                   'budget_pot'       : 'O',
                   'budget_type'      : 'P',
                   'is_recieving'     : 'Q',
                   'is_payment'       : 'R',
                   'is_transfer'      : 'S',
                   'in_group'         : 'T',
                   'group_name'       : 'U',
                   'excel_range_x'    : 'V',
                   'excel_range_y'    : 'W',
                   '_merge'           : 'X', 
                   'treated'          : 'Y' })
    
    ## Some categories of the data in the different cols
    prefilled_cols = ['description',
                      'date'       ,
                      'amount'     ]
    checkmark_cols = ['ignore'         , 
                      'is_complex'     , 
                      'is_budget_event', 
                      'is_recieving'   , 
                      'is_payment'     , 
                      'is_transfer'    ]
    entry_cols =     ['temporary_id'    , 
                      'type_description',    
                      'money_pot'       , 
                      'money_pot_sink'  , 
                      'budget_pot'      , 
                      'in_group'        ]
    if hidden_cols is None : 
        hidden_cols = ['excel_range_x', 'excel_range_y', '_merge', 'treated']
      
    ## Define default values to fill unknown columns
    default_values = ['X'] + [''] * (len(cols) - 2) + ['No']
    col_defaults = dict(zip(cols.keys(), default_values))
    
    ## Insert the missing cols into the dataframe with the defined default value
    for col_title in cols.keys() :
        if col_title not in dataframe.columns :
            dataframe[col_title] = col_defaults[col_title]
            
    dataframe = dataframe[list(cols.keys())]

    ## Save the dataframe to excel and open it
    dataframe.to_excel(filename)
    wb = pyxl.load_workbook(filename, data_only=False)
    sheet = wb.active
    
    ## Add data validation to the excel document
    #### First enforce Checkmarks in all cols in the list checkmark_cols
    
    validate_checkmark = pyxl.worksheet.datavalidation.DataValidation(
        type="list", formula1='"X"', allow_blank=True)
    validate_checkmark.error ='Your entry is not in the list'
    validate_checkmark.errorTitle = 'Invalid Entry'
    validate_checkmark.prompt = 'Please check with an X or leave blank'
    validate_checkmark.promptTitle = 'Checkmark'
    
    for col_title in checkmark_cols :
        col_name = cols[col_title]
        validate_checkmark.ranges.append(col_name + '2:' + col_name + str(sheet.max_row))
  
    sheet.add_data_validation(validate_checkmark)
    
    ## Insert some lookup tables for data validation and to make the excel dokument more readable
    #### Some functions to make comfortable data validation
    def add_lookup_sheet(wb, db, table, list_of_cols, lookup_sheet_name = 'lookup', sorted_by = None) :
        crsr = db.cursor()
        if sorted_by is None : 
            crsr.execute('SELECT {} FROM {}'.format(','.join(list_of_cols), table))
        else :
            crsr.execute('SELECT {} FROM {} ORDER BY {}'.format(','.join(list_of_cols), table, sorted_by))
        results = crsr.fetchall()
        
        lookup_sheet = wb.create_sheet(lookup_sheet_name)
        for row in results : 
            lookup_sheet.append(row)
            
    def add_validation_notes(dv) :
        dv.error ='Your entry is not in the list'
        dv.errorTitle = 'Invalid Entry'
        dv.prompt = 'Please select from the list'
        dv.promptTitle = 'List Selection'
        return dv  
    
    ###### Event Types
    lookup_sheet = 'lookup_event_types'
    add_lookup_sheet(wb, db, 'event_types', ['type', 'abbreviation'], lookup_sheet, 'type')
    validate_event_type = pyxl.worksheet.datavalidation.DataValidation(
        type="list", formula1='=' + lookup_sheet + '!A:A', allow_blank=False)
    validate_event_type = add_validation_notes(validate_event_type)
    col_name = cols['type_description']
    validate_event_type.ranges.append(col_name + '2:' + col_name + str(sheet.max_row))
    sheet.add_data_validation(validate_event_type)
    
    ###### Money Pots 
    lookup_sheet = 'lookup_money_pots'
    add_lookup_sheet(wb, db, 'money_pots', ['key', 'description'], lookup_sheet, 'key')
    validate_money_pot = pyxl.worksheet.datavalidation.DataValidation(
        type="list", formula1='=' + lookup_sheet + '!A:A', allow_blank=False)
    validate_money_pot = add_validation_notes(validate_money_pot)
    for col_title in ['money_pot', 'money_pot_sink'] :
        col_name = cols[col_title]
        validate_money_pot.ranges.append(col_name + '2:' + col_name + str(sheet.max_row))
    sheet.add_data_validation(validate_money_pot)
    
    ###### Buddget Pots
    lookup_sheet = 'lookup_budget_pots'
    add_lookup_sheet(wb, db, 'budget_pots', ['key', 'description'], lookup_sheet, 'key')
    validate_budget_pot = pyxl.worksheet.datavalidation.DataValidation(
        type="list", formula1='=' + lookup_sheet + '!A:A', allow_blank=False)
    validate_budget_pot = add_validation_notes(validate_budget_pot)
    col_title = cols['budget_pot']
    validate_budget_pot.ranges.append(col_name + '2:' + col_name + str(sheet.max_row))
    sheet.add_data_validation(validate_budget_pot)

    ###### Event Groups (without validation)
    add_lookup_sheet(wb, db, 'event_groups', ['group_id', 'description'], 'lookup_event_groups', 'group_id')
    
    ##Now some formatting and inserting formulas into individual cells
    yellow_fill = pyxl.styles.PatternFill(fill_type='solid', start_color='FFFF02')
    green_fill = pyxl.styles.PatternFill(fill_type='solid', start_color='92D050')
    grey_fill = pyxl.styles.PatternFill(fill_type='solid', start_color='BFBFBF')
    unwrap_text = pyxl.styles.Alignment(horizontal='fill')

    for i in range(2, sheet.max_row + 1) :
        sheet[cols['date'] + str(i)].number_format = 'DD/MM/YY'
        for col_title in entry_cols : 
            sheet[cols[col_title] + str(i)].fill = yellow_fill
        for col_title in prefilled_cols : 
            sheet[cols[col_title] + str(i)].fill = green_fill
        for col_title in checkmark_cols :
            sheet[cols[col_title] + str(i)].fill = grey_fill
            
                
        lookup_relations = [
            ['type_description', 'event_type',      'lookup_event_types'],
            ['money_pot',        'money_pot_name',  'lookup_money_pots'],
            ['money_pot_sink',   'money_sink_name', 'lookup_money_pots'],
            ['budget_pot',       'budget_type',     'lookup_budget_pots'],
            ['in_group',         'group_name',      'lookup_event_groups']
        ]
        
        for relation in lookup_relations : 
            col_key = cols[relation[0]]
            col_descr = cols[relation[1]]
            sheet_name = relation[2]
            sheet[col_descr + str(i)] =  '=IF(ISBLANK(' + col_key + str(i) + \
                                       '), "", LOOKUP(' + col_key + str(i) + \
                                      r', ' + sheet_name + '!A:A, ' + sheet_name + '!B:B))'
        
    #for column_cells in sheet.columns :
    #    if column_cells[0].column in ['B', 'C', 'D', 'E', 'J', 'L', 'N', 'O', 'Q', 'R', 'S', 'T'] :
    #        sheet.column_dimensions[column_cells[0].column].width = 4
    #    elif column_cells[0].column in ['K', 'M', 'P', 'U'] :
    #        sheet.column_dimensions[column_cells[0].column].width = 20
    #    else :
    #        length = max(len(str(cell.value)) for cell in column_cells)
    #        sheet.column_dimensions[column_cells[0].column].width = length
    
    length = max(len(str(cell.value)) for cell in column_cells)
    sheet.column_dimensions[column_cells[0].column].width = length
    
    for col in hidden_cols :
        sheet.column_dimensions[cols[col]].hidden = True    
    
    wb.save(filename)
    
create_excel_table_from_data(full_match_bargeld, bargeld_wb_filename)
checked_full_match_bargeld = use_excel_for_data_entry(bargeld_wb_filename, copy_mode=False)


The file bargeld_payments.xlsx will be opened in Excel for data entry. Enter Q to abort, enter anything else to continue: 
Make any input to continue. Abort with Q: Q
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-53-b6809b094df0> in <module>()
    181 
    182 create_excel_table_from_data(full_match_bargeld, bargeld_wb_filename)
--> 183 checked_full_match_bargeld = use_excel_for_data_entry(bargeld_wb_filename, copy_mode=False)

~/budgeter/excel_helpers.py in use_excel_for_data_entry(workbook_path, copy_mode, temp_path, delete_temp, open_before_read)
    156             os.system('open ' + workbook_path)
    157             signal = input('Make any input to continue. Abort with Q: ')
--> 158             abort_with_q(signal)
    159 
    160         # Reading the data

~/budgeter/excel_helpers.py in abort_with_q(signal)
    138     def abort_with_q(signal) :
    139         if signal in ['Q', 'q'] :
--> 140             raise RuntimeError('The Excel data entry procedure was aborted.')
    141 
    142     if copy_mode :

RuntimeError: The Excel data entry procedure was aborted.

In [44]:
full_match_bargeld.to_excel(bargeld_wb_filename)
bargeld_wb = pyxl.load_workbook(bargeld_wb_filename, data_only=True)

bargeld_sheet = bargeld_wb.active

the_fill = pyxl.styles.PatternFill(fill_type='solid', start_color='FFFF02')
        
the_list_a = ['Barzahlung', 'Kartenzahlung', 'Überweisung', 'Dauerauftrag',
            'SEPA-Mandat', 'Bankeinzug', 'Abheben', 'Kontotransfer', 
            'Bargeldtransfer', 'Einnahme', 'Geldfund']
the_list_b = ['B', 'K', 'U', 'D', 'S', 'BE', 'A', 'KT', 'BT', 'E', 'GF']

the_list_str = '"' + ','.join(the_list_a + the_list_b) + '"'

dv1 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1=the_list_str, allow_blank=False)
dv1.error ='Your entry is not in the list'
dv1.errorTitle = 'Invalid Entry'
dv1.prompt = 'Please select from the list'
dv1.promptTitle = 'List Selection'

dv2 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1='"X"', allow_blank=True)
dv2.error ='Your entry is not in the list'
dv2.errorTitle = 'Invalid Entry'
dv2.prompt = 'Please check with an X or leave blank'
dv2.promptTitle = 'Checkmark'

dv1.ranges.append('K2:K' + str(bargeld_sheet.max_row))
dv2.ranges.append('L2:M' + str(bargeld_sheet.max_row))

bargeld_sheet.add_data_validation(dv1)
bargeld_sheet.add_data_validation(dv2)

for i in range(2, bargeld_sheet.max_row + 1) :
    bargeld_sheet['D' + str(i)].number_format = 'DD/MM/YY'
    bargeld_sheet['K' + str(i)].fill = the_fill
    bargeld_sheet['L' + str(i)].fill = the_fill
    bargeld_sheet['M' + str(i)].fill = the_fill
    
for column_cells in bargeld_sheet.columns:
    length = max(len(str(cell.value)) for cell in column_cells)
    bargeld_sheet.column_dimensions[column_cells[0].column].width = length
    
for row in ['F', 'G', 'H', 'I', 'J'] :
    bargeld_sheet.column_dimensions[row].hidden = True    
    
bargeld_wb.save(bargeld_wb_filename)

In [13]:
checked_full_match_konto = use_excel_for_data_entry(bargeld_wb_filename, copy_mode=False)


The file bargeld_payments.xlsx will be opened in Excel for data entry. Enter Q to abort, enter anything else to continue: 
Make any input to continue. Abort with Q: Q
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-13-1a586f327e47> in <module>()
----> 1 checked_full_match_konto = use_excel_for_data_entry(bargeld_wb_filename, copy_mode=False)

~/budgeter/excel_helpers.py in use_excel_for_data_entry(workbook_path, copy_mode, temp_path, delete_temp, open_before_read)
    156             os.system('open ' + workbook_path)
    157             signal = input('Make any input to continue. Abort with Q: ')
--> 158             abort_with_q(signal)
    159 
    160         # Reading the data

~/budgeter/excel_helpers.py in abort_with_q(signal)
    138     def abort_with_q(signal) :
    139         if signal in ['Q', 'q'] :
--> 140             raise RuntimeError('The Excel data entry procedure was aborted.')
    141 
    142     if copy_mode :

RuntimeError: The Excel data entry procedure was aborted.

In [ ]:
for index, row in full_match_bargeld.iterrows() :
    the_id = automatically_enter_payment_row(row['date'], 'Barzahlung', 
        row['description'], row['amount'], row['money_pot'], 
        row['budget_type'], [row['excel_range_x'], row['excel_range_y']],
        excel_filename, august, august_, db_filename)
    
    all_info.loc[index, 'treated'] = 'Yes'

In [ ]:
#************************************#
# This code will not be used anymore #
#************************************#

## Putting all the Info which had matching info on the left and the right hand of the budget sheet into
## the database.

## Only doing the "Barzahlung" payments right now

##for BB in ['BM', 'BP'] :
##    subsetting_rules = {'_merge' : 'both', 'money_pot' : BB,  'treated' : 'No'}
##
##    full_match_bargeld = subset_data_frame(all_info, subsetting_rules)
##
##    for index, row in full_match_bargeld.iterrows() :
##        the_id = automatically_enter_payment_row(row['date'], 'Barzahlung', 
##            row['description'], row['amount'], row['money_pot'], 
##            row['budget_type'], [row['excel_range_x'], row['excel_range_y']],
##            excel_filename, august, august_, db_filename)
##        
##        all_info.loc[index, 'treated'] = 'Yes'

In [ ]:
## For the "GK" (Gemeinsames Konto) Payments we create an excel sheet, where we can change the payment type
subsetting_rules = {'_merge' : 'both', 'money_pot' : 'KG',  'treated' : 'No'}
full_match_konto = subset_data_frame(all_info, subsetting_rules)
full_match_konto['payment_type'] = 'Kartenzahlung'
full_match_konto['use_data'] = 'X'
full_match_konto['complex'] = ''

In [ ]:
gk_wb_filename = 'konto_payments.xlsx'

In [ ]:
full_match_konto.to_excel(gk_wb_filename)
gk_wb = pyxl.load_workbook(gk_wb_filename, data_only=True)

gk_sheet = gk_wb.active

the_fill = pyxl.styles.PatternFill(fill_type='solid', start_color='FFFF02')
        
the_list_a = ['Barzahlung', 'Kartenzahlung', 'Überweisung', 'Dauerauftrag',
            'SEPA-Mandat', 'Bankeinzug', 'Abheben', 'Kontotransfer', 
            'Bargeldtransfer', 'Einnahme', 'Geldfund']
the_list_b = ['B', 'K', 'U', 'D', 'S', 'BE', 'A', 'KT', 'BT', 'E', 'GF']

the_list_str = '"' + ','.join(the_list_a + the_list_b) + '"'

dv1 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1=the_list_str, allow_blank=False)
dv1.error ='Your entry is not in the list'
dv1.errorTitle = 'Invalid Entry'
dv1.prompt = 'Please select from the list'
dv1.promptTitle = 'List Selection'

dv2 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1='"X"', allow_blank=True)
dv2.error ='Your entry is not in the list'
dv2.errorTitle = 'Invalid Entry'
dv2.prompt = 'Please check with an X or leave blank'
dv2.promptTitle = 'Checkmark'

dv1.ranges.append('K2:K' + str(gk_sheet.max_row))
dv2.ranges.append('L2:M' + str(gk_sheet.max_row))

gk_sheet.add_data_validation(dv1)
gk_sheet.add_data_validation(dv2)

for i in range(2, gk_sheet.max_row + 1) :
    gk_sheet['D' + str(i)].number_format = 'DD/MM/YY'
    gk_sheet['K' + str(i)].fill = the_fill
    gk_sheet['L' + str(i)].fill = the_fill
    gk_sheet['M' + str(i)].fill = the_fill
    
for column_cells in gk_sheet.columns:
    length = max(len(str(cell.value)) for cell in column_cells)
    gk_sheet.column_dimensions[column_cells[0].column].width = length
    
for row in ['F', 'G', 'H', 'I', 'J'] :
    gk_sheet.column_dimensions[row].hidden = True    
    
gk_wb.save(gk_wb_filename)

In [ ]:
checked_full_match_konto = use_excel_for_data_entry(gk_wb_filename, copy_mode=False)

In [ ]:
miete_rules              = {'description' : 'Miete', 'amount' : -568,  'treated' : 'No'} 
haftpflicht_rules        = {'description' : 'Haftpflichtversicherung', 'amount' : -7.5,  'treated' : 'No'} 
berufsunfaehigkeit_rules = {'description' : 'Berufsunfähigkeitsversicherung', 'amount' : -49.05,  'treated' : 'No'} 
strom_rules_rules        = {'description' : 'Strom EnviaM', 'amount' : -51,  'treated' : 'No'} 
vodafone_rules           = {'description' : 'Vodafone', 'amount' : -19.99,  'treated' : 'No'} 
handy_paul_rules         = {'description' : 'Handy Paul', 'amount' : -7.99,  'treated' : 'No'} 
handy_max_rules          = {'description' : 'Handy Max', 'amount' : -7.99,  'treated' : 'No'} 
music_paul_rules         = {'description' : 'Apple Music Paul', 'amount' : -4.99,  'treated' : 'No'} 
music_max_rules          = {'description' : 'Spotify Max', 'amount' : -4.99,  'treated' : 'No'} 
backblaze_paul_rules     = {'description' : 'Backblaze Paul', 'amount' : -3.77,  'treated' : 'No'} 
backblaze_max_rules      = {'description' : 'Backblaze Max', 'amount' : -3.77,  'treated' : 'No'} 
semester_paul_rules      = {'description' : 'Semestergebühr Paul', 'amount' : -46.15,  'treated' : 'No'} 
semester_max_rules       = {'description' : 'Semestergebühr Max', 'amount' : -50.57,  'treated' : 'No'} 
sport_jahre_rules        = {'description' : '', 'amount' : -8,  'treated' : 'No'} 
sport_rules              = {'description' : 'Fitnessstudio', 'amount' : -39.8,  'treated' : 'No'} 
gew_rules                = {'description' : 'GEW', 'amount' : -2.5,  'treated' : 'No'} 
gez_rules                = {'description' : 'GEZ', 'amount' : -17.5,  'treated' : 'No'} 
miete_ffm_rules          = {'description' : 'Miete FFM', 'amount' : -450,  'treated' : 'No'} 

all_rules = { 1 : [miete_rules                              ],
              2 : [haftpflicht_rules                        ],
              3 : [berufsunfaehigkeit_rules                 ],
              4 : [strom_rules_rules                        ],
              5 : [vodafone_rules                           ],
              6 : [handy_paul_rules                         ],
              7 : [handy_max_rules                          ],
              8 : [music_paul_rules                         ],
              9 : [music_max_rules                          ],
             10 : [backblaze_paul_rules, backblaze_max_rules],
             11 : [semester_paul_rules                      ],
             12 : [semester_max_rules                       ],
             13 : [sport_jahre_rules                        ],
             14 : [sport_rules                              ],
             15 : [gew_rules                                ],
             16 : [gez_rules                                ],
             17 : [miete_ffm_rules                                ]}

monthly = [1, 2, 3, 4, 8, 9, 14, 17]
bigger_intervalls = [10, 11, 12, 13, 15, 16]
variable_prices = [5, 6, 7]
## ToDo : 2 + 3 sonderfall beachten; insbesondere wird das gemeinsam vom konto abgebucht
## ToDo : 4 is only 51 € since mid 2017

for rule_number in monthly :
    rule = all_rules[rule_number][0]
    
    the_info = subset_data_frame(checked_full_match_konto, rule)

    for index, row in the_info.iterrows() :
        the_id = automatically_enter_payment_row(row['date'], row['payment_type'], 
            row['description'], row['amount'], row['money_pot'], 
            row['budget_type'], [row['excel_range_x'], row['excel_range_y']],
            excel_filename, august, august_, db_filename)

        crsr = db.cursor()
        crsr.execute('''INSERT INTO event_in_group VALUES (1, {});'''.format(the_id))
        db.commit()

        all_info.loc[index, 'treated'] = 'Yes'
        
## ToDo : do the bigger intervalls

In [ ]:
for index, row in checked_full_match_konto.iterrows() :
    if checked_full_match_konto.loc[index, 'use_data'] != 'X' :
        continue
    if checked_full_match_konto.loc[index, 'treated'] == 'Yes' :
        continue
        
    the_id = automatically_enter_payment_row(row['date'], row['payment_type'], 
            row['description'], row['amount'], row['money_pot'], 
            row['budget_type'], [row['excel_range_x'], row['excel_range_y']],
            excel_filename, august, august_, db_filename)
    
    all_info.loc[index, 'treated'] = 'Yes'

In [ ]:
## List of all fully matched things we didnt treat yet
subsetting_rules = {'_merge' : 'both', 'treated' : 'No'}
remaining_matched = subset_data_frame(all_info, subsetting_rules)

display(remaining_matched)

In [ ]:
display(all_info[all_info['description'].str.contains('Schatulle')])

subsetting_rules = {'treated' : 'xxx'}
schatulle_info = subset_data_frame(all_info, subsetting_rules)

for direction in ['an', 'aus'] :
    descriptor = 'Geld ' + direction + ' Schatulle'
    subsetting_rules = {'description' : descriptor, 'treated' : 'No'}
    schatulle_info = schatulle_info.append(subset_data_frame(all_info, subsetting_rules))
    
display(schatulle_info)

In [ ]:
display(schatulle_info)
for index, row in schatulle_info.iterrows() :
    i = 0
    while True :
        the_date = all_info.loc[index+i, 'date']
        if str(the_date) == 'NaT' or the_date is np.nan :
            the_date = all_info.loc[index-i, 'date']
            if str(the_date) == 'NaT' or the_date is np.nan :
                i = i + 1
                continue
        
        break
        
    automatically_enter_transfer_row(
        the_date, 'Bargeldtransfer', row['description'], row['amount'], row['money_pot'], 'SB',
        [row['excel_range_y']], excel_filename, august, august_, db_filename)
    
    all_info.loc[index, 'treated'] = 'Yes'

In [ ]:
## The Three individual usages of the White Trianon Card : Charging
subsetting_rules = {'description' : 'Geld aufladen Trianon', 'amount' : -20, 'treated' : 'No'}
trianon_aufladen = subset_data_frame(all_info, subsetting_rules)

for index, row in trianon_aufladen.iterrows() :
    automatically_enter_transfer_row(
        row['date'], 'Aufladen', row['description'], row['amount'], row['money_pot'], 'CT',
        [row['excel_range_y']], excel_filename, august, august_, db_filename)
    
    all_info.loc[index, 'treated'] = 'Yes'

In [ ]:
doppelter_kaffee = {'budget_type' : 'AM', 'description' : 'Doppelter Kaffee Trianon', 
                                     'amount' : -1.5, 'treated' : 'No'}
einfacher_kaffee = {'budget_type' : 'AM', 'description' : 'Kaffee', 
                                     'amount' : -0.75, 'treated' : 'No'}
rules = [doppelter_kaffee, einfacher_kaffee]
trianon_payments = multirule_subset(all_info, rules)

for index, row in trianon_payments.iterrows() :
    automatically_enter_payment_row(
        row['date'], 'Kartenzahlung', row['description'], row['amount'], 'CT', 
        row['budget_type'], [row['excel_range_x']], excel_filename, august, august_, db_filename)
    
    all_info.loc[index, 'treated'] = 'Yes'

In [ ]:
subsetting_rules = {'budget_type' : 'AM', 'treated' : 'No'}
bundesbank_karte = subset_data_frame(all_info, subsetting_rules)

for index, row in bundesbank_karte.iterrows() :
    automatically_enter_payment_row(
        row['date'], 'Kartenzahlung', row['description'], row['amount'], 'CB', 
        row['budget_type'], [row['excel_range_x']], excel_filename, august, august_, db_filename)
    
    all_info.loc[index, 'treated'] = 'Yes'

In [ ]:
subsetting_rules = {'description' : 'Geld abheben', '_merge' : 'right_only', 'treated' : 'No'}
geld_abheben = subset_data_frame(all_info, subsetting_rules)

subsetting_rules = {'description' : 'Geld abheben', 'amount' : '>0', '_merge' : 'right_only', 'treated' : 'No'}
geld_abheben_gotten = subset_data_frame(all_info, subsetting_rules)

subsetting_rules = {'description' : 'Geld abheben', 'amount' : '<0', '_merge' : 'right_only', 'treated' : 'No'}
geld_abheben_drawn = subset_data_frame(all_info, subsetting_rules)
geld_abheben_drawn['amount_abs'] = - geld_abheben_drawn['amount']

## Subselect relevant columns
abheben_gotten = geld_abheben_gotten[['date', 'amount', 'money_pot', 'description', 
                                      'excel_range_y']].reset_index(level=0)
abheben_drawn = geld_abheben_drawn[['date', 'amount_abs', 'money_pot', 
                                    'excel_range_y']].reset_index(level=0)


abheben_data = pd.merge(abheben_gotten, abheben_drawn, how='outer', left_on = ['date', 'amount'], 
                        right_on=['date', 'amount_abs'], left_index=True, indicator=True)

display(abheben_data)


for index, row in abheben_data[abheben_data['_merge'] == 'both'].iterrows() :
    automatically_enter_transfer_row(
        row['date'], 'Abheben', row['description'], row['amount'], row['money_pot_y'], row['money_pot_x'],
        [row['excel_range_y_x'], row['excel_range_y_y']], excel_filename, august, august_, db_filename)
    
    all_info.loc[row['index_x'], 'treated'] = 'Yes'
    all_info.loc[row['index_y'], 'treated'] = 'Yes'

In [ ]:
## For the "GK" (Gemeinsames Konto) Payments we create an excel sheet, where we can change the payment type
subsetting_rules = {'_merge' : 'right_only', 'treated' : 'No'}
remaining = subset_data_frame(all_info, subsetting_rules)
remaining['is_transfer'] = ''
remaining['transfer_type'] = ''
remaining['money_pot_source'] = remaining['amount'].apply(lambda x : 1 if x < 0 else 0) * \
                                remaining['money_pot'] 
remaining['money_pot_sink'] = remaining['amount'].apply(lambda x : 1 if x > 0 else 0) * \
                              remaining['money_pot']

In [ ]:
mark_transfers_filename = 'testtest2.xlsx'
remaining.to_excel(mark_transfers_filename)
mark_transfers = pyxl.load_workbook(mark_transfers_filename, data_only=True)

remaining_sheet = mark_transfers.active

the_fill = pyxl.styles.PatternFill(fill_type='solid', start_color='FFFF02')

dv1 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1='"X"', allow_blank=True)
dv1.error ='Your entry is not in the list'
dv1.errorTitle = 'Invalid Entry'
dv1.prompt = 'Please check with an X or leave blank'
dv1.promptTitle = 'Checkmark'
 
list_of_transfer_types = ['Abheben', 'Aufladen', 'Kontotransfer', 'Bargeldtransfer']
rule = '"' + ','.join(list_of_transfer_types) + '"'
dv2 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1=rule, allow_blank=True)
dv2.error ='Your entry is not in the list'
dv2.errorTitle = 'Invalid Entry'
dv2.prompt = 'Please select from the list'
dv2.promptTitle = 'List Selection'

list_of_accounts = ['KG', 'KE', 'KM', 'KP', 'KB', 'KC', 'BM', 'BP', 'CB', 'CT', 'CM', 'GM', 'SB', 'SF']
rule = '"' + ','.join(list_of_accounts) + '"'
dv3 = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1=rule, allow_blank=True)
dv3.error ='Your entry is not in the list'
dv3.errorTitle = 'Invalid Entry'
dv3.prompt = 'Please select from the list'
dv3.promptTitle = 'List Selection'

dv1.ranges.append('K2:K' + str(remaining_sheet.max_row))
dv2.ranges.append('L2:L' + str(remaining_sheet.max_row))
dv3.ranges.append('M2:N' + str(remaining_sheet.max_row))

remaining_sheet.add_data_validation(dv1)
remaining_sheet.add_data_validation(dv2)
remaining_sheet.add_data_validation(dv3)

for i in range(2, remaining_sheet.max_row + 1) :
    remaining_sheet['D' + str(i)].number_format = 'DD/MM/YY'
    remaining_sheet['K' + str(i)].fill = the_fill
    remaining_sheet['L' + str(i)].fill = the_fill
    remaining_sheet['M' + str(i)].fill = the_fill
    remaining_sheet['N' + str(i)].fill = the_fill
    
for column_cells in remaining_sheet.columns:
    length = max(len(str(cell.value)) for cell in column_cells)
    remaining_sheet.column_dimensions[column_cells[0].column].width = length
    
for row in ['B', 'F', 'G', 'H', 'I', 'J'] :
    remaining_sheet.column_dimensions[row].hidden = True    
    
mark_transfers.save(mark_transfers_filename)

In [ ]:
checked_data = use_excel_for_data_entry(mark_transfers_filename, copy_mode=False, open_before_read=True)
checked_transfer_data = checked_data[checked_data['is_transfer'] == 'X']

In [ ]:
display(checked_transfer_data)

for index, row in checked_transfer_data.iterrows() :
    automatically_enter_transfer_row(
        row['date'], row['transfer_type'], row['description'], row['amount'], 
        row['money_pot_source'], row['money_pot_sink'],
        [row['excel_range_y']], excel_filename, august, august_, db_filename)
    
    all_info.loc[index, 'treated'] = 'Yes'

In [ ]:
## Mark remaining as : Budget_Event, Money_Transfer, Payment, Complex Process
## Give the Opportunity to give provisorial ids

subsetting_rules = {'treated' : 'No'}
## If I do not use copy(), it will throw warnings when only 
## subsetting with one rule. I suppose it does copy automatically 
## after from the second subsetting
remaining = subset_data_frame(all_info, subsetting_rules).copy()
remaining['ignore'] = 'X'
remaining['is_complex'] = ''
remaining['temporary_id'] = ''
remaining['event_abbr'] = ''
remaining['event_type'] = ''
remaining['is_budget_event'] = ''
remaining['is_recieving'] = ''
remaining['is_payment'] = ''
remaining['is_transfer'] = ''
remaining['additional_description'] = ''
remaining['effect_date'] = ''
remaining['money_pot_name'] = ''
remaining['money_pot_sink'] = ''
remaining['money_pot_sink_name'] = ''
remaining['budget_type_name'] = ''
remaining['in_group'] = ''
remaining['group_name'] = ''

## ToDo : Add additional_descriptions and effect_date

remaining = remaining[[
    'ignore', 'is_complex', 'temporary_id', 'event_abbr',
    'event_type', 'description', 'date', 
    'amount', 'money_pot', 'money_pot_name', 
    'money_pot_sink','money_pot_sink_name',
    'is_budget_event', 'budget_type', 'budget_type_name',  
    'is_recieving', 'is_payment', 'is_transfer', 'in_group', 'group_name', 'excel_range_x',
    'excel_range_y',  '_merge', 'treated'
]]

display(remaining)

In [ ]:
mark_remaining_filename = 'remaining.xlsx'
remaining.to_excel(mark_remaining_filename)
mark_remaining = pyxl.load_workbook(mark_remaining_filename, data_only=False)

remaining_sheet = mark_remaining.active

def add_lookup_sheet(wb, db, table, list_of_cols, lookup_sheet_name = 'lookup', sorted_by = None) :
    crsr = db.cursor()
    if sorted_by is None : 
        crsr.execute('SELECT {} FROM {}'.format(','.join(list_of_cols), table))
    else :
        crsr.execute('SELECT {} FROM {} ORDER BY {}'.format(','.join(list_of_cols), table, sorted_by))
    results = crsr.fetchall()
    #return [r[0] for r in results]
    
    lookup_sheet = wb.create_sheet(lookup_sheet_name)
    for row in results : 
        lookup_sheet.append(row)
    
add_lookup_sheet(mark_remaining, db, 'money_pots', ['key', 'description'], 'lookup_money_pots', 'key')
add_lookup_sheet(mark_remaining, db, 'event_types', ['abbreviation', 'type'], 'lookup_event_types', 'type')
add_lookup_sheet(mark_remaining, db, 'budget_pots', ['key', 'description'], 'lookup_budget_pots', 'key')
add_lookup_sheet(mark_remaining, db, 'event_groups', ['group_id', 'description'], 'lookup_event_groups', 'group_id')

yellow_fill = pyxl.styles.PatternFill(fill_type='solid', start_color='FFFF02')
grey_fill = pyxl.styles.PatternFill(fill_type='solid', start_color='BFBFBF')
unwrap_text = pyxl.styles.Alignment(horizontal='fill')

validate_checkmark = pyxl.worksheet.datavalidation.DataValidation(type="list", formula1='"X"', allow_blank=True)
validate_checkmark.error ='Your entry is not in the list'
validate_checkmark.errorTitle = 'Invalid Entry'
validate_checkmark.prompt = 'Please check with an X or leave blank'
validate_checkmark.promptTitle = 'Checkmark'

def keylist_from_db(db, table, colname) :
    crsr = db.cursor()
    crsr.execute('SELECT {} FROM {}'.format(colname, table))
    results = crsr.fetchall()
    return [r[0] for r in results]

def add_validation_notes(dv) :
    dv.error ='Your entry is not in the list'
    dv.errorTitle = 'Invalid Entry'
    dv.prompt = 'Please select from the list'
    dv.promptTitle = 'List Selection'
    return dv



the_list = keylist_from_db(db, 'event_types', 'type')
the_list_str = '"' + ','.join(the_list) + '"'
validate_event_type = pyxl.worksheet.datavalidation.DataValidation(
    type="list", formula1=the_list_str, allow_blank=False)
validate_event_type = add_validation_notes(validate_event_type)

the_list = keylist_from_db(db, 'money_pots', 'key')
the_list_str = '"' + ','.join(the_list) + '"'
validate_money_pot = pyxl.worksheet.datavalidation.DataValidation(
    type="list", formula1=the_list_str, allow_blank=False)
validate_money_pot = add_validation_notes(validate_money_pot)

the_list = keylist_from_db(db, 'budget_pots', 'key')
the_list_str = '"' + ','.join(the_list) + '"'
validate_budget_pot = pyxl.worksheet.datavalidation.DataValidation(
    type="list", formula1=the_list_str, allow_blank=False)
validate_budget_pot = add_validation_notes(validate_budget_pot)

validate_checkmark.ranges.append('B2:C' + str(remaining_sheet.max_row))
validate_checkmark.ranges.append('N2:N' + str(remaining_sheet.max_row))
validate_checkmark.ranges.append('Q2:S' + str(remaining_sheet.max_row))

validate_event_type.ranges.append('F2:F' + str(remaining_sheet.max_row))
validate_money_pot.ranges.append('J2:J' + str(remaining_sheet.max_row))
validate_money_pot.ranges.append('L2:L' + str(remaining_sheet.max_row))
validate_budget_pot.ranges.append('O2:O' + str(remaining_sheet.max_row))

remaining_sheet.add_data_validation(validate_checkmark)
remaining_sheet.add_data_validation(validate_event_type)
remaining_sheet.add_data_validation(validate_money_pot)
remaining_sheet.add_data_validation(validate_budget_pot)

for i in range(2, remaining_sheet.max_row + 1) :
    remaining_sheet['H' + str(i)].number_format = 'DD/MM/YY'
    for s in ['D', 'F', 'J', 'L', 'O', 'T'] : 
        remaining_sheet[s + str(i)].fill = yellow_fill
    for s in ['B', 'C', 'N', 'Q', 'R', 'S'] :
        remaining_sheet[s + str(i)].fill = grey_fill
    remaining_sheet['E' + str(i)] = '=IF(ISBLANK(F' + str(i) + '), "", LOOKUP(F' + str(i) + \
                                     r', lookup_event_types!B:B, lookup_event_types!A:A))'
    remaining_sheet['K' + str(i)] = '=IF(ISBLANK(J' + str(i) + '), "", LOOKUP(J' + str(i) + \
                                     r', lookup_money_pots!A:A, lookup_money_pots!B:B))'
    remaining_sheet['M' + str(i)] = '=IF(ISBLANK(L' + str(i) + '), "", LOOKUP(L' + str(i) + \
                                     r', lookup_money_pots!A:A, lookup_money_pots!B:B))'
    remaining_sheet['P' + str(i)] = '=IF(ISBLANK(O' + str(i) + '), "", LOOKUP(O' + str(i) + \
                                     r', lookup_budget_pots!A:A, lookup_budget_pots!B:B))'
    remaining_sheet['P' + str(i)].alignment = unwrap_text
    remaining_sheet['U' + str(i)] = '=IF(ISBLANK(T' + str(i) + '), "", LOOKUP(T' + str(i) + \
                                     r', lookup_event_groups!A:A, lookup_event_groups!B:B))'


for column_cells in remaining_sheet.columns :
    if column_cells[0].column in ['B', 'C', 'D', 'E', 'J', 'L', 'N', 'O', 'Q', 'R', 'S', 'T'] :
        remaining_sheet.column_dimensions[column_cells[0].column].width = 4
    elif column_cells[0].column in ['K', 'M', 'P', 'U'] :
        remaining_sheet.column_dimensions[column_cells[0].column].width = 20
    else :
        length = max(len(str(cell.value)) for cell in column_cells)
        remaining_sheet.column_dimensions[column_cells[0].column].width = length
    
for row in ['V', 'W', 'X', 'Y'] :
    remaining_sheet.column_dimensions[row].hidden = True    

mark_remaining.save(mark_remaining_filename)

In [ ]:
try :
    checked_data = use_excel_for_data_entry(mark_remaining_filename, copy_mode=False, open_before_read=True)
except RuntimeError :
    pass
#checked_transfer_data = checked_data[checked_data['ignore'] != 'X']

In [ ]:
remaining = all_info[all_info['treated'] == 'No']

display(remaining)

print("number remaining: " + str(len(remaining)))

In [ ]:
#for table in ["money_events", "payments", "budget_events"] :
#    print('The table {}: '.format(table))
#    display(pd.read_sql_query('SELECT * FROM {};'.format(table), db))
with pd.option_context('display.max_rows', None):
    display(pd.read_sql_query(
        'SELECT * FROM payments LEFT JOIN money_events where money_events.id = payments.id', db))
    display(pd.read_sql_query(
        'SELECT * FROM transfers LEFT JOIN money_events where money_events.id = transfers.id', db))

In [ ]:
wb.save('GemBil.xlsx')

In [ ]:
db.close()

In [ ]: