In [1]:
import openpyxl as pyxl
import pandas as pd
from gspread.utils import a1_to_rowcol, rowcol_to_a1

import re
from itertools import product

In [2]:
wb = pyxl.load_workbook('GemeinsameBilanzierung_16_17.xlsx')
august = wb.get_sheet_by_name('August')
august_ = wb.copy_worksheet(august)
august_.title = 'august_'
august_.sheet_state = 'hidden'

In [3]:
def list_from_range_string(range_string) :
    colon_position = range_string.find(':')
    if colon_position == -1 :
        raise
    first_cell = range_string[:colon_position]
    last_cell = range_string[colon_position+1:]
    
    first_row, first_col = a1_to_rowcol(first_cell)
    last_row, last_col = a1_to_rowcol(last_cell)
    
    return [rowcol_to_a1(i,j) for i,j in product(range(first_row, last_row+1), range(first_col, last_col+1))]

In [14]:
def id_generation(date) :
    date_int = int(date.strftime('%Y%m%d00'))
    #date_int = (int(date[0:4]) * 10000 + int(date[5:7]) * 100 + int(date[8:10])) * 100
    #crsr = db.cursor()
    #crsr.execute('SELECT id FROM money_events WHERE id BETWEEN {} AND {}'.format(
    #    date_int, date_int + 99))
    #results = [row[0] for row in crsr.fetchall()]

    current_id = date_int + 1
    #while current_id in results :
    #    current_id += 1
    #if current_id > date_int + 99 :
    #    raise IndexError('Encountered to many ids for the date {}'.format(date))

    return current_id

In [5]:
def date_convert(item) :
    try :
          return pyxl.utils.datetime.from_excel(item)
    except :
        if type(item) is str :
            return pd.to_datetime(item)
        else :
            return pd.Timestamp(item)
        #pass
            

def get_df_by_range(sheet, first_cell, last_cell, date_cols=None) :
    data_rows = [[cell.value for cell in row] + ['{0}:{1}'.format(row[0].coordinate, row[-1].coordinate)]
        for row in august[first_cell:last_cell]]

    df = pd.DataFrame(data_rows)
    new_index = df.iloc[:,range(len(df.columns)-1)].dropna(how='all').index
    if date_cols is not None and type(date_cols) is int :
        df.iloc[:,date_cols] = df.iloc[:,date_cols].apply(date_convert).copy()
    elif type(date_cols) is str : 
        df.loc[:,date_cols] = df.loc[:,date_cols].apply(date_convert).copy()
    elif type(date_cols) is list :
        for col in date_cols :
            if type(col) is int :
                df.iloc[:,col] = df.iloc[:,col].apply(date_convert).copy()
            if type(col) is str : 
                df.loc[:,col] = df.loc[:,col].apply(date_convert).copy()
    return df.loc[new_index]

In [7]:
budgeting = get_df_by_range(august_, 'A6', 'D130', 2)
budgeting.columns = ['budget_type', 'description', 'date', 'amount', 'excel_range']

col_titles = ['description', 'date', 'amount', 'excel_range']

max_bargeld = get_df_by_range(august_, 'H7', 'J130', 1)
max_bargeld.columns = col_titles
max_bargeld['money_pot'] = 'BM'

paul_bargeld = get_df_by_range(august_, 'K7', 'M130', 1)
paul_bargeld.columns = col_titles
paul_bargeld['money_pot'] = 'BM'

konto = get_df_by_range(august_, 'N7', 'P130', 1)
konto.columns = col_titles
konto['money_pot'] = 'KG'

conjoined = pd.concat([max_bargeld, paul_bargeld, konto])

In [8]:
all_info = pd.merge(budgeting, conjoined, how='outer', 
                    on=['description', 'date', 'amount'], indicator=True)

perfect_result = all_info[all_info['_merge'] == 'both']

In [15]:
for index, row in perfect_result.iterrows() :
    comment_text = 'This comment template is just a test.' + \
                   'The id is {} from the df row {}.'.format(id_generation(row['date']), index)
    comment = pyxl.comments.Comment(comment_text, 'budgeter')
    for cell in list_from_range_string(row['excel_range_x']) + \
                list_from_range_string(row['excel_range_y']) :
        august_[cell].comment = comment

In [16]:
wb.save('GemBil.xlsx')

In [ ]: