In [1]:
import openpyxl as pyxl
import pandas as pd
from gspread.utils import a1_to_rowcol, rowcol_to_a1
import re
from itertools import product
In [2]:
wb = pyxl.load_workbook('GemeinsameBilanzierung_16_17.xlsx')
august = wb.get_sheet_by_name('August')
august_ = wb.copy_worksheet(august)
august_.title = 'august_'
august_.sheet_state = 'hidden'
In [3]:
def list_from_range_string(range_string) :
colon_position = range_string.find(':')
if colon_position == -1 :
raise
first_cell = range_string[:colon_position]
last_cell = range_string[colon_position+1:]
first_row, first_col = a1_to_rowcol(first_cell)
last_row, last_col = a1_to_rowcol(last_cell)
return [rowcol_to_a1(i,j) for i,j in product(range(first_row, last_row+1), range(first_col, last_col+1))]
In [14]:
def id_generation(date) :
date_int = int(date.strftime('%Y%m%d00'))
#date_int = (int(date[0:4]) * 10000 + int(date[5:7]) * 100 + int(date[8:10])) * 100
#crsr = db.cursor()
#crsr.execute('SELECT id FROM money_events WHERE id BETWEEN {} AND {}'.format(
# date_int, date_int + 99))
#results = [row[0] for row in crsr.fetchall()]
current_id = date_int + 1
#while current_id in results :
# current_id += 1
#if current_id > date_int + 99 :
# raise IndexError('Encountered to many ids for the date {}'.format(date))
return current_id
In [5]:
def date_convert(item) :
try :
return pyxl.utils.datetime.from_excel(item)
except :
if type(item) is str :
return pd.to_datetime(item)
else :
return pd.Timestamp(item)
#pass
def get_df_by_range(sheet, first_cell, last_cell, date_cols=None) :
data_rows = [[cell.value for cell in row] + ['{0}:{1}'.format(row[0].coordinate, row[-1].coordinate)]
for row in august[first_cell:last_cell]]
df = pd.DataFrame(data_rows)
new_index = df.iloc[:,range(len(df.columns)-1)].dropna(how='all').index
if date_cols is not None and type(date_cols) is int :
df.iloc[:,date_cols] = df.iloc[:,date_cols].apply(date_convert).copy()
elif type(date_cols) is str :
df.loc[:,date_cols] = df.loc[:,date_cols].apply(date_convert).copy()
elif type(date_cols) is list :
for col in date_cols :
if type(col) is int :
df.iloc[:,col] = df.iloc[:,col].apply(date_convert).copy()
if type(col) is str :
df.loc[:,col] = df.loc[:,col].apply(date_convert).copy()
return df.loc[new_index]
In [7]:
budgeting = get_df_by_range(august_, 'A6', 'D130', 2)
budgeting.columns = ['budget_type', 'description', 'date', 'amount', 'excel_range']
col_titles = ['description', 'date', 'amount', 'excel_range']
max_bargeld = get_df_by_range(august_, 'H7', 'J130', 1)
max_bargeld.columns = col_titles
max_bargeld['money_pot'] = 'BM'
paul_bargeld = get_df_by_range(august_, 'K7', 'M130', 1)
paul_bargeld.columns = col_titles
paul_bargeld['money_pot'] = 'BM'
konto = get_df_by_range(august_, 'N7', 'P130', 1)
konto.columns = col_titles
konto['money_pot'] = 'KG'
conjoined = pd.concat([max_bargeld, paul_bargeld, konto])
In [8]:
all_info = pd.merge(budgeting, conjoined, how='outer',
on=['description', 'date', 'amount'], indicator=True)
perfect_result = all_info[all_info['_merge'] == 'both']
In [15]:
for index, row in perfect_result.iterrows() :
comment_text = 'This comment template is just a test.' + \
'The id is {} from the df row {}.'.format(id_generation(row['date']), index)
comment = pyxl.comments.Comment(comment_text, 'budgeter')
for cell in list_from_range_string(row['excel_range_x']) + \
list_from_range_string(row['excel_range_y']) :
august_[cell].comment = comment
In [16]:
wb.save('GemBil.xlsx')
In [ ]: