In [1]:
import gspread, gspread_dataframe
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import re
def num_to_col_letters(num) :
letters = ''
while num:
mod = (num - 1) % 26
letters += chr(mod + 65)
num = (num - 1) // 26
return ''.join(reversed(letters))
In [2]:
def connect_to_sheets(spreadsheet='GemeinsameBilanzierung_16_17', sheetname='') :
if type(spreadsheet) is str :
scope = ['https://spreadsheets.google.com/feeds']
creds = ServiceAccountCredentials.from_json_keyfile_name('client-secret.json', scope)
client = gspread.authorize(creds)
spreadsheet = client.open(spreadsheet)
# Find a workbook by name and open the first sheet
# Make sure you use the right name here.
if sheetname == '' :
return spreadsheet
else :
worksheet = spreadsheet.worksheet(sheetname)
return worksheet
In [4]:
res = { # re for at all valid
'valid_range' : r'^[A-Z]+\d*(:[A-Z]+\d*)?$',
# re for complete range
'normal_range' : r'^[A-Z]+\d+:[A-Z]+\d+$',
# re for single cell
'single_cell' : r'^[A-Z]+\d+$',
# re for only letters
'only_cols' : r'^[A-Z]+:[A-Z]+$',
# re for open end
'missing_start' : r'^[A-Z]+\d+:[A-Z]+$',
# re for open beginning
'missing_end' : r'^[A-Z]+:[A-Z]+\d+$'}
def normalize_range(range_string, worksheet) :
re_strings = [
# re for at all valid
r'^[A-Z]+\d*(:[A-Z]+\d*)?$',
# re for complete range
r'^[A-Z]+\d+:[A-Z]+\d+$',
# re for single cell
r'^[A-Z]+\d+$',
# re for only letters
r'^[A-Z]+:[A-Z]+$',
# re for open end
r'^[A-Z]+\d+:[A-Z]+$',
# re for open beginning
r'^[A-Z]+:[A-Z]+\d+$']
res = [re.compile(re_string) for re_string in re_strings]
if not res[0].match(range_string) :
raise RuntimeError('This is not a valid descriptor of ranges or cells')
if res[1].match(range_string) or res[2].match(range_string) :
return range_string
if res[5].match(range_string) :
pos_colon = range_string.find(':')
return range_string[0:pos_colon] + '1' + range_string[pos_colon:]
if res[4].match(range_string) :
return range_string + str(worksheet.row_count)
if res[3].match(range_string) :
pos_colon = range_string.find(':')
return range_string[0:pos_colon] + '1' + range_string[pos_colon:] + str(worksheet.row_count)
# def get_rowcol_range(range_string, worksheet) :
In [6]:
def read_gsheet_into_df(range_string, worksheet, date_col=None) :
range_string = normalize_range(range_string, worksheet)
cell_range = worksheet.range(range_string)
result = pd.DataFrame()
for cell in cell_range :
if pd.isnull(cell.numeric_value) :
if cell.value == '' :
value = cell.numeric_value
else :
value = cell.value
else :
value = round(cell.numeric_value, 2)
result.loc[cell.row, num_to_col_letters(cell.col)] = value
if date_col is not None and date_col in result.columns :
result[date_col] = pd.to_datetime('1899-12-30') + pd.to_timedelta(result[date_col], 'D')
return result.dropna(how='all')
In [7]:
def read_gsheet_into_df(range_string, worksheet, date_col=None) :
range_string = normalize_range(range_string, worksheet)
gspread.models.
if date_col is not None and date_col in result.columns :
result[date_col] = pd.to_datetime('1899-12-30') + pd.to_timedelta(result[date_col], 'D')
return result.dropna(how='all')
In [8]:
workbook = connect_to_sheets()
sheet = connect_to_sheets(workbook, 'August')
In [9]:
col_titles = ['description', 'date', 'amount']
budgeting = read_gsheet_into_df('A6:D', sheet, 'C')
budgeting.columns = ['budget_type'] + col_titles
max_bargeld = read_gsheet_into_df('H7:J', sheet, 'I')
max_bargeld.columns = col_titles
max_bargeld['money_pot'] = 'BM'
paul_bargeld = read_gsheet_into_df('K7:M', sheet, 'L')
paul_bargeld.columns = col_titles
paul_bargeld['money_pot'] = 'BP'
konto = read_gsheet_into_df('N7:P', sheet, 'O')
konto.columns = col_titles
konto['money_pot'] = 'KG'
conjoined = pd.concat([max_bargeld, paul_bargeld, konto])
display(budgeting, conjoined)
In [12]:
all_info = pd.merge(budgeting, conjoined, how='outer',
on=['description', 'date', 'amount'])
display(all_info)
perfect_result = all_info.dropna()
display(perfect_result)
In [14]:
import sqlite3 as sql
db = sql.connect('my-budget-v0.sqlite')
In [26]:
def id_generation(date) :
date_int = int(date.strftime('%Y%m%d'))
crsr = db.cursor()
crsr.execute('SELECT id FROM money_events WHERE id BETWEEN {} AND {}'.format(
date_int, date_int + 99))
results = [row[0] for row in crsr.fetchall()]
current_id = date_int + 1
while current_id in results :
current_id += 1
if current_id > date_int + 99 :
raise IndexError('Encountered to many ids for the date {}'.format(date))
return current_id
In [28]:
def simple_payment(date, description, amount, budget_type, transaction_type, money_pot):
new_id = id_generation(date)
crsr = db.cursor()
crsr.execute("INSERT INTO money_events VALUES ({}, '{}', '{}', DATE('{}'), NULL, NULL, 'auto-gen');".format(
new_id, transaction_type, description, date))
crsr.execute("INSERT INTO budget_events VALUES ({}, '{}', {}, NULL, DATE('{}'), NULL, NULL, 'auto-gen');".format(
new_id, budget_type, amount, date))
crsr.execute("INSERT INTO payments VALUES ({}, '{}', {}, NULL, NULL, DATE('{}'), NULL, NULL, 'auto-gen');".format(
new_id, money_pot, amount, date))
db.commit()
In [29]:
for index, row in perfect_result.iterrows() :
simple_payment(row['date'], row['description'], row['amount'], row['budget_type'],
'Kartenzahlung' if row['money_pot'] == 'KG' else 'Barzahlung', row['money_pot'])
In [30]:
def print_action_tables() :
crsr = db.cursor()
for table in ['money_events', 'budget_events', 'payments'] :
display(pd.read_sql_query('SELECT * FROM {};'.format(table), db))
print_action_tables()
In [ ]: