In [ ]:
%load_ext pycodestyle_magic
In [ ]:
#%%pycodestyle
# Standard Library imports first
from datetime import date, datetime, timedelta
from functools import lru_cache as cache
from os import makedirs
# Heavy-lifting 3rd party tools next.
import requests
import numpy as np
import pandas as pd
'''____ ____ ___ _ _ ___ ____ ____ ___ ____ ____ ___
[__ |___ | | | |__] |__/ |___ |__] | | |__/ |
___] |___ | |__| | | \ |___ | |__| | \ | '''
# Start logging right away
from os.path import isdir, isfile, dirname, abspath
from logzero import logger, setup_logger
if not isdir('./logs/'): makedirs('./logs/')
logger = setup_logger(logfile='./logs/app.log', maxBytes=1000000, backupCount=30)
logger.info("Libraries loaded.")
# Pipulate lastly, in developer or pip install mode.
devmode = True
if devmode:
logger.warning('Pipulate in developer mode. You may edit ../pipdev/pipulate/__init__.py')
import sys
from inspect import getfile, currentframe
currentdir = dirname(abspath(getfile(currentframe())))
parentdir = dirname(currentdir)
sys.path.insert(0, parentdir)
from pipdev import pipulate as gs
from importlib import reload
reload(gs) #ensures re-running this JN-block reloads edited pipulate files while you develop.
else:
import pipulate as gs # <--The line to be commented out when out of delveloper-mode.
logger.info(gs.h1('Pipulating...'))
# Custom config values
dockey = '<Your Doc Key>'
tab_name = 'New Template'
headers = {'Authorization': "<your token>"}
an_id = <some parameter>
'''____ ____ _ _ _ _ ____ ____ ___ _ _ _ ____
| | | |\ | |\ | |___ | | | |\ | | __
|___ |__| | \| | \| |___ |___ | | | \| |__] '''
logger.info(gs.h2('Connecting...'))
tab = gs.key(dockey).worksheet(tab_name)
logger.info(gs.link(dockey))
# All data columns MUST have field-names in row1. It may be hidden in the spreadsheet.
row1 = tab.row_values(1)
if row1:
while not row1[-1]:
row1.pop()
else:
logger.error("Row1 can not be empty. Exiting.")
raise SystemExit()
col_count = len(row1)
# The first data-row comes immediately after the word "formatting" (always) found in Column A.
col1 = tab.col_values(1)
if col1:
while not col1[-1]:
col1.pop()
else:
logger.error('You must have at least one row of data')
raise SystemExit()
try:
first_row_with_data = col1.index('formatting') + 2
except:
logger.error('Column A must have a cell containing "formatting" immediately above first data row.')
'''____ ___ _ _ _ ____ ___ _ _ _ ____
|__| | \ | | | [__ | | |\ | | __
| | |__/ _| |__| ___] | | | \| |__] '''
logger.info(gs.h2('Adjusting...'))
# This is a modest little reminder of our conventions. Why be bigger than the blit?
logger.info('Auto-trimming extra rows & columns.')
tab.resize(rows=len(col1), cols=col_count+1)
tab.resize(rows=len(col1)+2, cols=col_count+1)
# Even blank spacer colunns need column-labels. Also, a field-mapper just in case.
fm = dict()
rx = row1[:]
blanks = 0
for i, item in enumerate(row1):
col_name = item
if not col_name:
col_name = '%s%s' % ('xx', blanks)
rx[i] = col_name
blanks = blanks + 1
fm[col_name] = eval("'%s'" % gs.cc(i+1))
if 'date range' in col1:
logger.info('Updating date range labels.')
dates = gs.date_ranges(yoy=False)
human_dates = gs.date_ranges(human=True, yoy=False)
if 'date range' in col1:
range_row = col1.index('date range') + 1
for metric in ['users', 'clicks', 'revenue']:
for i, num in enumerate(['90', '60', '30']):
try:
field_dex = '%s%s' % (metric, num)
range_col = row1.index(field_dex) + 1
tab.update_cell(range_row, range_col, human_dates[i])
except:
pass
else:
logger.warning('Insert a row labeled "date range" in Column 1 for date ranges.')
'''___ ____ ___ ____ ___ ____ ___ ____ _ _ ___ ____ ____
| \ |__| | |__| | | | |__] |__| |\ | | \ |__| [__
|__/ | | | | | | |__| | | | | \| |__/ | | ___] '''
logger.info(gs.h2('Data to Pandas...'))
# Blit the Pipulate-range out of GSheet and into Pandas memory
rows = first_row_with_data, len(col1)
cols = 'A', gs.cc(len(row1))
cl, df = gs.pipulate(tab, rows, cols, rx)
'''___ ____ ____ ___ ___ _ _ _ ____ _ _ _ _ ____ ___ _ ____ _ _ ____
| \ |___ |__| |__] |__] | \_/ |___ | | |\ | | | | | | |\ | [__
|__/ | .| | | | |___ | | |__| | \| |___ | | |__| | \| ___] '''
def category(an_id, a_date):
endpoint = '<some endpoint to build list>' % (an_id, a_date)
r = requests.get(endpoint, headers=headers)
ids = r.json()[0]['ids']
return ids
def fpo(row, **kwargs):
"""Pass-through function for testing. Echos df.apply(fpo, axis=1, echo='fieldname').
Notice how we use **kwargs to pass in the echo argument on df.apply() in manipulations."""
col_name = 'index'
if 'echo' in kwargs:
col_name = kwargs['echo']
return_me = 'not found'
if col_name in row:
return_me = row[rx.index(col_name)]
return return_me
@cache()
def cached_api_data(*row):
'''Returns result from API-call the first time and from cache remaining times that day.
This function is intended to be "wrapped" by non-caching outer functions.'''
id_column = rx.index('id')
id = row[id_column]
return_me = list()
endpoint2 = '<some URL that pulls information based on id parameter>?id[]=%s' % id
r2 = requests.get(endpoint2, headers=headers)
try:
return_me = r2.json()[0]
except:
pass
return return_me
def title(row):
result_row = cached_api_data(*row)
name = result_row['title']
return(name)
def link(row):
result_row = cached_api_data(*row)
name = result_row['link']
return(name)
'''_ _ ____ _ _ _ ___ _ _ _ ____ ___ _ ____ _ _ ____
|\/| |__| |\ | | |__] | | | |__| | | | | |\ | [__
| | | | | \| | | |__| |___ | | | | |__| | \| ___] '''
logger.info(gs.h2('Manipulations...'))
ids = category(an_id, gs.api_now)
ids = an_ids[:len(col1)]
df['id'] = pd.Series(ids)
df['title'] = df.apply(title, axis=1)
df['link'] = df.apply(link, axis=1)
df['traffic'] = df.apply(fpo, axis=1, echo='id') # Passes through the id. Use fpo as template for df.apply() functions.
'''___ ____ ___ ____ ___ ____ ____ ____ _ _ ____ ____ ___
| \ |__| | |__| | | | | __ [__ |__| |___ |___ |
|__/ | | | | | | |__| |__] ___] | | |___ |___ | '''
logger.info('Pushing data back to GSheet.')
logger.info(gs.h2('Data to GSheet...'))
df.fillna(0, inplace=True)
gs.populate(tab, cl, df)
'''___ _ _ _ ____ ____ ___ ____ _ _ ___ ____
| | |\/| |___ [__ | |__| |\/| |__] [__
| | | | |___ ___] | | | | | | ___] '''
if 'timestamp' in col1:
logger.info(gs.h2('Timestamps...'))
logger.info('Processing each row with timestamps.')
for item in fm:
if item and item != 'index' and item[:2] != 'xx':
eval_me = 'fm["%s"]' % item
column_letter = eval(eval_me)
timestamp_row = col1.index('timestamp') + 1
timestamp_col = gs.aa(column_letter)
gmt = gs.gmt()
tab.update_cell(timestamp_row, timestamp_col, gmt)
else:
logger.warning('Insert a row labeled "timestamp" in Column 1 for timestamps.')
logger.info(gs.h1('Complete!'))
In [ ]: