In [ ]:

    
#%%pycodestyle

# Standard Library imports first
from datetime import date, datetime, timedelta
from functools import lru_cache as cache
from os import makedirs

# Heavy-lifting 3rd party tools next.
import requests
import numpy as np
import pandas as pd

'''____ ____ ___ _  _ ___     ____ ____ ___  ____ ____ ___ 
   [__  |___  |  |  | |__]    |__/ |___ |__] |  | |__/  |  
   ___] |___  |  |__| |       |  \ |___ |    |__| |  \  |  '''

# Start logging right away
from os.path import isdir, isfile, dirname, abspath
from logzero import logger, setup_logger
if not isdir('./logs/'): makedirs('./logs/')
logger = setup_logger(logfile='./logs/app.log', maxBytes=1000000, backupCount=30)
logger.info("Libraries loaded.")

# Pipulate lastly, in developer or pip install mode.
devmode = True
if devmode:
    logger.warning('Pipulate in developer mode. You may edit ../pipdev/pipulate/__init__.py')
    import sys
    from inspect import getfile, currentframe
    currentdir = dirname(abspath(getfile(currentframe())))
    parentdir = dirname(currentdir)
    sys.path.insert(0, parentdir)
    from pipdev import pipulate as gs
    from importlib import reload
    reload(gs) #ensures re-running this JN-block reloads edited pipulate files while you develop.
else:
    import pipulate as gs # <--The line to be commented out when out of delveloper-mode.

logger.info(gs.h1('Pipulating...'))

# Custom config values
dockey = '<Your Doc Key>'
tab_name = 'New Template'
headers = {'Authorization': "<your token>"}
an_id = <some parameter>

'''____ ____ _  _ _  _ ____ ____ ___ _ _  _ ____ 
   |    |  | |\ | |\ | |___ |     |  | |\ | | __ 
   |___ |__| | \| | \| |___ |___  |  | | \| |__] '''

logger.info(gs.h2('Connecting...'))
tab = gs.key(dockey).worksheet(tab_name)
logger.info(gs.link(dockey))
# All data columns MUST have field-names in row1. It may be hidden in the spreadsheet.
row1 = tab.row_values(1)
if row1:
    while not row1[-1]:
        row1.pop()
else:
    logger.error("Row1 can not be empty. Exiting.")
    raise SystemExit()
col_count = len(row1)
# The first data-row comes immediately after the word "formatting" (always) found in Column A.
col1 = tab.col_values(1)
if col1:
    while not col1[-1]:
        col1.pop()
else:
    logger.error('You must have at least one row of data')
    raise SystemExit()
try:
    first_row_with_data = col1.index('formatting') + 2
except:
    logger.error('Column A must have a cell containing "formatting" immediately above first data row.')

'''____ ___   _ _  _ ____ ___ _ _  _ ____ 
   |__| |  \  | |  | [__   |  | |\ | | __ 
   |  | |__/ _| |__| ___]  |  | | \| |__] '''

logger.info(gs.h2('Adjusting...'))
# This is a modest little reminder of our conventions. Why be bigger than the blit?
logger.info('Auto-trimming extra rows & columns.')
tab.resize(rows=len(col1), cols=col_count+1)
tab.resize(rows=len(col1)+2, cols=col_count+1)
# Even blank spacer colunns need column-labels. Also, a field-mapper just in case.
fm = dict()
rx = row1[:]
blanks = 0
for i, item in enumerate(row1):
    col_name = item
    if not col_name:
        col_name = '%s%s' % ('xx', blanks)
        rx[i] = col_name
        blanks = blanks + 1
    fm[col_name] = eval("'%s'" % gs.cc(i+1))
if 'date range' in col1:
    logger.info('Updating date range labels.')
    dates = gs.date_ranges(yoy=False)
    human_dates = gs.date_ranges(human=True, yoy=False)
    if 'date range' in col1:
        range_row = col1.index('date range') + 1
        for metric in ['users', 'clicks', 'revenue']:
            for i, num in enumerate(['90', '60', '30']):
                try:
                    field_dex = '%s%s' % (metric, num)
                    range_col = row1.index(field_dex) + 1
                    tab.update_cell(range_row, range_col, human_dates[i])
                except:
                    pass
else:
    logger.warning('Insert a row labeled "date range" in Column 1 for date ranges.')

'''___  ____ ___ ____    ___ ____    ___  ____ _  _ ___  ____ ____ 
   |  \ |__|  |  |__|     |  |  |    |__] |__| |\ | |  \ |__| [__  
   |__/ |  |  |  |  |     |  |__|    |    |  | | \| |__/ |  | ___] '''

logger.info(gs.h2('Data to Pandas...'))
# Blit the Pipulate-range out of GSheet and into Pandas memory
rows = first_row_with_data, len(col1)
cols = 'A', gs.cc(len(row1))
cl, df = gs.pipulate(tab, rows, cols, rx)


'''___  ____  ____ ___  ___  _    _   _    ____ _  _ _  _ ____ ___ _ ____ _  _ ____ 
   |  \ |___  |__| |__] |__] |     \_/     |___ |  | |\ | |     |  | |  | |\ | [__  
   |__/ |    .|  | |    |    |___   |      |    |__| | \| |___  |  | |__| | \| ___] '''


def category(an_id, a_date):
    endpoint = '<some endpoint to build list>' % (an_id, a_date)
    r = requests.get(endpoint, headers=headers)
    ids = r.json()[0]['ids']
    return ids


def fpo(row, **kwargs):
    """Pass-through function for testing. Echos df.apply(fpo, axis=1, echo='fieldname').
    Notice how we use **kwargs to pass in the echo argument on df.apply() in manipulations."""
    
    col_name = 'index'
    if 'echo' in kwargs:
        col_name = kwargs['echo']
    return_me = 'not found'
    if col_name in row:
        return_me = row[rx.index(col_name)]
    return return_me


@cache()
def cached_api_data(*row):
    '''Returns result from API-call the first time and from cache remaining times that day.
    This function is intended to be "wrapped" by non-caching outer functions.'''
    
    id_column = rx.index('id')
    id = row[id_column]
    return_me = list()
    endpoint2 = '<some URL that pulls information based on id parameter>?id[]=%s' % id
    r2 = requests.get(endpoint2, headers=headers)
    try:
        return_me = r2.json()[0]
    except:
        pass
    return return_me


def title(row):
    result_row = cached_api_data(*row)
    name = result_row['title']
    return(name)


def link(row):
    result_row = cached_api_data(*row)
    name = result_row['link']
    return(name)


'''_  _ ____ _  _ _ ___  _  _ _    ____ ___ _ ____ _  _ ____ 
   |\/| |__| |\ | | |__] |  | |    |__|  |  | |  | |\ | [__  
   |  | |  | | \| | |    |__| |___ |  |  |  | |__| | \| ___] '''

logger.info(gs.h2('Manipulations...'))
ids = category(an_id, gs.api_now)
ids = an_ids[:len(col1)]
df['id'] = pd.Series(ids)
df['title'] = df.apply(title, axis=1)
df['link'] = df.apply(link, axis=1)
df['traffic'] = df.apply(fpo, axis=1, echo='id') # Passes through the id. Use fpo as template for df.apply() functions.

'''___  ____ ___ ____    ___ ____    ____ ____ _  _ ____ ____ ___ 
   |  \ |__|  |  |__|     |  |  |    | __ [__  |__| |___ |___  |  
   |__/ |  |  |  |  |     |  |__|    |__] ___] |  | |___ |___  |  '''

logger.info('Pushing data back to GSheet.')
logger.info(gs.h2('Data to GSheet...'))
df.fillna(0, inplace=True)
gs.populate(tab, cl, df)

'''___ _ _  _ ____ ____ ___ ____ _  _ ___  ____ 
    |  | |\/| |___ [__   |  |__| |\/| |__] [__  
    |  | |  | |___ ___]  |  |  | |  | |    ___] '''

if 'timestamp' in col1:
    logger.info(gs.h2('Timestamps...'))
    logger.info('Processing each row with timestamps.')            
    for item in fm:
        if item and item != 'index' and item[:2] != 'xx':
            eval_me = 'fm["%s"]' % item
            column_letter = eval(eval_me)
            timestamp_row = col1.index('timestamp') + 1
            timestamp_col = gs.aa(column_letter)
            gmt = gs.gmt()
            tab.update_cell(timestamp_row, timestamp_col, gmt)
else:
    logger.warning('Insert a row labeled "timestamp" in Column 1 for timestamps.')

logger.info(gs.h1('Complete!'))