In [0]:

    
#@title Copyright 2019 The Lifetime Value Authors.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

Churn Prediction for Kaggle Acquire Valued Customer Challenge

View source on GitHub



In [0]:

    
import os

import numpy as np
import pandas as pd
import tqdm
from sklearn import metrics
from sklearn import model_selection
from sklearn import preprocessing
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
import tensorflow_probability as tfp
from typing import Sequence

# install and import ltv
!pip install -q git+https://github.com/google/lifetime_value
import lifetime_value as ltv



In [0]:

    
tfd = tfp.distributions
tf.enable_eager_execution()
pd.options.mode.chained_assignment = None  # default='warn'

Global variables



In [0]:

    
COMPANY = '104900040'  # @param { isTemplate: true, type: 'string'}
LOSS = 'ziln'  # @param { isTemplate: true, type: 'string'} ['bce', 'ziln']
MODEL = 'linear'  # @param { isTemplate: true, type: 'string'} ['linear', 'dnn']
LEARNING_RATE = 0.0002  # @param { isTemplate: true}
EPOCHS = 400  # @param {type: 'integer'}
OUTPUT_CSV_FOLDER = '/tmp/lifetime-value/result'  # @param { isTemplate: true, type: 'string'}



In [0]:

    
CATEGORICAL_FEATURES = ['chain', 'dept', 'category', 'brand', 'productmeasure']
NUMERIC_FEATURES = ['log_calibration_value']

ALL_FEATURES = CATEGORICAL_FEATURES + NUMERIC_FEATURES

Data

Download data

Setup kaggle API correctly following https://www.kaggle.com/docs/api



In [0]:

    
%%shell
if [ -e /tmp/lifetime-value/acquire-valued-shoppers-challenge/transactions.csv ]
then
  echo "File already exists, no need to download."
else
  rm -rf /tmp/lifetime-value/acquire-valued-shoppers-challenge
  mkdir /tmp/lifetime-value/acquire-valued-shoppers-challenge
  cd /tmp/lifetime-value/acquire-valued-shoppers-challenge
  kaggle competitions download -c acquire-valued-shoppers-challenge
  unzip acquire-valued-shoppers-challenge.zip
  gunzip transactions.csv.gz
fi









    














    














    














    














    














    














    



 99%|█████████▉| 2.85G/2.86G [01:03<00:00, 89.1MB/s]Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /usr/local/google/home/tianqiliu/.kaggle/kaggle.json'
100%|██████████| 2.86G/2.86G [01:04<00:00, 47.9MB/s]
Downloading acquire-valued-shoppers-challenge.zip to /tmp/lifetime-value/acquire-valued-shoppers-challenge

Archive:  acquire-valued-shoppers-challenge.zip
  inflating: trainHistory.csv.gz     
  inflating: sampleSubmission.csv.gz  
  inflating: testHistory.csv.gz      
  inflating: offers.csv.gz           
  inflating: transactions.csv.gz

Load transaction csv



In [0]:

    
def load_transaction_data(company):
  all_data_filename = '/tmp/lifetime-value/acquire-valued-shoppers-challenge/transactions.csv'
  one_company_data_filename = (
      '/tmp/lifetime-value/acquire-valued-shoppers-challenge/transactions_company_{}.csv'
      .format(COMPANY))
  if os.path.isfile(one_company_data_filename):
    df = pd.read_csv(one_company_data_filename)
  else:
    data_list = []
    chunksize = 10**6
    # 350 iterations
    for chunk in tqdm.tqdm(pd.read_csv(all_data_filename, chunksize=chunksize)):
      data_list.append(chunk.query("company=='{}'".format(company)))
    df = pd.concat(data_list, axis=0)
    df.to_csv(one_company_data_filename, index=None)
  return df

Preprocess data



In [0]:

    
def preprocess(df):
  df = df.query('purchaseamount>0')
  df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
  df['start_date'] = df.groupby('id')['date'].transform('min')

  # Compute calibration values
  calibration_value = (
      df.query('date==start_date').groupby('id')
      ['purchaseamount'].sum().reset_index())
  calibration_value.columns = ['id', 'calibration_value']

  # Compute holdout values
  one_year_holdout_window_mask = (
      (df['date'] > df['start_date']) &
      (df['date'] <= df['start_date'] + np.timedelta64(365, 'D')))
  holdout_value = (
      df[one_year_holdout_window_mask].groupby('id')
      ['purchaseamount'].sum().reset_index())
  holdout_value.columns = ['id', 'holdout_value']

  # Compute calibration attributes
  calibration_attributes = (
      df.query('date==start_date').sort_values(
          'purchaseamount', ascending=False).groupby('id')[[
              'chain', 'dept', 'category', 'brand', 'productmeasure'
          ]].first().reset_index())

  # Merge dataframes
  customer_level_data = (
      calibration_value.merge(calibration_attributes, how='left',
                              on='id').merge(
                                  holdout_value, how='left', on='id'))
  customer_level_data['holdout_value'] = (
      customer_level_data['holdout_value'].fillna(0.))
  customer_level_data[CATEGORICAL_FEATURES] = (
      customer_level_data[CATEGORICAL_FEATURES].fillna('UNKNOWN'))

  # Specify data types
  customer_level_data['log_calibration_value'] = (
      np.log(customer_level_data['calibration_value']).astype('float32'))
  customer_level_data['chain'] = (
      customer_level_data['chain'].astype('category'))
  customer_level_data['dept'] = (customer_level_data['dept'].astype('category'))
  customer_level_data['brand'] = (
      customer_level_data['brand'].astype('category'))
  customer_level_data['category'] = (
      customer_level_data['category'].astype('category'))
  customer_level_data['label'] = (
      customer_level_data['holdout_value'].astype('float32'))
  return customer_level_data

Load customer-level csv



In [0]:

    
def load_customer_level_csv(company):
  customer_level_data_file = (
      '/tmp/lifetime-value/acquire-valued-shoppers-challenge/customer_level_data_company_{}.csv'
      .format(company))
  if os.path.isfile(customer_level_data_file):
    customer_level_data = pd.read_csv(customer_level_data_file)
  else:
    customer_level_data = preprocess(load_transaction_data(company))
  for cat_col in CATEGORICAL_FEATURES:
    customer_level_data[cat_col] = (
        customer_level_data[cat_col].astype('category'))
  for num_col in [
      'log_calibration_value', 'calibration_value', 'holdout_value'
  ]:
    customer_level_data[num_col] = (
        customer_level_data[num_col].astype('float32'))

  return customer_level_data



In [0]:

    
customer_level_data = load_customer_level_csv(COMPANY)

We observe a mixture of zero and lognormal distribution of holdout value.



In [0]:

    
customer_level_data.label.apply(np.log1p).hist(bins=50)









    Out[0]:





<matplotlib.axes._subplots.AxesSubplot at 0xf44fc75b050>



In [0]:

    
customer_level_data.head().T









    Out[0]:







  
    
      
      0
      1
      2
      3
      4
    
  
  
    
      id
      86246
      86252
      12262064
      12277270
      12332190
    
    
      calibration_value
      11.58
      0.99
      1.72
      4.49
      1.49
    
    
      chain
      205
      205
      95
      95
      95
    
    
      dept
      36
      21
      21
      36
      36
    
    
      category
      3630
      2105
      2105
      3601
      3611
    
    
      brand
      23359
      14357
      14357
      3809
      4599
    
    
      productmeasure
      OZ
      OZ
      OZ
      OZ
      OZ
    
    
      holdout_value
      358.26
      336.65
      32.31
      186.75
      53.61
    
    
      log_calibration_value
      2.44928
      -0.0100503
      0.542324
      1.50185
      0.398776
    
    
      label
      358.26
      336.65
      32.31
      186.75
      53.61

Data

Make train/eval



In [0]:

    
def linear_split(df):
  # get_dummies preserves numeric features.
  x = pd.get_dummies(df[ALL_FEATURES], drop_first=True).astype('float32').values
  y = df['label'].values

  x_train, x_eval, y_train, y_eval = model_selection.train_test_split(
      x, y, test_size=0.2, random_state=123)

  return x_train, x_eval, y_train, y_eval



In [0]:

    
def dnn_split(df):
  for key in CATEGORICAL_FEATURES:
    encoder = preprocessing.LabelEncoder()
    df[key] = encoder.fit_transform(df[key])

  df_train, df_eval = model_selection.train_test_split(
      df, test_size=0.2, random_state=123)

  def feature_dict(df):
    features = {k: v.values for k, v in dict(df[CATEGORICAL_FEATURES]).items()}
    features['numeric'] = df[NUMERIC_FEATURES].values
    return features

  x_train, y_train = feature_dict(df_train), df_train['label'].values
  x_eval, y_eval = feature_dict(df_eval), df_eval['label'].values

  return x_train, x_eval, y_train, y_eval

Model



In [0]:

    
def linear_model(output_units):
  return tf.keras.experimental.LinearModel(output_units)



In [0]:

    
def embedding_dim(x):
  return int(x**.25) + 1


def embedding_layer(vocab_size):
  return tf.keras.Sequential([
      tf.keras.layers.Embedding(
          input_dim=vocab_size,
          output_dim=embedding_dim(vocab_size),
          input_length=1),
      tf.keras.layers.Flatten(),
  ])


def dnn_model(output_units, df):
  numeric_input = tf.keras.layers.Input(
      shape=(len(NUMERIC_FEATURES),), name='numeric')

  embedding_inputs = [
      tf.keras.layers.Input(shape=(1,), name=key, dtype=np.int64)
      for key in CATEGORICAL_FEATURES
  ]

  embedding_outputs = [
      embedding_layer(vocab_size=df[key].nunique())(input)
      for key, input in zip(CATEGORICAL_FEATURES, embedding_inputs)
  ]

  deep_input = tf.keras.layers.concatenate([numeric_input] + embedding_outputs)
  deep_model = tf.keras.Sequential([
      tf.keras.layers.Dense(64, activation='relu'),
      tf.keras.layers.Dense(32, activation='relu'),
      tf.keras.layers.Dense(output_units),
  ])
  return tf.keras.Model(
      inputs=[numeric_input] + embedding_inputs, outputs=deep_model(deep_input))

Train



In [0]:

    
if LOSS == 'bce':
  loss = keras.losses.BinaryCrossentropy(from_logits=True)
  output_units = 1

if LOSS == 'ziln':
  loss = ltv.zero_inflated_lognormal_loss
  output_units = 3



In [0]:

    
if MODEL == 'linear':
  x_train, x_eval, y_train, y_eval = linear_split(customer_level_data)
  model = linear_model(output_units)

if MODEL == 'dnn':
  x_train, x_eval, y_train, y_eval = dnn_split(customer_level_data)
  model = dnn_model(output_units, customer_level_data)



In [0]:

    
if LOSS == 'bce':
  y_train = (y_train > 0).astype('float32')
  y_eval = (y_eval > 0).astype('float32')



In [0]:

    
model.compile(loss=loss, optimizer=keras.optimizers.Adam(lr=LEARNING_RATE))



In [0]:

    
callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', min_lr=1e-6),
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10),
]



In [0]:

    
history = model.fit(
    x=x_train,
    y=y_train,
    batch_size=1024,
    epochs=EPOCHS,
    verbose=2,
    callbacks=callbacks,
    validation_data=(x_eval, y_eval)).history









    



Train on 199428 samples, validate on 49857 samples
Epoch 1/400
199428/199428 - 1s - loss: 18.0561 - val_loss: 15.2577
Epoch 2/400
199428/199428 - 1s - loss: 13.3657 - val_loss: 11.9011
Epoch 3/400
199428/199428 - 1s - loss: 10.7903 - val_loss: 9.9213
Epoch 4/400
199428/199428 - 1s - loss: 9.2033 - val_loss: 8.6430
Epoch 5/400
199428/199428 - 1s - loss: 8.1475 - val_loss: 7.7657
Epoch 6/400
199428/199428 - 1s - loss: 7.4057 - val_loss: 7.1342
Epoch 7/400
199428/199428 - 1s - loss: 6.8629 - val_loss: 6.6641
Epoch 8/400
199428/199428 - 1s - loss: 6.4531 - val_loss: 6.3038
Epoch 9/400
199428/199428 - 1s - loss: 6.1359 - val_loss: 6.0220
Epoch 10/400
199428/199428 - 1s - loss: 5.8853 - val_loss: 5.7972
Epoch 11/400
199428/199428 - 1s - loss: 5.6839 - val_loss: 5.6152
Epoch 12/400
199428/199428 - 1s - loss: 5.5198 - val_loss: 5.4656
Epoch 13/400
199428/199428 - 1s - loss: 5.3844 - val_loss: 5.3417
Epoch 14/400
199428/199428 - 1s - loss: 5.2715 - val_loss: 5.2380
Epoch 15/400
199428/199428 - 1s - loss: 5.1766 - val_loss: 5.1504
Epoch 16/400
199428/199428 - 1s - loss: 5.0962 - val_loss: 5.0758
Epoch 17/400
199428/199428 - 1s - loss: 5.0277 - val_loss: 5.0122
Epoch 18/400
199428/199428 - 1s - loss: 4.9690 - val_loss: 4.9576
Epoch 19/400
199428/199428 - 1s - loss: 4.9185 - val_loss: 4.9104
Epoch 20/400
199428/199428 - 1s - loss: 4.8748 - val_loss: 4.8696
Epoch 21/400
199428/199428 - 1s - loss: 4.8371 - val_loss: 4.8343
Epoch 22/400
199428/199428 - 1s - loss: 4.8043 - val_loss: 4.8036
Epoch 23/400
199428/199428 - 1s - loss: 4.7759 - val_loss: 4.7770
Epoch 24/400
199428/199428 - 1s - loss: 4.7512 - val_loss: 4.7538
Epoch 25/400
199428/199428 - 1s - loss: 4.7298 - val_loss: 4.7338
Epoch 26/400
199428/199428 - 1s - loss: 4.7114 - val_loss: 4.7165
Epoch 27/400
199428/199428 - 1s - loss: 4.6954 - val_loss: 4.7016
Epoch 28/400
199428/199428 - 1s - loss: 4.6818 - val_loss: 4.6889
Epoch 29/400
199428/199428 - 1s - loss: 4.6701 - val_loss: 4.6781
Epoch 30/400
199428/199428 - 1s - loss: 4.6602 - val_loss: 4.6690
Epoch 31/400
199428/199428 - 1s - loss: 4.6519 - val_loss: 4.6614
Epoch 32/400
199428/199428 - 1s - loss: 4.6450 - val_loss: 4.6550
Epoch 33/400
199428/199428 - 1s - loss: 4.6392 - val_loss: 4.6497
Epoch 34/400
199428/199428 - 1s - loss: 4.6344 - val_loss: 4.6454
Epoch 35/400
199428/199428 - 1s - loss: 4.6305 - val_loss: 4.6417
Epoch 36/400
199428/199428 - 1s - loss: 4.6272 - val_loss: 4.6387
Epoch 37/400
199428/199428 - 1s - loss: 4.6244 - val_loss: 4.6362
Epoch 38/400
199428/199428 - 1s - loss: 4.6221 - val_loss: 4.6341
Epoch 39/400
199428/199428 - 1s - loss: 4.6201 - val_loss: 4.6323
Epoch 40/400
199428/199428 - 1s - loss: 4.6183 - val_loss: 4.6307
Epoch 41/400
199428/199428 - 1s - loss: 4.6168 - val_loss: 4.6293
Epoch 42/400
199428/199428 - 1s - loss: 4.6155 - val_loss: 4.6281
Epoch 43/400
199428/199428 - 1s - loss: 4.6143 - val_loss: 4.6270
Epoch 44/400
199428/199428 - 1s - loss: 4.6132 - val_loss: 4.6260
Epoch 45/400
199428/199428 - 1s - loss: 4.6123 - val_loss: 4.6252
Epoch 46/400
199428/199428 - 1s - loss: 4.6114 - val_loss: 4.6244
Epoch 47/400
199428/199428 - 1s - loss: 4.6106 - val_loss: 4.6237
Epoch 48/400
199428/199428 - 1s - loss: 4.6099 - val_loss: 4.6231
Epoch 49/400
199428/199428 - 1s - loss: 4.6093 - val_loss: 4.6225
Epoch 50/400
199428/199428 - 1s - loss: 4.6087 - val_loss: 4.6220
Epoch 51/400
199428/199428 - 1s - loss: 4.6082 - val_loss: 4.6215
Epoch 52/400
199428/199428 - 1s - loss: 4.6077 - val_loss: 4.6211
Epoch 53/400
199428/199428 - 1s - loss: 4.6073 - val_loss: 4.6207
Epoch 54/400
199428/199428 - 1s - loss: 4.6069 - val_loss: 4.6203
Epoch 55/400
199428/199428 - 1s - loss: 4.6065 - val_loss: 4.6200
Epoch 56/400
199428/199428 - 1s - loss: 4.6062 - val_loss: 4.6198
Epoch 57/400
199428/199428 - 1s - loss: 4.6059 - val_loss: 4.6195
Epoch 58/400
199428/199428 - 1s - loss: 4.6057 - val_loss: 4.6193
Epoch 59/400
199428/199428 - 1s - loss: 4.6055 - val_loss: 4.6191
Epoch 60/400
199428/199428 - 1s - loss: 4.6053 - val_loss: 4.6189
Epoch 61/400
199428/199428 - 1s - loss: 4.6051 - val_loss: 4.6188
Epoch 62/400
199428/199428 - 1s - loss: 4.6049 - val_loss: 4.6187
Epoch 63/400
199428/199428 - 1s - loss: 4.6048 - val_loss: 4.6186
Epoch 64/400
199428/199428 - 1s - loss: 4.6047 - val_loss: 4.6185
Epoch 65/400
199428/199428 - 1s - loss: 4.6046 - val_loss: 4.6184
Epoch 66/400
199428/199428 - 1s - loss: 4.6045 - val_loss: 4.6183
Epoch 67/400
199428/199428 - 1s - loss: 4.6044 - val_loss: 4.6182
Epoch 68/400
199428/199428 - 1s - loss: 4.6043 - val_loss: 4.6182
Epoch 69/400
199428/199428 - 1s - loss: 4.6042 - val_loss: 4.6181
Epoch 70/400
199428/199428 - 1s - loss: 4.6042 - val_loss: 4.6180
Epoch 71/400
199428/199428 - 1s - loss: 4.6041 - val_loss: 4.6180
Epoch 72/400
199428/199428 - 1s - loss: 4.6041 - val_loss: 4.6180
Epoch 73/400
199428/199428 - 1s - loss: 4.6040 - val_loss: 4.6179
Epoch 74/400
199428/199428 - 1s - loss: 4.6040 - val_loss: 4.6179
Epoch 75/400
199428/199428 - 1s - loss: 4.6039 - val_loss: 4.6179
Epoch 76/400
199428/199428 - 1s - loss: 4.6039 - val_loss: 4.6178
Epoch 77/400
199428/199428 - 1s - loss: 4.6038 - val_loss: 4.6178
Epoch 78/400
199428/199428 - 1s - loss: 4.6038 - val_loss: 4.6177
Epoch 79/400
199428/199428 - 1s - loss: 4.6038 - val_loss: 4.6177
Epoch 80/400
199428/199428 - 1s - loss: 4.6037 - val_loss: 4.6177
Epoch 81/400
199428/199428 - 1s - loss: 4.6037 - val_loss: 4.6177
Epoch 82/400
199428/199428 - 1s - loss: 4.6037 - val_loss: 4.6176
Epoch 83/400
199428/199428 - 1s - loss: 4.6036 - val_loss: 4.6176
Epoch 84/400
199428/199428 - 1s - loss: 4.6036 - val_loss: 4.6176
Epoch 85/400
199428/199428 - 1s - loss: 4.6036 - val_loss: 4.6176
Epoch 86/400
199428/199428 - 1s - loss: 4.6035 - val_loss: 4.6176
Epoch 87/400
199428/199428 - 1s - loss: 4.6035 - val_loss: 4.6175
Epoch 88/400
199428/199428 - 1s - loss: 4.6035 - val_loss: 4.6175
Epoch 89/400
199428/199428 - 1s - loss: 4.6035 - val_loss: 4.6175
Epoch 90/400
199428/199428 - 1s - loss: 4.6034 - val_loss: 4.6175
Epoch 91/400
199428/199428 - 1s - loss: 4.6034 - val_loss: 4.6175
Epoch 92/400
199428/199428 - 1s - loss: 4.6034 - val_loss: 4.6175
Epoch 93/400
199428/199428 - 1s - loss: 4.6034 - val_loss: 4.6174
Epoch 94/400
199428/199428 - 1s - loss: 4.6034 - val_loss: 4.6174
Epoch 95/400
199428/199428 - 1s - loss: 4.6033 - val_loss: 4.6174
Epoch 96/400
199428/199428 - 1s - loss: 4.6033 - val_loss: 4.6174
Epoch 97/400
199428/199428 - 1s - loss: 4.6033 - val_loss: 4.6174
Epoch 98/400
199428/199428 - 1s - loss: 4.6033 - val_loss: 4.6174
Epoch 99/400
199428/199428 - 1s - loss: 4.6033 - val_loss: 4.6174
Epoch 100/400
199428/199428 - 1s - loss: 4.6033 - val_loss: 4.6174
Epoch 101/400
199428/199428 - 1s - loss: 4.6032 - val_loss: 4.6174
Epoch 102/400
199428/199428 - 1s - loss: 4.6032 - val_loss: 4.6174



In [0]:

    
pd.DataFrame(history)[['loss', 'val_loss']][2:].plot()

Eval



In [0]:

    
logits = model.predict(x=x_eval, batch_size=1024)
y_pred = K.sigmoid(logits[..., :1]).numpy().flatten()



In [0]:

    
y_true = (y_eval > 0).astype('float32')



In [0]:

    
def classification_report(y_true: Sequence[int],
                          y_pred: Sequence[float]) -> pd.DataFrame:
  """Report individual level classification metrics.

  Arguments:
    y_true: true binary labels.
    y_pred: predicted binary labels.

  Returns:
    out: dataframe with classification metrics as columns.
  """
  out = pd.DataFrame(index=[0])

  out['AUC'] = metrics.roc_auc_score(y_true, y_pred)
  out['PR_AUC'] = metrics.average_precision_score(y_true, y_pred)
  out['precision'] = metrics.precision_score(y_true, 1 * (y_pred > .5))
  out['recall'] = metrics.recall_score(y_true, 1 * (y_pred > .5))
  out['f1'] = metrics.f1_score(y_true, 1 * (y_pred > .5))
  return out



In [0]:

    
classification = classification_report(y_true, y_pred)
classification

All metrics together



In [0]:

    
df_metrics = pd.DataFrame(
    {
        'company': COMPANY,
        'model': MODEL,
        'loss': LOSS,
        'label_mean': y_true.mean(),
        'pred_mean': y_pred.mean(),
        'AUC': classification.loc[0, 'AUC'],
        'PR_AUC': classification.loc[0, 'PR_AUC'],
        'precision': classification.loc[0, 'precision'],
        'recall': classification.loc[0, 'recall'],
        'f1': classification.loc[0, 'f1']
    },
    index=[0])



In [0]:

    
df_metrics[[
    'company',
    'model',
    'loss',
    'label_mean',
    'pred_mean',
    'AUC',
    'PR_AUC',
    'precision',
    'recall',
    'f1',
]]









    Out[0]:







  
    
      
      company
      model
      loss
      label_mean
      pred_mean
      AUC
      PR_AUC
      precision
      recall
      f1
    
  
  
    
      0
      104900040
      linear
      ziln
      0.844395
      0.84196
      0.657588
      0.906241
      0.845141
      0.998313
      0.915364

Save



In [0]:

    
output_path = os.path.join(OUTPUT_CSV_FOLDER, COMPANY)



In [0]:

    
if not os.path.isdir(output_path):
  os.makedirs(output_path)



In [0]:

    
output_file = os.path.join(output_path,
                           '{}_classification_{}.csv'.format(MODEL, LOSS))



In [0]:

    
df_metrics.to_csv(output_file, index=False)

	0	1	2	3	4
id	86246	86252	12262064	12277270	12332190
calibration_value	11.58	0.99	1.72	4.49	1.49
chain	205	205	95	95	95
dept	36	21	21	36	36
category	3630	2105	2105	3601	3611
brand	23359	14357	14357	3809	4599
productmeasure	OZ	OZ	OZ	OZ	OZ
holdout_value	358.26	336.65	32.31	186.75	53.61
log_calibration_value	2.44928	-0.0100503	0.542324	1.50185	0.398776
label	358.26	336.65	32.31	186.75	53.61