In [1]:
%cd -q ~/GitHub/pfcompute/

In [2]:
%matplotlib inline

import sys
import time
import warnings
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import scipy.stats as st
from multiprocessing import Pool

matplotlib.rcParams['figure.figsize'] = (16.0, 12.0)
matplotlib.style.use('ggplot')
pd.set_option('display.max_rows', 500)

import pf.util

In [3]:
# Read in average personal finance data
avg_pf = pd.read_csv('notebooks/average_personal_income_and_expense.csv', index_col=0, parse_dates=True)
avg_pf.dropna(axis=1, inplace=True)

In [4]:
# Sample onto daily timeframe
daily_pf = avg_pf.resample('D').mean().interpolate(method='polynomial', order=5, limit_direction='both')

In [5]:
# Get the % change from data
daily_pf_change = daily_pf.pct_change()
daily_pf_change = daily_pf_change.iloc[1:]
daily_pf_change = daily_pf_change.replace([np.inf, -np.inf], 1)

In [6]:
def par_dist_fit(x):
    col, data = x
    model, params = pf.util.best_fit_distribution(data)
    return (col, model, params)

models = {}
pool = Pool()
col_data = [(col, data) for col, data in daily_pf_change.iteritems()]
fit_models = pool.map(par_dist_fit, col_data)
pool.close()
pool.join()

In [7]:
# Create random % changes for future
number_of_days_to_sim = 10*365
date_range = pd.date_range(start='2015-01-01', freq='D', periods=number_of_days_to_sim)
rand_pf = pd.DataFrame(None, columns=avg_pf.columns, index=date_range)

for col, model_name, params in fit_models:
    # Get the model
    model = getattr(st, model_name)
    # Separate parts of parameters
    arg = params[:-2]
    loc = params[-2]
    scale = params[-1]
    
    rvs = model(loc=loc, scale=scale, *arg).rvs(size=number_of_days_to_sim)
    rand_pf[col] = rvs

In [8]:
avg_pf.plot(legend=False)


Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x11c4d2990>

In [9]:
# Display years
size = 10*365
i = daily_pf.index.get_loc('2015-01-01')
# Calculate projected returns from end of data
projected_pf = (1 + rand_pf.cumsum()).multiply(daily_pf.iloc[i]).iloc[:size]

ax = daily_pf.iloc[i-2*size:i].plot(legend=False, color='r', alpha=0.25, logy=True, label='Historical')
projected_pf.plot(legend=False, color='b', alpha=0.25, logy=True, label='Projected', ax=ax)


Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x1171ff3d0>

In [11]:
# Determine max plot size
plotMax = len(fit_models)
plt.figure(figsize=(12, 6*plotMax))

for i, (col, model_name, params) in enumerate(fit_models):

    # Get the model and PDF
    model = getattr(st, model_name)
    pdf = pf.util.make_pdf(model, params)

    # Plot the PDF, Historical and Projected Histogram
    plt.subplot(plotMax, 1, i+1)
    ax = pdf.plot(label='Model PDF', lw=2, legend=True)
    daily_pf_change[col].dropna().plot(kind='hist', bins=64,  alpha=0.7, normed=True, label='Historical', legend=True, ax=ax)
    rand_pf[col].dropna().plot(kind='hist', bins=64, color='k', alpha=0.3, normed=True, label='Projected', legend=True, ax=ax)
    ax.set_title('{}\n{}({})'.format(col, model_name, ', '.join(['{:0.4f}'.format(x) for x in params])))



In [ ]: