In [1]:
import pandas as pd
import numpy as np

from price import Price
import config
import crosscoin
import plot

In [2]:
# Load price data into data frame
prices = [Price(ticker) for ticker in config.ALL_TICKERS]
price_df = crosscoin.create_price_frame(prices, normalize='z_score')

In [10]:
# Compute percent change and 30-day rolling variance
pct_change_df = price_df.pct_change()
variance_df = pd.rolling_var(price_df, window=30)

In [101]:
# Plot 30-day variance
plot.plot_timeseries('30-Day Rolling Price Variance', variance_df, legend=False)



In [9]:
# Tighten window
start = pd.tslib.Timestamp('2013-11-01')
plot.plot_timeseries('30-Day Rolling Price Variance', variance_df[start:], legend=False)



In [15]:
# Let's find the date range for "most volatile" windows
max_dates = [variance_df[ticker].argmax() for ticker in variance_df]

# What percentage of these peaks occurred in December 2013 or January 2014?
start = pd.tslib.Timestamp('2013-12-01')
end = pd.tslib.Timestamp('2014-02-01')
print len([date for date in max_dates if date > start and date < end]) / float(len(max_dates))

# Now plot this window
plot.plot_timeseries('30-Day Rolling Price Variance', variance_df[start:end], legend=False)
print 'Mean value in this window: %.3f' % variance_df[start:end].mean().mean()


0.842105263158
Mean value in this window: 1.633

In [17]:
# Now, let's find the date range for "least volatile" windows
min_dates = [variance_df[ticker].argmin() for ticker in variance_df]

# What percentage of these troughs occurred in the last 60 days (from time of running)?
start = pd.tslib.Timestamp('2014-11-05')
print len([date for date in min_dates if date > start]) / float(len(min_dates))

# Now plot this window
plot.plot_timeseries('30-Day Rolling Price Variance', variance_df[start:], legend=False)
print 'Mean value in this window: %.3f' % variance_df[start:].mean().mean()


0.657894736842
Mean value in this window: 0.003

In [111]:
# Are there any days where every price is strictly increasing? Or 85% of prices?
num_non_null = pct_change_df.count(numeric_only=True, axis=1)
for row in pct_change_df[pct_change_df >= 0].iterrows():
    date = row[0]
    
    # Baseline: for how many coins do we have price data on this date?
    n = float(num_non_null[date])
    if n <= 1:
        continue
        
    # And how many of those are positive?
    pct_pos = len(filter(bool, pd.notnull(row[row > 0]))) / n
    if pct_pos >= 0.85:
        print date


2013-06-08 00:00:00
2013-08-25 00:00:00
2013-12-09 00:00:00
2013-12-18 00:00:00
2013-12-24 00:00:00
2014-08-16 00:00:00
2014-11-20 00:00:00
2015-01-02 00:00:00
2015-01-04 00:00:00

In [36]:
# Try to hit on some more long-term trends
ew_variance_df = pd.ewmvar(price_df, span=20)
plot.plot_timeseries('Avg. 20-Day EW Rolling Price Variance', ew_variance_df.mean(axis=1), legend=False)



In [ ]: