Load the required modules



In [ ]:

    
import os, re
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

import self_similarity as ss
from main import sim_load, list_files

Define a procedure for estimating the Hurst exponent.



In [ ]:

    
def xing_hurst( file_name, p = 5, q = 7 ) :
    levels = np.arange( p, q + 1 ) - 1
    H, Njn, Djnk, Vjnde = sim_load( file_name )
    Cjn = np.reshape( np.sum( Djnk, axis = ( 2, ), dtype = np.float ), Djnk.shape[ :-1 ] + ( 1, ) )
    Zk = 2 * np.reshape( 1 + np.arange( Djnk.shape[ -1 ] ), ( 1, 1, Djnk.shape[ -1 ] ) )
    Pjnk = np.where( Cjn > 0, Djnk / Cjn, 0 )
    Mjn = np.reshape( np.sum( Pjnk * Zk, axis = ( 2, ) ), Cjn.shape )
    Hjn = np.log( 2 ) / np.log( Mjn )
    return ( ( 1 + np.arange( Djnk.shape[ -2 ] ) )[levels], H,
        np.average( Hjn, axis = (0, ) ).flatten( )[levels],
        np.std( Hjn, axis = (0, ) ).flatten( )[levels] )

The procedure below loads data from the specified files and tests the pooled subcrossing size distribution against teh conjectured one.



In [ ]:

    
def xing_probs( file_name, p = 5, q = 7 ) :
    levels = np.arange( p, q + 1 ) - 1
    H, Njn, Djnk, Vjnde = sim_load( file_name )
    Cj = np.reshape( np.sum( Djnk[:, levels ], axis = ( 1, 2, ), dtype = np.float ), ( Djnk.shape[ 0 ], 1, ) )
    Pjk = np.where( Cj > 0, np.sum( Djnk[:, levels ], axis = ( 1, ) ) / Cj, 0 )
    Zk = np.reshape( 1 + np.arange( Pjk.shape[ -1 ] ), ( 1, Pjk.shape[ -1 ] ) )
    theta = 2.0**( 1.0 - 1.0 / H )
    Pk = theta * ( 1 - theta )**( Zk - 1 )
    return H, 2*Zk, Pk, np.average( Pjk, axis = ( 0, ) ), np.std( Pjk, axis = ( 0, ) )

Load the simulated reults.



In [ ]:

    
# files = list_files( './output/25/std/', pattern = r'\.npz$' )
# files = list_files( '/Users/user/Dropbox/study_notes/year_14_15/course_project/code/output/HRM-2_19-32/med/', pattern = r'\.npz$' )
files = list_files( './output/HRM/local/HRM-4_18-16/med/', pattern = r'\.npz$' )
# files = list_files( './output/20150527/HRM-3_20-16/med/', pattern = r'\.npz$' )

Set the levels of the crossing tree where the self-similarity seems to kick in.



In [ ]:

    
p, q = 5, 7

Plot the estimated distribution



In [ ]:

    
def draw_probs( files, ax, theor_color = 'gray' ) :
    ax.set_xticks( np.arange( 2, 42, 2) )
    ax.grid()
    ax.set_color_cycle( plt.cm.rainbow( np.linspace( 0, 1, max( len( files ), 10 ) )[::-1] ) )
    for f in files[1:] :
        H, Zk, Pk, Phat, Pstd = xing_probs( f, p = p, q = q )
        ax.plot( Zk[0], Pk[0], linestyle = '-', color = theor_color )
        mask = Phat > 0
        lwr = np.where( Phat > Pstd, Pstd, 0 )
        ax.errorbar( Zk[0][mask], Phat[mask], yerr = [lwr[mask], Pstd[mask]],
            fmt = '-o', label = 'HRM-4 H=%.2f' % ( H, ) )

## Create an appropriately sized figure
plt.figure( figsize = ( 16, 9 ) )
ax = plt.subplot( 111 )

## Setup view of the polt's main area
draw_probs( files, ax, theor_color = 'black' )
ax.set_yscale( 'log' ) ; ax.set_ylim( 1e-8, 1 ) ; ax.set_xlim( 2, 40 )
## Add legend
legend = ax.legend( loc = 'lower right', frameon = 1 )
frame = legend.get_frame( )
frame.set_facecolor( 'whitesmoke' )
legend.set_title( r'Empirical' )

## Add proper labels and titles
ax.set_title(r"""Subcrossing size distribution: empirical against conjectured $\mathbb{P}(Z=2k) = \theta \cdot (1-\theta)^{k-1}$ with $\theta = 2^{1-H^{-1}}$""")
ax.set_ylabel( r'Probability (log)' )
ax.set_xlabel( r'Subcrossing size' )

## Add a zoomed view of the small size subcrossings region
ax2 = plt.axes( [.64, .54, .25, .35 ], axisbg = 'w' )
draw_probs( files, ax2, theor_color = 'gray' )
ax2.set_yscale( 'log' ) ; ax2.set_ylim( .25e-2, 1 ) ; ax2.set_xlim( 1, 7 )

## Output
plt.savefig('./output/HRM-4_18-16-log_probs.png')
plt.show()

Demonstrate that there are not that little crossings of sizes greater than 4.



In [ ]:

    
# H, Njn, Djnk, Vjnde = sim_load( './output/25/std/fbm_std_20150517-075535_25_0.5000_10.npz' )
H, Njn, Djnk, Vjnde = sim_load( '/Users/user/Dropbox/study_notes/year_14_15/course_project/code/output/HRM-4_19-32/med/HRM-4_med_20150526-073757_19-32_0.5000_1000.npz' )
print "Average number of observed subcrossings of specified size per each sample path replication"
print np.average( Djnk, axis = ( 0, ) )[p:q+1, np.arange( 6, 14, 2)//2-1]

Show the estimated Hurst exponents



In [ ]:

    
p, q = 1, 19



In [ ]:

    
plt.figure( figsize = ( 16, 9 ) )
ax = plt.subplot( 111 )
ax.set_title('Crossing tree estimates of the Hurst exponent')
ax.set_xlabel( 'Tree level' ) ; ax.set_ylabel( '$H$', rotation = 0 )
ax.set_xlim( p, q ) ; ax.set_ylim( 0.45, 1.0 )
ax.set_color_cycle( plt.cm.rainbow( np.linspace( 0, 1, max( len( files ), 10 ) )[::-1] ) )
for f in files[1:] :
    Ln, H, Hhat, Hstd = xing_hurst( f, p = p, q = q )
    ax.axhline( y = H, color = 'black', linestyle = '-.' )
    lwr = np.where( Hhat > Hstd, Hstd, 0 )
    mask = Hhat > H*0.75
    ax.errorbar( Ln[mask], Hhat[mask], yerr = [ lwr[mask], Hstd[mask] ],
        fmt = '-o', label = 'fBM H=%.2f' % ( H, ) )

## Add a legend with white opaque background.
legend = ax.legend( loc = 'lower right', frameon = 1 )
frame = legend.get_frame( )
frame.set_facecolor( 'whitesmoke' )

Test the self similarity hypothesis.



In [ ]:

    
p, q, h = 7, 8, 5



In [ ]:

    
plt.figure( figsize = ( 16, 9 ) )
ax = plt.subplot( 111 )
prc = np.arange( 1, 100 )/100.0
ax.set_color_cycle( plt.cm.rainbow( np.linspace( 0, 1, len( files ) ) ) )
labels, data = [], []
for f in files :
    H, Njn, Djnk, Vjnde = sim_load( f, return_durations = False )
    stat, dof, pv = ss.pooled( Djnk, Njn, p = p, q = q, h = h )
    labels.append( 'fBM H=%.2f' % ( H, ) )
    data.append( pv )
    ax.hist( pv, bins = prc, label = 'fBM H=%.2f' % ( H, ), histtype = 'barstacked', stacked= True )
# ax.hist( data, bins = 10, label = labels, histtype = 'bar', stacked = False )
#     ax.plot( np.percentile( pv, prc ), prc, '-o', label = 'fBM H=%.2f' % ( H, ) )
legend = ax.legend( loc = 'upper right', frameon = 1 )
frame = legend.get_frame( )
frame.set_facecolor( 'whitesmoke' )



In [ ]:

    
H, Njn, Djnk, Vjnde, Wjnp, Wbarjn, Wstdjn = sim_load(
#     './output/25/std/fbm_std_20150517-075535_25_0.5000_10.npz', return_durations = True )
    './output/HRM-2-15-4/med/HRM-2_med_20150525-144147_15-16_0.5000_100.npz', return_durations = True )



In [ ]:

    
plt.figure( figsize = ( 16, 9 ) )
ax = plt.subplot( 111 )
# ax.set_yscale( 'log' )
# ax.errorbar( x = np.arange( Wbarjn.shape[1] ), y = np.average( Wjnp, axis = ( 0, ) ).T )
plt.plot( np.log( np.average( Wjnp, axis = ( 0, ) ).T ) )



In [ ]:

    
ax = plt.subplot( 111 )
ax.set_color_cycle( plt.cm.rainbow( np.linspace( 0, 1, Wjnp.shape[1] ) ) )
prc = np.array( [ 0.5, 1.0, 2.5, 5.0, 10, 25, 50, 75, 90, 95, 97.5, 99, 99.5 ] )
ax.plot( np.log( np.average( Wjnp, axis = ( 0, ) )[:14].T ), prc )



In [ ]:

    
# H, Njn, Djnk, Vjnde = sim_load( './output/25/std/fbm_std_20150517-080403_25_0.6500_10.npz' )
basepath = '/Users/user/Dropbox/study_notes/year_14_15/course_project/code/output/HRM-3_19-32/med/'
H, Njn, Djnk, Vjnde = sim_load( basepath+'HRM-3_med_20150526-042928_19-32_0.5000_1000.npz' )



In [ ]:

    
p, q = 6, 10



In [ ]:

    
levels = np.arange( p, 1 + q ) - 1
Cjnd = np.reshape( np.sum( Vjnde, axis = ( 3, ), dtype = np.float ), ( Vjnde.shape[ :-1 ] + ( 1, ) ) )
V_hat = Vjnde[:,levels] / Cjnd[:,levels]
Vpool = np.sum( Vjnde[:,levels], axis = ( 1, ) ) / np.sum( Cjnd[:,levels], axis = ( 1, ) )



In [ ]:

    
print np.average( Vpool, axis = 0 )
print np.std( Vpool, axis = 0 )



In [ ]:

    
theo = 2**(1-1.0/H)
# print theo
pv = 1.0 / np.sqrt( 2.0 / theo )
print pv



In [ ]:

    
print np.array( np.sum( Djnk > 0, axis = ( 0, ) ) / 50, np.int )#[p:q+1,:]



In [ ]: