Guy 2018-09-23
This is a fork on a GitHub from tjvandal (https://github.com/tjvandal/generic-pred/tree/master/julia)
Generic-Pred stand for "Generic-Prediction" and was introduced by Golestani Abbas & Robin Gras, 2014 paper "Can we predict the unpredictable?" (http://www.nature.com/articles/srep06834 and http://www.nature.com/articles/srep06834#supplementary-information)
It is about "an original approach that brings a novel perspective to the field of long-term time series forecasting".
One demonstrated application is Dow Jones Industrial Average (DJIA) forcasting. Authors seems to have used closing index data from September 1993 to September 2001:
"Extract from the paper"
We have downloaded (http://quotes.wsj.com/index/DJIA/historical-prices) DJIA data from September 1st, 1993 to September 28, 2001. And found the code tjvandal put in its github.
Our intend is to reproduce Golestani Abbas & Robin Gras paper's demonstration on DJIA.
Objectives:
In [1]:
    
#Pkg.add("Distributed") #https://docs.julialang.org/en/v1/stdlib/Distributed/index.html
#Pkg.add("Gadfly") # install GadFly if not already present
#Pkg.add("DataFrames") # install DataFrames if not already present
#Pkg.update() # get the latest versions of your packages
push!(LOAD_PATH, pwd()) #LOAD_PATH contains the directories Julia searches for modules : pwd() Get the current working directory
    
    Out[1]:
In [2]:
    
using DataFrames #DataFrames package provides the essential tools you need for working with statistical data
using Gadfly #Gadfly is a system for plotting and visualization
using CSV # fast and flexible pure-Julia library for handling delimited text files
# @everywhere using Lyaponuv # @everywhere : this tag will insure that all the processes have access to the function
# @everywhere is a way to include a fonction library. Here, the fonctions are within this notebook
# Lyaponuv is a distinct file with all the functions. I have inserted those function in the next cell.
f = "DJIA 09-1993 to 09-2001.csv" 
# a File name which is a .csv containing two columns Date and DJIA
# Dow Jones Industrial Average from Sept 01, 1993 to Sept 28, 2001
#data=readtable(f, header=true) #read data from a CSV-like file
data=CSV.read(f)
    
    
    Out[2]:
In [3]:
    
#using Distributed #https://docs.julialang.org/en/v1/stdlib/Distributed/index.html
addprocs(16) #add 4 processes on the local machine. This can be used to take advantage of multiple cores.
@everywhere function lyaponuv_k(time_series, J, m, ref)
    #println("test:1")
    X = attractor(time_series, m, J)
    #println("test:2")
    norms = compute_norms(X)
    #println("test:3")
    pairs = match_pairs(norms)
    #println("test:4")
    y = follow_points(pairs, norms, ref)
    #println("test:5")
    return(norms,y)
end
@everywhere function match_pairs(norms)
    M = size(norms)[1]
    pairs = Array{Int}(M)
    for row in 1:M
        mn, idx = findmin(norms[row, :])
        pairs[row] = idx
    end
    return(pairs)
end
@everywhere function attractor(time_series, m, J)
    N = length(time_series)
    M = N - (m - 1) * J  
    X = Array{Float64}(m, M)
    i = 1
    for i=1:M
        X[:,i] = time_series[i:J:(i+(m-1)*J)]
    end
    return(X)
end
@everywhere function follow_points(pairs, norms, ref)
    y = Array{Float64}(ref)
    M = size(norms)[1]
    for i=0:ref-1
        agg = 0 
        count = 0
        for j=1:M
            jhat = pairs[j]+i
            jtrue = j+i
            if jhat <= M && jtrue <= M
                agg = agg + log(norms[jtrue, jhat])
               # agg = agg + log(vecnorm(X[:, jtrue] - X[:, jhat]))
                count = count + 1
            end
        end
        y[i+1] = agg/count # divide by delta-t also?
    end
    return(y)
end
@everywhere function compute_norms(X)
    M = size(X)[2]
norms = Array{Float64}(M, M)
    for i=1:M
        norms[i,:] = column_norms(X, i)      
    end    
    return(norms)
end
@everywhere function column_norms(X, i)
    M = size(X)[2]
    X_diff = X .- X[:, i]
    norm_vector = [vecnorm(X_diff[:, k]) for k=1:M]
    norm_vector[i] = 10^10
    return(norm_vector)
end
@everywhere function lyaponuv_exp(series)
    nn = .!isnan.(series)
    A = ones(length(series), 2)
    A[:,1] = linspace(1, length(series), length(series))
    gradient = \(A, series)
    return(gradient[1])
end
@everywhere function lyaponuv(time_series, J, m, ref)
    ts = lyaponuv_k(time_series, J, m, ref)[2]
    exponent = lyaponuv_exp(ts[isfinite.(ts)])  ## only input those which are finite
    return(exponent)
end
#=
@everywhere function get_next(ts, m, M, norms, ref, J)
    
    attractor_array = attractor(ts, m, J)
    temp_norms = Array{Float64}(M+1, M+1)
    temp_norms[1:M, 1:M] = norms
    col = column_norms(attractor_array, M+1)
    temp_norms[M+1, :] = col
    temp_norms[:, M+1] = col
    pairs=match_pairs(temp_norms)
    lyap_k_temp = follow_points(pairs, temp_norms, ref)
    return(lyaponuv_exp(lyap_k_temp))
end
@everywhere function lyaponuv_next(time_series, J, m, ref, sample_size)
    ts_diff = time_series[2:end] - time_series[1:end-1]
    sigma = std(ts_diff)
    samples = randn(sample_size) * sigma + time_series[end]
    @time norms, lyap_k = lyaponuv_k(time_series, J, m, ref)
    true_exponent = lyaponuv_exp(lyap_k)
    exponents = Array{Float64}(sample_size)
    M = size(norms)[1]
    tasks = Array{Future}(sample_size)
    for i=1:sample_size
        s = samples[i]
        tasks[i] = @spawn get_next(vcat(time_series, s), m, M, norms, ref, J)
        #@time exponents[i] = get_next(vcat(time_series, s), m, M, norms, ref, J)        
        #@printf("process: %d\n", i) 
    end
    
    for i=1:sample_size
        exponents[i]=fetch(tasks[i])
    end
    diff = abs.(exponents .-  true_exponent)    
    val, idx = findmin(diff)
    println("Next Value:", samples[idx])
    return(samples[idx])
end
=#
    
In [103]:
    
#Variables needed by lyaponuv function
J = 2  ## reconstruction delay
m = 3  ## embedding dimension
r = 11 ## 
sliding_window = 1400
next_x_points = 521
sample_size = 10
ts = deepcopy(data[:DJIA][1:end-next_x_points])/10000 
#Create a deep copy of "data", everything is copied recursively, resulting in a fully independent object.
#The copy lenghy has next_x_points less points than the original "data"
#Futhurmore, each points is divided per 10000
#println("ts:", ts, "\n")
#println("length(ts):", length(ts), "\n")
#println("length(ts[1:end-1]):", length(ts[1:end-1]), "\n")
#println("length(ts[2:end]):", length(ts[2:end]), "\n")
diff = ts[1:end-1] - ts[2:end]
#
#println("diff:", diff, "\n")
#println("length(diff):", length(diff), "\n")
#println("diff avg:", mean(abs.(diff)), "\tdiff std:", std(diff))
println(now())
tictoc = tic()
for i=1:next_x_points
    #println("i:", i, ":")
    lyap_exp = lyaponuv(ts[end-sliding_window:end], J, m, r)
    #println("lyap_exp:", lyap_exp, "\n")
    tasks = Array{Future}(sample_size)
    #println("tasks:", tasks, "\n")
    mu = mean(ts[end-sliding_window:end])
    #println("mu:", mu, "\n")
    diff = ts[end-sliding_window+1:end] - ts[end-sliding_window:end-1]
    mu = mean(diff)
    #println("mu:", mu, "\n")
    sigma = std(diff)
    #println("sigma:", sigma, "\n")
    sample_values = randn(sample_size) .* sigma .+ ts[end]
    #println("sample_values:", sample_values, "\n")
    for j=1:sample_size
        #println("j:", j, "\n")
        tempts=deepcopy(ts[end-sliding_window:end])
        #println("tempts:", tempts, "\n")
        append!(tempts, [sample_values[j]])
        #println("sample_values:", sample_values, "\n")
        tasks[j] = @spawn lyaponuv(tempts, J, m, r)
        #println("tasks[j]:", tasks[j], "\n")
    end
    
    exponents = Array{Float64}(sample_size)
    #println("exponents:", exponents, "\n")
    
    for j=1:sample_size
        #println("j:", j, "\n")
        exponents[j] = fetch(tasks[j])
        #println("exponents[j]:", exponents[j], "\n")
    end
    #println("exponent:", mean(lyap_exp))
    
    exp_diff = abs.(exponents .- lyap_exp)
    min_index = findmin(exp_diff)
    best_val = sample_values[min_index[2]]
    append!(ts, [best_val])
    #println(i, "   best value:", best_val, "\t")
end
#toc()
println("This took %s", duration([0, 0, toc(tictoc)]))
# use writetable("output.csv", df) To write data to a CSV file -> deprecated USE CSV.writer instead
    
    
    
In [108]:
    
println(tictoc)
duration([0,0,tictoc])
    
    
    
In [104]:
    
x=linspace(1, length(data[:DJIA]), length(data[:DJIA]))
        #linspace(start, stop, n=50) Construct a range of n linearly spaced elements from start to stop.
l1 = layer(x=x, y=data[:DJIA], Geom.line)
x0 = length(data[:DJIA])-next_x_points+1
x_ts = linspace(x0, length(ts), length(ts)-x0+1)
y_ts = ts[x0:end]*10000
l2 = layer(x=x_ts, y=y_ts, Geom.line, Theme(default_color=colorant"red"))
plot(l1, l2)
    
    Out[104]:
In [52]:
    
println(data)
println(x0)
    
    
In [101]:
    
lay1 = layer(data, x=:Date, y=:DJIA, Geom.line, Theme(default_color="cyan"))
#lay2 = layer(ts, )
plot(lay1)
    
    Out[101]:
In [72]:
    
println(length(ts))
println(length(data[:DJIA]))
println(size(data,1))
println(length(ts)-next_x_points+1)
size(data)
    
    
    Out[72]:
In [31]:
    
typeof(data[:DJIA])
    
    Out[31]:
In [76]:
    
ds = deepcopy(data)
#ds[:DJIA_p] = 1:size(data,1) #ajoute une colonne avec chiffre de 1 à n
ds[:DJIA_p] = ts*10000
#delete!(ds, :DJIA_P)
println(data)
    
    
In [ ]:
    
lay1 = layer(data, x=:Date, y=:DJIA, Geom.line, Theme(default_color="cyan"))
#lay2 = layer(ts, )
plot(lay1)