In [ ]:
import pandas as pd
import numpy as np
import AutoForecast as autof
import Bench.TS_datasets as tsds
#import SignalDecomposition_Perf as tsperf
%matplotlib inline
import pandas as pd
import numpy as np
import sys
import datetime as dt
from sqlalchemy import *
#from sqlalchemy import desc, nullsfirst
import sqlalchemy
In [ ]:
b1 = tsds.load_airline_passengers()
df = b1.mPastData
def convert_double_to_datetime(x):
ratio = (x - int(x))
fulldate = dt.datetime(int(x), 1, 1, 0, 0, 0)
year_length = dt.datetime(int(x) + 1, 1, 1, 0, 0, 0) - fulldate
fulldate = fulldate + dt.timedelta(days = int(year_length.days*ratio))
return fulldate
lDateCol = 'time';
df[lDateCol] = df[lDateCol].apply(lambda x : convert_double_to_datetime(x))
lAutoF = autof.cAutoForecast()
lAutoF.mOptions.mDebugCycles = True
lAutoF
lAutoF.train(df , 'time' , 'AirPassengers' , 12)
lAutoF.getModelInfo()
In [ ]:
trend = lAutoF.mSignalDecomposition.mBestTransformation.mBestModelTrend
cycle = lAutoF.mSignalDecomposition.mBestTransformation.mBestModelCycle
ar = lAutoF.mSignalDecomposition.mBestTransformation.mBestModelAR
In [ ]:
lSignalName = lAutoF.mSignalDecomposition.mBestTransformation.mOriginalSignal
lTimeName = lAutoF.mSignalDecomposition.mBestTransformation.mTime
In [ ]:
cycle.getCycleName()
In [ ]:
trend.mTrendRidge.coef_ , trend.mTrendRidge.intercept_
In [ ]:
cycle # NoCycle
In [ ]:
ar.mARRidge.coef_, ar.mARRidge.intercept_
In [ ]:
#lAutoF.mSignalDecomposition.mBestTransformation.mTimeInfo.__dict__
In [ ]:
In [ ]:
In [ ]:
sqlalchemy.__version__
sys.setrecursionlimit(200000);
In [ ]:
# in-memory database
#lDSN = 'sqlite://'
lDSN = 'mysql://user:pass@localhost/GitHubtest'
#lDSN = 'postgresql:///GitHubtest'
engine = create_engine(lDSN , echo=True)
#create_engine( , echo=True)
conn = engine.connect()
In [ ]:
conn.connection.connection.__dict__
In [ ]:
#a= conn.connection.connection.
#a
In [ ]:
type(MetaData.tables)
In [ ]:
In [ ]:
df.head(12)
In [ ]:
#conn.connection.connection.dsn
In [ ]:
conn.engine.has_table('ds1')
In [ ]:
conn.engine.dialect
In [ ]:
In [ ]:
df.to_sql("ds1" , conn, if_exists='replace')
In [ ]:
meta = MetaData()
table1 = Table('ds1', meta, autoload=True, autoload_with=engine)
In [ ]:
table1.primary_key
In [ ]:
exprs = [table1]
statement = select(exprs)
result = conn.execute(statement).fetchmany(5)
result
In [ ]:
lTimeInfo = lAutoF.mSignalDecomposition.mBestTransformation.mTimeInfo
In [ ]:
def convert_double_to_datetime(x):
ratio = (x - int(x))
fulldate = datetime.datetime(int(x), 1, 1, 0, 0, 0)
year_length = dt.datetime(int(x) + 1, 1, 1, 0, 0, 0) - fulldate
fulldate = fulldate + datetime.timedelta(days = int(year_length.days*ratio))
return fulldate
In [ ]:
In [ ]:
In [ ]:
In [ ]:
## ****************************** Signal Transformation CTE **************************************************
def addTransformedSignal(table):
exprs = []
trasformed_signal = table.c[lSignalName];
trasformed_signal = trasformed_signal.label("Signal");
# trasformed_time = table.c[lTimeName];
# year1 = func.extract('year' , trasformed_time);
# year1 = func.cast(trasformed_time , Integer);
# ratio = trasformed_time - year1
# date2 = dt.datetime(year1, 7, 11, 12, 47, 28)
# trasformed_time = func.dateadd(func.cast(trasformed_time, bindparam('tomorrow', timedelta(days=1), Interval()))
# trasformed_signal = trasformed_signal.label("Time");
exprs = exprs + [ trasformed_signal ]; # , trasformed_time, year1, ratio, date2]
return exprs
signal_exprs = addTransformedSignal(table1)
Transformation_CTE = select([table1] + signal_exprs).cte("CTE_Transformation")
statement = select([ Transformation_CTE ])
result = conn.execute(statement).fetchmany(5)
print(result)
In [ ]:
In [ ]:
In [ ]:
## ****************************** Trend Input CTE **************************************************
def julian_day(date_expr):
expr_months = extract('year', date_expr) * 12 + extract('month', date_expr)
return expr_months;
lAmplitude = lTimeInfo.mTimeMax - lTimeInfo.mTimeMin
lAmplitude = lAmplitude.days
def addTrendInputs(table , time_col):
exprs = []
row_number_column = func.row_number().over(order_by=asc(table.c[time_col])).label('row_number') - 1
row_number_column = row_number_column.label("row_number")
# normalized_time = func.datediff(text('month'), table.c[time_col] , lTimeInfo.mTimeMin) / func.datediff(text('month'), lTimeInfo.mTimeMax , lTimeInfo.mTimeMin)
normalized_time = (julian_day(table.c[time_col]) - julian_day(lTimeInfo.mTimeMin)) ;
normalized_time = normalized_time / lAmplitude
normalized_time = normalized_time.label("normalized_time")
normalized_time_2 = normalized_time * normalized_time
normalized_time_2 = normalized_time_2.label("normalized_time_2")
normalized_time_3 = normalized_time_2 * normalized_time
normalized_time_3 = normalized_time_3.label("normalized_time_3")
exprs = exprs + [row_number_column , normalized_time, normalized_time_2, normalized_time_3]
return exprs
trend_inputs = addTrendInputs(Transformation_CTE, lTimeInfo.mTime)
statement = select(trend_inputs)
trend_inputs_CTE = select([Transformation_CTE] + trend_inputs).cte("CTE_Trend_Inputs")
statement2 = select([trend_inputs_CTE])
result = conn.execute(statement).fetchmany(5)
print(result)
result2 = conn.execute(statement2).fetchmany(5)
print(result2)
trend_inputs_df = pd.DataFrame(result2)
trend_inputs_df.columns = statement2.columns.keys()
trend_inputs_df.head()
In [ ]:
In [ ]:
# ****************************** TREND CTE **********************************
def addTrends(table):
exprs = []
trend_expr = table.c["normalized_time"]
print(type(trend_expr))
trend_expr = trend.mTrendRidge.coef_[0] * trend_expr + trend.mTrendRidge.intercept_
trend_expr = trend_expr.label(trend.mOutName);
exprs = exprs + [trend_expr]
return exprs
sel1 = alias(select([trend_inputs_CTE]), "TrendInputAlias")
print(sel1.columns)
trends = addTrends(sel1)
trend_CTE = select([trend_inputs_CTE] + trends).cte("trend_CTE")
statement3 = select([trend_CTE])
result3 = conn.execute(statement3).fetchmany(15)
trend_df = pd.DataFrame(result3)
trend_df.columns = statement3.columns.keys()
trend_df.head()
In [ ]:
In [ ]:
# ****************************** CYCLE_INPUT CTE **********************************
def addCycleInputs(table):
lTime = lAutoF.mSignalDecomposition.mBestTransformation.mTime
exprs = []
date_expr = table.c[lTime]
date_parts = [extract('year', date_expr).label(lTime + "_year") ,
extract('month', date_expr).label(lTime + "_month") ,
extract('day', date_expr).label(lTime + "_day") ,
extract('hour', date_expr).label(lTime + "_hour") ,
extract('minute', date_expr).label(lTime + "_minute") ,
extract('second', date_expr).label(lTime + "_second") ,
extract('dow', date_expr).label(lTime + "_dow") ,
extract('week', date_expr).label(lTime + "_woy")]
exprs = exprs + date_parts
return exprs
sel1 = alias(select([trend_CTE]), "Trend_CTE1")
print(sel1.columns)
cycle_inputs = addCycleInputs(sel1)
cycle_input_CTE = select([trend_CTE] + cycle_inputs).cte("cycle_input_CTE")
statement3 = select([cycle_input_CTE])
result3 = conn.execute(statement3).fetchmany(15)
cycle_input_df = pd.DataFrame(result3)
cycle_input_df.columns = statement3.columns.keys()
cycle_input_df.head()
In [ ]:
In [ ]:
# ****************************** CYCLE CTE **********************************
def addCycles(table):
exprs = []
cycle_expr = table.c["row_number"] * 0.0;
cycle_expr = cycle_expr.label(cycle.getCycleName())
exprs = exprs + [cycle_expr]
return exprs
sel1 = alias(select([cycle_input_CTE]), "CycleInputAlias")
print(sel1.columns)
cycles = addCycles(sel1)
cycle_CTE = select([cycle_input_CTE] + cycles).cte("cycle_CTE")
statement3 = select([cycle_CTE])
result3 = conn.execute(statement3).fetchmany(15)
cycle_df = pd.DataFrame(result3)
cycle_df.columns = statement3.columns.keys()
cycle_df.head()
In [ ]:
In [ ]:
# ****************************** CYCLE RESIDUES CTE **********************************
def addCycleResidues(table):
exprs = table.columns
lSignalName = lAutoF.mSignalDecomposition.mBestTransformation.mOriginalSignal
lTimeName = lAutoF.mSignalDecomposition.mBestTransformation.mTime
cycle_expr = table.c[cycle.getCycleName()];
trend_expr = table.c[trend.mOutName];
cycle_residue_expr = trend_expr + cycle_expr - table.c[lSignalName]
cycle_residue_expr = cycle_residue_expr.label(cycle.getCycleResidueName())
exprs = exprs + [cycle_residue_expr]
return exprs
sel1 = alias(select([cycle_CTE]), "CycleAlias")
print(sel1.columns)
cycle_resdiues = addCycleResidues(sel1)
cycle_residues_CTE = select(cycle_resdiues).cte("cycle_residues_CTE")
statement3 = select([cycle_residues_CTE])
result3 = conn.execute(statement3).fetchmany(15)
cycle_resdiues_df = pd.DataFrame(result3)
cycle_resdiues_df.columns = statement3.columns.keys()
cycle_resdiues_df.head()
In [ ]:
# ****************************** AR_INPUT CTE **********************************
# (select t."Signal" from ds1 t where ((t."index" + 1) = ds1."index")) as Signal_LAG_1,
def createLags(table , H , col, index_col):
TS = table
TS1 = table.alias("t");
col_expr_1 = TS1.c[col];
index_expr = TS.c[index_col]
index_expr_1 = TS1.c[index_col]
exprs = [table];
for h in range(1 , H+1):
expr1 = select([col_expr_1]).where(index_expr == (index_expr_1 + h));
expr = expr1;
expr = expr.label(col + "_Lag" + str(h));
exprs = exprs + [expr];
return exprs;
def addARInputs(table):
residue_name = cycle.getCycleResidueName();
exprs = createLags(table,
len(ar.mARLagNames),
residue_name,
"row_number");
exprs = [table] + exprs
return exprs
sel1 = alias(select([cycle_residues_CTE]), "AR_CTE1")
print(sel1.columns)
ar_inputs = addARInputs(sel1)
ar_input_CTE = select(ar_inputs).cte("ar_input_CTE")
statement3 = select([ar_input_CTE])
result3 = conn.execute(statement3).fetchmany(15)
ar_input_df = pd.DataFrame(result3)
ar_input_df.columns = statement3.columns.keys()
ar_input_df.head()
In [ ]:
In [ ]:
# ****************************** AR CTE **********************************
def addARModel(table):
exprs = table.columns
print(ar.mARLagNames)
ar_expr = None;
i = 0 ;
for feat in ar.mARLagNames:
if(ar_expr is None):
ar_expr = ar.mARRidge.coef_[i] * table.c[feat];
else:
ar_expr = ar_expr + ar.mARRidge.coef_[i] * table.c[feat];
i = i + 1;
ar_expr = ar_expr + ar.mARRidge.intercept_;
ar_expr = ar_expr.label(ar.mOutName)
exprs = exprs + [ar_expr]
return exprs
sel1 = alias(select([ar_input_CTE]), "ARInputAlias")
print(sel1.columns)
ars = addARModel(sel1)
ar_CTE = select(ars).cte("ar_CTE")
statement3 = select([ar_CTE])
result3 = conn.execute(statement3).fetchall()
ar_df = pd.DataFrame(result3)
ar_df.columns = statement3.columns.keys()
ar_df.tail()
In [ ]:
In [ ]:
# ****************************** TSMODEL CTE **********************************
lModelName = lAutoF.mSignalDecomposition.mBestTransformation.mBestModelName
def add_TS_Model(table):
exprs = table.columns
model_expr = table.c[trend.mOutName] + table.c[cycle.mOutName] + table.c[ar.mOutName];
model_expr = model_expr.label(lModelName)
model_residue = model_expr - table.c[lSignalName]
model_residue = model_residue.label(lModelName + "Residue")
exprs = exprs + [model_expr , model_residue]
return exprs
sel1 = alias(select([ar_CTE]), "AR_Alias")
print(sel1.columns)
model_vars = add_TS_Model(sel1)
model_CTE = select(model_vars).cte("model_CTE")
statement3 = select([model_CTE])
result3 = conn.execute(statement3).fetchall()
model_df = pd.DataFrame(result3)
model_df.columns = statement3.columns.keys()
model_df.tail()
In [ ]:
In [ ]:
import pickle
from io import StringIO
In [ ]:
src = StringIO()
p = pickle.Pickler(src)
In [ ]:
import pickle
favorite_color = { "lion": "yellow", "kitty": "red" }
pickle.dumps( favorite_color)
pickle.dumps(kk)
In [ ]:
expr_sig
In [ ]:
expr_columns_cte = [ cte1.c.index, cte1.c.Time, cte1.c.Signal];
expr_columns_table2 = [ table2.c.index, table2.c.Time, table2.c.Signal];
expr_columns_cte = [ cte1 ];
expr_columns_table2 = [ table2 ];
def createLagsAsJoins(table , H):
TS = table
join_H = TS;
for h in range(0 , H):
cte_h = select([TS]).cte("LAG_" + str(h + 1))
join_H = join_H.join(cte_h, TS.c.index == (cte_h.c.index + h + 1), isouter=True)
statement = select(join_H.columns).select_from(join_H)
print(join_H.c.keys())
return (statement , join_H)
# (select t."Signal" from ds1 t where ((t."index" + 1) = ds1."index")) as Signal_LAG_1,
def createLagsAsCTE(table , H , col, index_col):
TS = table
TS1 = table.alias("t");
col_expr_1 = TS1.c[col];
index_expr = TS.c[index_col]
index_expr_1 = TS1.c[index_col]
exprs = [table];
for h in range(1 , H+1):
expr1 = select([col_expr_1]).where(index_expr == (index_expr_1 + h));
expr = expr1;
expr = expr.label(col + "_LAG_" + str(h));
exprs = exprs + [expr];
cte_H = select(exprs).cte("LAGS")
statement = select([cte_H])
print(cte_H.c.keys())
return (statement , cte_H)
def addRowNumber(table , time_col):
exprs = [table]
row_number_column = func.row_number().over(order_by=asc(table.c[time_col])).label('row_number')
exprs = exprs + [row_number_column]
statement = select(exprs)
return statement
In [ ]:
stmt = addRowNumber(table2 , 'Time');
result = conn.execute(stmt).fetchmany(5)
result
In [ ]:
stmt.columns.keys()
In [ ]:
result.
In [ ]:
(stmt , j) = createLagsAsCTE(table2 , 4 , 'Signal' , 'index')
result = conn.execute(stmt).fetchmany(5)
result
stmt.label
In [ ]:
expr_columns_cte = [ cte1.c.index, cte1.c.Time, cte1.c.Signal];
expr_columns_table2 = [ table2.c.index, table2.c.Time, table2.c.Signal];
expr_columns_cte = [ cte1 ];
expr_columns_table2 = [ table2 ];
cte2 = select([table2]).cte("CTE2")
cte3 = select([table2]).cte("CTE3")
cte4 = select([table2]).cte("CTE4")
join1 = table2.join(cte1, table2.c.index == (cte1.c.index + 1), isouter=True)
join2 = table2.join(cte2, table2.c.index == (cte2.c.index + 2), isouter=True)
join3 = table2.join(cte3, table2.c.index == (cte3.c.index + 3), isouter=True)
join4 = table2.join(cte4, table2.c.index == (cte4.c.index + 4), isouter=True)
join_1234 = table2.join(cte1, table2.c.index == (cte1.c.index + 1), isouter=True).join(cte2, table2.c.index == (cte2.c.index + 2), isouter=True).join(cte3, table2.c.index == (cte3.c.index + 3), isouter=True).join(cte4, table2.c.index == (cte4.c.index + 4), isouter=True)
statement = select(expr_columns_cte + expr_columns_table2).where(table2.c.index == (cte1.c.index - 1))
statement = statement.order_by(table2.c.index)
statement1 = select([table2, cte1]).select_from(join1)
statement2 = select([table2, cte1]).select_from(join2)
statement3 = select([table2, cte1]).select_from(join3)
statement4 = select([table2, cte1]).select_from(join4)
statement1234 = select([table2, cte1, cte2, cte3, cte4]).select_from(join_1234)
In [ ]:
join_1234.c.keys()
In [ ]:
In [ ]:
(stmt , j) = createLagsAsCTE(table2 , 1 , 'Signal' , 'index')
result = conn.execute(stmt).fetchmany(5)
result
In [ ]:
In [ ]:
stmt.columns.keys()
In [ ]:
In [ ]:
func.row_number33
In [ ]:
s = select([table2])
In [ ]:
expr = over(func.row_number(), order_by=table2.c.Time)
expr
In [ ]:
def buildSQLForLAgs(H , table):
lSQL = '\nWITH "LAGS_CTE" AS \n(SELECT \nds1."index" AS "index",\n ds1."Time" AS "Time",\n ds1."Signal" AS "Signal",\n';
for h in range(0 , H):
lSQL = lSQL + '(select t."Signal" from ds1 t where ((t."index" + ' + str(h + 1) + ') = ds1."index")) as Signal_LAG_' + str(h+1)
if((h+1) < H):
lSQL = lSQL + ', \n'
lSQL = lSQL + ' \nFROM ds1) \nSELECT "LAGS_CTE".* \nFROM "LAGS_CTE"'; # + table;
return lSQL;
#stmt = select([expr])
lSQL1 = "SELECT row_number() OVER (ORDER BY ds1.Time) AS anon_1FROM ds1"
#result1 = conn.execute(lSQL)
lSQL2 = "SELECT ROWID AS anon_1FROM ds1"
lSQL3 = buildSQLForLAgs(7 , 'ds1');
result2 = conn.execute(lSQL3).fetchall()
#result = conn.execute(stmt).fetchall()
result2
stmt3 = text(lSQL3);
In [ ]:
a= stmt3.columns()
In [ ]:
a = stmt3.compile()
In [ ]:
a.ctes
In [ ]:
stmt = select([table2]).\
order_by(desc(table2.c.A))
In [ ]:
from sqlalchemy.sql import column
c_A = table2.c.A
c_B = table2.c.B
In [ ]:
c_A
In [ ]:
print( c_A + c_B)
In [ ]:
c_Sum = c_A + c_B
c_Sum2 = c_A + 2 * c_B
c_Sum3 = (c_A - 55) / 67
c_prev_A =
stmt = select([c_A, c_B, c_Sum, c_Sum2, c_Sum3])
In [ ]:
result = conn.execute(stmt)
In [ ]:
result.fetchmany(5)
In [ ]:
def generateLinearCombination(iTable, iDict , iIntercept):
stmt_arg = []
sum_1 = None
for k,v in iDict.items():
print(k)
name = iTable.c[k]
stmt_arg = stmt_arg + [name]
if(sum_1 is None):
sum_1 = v * name
else:
sum_1 = sum_1 + v * name
sum_1 = sum_1 + iIntercept
sum_1.label("MyScore")
print(sum_1)
return sum_1
def debrief_statement(stmt):
print(stmt)
result = conn.execute(stmt)
print(result.fetchmany(6))
return result
def debrief_expression(expr):
print("debrief_start")
# stmt_arg = stmt_arg + [expr]
stmt = select([expr] , use_labels=True)
result = debrief_statement(stmt)
print("debrief_end")
return result
In [ ]:
coeffs = {}
coeffs['A'] = 3
coeffs['B'] = 0.3
coeffs['C'] = -3
In [ ]:
expr = generateLinearCombination(table2, coeffs, iIntercept=5)
res = debrief_expression(expr)
In [ ]:
aaa = expr.compile()
In [ ]:
aaa.__dict__
In [ ]:
expr.expression
In [ ]:
(aa, bb) = expr._orig
In [ ]:
expr.compile?
In [ ]:
def generateCaseWhenDiscrete(iTable, iColumn, iDict , iElseValue):
stmt_arg = []
name = iTable.c[iColumn]
case_1 = None
for k,v in iDict.items():
print(k)
if(case_1 is None):
case_1 = [(name == k , v)]
else:
case_1 = case_1 + [(name == k , v)]
print(case_1)
case_2 = case(case_1 , else_ = iElseValue);
return case_2
In [ ]:
mapping = {}
mapping["S"] = -1
mapping["W"] = 1
mapping["Z"] = 0
expr = generateCaseWhenDiscrete(table2, "A", mapping, iElseValue=5)
debrief_expression(expr)
In [ ]:
In [ ]:
def generateCaseWhenWithSegments(iTable, iColumn, iDict , iElseValue):
stmt_arg = []
name = iTable.c[iColumn]
case_1 = None
for k,v in iDict.items():
print(k)
(a , b) = v
if(b):
expr = (name <= k)
else:
expr = (name < k)
if(case_1 is None):
case_1 = [(expr , a)]
else:
case_1 = case_1 + [(expr , a)]
print(case_1)
case_2 = case(case_1 , else_ = iElseValue);
return case_2
In [ ]:
mapping = {}
#mapping[None] = (-5 , False)
mapping[0.5] = (-1 , False)
mapping[1.0] = (1 , False)
mapping[2] = (0 , True)
expr = generateCaseWhenWithSegments(table2, "B", mapping, iElseValue=5)
debrief_expression(expr)
In [ ]:
a = expr.value
a
In [ ]:
for i in a:
print(i)
In [ ]:
import numpy as np
X = np.random.randint(5, size=(6, 100))
y = np.array([1, 2, 3, 4, 5, 6])
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB()
clf.fit(X, y)
print(clf.predict(X[2:3]))
print(clf.predict_proba(X[2:10]))
In [ ]: