Launching

Using Spark 1.4 and Python 3.4. The way of launching the ipython notebook has changed

IPYTHON=1 IPYTHON_OPTS=notebook PYSPARK_PYTHON=python3 pyspark



In [1]:

    
import os, sys



In [27]:

    
from pyspark.sql import SQLContext, Row
import datetime
from collections import namedtuple
import numpy as np
import pandas as pd

Create the SQLContext



In [3]:

    
sql = SQLContext(sc)

Create different "classes" for parsing the input

Each row contains the (already computed) scenario values for each date and risk factor:

Row = DAY x RiskFactor x NeutralScenario x Scenarios

Ideally the rows would be parsed like this, but because custom row aggregation is not fully supported



In [36]:

    
RFScenario = namedtuple('RFScenario', ('rf', 'date', 'neutral', 'scenarios'))

and because the number of scenarios is fixed, each scenario is a column



In [6]:

    
def construct_scenarios_type(number_scenarios=250, name = 'Scenarios'):
    names = ['rf', 'date', 'neutral']
    scenario_cols = ["s%d"%x for x in range(1,number_scenarios+1)]
    names.extend(scenario_cols)
    Scenarios = namedtuple('Scenarios', names)
    return Scenarios, scenario_cols

Scenarios, scenario_cols = construct_scenarios_type()

and we can parse the rows of the csv file accordingly



In [30]:

    
DATA_DIR = os.path.join(os.pardir, 'data')
csv_filename = os.path.join(DATA_DIR, "scenarios2.csv")
pd.read_csv(csv_filename, header=None).head()









    Out[30]:






  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
      ...
      243
      244
      245
      246
      247
      248
      249
      250
      251
      252
    
  
  
    
      0
      RF1
      2015-04-14
      35.03
      34.678659
      35.193093
      35.611624
      34.920900
      35.275753
      34.885387
      34.794800
      ...
      35.445356
      34.829611
      35.514069
      35.372834
      35.343582
      34.828644
      34.824178
      34.774605
      34.879511
      34.856106
    
    
      1
      RF1
      2015-04-15
      35.20
      35.359100
      35.767287
      35.093559
      35.439729
      35.058909
      34.970520
      34.606949
      ...
      35.004487
      35.672156
      35.534417
      35.505887
      35.003543
      34.999186
      34.950814
      35.053176
      35.030339
      35.248848
    
    
      2
      RF1
      2015-04-16
      34.25
      34.863274
      34.135051
      34.509031
      34.097640
      34.002225
      33.610037
      34.563266
      ...
      34.760356
      34.611395
      34.580549
      34.037870
      34.033167
      33.980956
      34.091450
      34.066796
      34.302765
      34.434559
    
    
      3
      RF1
      2015-04-17
      33.30
      33.179576
      33.571484
      33.140390
      33.040462
      32.629955
      33.628347
      34.043960
      ...
      33.678814
      33.646469
      33.077790
      33.072865
      33.018190
      33.133906
      33.108084
      33.355289
      33.493416
      32.332506
    
    
      4
      RF1
      2015-04-20
      33.40
      33.664591
      33.244414
      33.146995
      32.746707
      33.720002
      34.124926
      33.055073
      ...
      33.737661
      33.183387
      33.178585
      33.125281
      33.238094
      33.212921
      33.453890
      33.588511
      32.456574
      32.499345
    
  

5 rows × 253 columns



In [31]:

    
from pyspark.mllib.linalg import Vectors, DenseVector, SparseVector, _convert_to_vector 
def parse(row):
    DATE_FMT = "%Y-%m-%d"
    row[0] = row[0]
    row[1] = datetime.datetime.strptime(row[1], DATE_FMT)
    for i in np.arange(2,len(row)):
        row[i] = float(row[i])
    return RFScenario(row[0], row[1], row[2], DenseVector(row[3:6]))


def parse_explicit(row):
    DATE_FMT = "%Y-%m-%d"
    row[0] = row[0]
    row[1] = datetime.datetime.strptime(row[1], DATE_FMT)
    for i in np.arange(2,len(row)):
        row[i] = float(row[i])
    return Scenarios(*row)

Process the file in Spark



In [21]:

    
lines = sc.textFile(csv_filename)
parts = lines.map(lambda l: l.split(","))
rows = parts.map(parse)



In [22]:

    
rows_exp = parts.map(parse_explicit)
df_exp = sql.createDataFrame(rows_exp)



In [23]:

    
df_exp.head(1)









    Out[23]:





[Row(rf='RF1', date=datetime.datetime(2015, 4, 14, 0, 0), neutral=35.03, s1=34.67865912475935, s2=35.19309272805697, s3=35.61162381981636, s4=34.92090026310803, s5=35.2757534118607, s6=34.88538726175033, s7=34.7947997828024, s8=34.422262176432426, s9=35.32719209964416, s10=35.70279197608641, s11=34.70929644638722, s12=35.16468125943031, s13=34.7083308292015, s14=34.6909371576162, s15=34.24743006243561, s16=35.18043521116673, s17=35.26710783535421, s18=34.68646893255419, s19=35.42074885668956, s20=35.10073110319397, s21=34.62729258634153, s22=34.590238555794265, s23=35.12518904748294, s24=34.54164263329866, s25=34.780562034944296, s26=35.009313286776944, s27=35.19898478845906, s28=34.22306349493622, s29=35.58776307961069, s30=34.86817402539321, s31=34.904330793280025, s32=34.501576221982006, s33=34.908487404457446, s34=34.927195922549025, s35=35.26215239163238, s36=33.92269249584361, s37=34.30733618265355, s38=34.834677237655015, s39=34.62744354378543, s40=34.77382130417254, s41=34.72824373996907, s42=34.854606067026005, s43=34.64222172604983, s44=34.60156319552878, s45=35.70918069109046, s46=35.56681653497656, s47=33.896109761348846, s48=34.90079622390159, s49=34.58819986436787, s50=35.188539963456876, s51=34.01433872326011, s52=35.0739928313814, s53=34.613187678146446, s54=35.13416496576896, s55=34.99445195403314, s56=35.868772271139115, s57=34.43529924147874, s58=34.37558742473077, s59=35.767529209278806, s60=34.20841279797285, s61=35.02037678092141, s62=35.730742806800265, s63=35.58706083101516, s64=35.41139867751124, s65=34.35314972833785, s66=35.390550659047165, s67=34.766467265989796, s68=34.181696092298104, s69=35.70013050000655, s70=35.533538713265656, s71=34.69355471713625, s72=35.03, s73=34.88218261135854, s74=34.75938897742558, s75=34.08254454104669, s76=34.84672564828843, s77=34.90031331977329, s78=34.217426964067094, s79=35.03, s80=34.829337228686626, s81=35.640694811872486, s82=34.6621907358054, s83=34.9477616512553, s84=35.36955717482652, s85=35.02064237325755, s86=34.81790722492786, s87=35.109690177474775, s88=35.121816561235335, s89=34.96703922436443, s90=35.36876192881472, s91=34.95430350135521, s92=35.658557640675376, s93=35.68806992767282, s94=35.54751315223279, s95=34.39708977622091, s96=34.94204702169191, s97=34.919015204756775, s98=35.78236766249537, s99=34.92322674784095, s100=35.31876478719741, s101=34.788226636248645, s102=34.812093832739485, s103=35.62735090267868, s104=34.988263286860054, s105=34.795472900985914, s106=34.90865266511509, s107=35.07566419817475, s108=35.147503691958356, s109=35.832642229351144, s110=35.28787006150122, s111=34.8931410928295, s112=34.36407120219385, s113=35.562728269783065, s114=34.288690859551494, s115=35.3824406599367, s116=34.52621418361805, s117=34.99706834710071, s118=34.984682560983956, s119=34.43862544521726, s120=34.994913750126514, s121=35.07824604858676, s122=34.02040491040312, s123=35.62842443816883, s124=33.83964625719033, s125=34.307605664957116, s126=34.80412547187779, s127=35.02025306285834, s128=34.45045630321518, s129=34.04539649220161, s130=35.692919099972116, s131=35.453361214684065, s132=35.5980879542828, s133=34.502076552855804, s134=35.53265621968611, s135=35.18513529612576, s136=34.64146728562058, s137=35.81497702341632, s138=34.13365858374198, s139=34.710849475194735, s140=35.441126639612605, s141=34.735309443333264, s142=34.95120360700475, s143=35.10310881092355, s144=34.78003784826053, s145=34.70277926059655, s146=34.81468760885061, s147=35.16861531580745, s148=34.51921794752109, s149=35.15555177982842, s150=35.187221664553896, s151=35.22934866861739, s152=35.52802972799997, s153=35.14857742960984, s154=34.621016530630264, s155=35.63793940845333, s156=35.81806189284964, s157=35.639034885494894, s158=35.3610967180445, s159=34.8275485284126, s160=34.66429171826707, s161=35.343235756698064, s162=35.16162979597568, s163=34.644343702439386, s164=35.732088633804885, s165=34.379164905532846, s166=34.58600679010698, s167=34.41992802695751, s168=35.223875648636664, s169=34.516177001997306, s170=34.31381019707277, s171=35.106266569014124, s172=35.3559247203471, s173=35.88780549634091, s174=34.46293437448051, s175=35.1174672101038, s176=35.11080976204788, s177=35.0669255901668, s178=35.08700876483932, s179=34.81577549469441, s180=34.65169524785512, s181=34.63104192343893, s182=35.492450840709935, s183=33.723759518283956, s184=34.639464724361375, s185=35.290667410447604, s186=35.08309642239941, s187=34.59805207136981, s188=35.02059708314596, s189=35.46689759917978, s190=35.04887237318198, s191=34.83564976384203, s192=35.18008539520424, s193=35.65359552361088, s194=35.49838599299696, s195=35.4211973789694, s196=34.19181839405501, s197=35.37685667462542, s198=34.369390437088434, s199=34.23251735715107, s200=36.107741224520915, s201=34.712951815325674, s202=35.82105791565597, s203=35.77258752779277, s204=34.58277044729649, s205=35.314907187188275, s206=34.634499042072115, s207=34.99113873890582, s208=35.04337947006084, s209=34.8649579055206, s210=35.92777558392091, s211=35.46071686065867, s212=35.22273849877033, s213=35.150776011353, s214=34.90665926320567, s215=35.57693287877255, s216=34.6650630572609, s217=35.216943277522056, s218=34.83355351507067, s219=34.855309964266716, s220=35.116977441073736, s221=35.17138672682224, s222=34.55213158061924, s223=34.95298716811318, s224=35.06185210323065, s225=34.73835146645663, s226=35.13111997732124, s227=34.37016675180393, s228=35.38373966358735, s229=35.13284944102854, s230=35.13555429352917, s231=35.775779025911675, s232=34.820897220882856, s233=35.55933866642973, s234=34.885979080521025, s235=35.98833780725287, s236=35.4593844276052, s237=35.39339030555425, s238=34.74521126608465, s239=34.76266444232124, s240=35.03, s241=35.445356246460555, s242=34.829610784434244, s243=35.51406910666242, s244=35.37283430286354, s245=35.3435824194863, s246=34.828643512375734, s247=34.82417831041008, s248=34.77460466629185, s249=34.8795109527282, s250=34.8561058578509)]

Let's do some VaR aggregation

For each day, we want to aggregate scenarios from different risk factors, and then compute the Value at Risk per day.



In [38]:

    
def var(scenarios, level=99, neutral_scenario=0):
    pnls = scenarios - neutral_scenario
    return - np.percentile(pnls, 100-level, interpolation='linear')



In [39]:

    
scenario_dates = df_exp.groupBy('date').sum()
var_rdd = scenario_dates.map(lambda r: (r[0], r[1], float(var(np.array(r[2:]) - r[1]))))
df_var = sql.createDataFrame(var_rdd, schema=['date', 'neutral', 'var'])



In [40]:

    
%matplotlib notebook
df_var.toPandas().plot()









    














    











    Out[40]:





<matplotlib.axes._subplots.AxesSubplot at 0x1140d8f98>

Slightly more complex case, with two portfolios

Define portfolios and put them into a Spark DataFrame



In [63]:

    
pf_rdd = sc.parallelize([('P1', 'RF1', 1.), ('P1', 'RF2', 2.), ('P2', 'RF1', 0.2), ('P2', 'RF2', -0.8)])
dfpf = sql.createDataFrame(pf_rdd, ['portfolio', 'rf', 'qty'])



In [64]:

    
dfpf.collect()









    Out[64]:





[Row(portfolio='P1', rf='RF1', qty=1.0),
 Row(portfolio='P1', rf='RF2', qty=2.0),
 Row(portfolio='P2', rf='RF1', qty=0.2),
 Row(portfolio='P2', rf='RF2', qty=-0.8)]



In [65]:

    
res = df_exp.join(dfpf, dfpf.rf == df_exp.rf)



In [66]:

    
res.head(1)









    Out[66]:





[Row(rf='RF1', date=datetime.datetime(2015, 4, 14, 0, 0), neutral=35.03, s1=34.67865912475935, s2=35.19309272805697, s3=35.61162381981636, s4=34.92090026310803, s5=35.2757534118607, s6=34.88538726175033, s7=34.7947997828024, s8=34.422262176432426, s9=35.32719209964416, s10=35.70279197608641, s11=34.70929644638722, s12=35.16468125943031, s13=34.7083308292015, s14=34.6909371576162, s15=34.24743006243561, s16=35.18043521116673, s17=35.26710783535421, s18=34.68646893255419, s19=35.42074885668956, s20=35.10073110319397, s21=34.62729258634153, s22=34.590238555794265, s23=35.12518904748294, s24=34.54164263329866, s25=34.780562034944296, s26=35.009313286776944, s27=35.19898478845906, s28=34.22306349493622, s29=35.58776307961069, s30=34.86817402539321, s31=34.904330793280025, s32=34.501576221982006, s33=34.908487404457446, s34=34.927195922549025, s35=35.26215239163238, s36=33.92269249584361, s37=34.30733618265355, s38=34.834677237655015, s39=34.62744354378543, s40=34.77382130417254, s41=34.72824373996907, s42=34.854606067026005, s43=34.64222172604983, s44=34.60156319552878, s45=35.70918069109046, s46=35.56681653497656, s47=33.896109761348846, s48=34.90079622390159, s49=34.58819986436787, s50=35.188539963456876, s51=34.01433872326011, s52=35.0739928313814, s53=34.613187678146446, s54=35.13416496576896, s55=34.99445195403314, s56=35.868772271139115, s57=34.43529924147874, s58=34.37558742473077, s59=35.767529209278806, s60=34.20841279797285, s61=35.02037678092141, s62=35.730742806800265, s63=35.58706083101516, s64=35.41139867751124, s65=34.35314972833785, s66=35.390550659047165, s67=34.766467265989796, s68=34.181696092298104, s69=35.70013050000655, s70=35.533538713265656, s71=34.69355471713625, s72=35.03, s73=34.88218261135854, s74=34.75938897742558, s75=34.08254454104669, s76=34.84672564828843, s77=34.90031331977329, s78=34.217426964067094, s79=35.03, s80=34.829337228686626, s81=35.640694811872486, s82=34.6621907358054, s83=34.9477616512553, s84=35.36955717482652, s85=35.02064237325755, s86=34.81790722492786, s87=35.109690177474775, s88=35.121816561235335, s89=34.96703922436443, s90=35.36876192881472, s91=34.95430350135521, s92=35.658557640675376, s93=35.68806992767282, s94=35.54751315223279, s95=34.39708977622091, s96=34.94204702169191, s97=34.919015204756775, s98=35.78236766249537, s99=34.92322674784095, s100=35.31876478719741, s101=34.788226636248645, s102=34.812093832739485, s103=35.62735090267868, s104=34.988263286860054, s105=34.795472900985914, s106=34.90865266511509, s107=35.07566419817475, s108=35.147503691958356, s109=35.832642229351144, s110=35.28787006150122, s111=34.8931410928295, s112=34.36407120219385, s113=35.562728269783065, s114=34.288690859551494, s115=35.3824406599367, s116=34.52621418361805, s117=34.99706834710071, s118=34.984682560983956, s119=34.43862544521726, s120=34.994913750126514, s121=35.07824604858676, s122=34.02040491040312, s123=35.62842443816883, s124=33.83964625719033, s125=34.307605664957116, s126=34.80412547187779, s127=35.02025306285834, s128=34.45045630321518, s129=34.04539649220161, s130=35.692919099972116, s131=35.453361214684065, s132=35.5980879542828, s133=34.502076552855804, s134=35.53265621968611, s135=35.18513529612576, s136=34.64146728562058, s137=35.81497702341632, s138=34.13365858374198, s139=34.710849475194735, s140=35.441126639612605, s141=34.735309443333264, s142=34.95120360700475, s143=35.10310881092355, s144=34.78003784826053, s145=34.70277926059655, s146=34.81468760885061, s147=35.16861531580745, s148=34.51921794752109, s149=35.15555177982842, s150=35.187221664553896, s151=35.22934866861739, s152=35.52802972799997, s153=35.14857742960984, s154=34.621016530630264, s155=35.63793940845333, s156=35.81806189284964, s157=35.639034885494894, s158=35.3610967180445, s159=34.8275485284126, s160=34.66429171826707, s161=35.343235756698064, s162=35.16162979597568, s163=34.644343702439386, s164=35.732088633804885, s165=34.379164905532846, s166=34.58600679010698, s167=34.41992802695751, s168=35.223875648636664, s169=34.516177001997306, s170=34.31381019707277, s171=35.106266569014124, s172=35.3559247203471, s173=35.88780549634091, s174=34.46293437448051, s175=35.1174672101038, s176=35.11080976204788, s177=35.0669255901668, s178=35.08700876483932, s179=34.81577549469441, s180=34.65169524785512, s181=34.63104192343893, s182=35.492450840709935, s183=33.723759518283956, s184=34.639464724361375, s185=35.290667410447604, s186=35.08309642239941, s187=34.59805207136981, s188=35.02059708314596, s189=35.46689759917978, s190=35.04887237318198, s191=34.83564976384203, s192=35.18008539520424, s193=35.65359552361088, s194=35.49838599299696, s195=35.4211973789694, s196=34.19181839405501, s197=35.37685667462542, s198=34.369390437088434, s199=34.23251735715107, s200=36.107741224520915, s201=34.712951815325674, s202=35.82105791565597, s203=35.77258752779277, s204=34.58277044729649, s205=35.314907187188275, s206=34.634499042072115, s207=34.99113873890582, s208=35.04337947006084, s209=34.8649579055206, s210=35.92777558392091, s211=35.46071686065867, s212=35.22273849877033, s213=35.150776011353, s214=34.90665926320567, s215=35.57693287877255, s216=34.6650630572609, s217=35.216943277522056, s218=34.83355351507067, s219=34.855309964266716, s220=35.116977441073736, s221=35.17138672682224, s222=34.55213158061924, s223=34.95298716811318, s224=35.06185210323065, s225=34.73835146645663, s226=35.13111997732124, s227=34.37016675180393, s228=35.38373966358735, s229=35.13284944102854, s230=35.13555429352917, s231=35.775779025911675, s232=34.820897220882856, s233=35.55933866642973, s234=34.885979080521025, s235=35.98833780725287, s236=35.4593844276052, s237=35.39339030555425, s238=34.74521126608465, s239=34.76266444232124, s240=35.03, s241=35.445356246460555, s242=34.829610784434244, s243=35.51406910666242, s244=35.37283430286354, s245=35.3435824194863, s246=34.828643512375734, s247=34.82417831041008, s248=34.77460466629185, s249=34.8795109527282, s250=34.8561058578509, portfolio='P1', rf='RF1', qty=1.0)]



In [72]:

    
# scenario_dates = df_exp.groupBy('date').sum()
var_per_portfolio = res.groupBy('date', 'portfolio').sum()
# var_per_portfolio.toPandas().plot()
var_per_portfolio = var_per_portfolio.map(lambda r: (r[0], r[1], r[2], float(var(np.array(r[3:]) - r[2]))))
var_per_portfolio = sql.createDataFrame(var_per_portfolio, schema=['date', 'portfolio', 'neutral', 'var'])



In [82]:

    
%matplotlib notebook
df1 = var_per_portfolio.toPandas()
df2 = df1.set_index(['date', 'portfolio'])
# ['neutral'].plot(subplots=True)



In [95]:

    
df3 = df2.unstack(1) #['var'].plot(subplots=True)
df3

Me trying to register python UDFs



In [ ]:

    
f = sql.udf.register("fadd", lambda x: (np.array(x[3]) * 3.1).tolist(), ArrayType(FloatType()))
fagg = sql.udf.register("fagg", lambda x,y: (np.array(x[3]) + np.array(y[3])).tolist(), ArrayType(FloatType()))



In [76]:

    
sql.registerDataFrameAsTable(df, 'scen')



In [128]:

    
sql.sql('select date, fadd(scenarios) from scen group by date').collect()









    



---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
<ipython-input-128-dddb85d05e4a> in <module>()
----> 1 sql.sql('select date, fadd(scenarios)  from scen group by date').collect()

/usr/local/Cellar/apache-spark/1.4.0/libexec/python/pyspark/sql/context.py in sql(self, sqlQuery)
    514         [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]
    515         """
--> 516         return DataFrame(self._ssql_ctx.sql(sqlQuery), self)
    517 
    518     @since(1.0)

/usr/local/Cellar/apache-spark/1.4.0/libexec/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py in __call__(self, *args)
    536         answer = self.gateway_client.send_command(command)
    537         return_value = get_return_value(answer, self.gateway_client,
--> 538                 self.target_id, self.name)
    539 
    540         for temp_arg in temp_args:

/usr/local/Cellar/apache-spark/1.4.0/libexec/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
    298                 raise Py4JJavaError(
    299                     'An error occurred while calling {0}{1}{2}.\n'.
--> 300                     format(target_id, '.', name), value)
    301             else:
    302                 raise Py4JError(

Py4JJavaError: An error occurred while calling o111.sql.
: org.apache.spark.sql.AnalysisException: expression 'pythonUDF' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() if you don't care which value you get.;
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:38)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:42)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1(CheckAnalysis.scala:95)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$4.apply(CheckAnalysis.scala:111)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$4.apply(CheckAnalysis.scala:111)
	at scala.collection.immutable.List.foreach(List.scala:318)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:111)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50)
	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:98)
	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:97)
	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:97)
	at scala.collection.immutable.List.foreach(List.scala:318)
	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:97)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:42)
	at org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:920)
	at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:131)
	at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
	at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:744)
	at sun.reflect.GeneratedMethodAccessor53.invoke(Unknown Source)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:606)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379)
	at py4j.Gateway.invoke(Gateway.java:259)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:207)
	at java.lang.Thread.run(Thread.java:744)

	neutral		var
portfolio	P1	P2	P1	P2
date
2015-04-14	70.06	70.06	2.324244	2.324244
2015-04-15	70.40	70.40	2.268607	2.268607
2015-04-16	68.50	68.50	2.445955	2.445955
2015-04-17	66.60	66.60	2.559204	2.559204
2015-04-20	66.80	66.80	2.495879	2.495879
2015-04-21	66.68	66.68	2.417056	2.417056
2015-04-22	67.90	67.90	2.506571	2.506571
2015-04-23	68.30	68.30	2.456964	2.456964
2015-04-24	68.52	68.52	2.393792	2.393792
2015-04-27	65.22	65.22	2.911771	2.911771
2015-04-28	64.08	64.08	2.978352	2.978352
2015-04-29	63.46	63.46	2.872365	2.872365
2015-04-30	64.20	64.20	2.882594	2.882594
2015-05-01	64.58	64.58	2.834198	2.834198
2015-05-04	64.18	64.18	2.737028	2.737028
2015-05-05	63.02	63.02	2.676874	2.676874
2015-05-06	64.52	64.52	2.864054	2.864054
2015-05-07	64.10	64.10	2.766714	2.766714
2015-05-08	64.84	64.84	2.768306	2.768306

	0	1	2	3	4	5	6	7	8	9	...	243	244	245	246	247	248	249	250	251	252
0	RF1	2015-04-14	35.03	34.678659	35.193093	35.611624	34.920900	35.275753	34.885387	34.794800	...	35.445356	34.829611	35.514069	35.372834	35.343582	34.828644	34.824178	34.774605	34.879511	34.856106
1	RF1	2015-04-15	35.20	35.359100	35.767287	35.093559	35.439729	35.058909	34.970520	34.606949	...	35.004487	35.672156	35.534417	35.505887	35.003543	34.999186	34.950814	35.053176	35.030339	35.248848
2	RF1	2015-04-16	34.25	34.863274	34.135051	34.509031	34.097640	34.002225	33.610037	34.563266	...	34.760356	34.611395	34.580549	34.037870	34.033167	33.980956	34.091450	34.066796	34.302765	34.434559
3	RF1	2015-04-17	33.30	33.179576	33.571484	33.140390	33.040462	32.629955	33.628347	34.043960	...	33.678814	33.646469	33.077790	33.072865	33.018190	33.133906	33.108084	33.355289	33.493416	32.332506
4	RF1	2015-04-20	33.40	33.664591	33.244414	33.146995	32.746707	33.720002	34.124926	33.055073	...	33.737661	33.183387	33.178585	33.125281	33.238094	33.212921	33.453890	33.588511	32.456574	32.499345