Launching

Using Spark 1.4 and Python 3.4. The way of launching the ipython notebook has changed

IPYTHON=1 IPYTHON_OPTS=notebook PYSPARK_PYTHON=python3 pyspark

In [1]:
import os, sys

In [27]:
from pyspark.sql import SQLContext, Row
import datetime
from collections import namedtuple
import numpy as np
import pandas as pd

Create the SQLContext


In [3]:
sql = SQLContext(sc)

Create different "classes" for parsing the input

Each row contains the (already computed) scenario values for each date and risk factor:

Row = DAY x RiskFactor x NeutralScenario x Scenarios

Ideally the rows would be parsed like this, but because custom row aggregation is not fully supported


In [36]:
RFScenario = namedtuple('RFScenario', ('rf', 'date', 'neutral', 'scenarios'))

and because the number of scenarios is fixed, each scenario is a column


In [6]:
def construct_scenarios_type(number_scenarios=250, name = 'Scenarios'):
    names = ['rf', 'date', 'neutral']
    scenario_cols = ["s%d"%x for x in range(1,number_scenarios+1)]
    names.extend(scenario_cols)
    Scenarios = namedtuple('Scenarios', names)
    return Scenarios, scenario_cols

Scenarios, scenario_cols = construct_scenarios_type()

and we can parse the rows of the csv file accordingly


In [30]:
DATA_DIR = os.path.join(os.pardir, 'data')
csv_filename = os.path.join(DATA_DIR, "scenarios2.csv")
pd.read_csv(csv_filename, header=None).head()


Out[30]:
0 1 2 3 4 5 6 7 8 9 ... 243 244 245 246 247 248 249 250 251 252
0 RF1 2015-04-14 35.03 34.678659 35.193093 35.611624 34.920900 35.275753 34.885387 34.794800 ... 35.445356 34.829611 35.514069 35.372834 35.343582 34.828644 34.824178 34.774605 34.879511 34.856106
1 RF1 2015-04-15 35.20 35.359100 35.767287 35.093559 35.439729 35.058909 34.970520 34.606949 ... 35.004487 35.672156 35.534417 35.505887 35.003543 34.999186 34.950814 35.053176 35.030339 35.248848
2 RF1 2015-04-16 34.25 34.863274 34.135051 34.509031 34.097640 34.002225 33.610037 34.563266 ... 34.760356 34.611395 34.580549 34.037870 34.033167 33.980956 34.091450 34.066796 34.302765 34.434559
3 RF1 2015-04-17 33.30 33.179576 33.571484 33.140390 33.040462 32.629955 33.628347 34.043960 ... 33.678814 33.646469 33.077790 33.072865 33.018190 33.133906 33.108084 33.355289 33.493416 32.332506
4 RF1 2015-04-20 33.40 33.664591 33.244414 33.146995 32.746707 33.720002 34.124926 33.055073 ... 33.737661 33.183387 33.178585 33.125281 33.238094 33.212921 33.453890 33.588511 32.456574 32.499345

5 rows × 253 columns


In [31]:
from pyspark.mllib.linalg import Vectors, DenseVector, SparseVector, _convert_to_vector 
def parse(row):
    DATE_FMT = "%Y-%m-%d"
    row[0] = row[0]
    row[1] = datetime.datetime.strptime(row[1], DATE_FMT)
    for i in np.arange(2,len(row)):
        row[i] = float(row[i])
    return RFScenario(row[0], row[1], row[2], DenseVector(row[3:6]))


def parse_explicit(row):
    DATE_FMT = "%Y-%m-%d"
    row[0] = row[0]
    row[1] = datetime.datetime.strptime(row[1], DATE_FMT)
    for i in np.arange(2,len(row)):
        row[i] = float(row[i])
    return Scenarios(*row)

Process the file in Spark


In [21]:
lines = sc.textFile(csv_filename)
parts = lines.map(lambda l: l.split(","))
rows = parts.map(parse)

In [22]:
rows_exp = parts.map(parse_explicit)
df_exp = sql.createDataFrame(rows_exp)

In [23]:
df_exp.head(1)


Out[23]:
[Row(rf='RF1', date=datetime.datetime(2015, 4, 14, 0, 0), neutral=35.03, s1=34.67865912475935, s2=35.19309272805697, s3=35.61162381981636, s4=34.92090026310803, s5=35.2757534118607, s6=34.88538726175033, s7=34.7947997828024, s8=34.422262176432426, s9=35.32719209964416, s10=35.70279197608641, s11=34.70929644638722, s12=35.16468125943031, s13=34.7083308292015, s14=34.6909371576162, s15=34.24743006243561, s16=35.18043521116673, s17=35.26710783535421, s18=34.68646893255419, s19=35.42074885668956, s20=35.10073110319397, s21=34.62729258634153, s22=34.590238555794265, s23=35.12518904748294, s24=34.54164263329866, s25=34.780562034944296, s26=35.009313286776944, s27=35.19898478845906, s28=34.22306349493622, s29=35.58776307961069, s30=34.86817402539321, s31=34.904330793280025, s32=34.501576221982006, s33=34.908487404457446, s34=34.927195922549025, s35=35.26215239163238, s36=33.92269249584361, s37=34.30733618265355, s38=34.834677237655015, s39=34.62744354378543, s40=34.77382130417254, s41=34.72824373996907, s42=34.854606067026005, s43=34.64222172604983, s44=34.60156319552878, s45=35.70918069109046, s46=35.56681653497656, s47=33.896109761348846, s48=34.90079622390159, s49=34.58819986436787, s50=35.188539963456876, s51=34.01433872326011, s52=35.0739928313814, s53=34.613187678146446, s54=35.13416496576896, s55=34.99445195403314, s56=35.868772271139115, s57=34.43529924147874, s58=34.37558742473077, s59=35.767529209278806, s60=34.20841279797285, s61=35.02037678092141, s62=35.730742806800265, s63=35.58706083101516, s64=35.41139867751124, s65=34.35314972833785, s66=35.390550659047165, s67=34.766467265989796, s68=34.181696092298104, s69=35.70013050000655, s70=35.533538713265656, s71=34.69355471713625, s72=35.03, s73=34.88218261135854, s74=34.75938897742558, s75=34.08254454104669, s76=34.84672564828843, s77=34.90031331977329, s78=34.217426964067094, s79=35.03, s80=34.829337228686626, s81=35.640694811872486, s82=34.6621907358054, s83=34.9477616512553, s84=35.36955717482652, s85=35.02064237325755, s86=34.81790722492786, s87=35.109690177474775, s88=35.121816561235335, s89=34.96703922436443, s90=35.36876192881472, s91=34.95430350135521, s92=35.658557640675376, s93=35.68806992767282, s94=35.54751315223279, s95=34.39708977622091, s96=34.94204702169191, s97=34.919015204756775, s98=35.78236766249537, s99=34.92322674784095, s100=35.31876478719741, s101=34.788226636248645, s102=34.812093832739485, s103=35.62735090267868, s104=34.988263286860054, s105=34.795472900985914, s106=34.90865266511509, s107=35.07566419817475, s108=35.147503691958356, s109=35.832642229351144, s110=35.28787006150122, s111=34.8931410928295, s112=34.36407120219385, s113=35.562728269783065, s114=34.288690859551494, s115=35.3824406599367, s116=34.52621418361805, s117=34.99706834710071, s118=34.984682560983956, s119=34.43862544521726, s120=34.994913750126514, s121=35.07824604858676, s122=34.02040491040312, s123=35.62842443816883, s124=33.83964625719033, s125=34.307605664957116, s126=34.80412547187779, s127=35.02025306285834, s128=34.45045630321518, s129=34.04539649220161, s130=35.692919099972116, s131=35.453361214684065, s132=35.5980879542828, s133=34.502076552855804, s134=35.53265621968611, s135=35.18513529612576, s136=34.64146728562058, s137=35.81497702341632, s138=34.13365858374198, s139=34.710849475194735, s140=35.441126639612605, s141=34.735309443333264, s142=34.95120360700475, s143=35.10310881092355, s144=34.78003784826053, s145=34.70277926059655, s146=34.81468760885061, s147=35.16861531580745, s148=34.51921794752109, s149=35.15555177982842, s150=35.187221664553896, s151=35.22934866861739, s152=35.52802972799997, s153=35.14857742960984, s154=34.621016530630264, s155=35.63793940845333, s156=35.81806189284964, s157=35.639034885494894, s158=35.3610967180445, s159=34.8275485284126, s160=34.66429171826707, s161=35.343235756698064, s162=35.16162979597568, s163=34.644343702439386, s164=35.732088633804885, s165=34.379164905532846, s166=34.58600679010698, s167=34.41992802695751, s168=35.223875648636664, s169=34.516177001997306, s170=34.31381019707277, s171=35.106266569014124, s172=35.3559247203471, s173=35.88780549634091, s174=34.46293437448051, s175=35.1174672101038, s176=35.11080976204788, s177=35.0669255901668, s178=35.08700876483932, s179=34.81577549469441, s180=34.65169524785512, s181=34.63104192343893, s182=35.492450840709935, s183=33.723759518283956, s184=34.639464724361375, s185=35.290667410447604, s186=35.08309642239941, s187=34.59805207136981, s188=35.02059708314596, s189=35.46689759917978, s190=35.04887237318198, s191=34.83564976384203, s192=35.18008539520424, s193=35.65359552361088, s194=35.49838599299696, s195=35.4211973789694, s196=34.19181839405501, s197=35.37685667462542, s198=34.369390437088434, s199=34.23251735715107, s200=36.107741224520915, s201=34.712951815325674, s202=35.82105791565597, s203=35.77258752779277, s204=34.58277044729649, s205=35.314907187188275, s206=34.634499042072115, s207=34.99113873890582, s208=35.04337947006084, s209=34.8649579055206, s210=35.92777558392091, s211=35.46071686065867, s212=35.22273849877033, s213=35.150776011353, s214=34.90665926320567, s215=35.57693287877255, s216=34.6650630572609, s217=35.216943277522056, s218=34.83355351507067, s219=34.855309964266716, s220=35.116977441073736, s221=35.17138672682224, s222=34.55213158061924, s223=34.95298716811318, s224=35.06185210323065, s225=34.73835146645663, s226=35.13111997732124, s227=34.37016675180393, s228=35.38373966358735, s229=35.13284944102854, s230=35.13555429352917, s231=35.775779025911675, s232=34.820897220882856, s233=35.55933866642973, s234=34.885979080521025, s235=35.98833780725287, s236=35.4593844276052, s237=35.39339030555425, s238=34.74521126608465, s239=34.76266444232124, s240=35.03, s241=35.445356246460555, s242=34.829610784434244, s243=35.51406910666242, s244=35.37283430286354, s245=35.3435824194863, s246=34.828643512375734, s247=34.82417831041008, s248=34.77460466629185, s249=34.8795109527282, s250=34.8561058578509)]

Let's do some VaR aggregation

For each day, we want to aggregate scenarios from different risk factors, and then compute the Value at Risk per day.


In [38]:
def var(scenarios, level=99, neutral_scenario=0):
    pnls = scenarios - neutral_scenario
    return - np.percentile(pnls, 100-level, interpolation='linear')

In [39]:
scenario_dates = df_exp.groupBy('date').sum()
var_rdd = scenario_dates.map(lambda r: (r[0], r[1], float(var(np.array(r[2:]) - r[1]))))
df_var = sql.createDataFrame(var_rdd, schema=['date', 'neutral', 'var'])

In [40]:
%matplotlib notebook
df_var.toPandas().plot()


Out[40]:
<matplotlib.axes._subplots.AxesSubplot at 0x1140d8f98>

Slightly more complex case, with two portfolios

Define portfolios and put them into a Spark DataFrame


In [63]:
pf_rdd = sc.parallelize([('P1', 'RF1', 1.), ('P1', 'RF2', 2.), ('P2', 'RF1', 0.2), ('P2', 'RF2', -0.8)])
dfpf = sql.createDataFrame(pf_rdd, ['portfolio', 'rf', 'qty'])

In [64]:
dfpf.collect()


Out[64]:
[Row(portfolio='P1', rf='RF1', qty=1.0),
 Row(portfolio='P1', rf='RF2', qty=2.0),
 Row(portfolio='P2', rf='RF1', qty=0.2),
 Row(portfolio='P2', rf='RF2', qty=-0.8)]

In [65]:
res = df_exp.join(dfpf, dfpf.rf == df_exp.rf)

In [66]:
res.head(1)


Out[66]:
[Row(rf='RF1', date=datetime.datetime(2015, 4, 14, 0, 0), neutral=35.03, s1=34.67865912475935, s2=35.19309272805697, s3=35.61162381981636, s4=34.92090026310803, s5=35.2757534118607, s6=34.88538726175033, s7=34.7947997828024, s8=34.422262176432426, s9=35.32719209964416, s10=35.70279197608641, s11=34.70929644638722, s12=35.16468125943031, s13=34.7083308292015, s14=34.6909371576162, s15=34.24743006243561, s16=35.18043521116673, s17=35.26710783535421, s18=34.68646893255419, s19=35.42074885668956, s20=35.10073110319397, s21=34.62729258634153, s22=34.590238555794265, s23=35.12518904748294, s24=34.54164263329866, s25=34.780562034944296, s26=35.009313286776944, s27=35.19898478845906, s28=34.22306349493622, s29=35.58776307961069, s30=34.86817402539321, s31=34.904330793280025, s32=34.501576221982006, s33=34.908487404457446, s34=34.927195922549025, s35=35.26215239163238, s36=33.92269249584361, s37=34.30733618265355, s38=34.834677237655015, s39=34.62744354378543, s40=34.77382130417254, s41=34.72824373996907, s42=34.854606067026005, s43=34.64222172604983, s44=34.60156319552878, s45=35.70918069109046, s46=35.56681653497656, s47=33.896109761348846, s48=34.90079622390159, s49=34.58819986436787, s50=35.188539963456876, s51=34.01433872326011, s52=35.0739928313814, s53=34.613187678146446, s54=35.13416496576896, s55=34.99445195403314, s56=35.868772271139115, s57=34.43529924147874, s58=34.37558742473077, s59=35.767529209278806, s60=34.20841279797285, s61=35.02037678092141, s62=35.730742806800265, s63=35.58706083101516, s64=35.41139867751124, s65=34.35314972833785, s66=35.390550659047165, s67=34.766467265989796, s68=34.181696092298104, s69=35.70013050000655, s70=35.533538713265656, s71=34.69355471713625, s72=35.03, s73=34.88218261135854, s74=34.75938897742558, s75=34.08254454104669, s76=34.84672564828843, s77=34.90031331977329, s78=34.217426964067094, s79=35.03, s80=34.829337228686626, s81=35.640694811872486, s82=34.6621907358054, s83=34.9477616512553, s84=35.36955717482652, s85=35.02064237325755, s86=34.81790722492786, s87=35.109690177474775, s88=35.121816561235335, s89=34.96703922436443, s90=35.36876192881472, s91=34.95430350135521, s92=35.658557640675376, s93=35.68806992767282, s94=35.54751315223279, s95=34.39708977622091, s96=34.94204702169191, s97=34.919015204756775, s98=35.78236766249537, s99=34.92322674784095, s100=35.31876478719741, s101=34.788226636248645, s102=34.812093832739485, s103=35.62735090267868, s104=34.988263286860054, s105=34.795472900985914, s106=34.90865266511509, s107=35.07566419817475, s108=35.147503691958356, s109=35.832642229351144, s110=35.28787006150122, s111=34.8931410928295, s112=34.36407120219385, s113=35.562728269783065, s114=34.288690859551494, s115=35.3824406599367, s116=34.52621418361805, s117=34.99706834710071, s118=34.984682560983956, s119=34.43862544521726, s120=34.994913750126514, s121=35.07824604858676, s122=34.02040491040312, s123=35.62842443816883, s124=33.83964625719033, s125=34.307605664957116, s126=34.80412547187779, s127=35.02025306285834, s128=34.45045630321518, s129=34.04539649220161, s130=35.692919099972116, s131=35.453361214684065, s132=35.5980879542828, s133=34.502076552855804, s134=35.53265621968611, s135=35.18513529612576, s136=34.64146728562058, s137=35.81497702341632, s138=34.13365858374198, s139=34.710849475194735, s140=35.441126639612605, s141=34.735309443333264, s142=34.95120360700475, s143=35.10310881092355, s144=34.78003784826053, s145=34.70277926059655, s146=34.81468760885061, s147=35.16861531580745, s148=34.51921794752109, s149=35.15555177982842, s150=35.187221664553896, s151=35.22934866861739, s152=35.52802972799997, s153=35.14857742960984, s154=34.621016530630264, s155=35.63793940845333, s156=35.81806189284964, s157=35.639034885494894, s158=35.3610967180445, s159=34.8275485284126, s160=34.66429171826707, s161=35.343235756698064, s162=35.16162979597568, s163=34.644343702439386, s164=35.732088633804885, s165=34.379164905532846, s166=34.58600679010698, s167=34.41992802695751, s168=35.223875648636664, s169=34.516177001997306, s170=34.31381019707277, s171=35.106266569014124, s172=35.3559247203471, s173=35.88780549634091, s174=34.46293437448051, s175=35.1174672101038, s176=35.11080976204788, s177=35.0669255901668, s178=35.08700876483932, s179=34.81577549469441, s180=34.65169524785512, s181=34.63104192343893, s182=35.492450840709935, s183=33.723759518283956, s184=34.639464724361375, s185=35.290667410447604, s186=35.08309642239941, s187=34.59805207136981, s188=35.02059708314596, s189=35.46689759917978, s190=35.04887237318198, s191=34.83564976384203, s192=35.18008539520424, s193=35.65359552361088, s194=35.49838599299696, s195=35.4211973789694, s196=34.19181839405501, s197=35.37685667462542, s198=34.369390437088434, s199=34.23251735715107, s200=36.107741224520915, s201=34.712951815325674, s202=35.82105791565597, s203=35.77258752779277, s204=34.58277044729649, s205=35.314907187188275, s206=34.634499042072115, s207=34.99113873890582, s208=35.04337947006084, s209=34.8649579055206, s210=35.92777558392091, s211=35.46071686065867, s212=35.22273849877033, s213=35.150776011353, s214=34.90665926320567, s215=35.57693287877255, s216=34.6650630572609, s217=35.216943277522056, s218=34.83355351507067, s219=34.855309964266716, s220=35.116977441073736, s221=35.17138672682224, s222=34.55213158061924, s223=34.95298716811318, s224=35.06185210323065, s225=34.73835146645663, s226=35.13111997732124, s227=34.37016675180393, s228=35.38373966358735, s229=35.13284944102854, s230=35.13555429352917, s231=35.775779025911675, s232=34.820897220882856, s233=35.55933866642973, s234=34.885979080521025, s235=35.98833780725287, s236=35.4593844276052, s237=35.39339030555425, s238=34.74521126608465, s239=34.76266444232124, s240=35.03, s241=35.445356246460555, s242=34.829610784434244, s243=35.51406910666242, s244=35.37283430286354, s245=35.3435824194863, s246=34.828643512375734, s247=34.82417831041008, s248=34.77460466629185, s249=34.8795109527282, s250=34.8561058578509, portfolio='P1', rf='RF1', qty=1.0)]

In [72]:
# scenario_dates = df_exp.groupBy('date').sum()
var_per_portfolio = res.groupBy('date', 'portfolio').sum()
# var_per_portfolio.toPandas().plot()
var_per_portfolio = var_per_portfolio.map(lambda r: (r[0], r[1], r[2], float(var(np.array(r[3:]) - r[2]))))
var_per_portfolio = sql.createDataFrame(var_per_portfolio, schema=['date', 'portfolio', 'neutral', 'var'])

In [82]:
%matplotlib notebook
df1 = var_per_portfolio.toPandas()
df2 = df1.set_index(['date', 'portfolio'])
# ['neutral'].plot(subplots=True)

In [95]:
df3 = df2.unstack(1) #['var'].plot(subplots=True)
df3


Out[95]:
neutral var
portfolio P1 P2 P1 P2
date
2015-04-14 70.06 70.06 2.324244 2.324244
2015-04-15 70.40 70.40 2.268607 2.268607
2015-04-16 68.50 68.50 2.445955 2.445955
2015-04-17 66.60 66.60 2.559204 2.559204
2015-04-20 66.80 66.80 2.495879 2.495879
2015-04-21 66.68 66.68 2.417056 2.417056
2015-04-22 67.90 67.90 2.506571 2.506571
2015-04-23 68.30 68.30 2.456964 2.456964
2015-04-24 68.52 68.52 2.393792 2.393792
2015-04-27 65.22 65.22 2.911771 2.911771
2015-04-28 64.08 64.08 2.978352 2.978352
2015-04-29 63.46 63.46 2.872365 2.872365
2015-04-30 64.20 64.20 2.882594 2.882594
2015-05-01 64.58 64.58 2.834198 2.834198
2015-05-04 64.18 64.18 2.737028 2.737028
2015-05-05 63.02 63.02 2.676874 2.676874
2015-05-06 64.52 64.52 2.864054 2.864054
2015-05-07 64.10 64.10 2.766714 2.766714
2015-05-08 64.84 64.84 2.768306 2.768306

Me trying to register python UDFs


In [ ]:
f = sql.udf.register("fadd", lambda x: (np.array(x[3]) * 3.1).tolist(), ArrayType(FloatType()))
fagg = sql.udf.register("fagg", lambda x,y: (np.array(x[3]) + np.array(y[3])).tolist(), ArrayType(FloatType()))

In [76]:
sql.registerDataFrameAsTable(df, 'scen')

In [128]:
sql.sql('select date, fadd(scenarios) from scen group by date').collect()


---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
<ipython-input-128-dddb85d05e4a> in <module>()
----> 1 sql.sql('select date, fadd(scenarios)  from scen group by date').collect()

/usr/local/Cellar/apache-spark/1.4.0/libexec/python/pyspark/sql/context.py in sql(self, sqlQuery)
    514         [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]
    515         """
--> 516         return DataFrame(self._ssql_ctx.sql(sqlQuery), self)
    517 
    518     @since(1.0)

/usr/local/Cellar/apache-spark/1.4.0/libexec/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py in __call__(self, *args)
    536         answer = self.gateway_client.send_command(command)
    537         return_value = get_return_value(answer, self.gateway_client,
--> 538                 self.target_id, self.name)
    539 
    540         for temp_arg in temp_args:

/usr/local/Cellar/apache-spark/1.4.0/libexec/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
    298                 raise Py4JJavaError(
    299                     'An error occurred while calling {0}{1}{2}.\n'.
--> 300                     format(target_id, '.', name), value)
    301             else:
    302                 raise Py4JError(

Py4JJavaError: An error occurred while calling o111.sql.
: org.apache.spark.sql.AnalysisException: expression 'pythonUDF' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() if you don't care which value you get.;
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:38)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:42)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1(CheckAnalysis.scala:95)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$4.apply(CheckAnalysis.scala:111)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$4.apply(CheckAnalysis.scala:111)
	at scala.collection.immutable.List.foreach(List.scala:318)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:111)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50)
	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:98)
	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:97)
	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:97)
	at scala.collection.immutable.List.foreach(List.scala:318)
	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:97)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:42)
	at org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:920)
	at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:131)
	at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
	at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:744)
	at sun.reflect.GeneratedMethodAccessor53.invoke(Unknown Source)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:606)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379)
	at py4j.Gateway.invoke(Gateway.java:259)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:207)
	at java.lang.Thread.run(Thread.java:744)