In [3]:
member_raw = df("etl.dimtable.DimCustomer")

In [4]:
member_raw.peek()


CUST_ID:String  = 000704225906
BIRTH_YYYY:Long = 1963
BIRTH_MM:Long   = 5
BIRTH_DD:Long   = 17
gender:String   = F

Check input data of airline demo


In [3]:
import glob
import os
import numpy as np
from pandas import *
import matplotlib
import smv

proj_nm = "com.datasenseanalytics.pluto.airlinedemo."

In [2]:
#indata_path = "/Users/xingyuwu/Documents/Datasense/Apps/PlutoDM/data/input"
member_raw = ddf(proj_nm+"etl.DimCustomer")
member_df = member_raw.toPandas()
member_df.groupby('CURR_LVL').count()


Out[2]:
CUST_ID BIRTH_YYYY BIRTH_MM BIRTH_DD gender ENROLL_DT
CURR_LVL
0 16 14 14 14 16 12
1 3 2 2 2 3 1
2 1 1 1 1 1 1
3 1 1 1 1 1 1
5 1 1 1 1 1 1

In [4]:
demand_raw = ddf(proj_nm+"etl.FactRouteBkgSum")
demand_df = demand_raw.toPandas()
demand_df.head(10)


Out[4]:
SCH_LEG_ORIG_CD SCH_LEG_DEST_CD FLT_MONTH BKG_MONTH DELTA_MONTH CUMM_BKG_AMT
0 LAX JFK 2013-07 2013-07 0.0 27443.0
1 LAX JFK 2013-07 2013-06 1.0 21495.0
2 LAX JFK 2013-07 2013-05 2.0 17051.0
3 LAX JFK 2013-07 2013-04 3.0 14257.0
4 LAX JFK 2013-07 2013-03 4.0 10271.0
5 LAX JFK 2013-07 2013-02 5.0 7677.0
6 LAX JFK 2013-07 2013-01 6.0 5718.0
7 LAX JFK 2013-07 2012-12 7.0 3476.0
8 LAX JFK 2013-07 2012-11 8.0 2153.0
9 LAX JFK 2013-07 2012-10 9.0 1167.0

In [5]:
tgt_raw = ddf(proj_nm+"etl.FactRouteTarget")
tgt_df = tgt_raw.toPandas()
tgt_df.head(10)


Out[5]:
SCH_LEG_ORIG_CD SCH_LEG_DEST_CD FLT_MONTH TGT_AMT
0 LAX JFK 2013-07 25010.0
1 LAX JFK 2013-08 32649.0
2 LAX JFK 2013-09 32690.0
3 LAX JFK 2013-10 30967.0
4 LAX JFK 2013-11 27588.0
5 LAX JFK 2013-12 22677.0
6 LAX JFK 2014-01 26904.0
7 LAX JFK 2014-02 21805.0
8 LAX JFK 2014-03 32884.0
9 LAX JFK 2014-04 28015.0

In [ ]: