In [1]:
!pip install --user xlrd


Requirement already satisfied: xlrd in /gpfs/global_fs01/sym_shared/YPProdSpark/user/s1df-1767d8774d3251-73caa6cfaa60/.local/lib/python2.7/site-packages

In [129]:
# Setup constants if any
# FUNDS ID
FUNDS_ID_LIST = ['I-CJF','I-MG1','I-SQGFSH2','I-SQGFSH2O']

In [2]:
import pandas as pd
from io import BytesIO
import requests
import json
import xlrd 

from pyspark.sql.functions import *
from pyspark.sql.types import *

from datetime import datetime
from dateutil.parser import parse

from ingest.Connectors import Connectors

In [3]:
# The code was removed by DSX for sharing.

In [4]:
# The code was removed by DSX for sharing.

In [30]:
aladdinSecDF1 = pd.read_excel(getFileFromObjectStorage('MizuhoPOC', 'ALADDIN-SEC.xlsx'),index_col=[0], header=[0]).iloc[0:8]
# Drop rows & columns with all 'NaN' values, axis 0 is for row
aladdinSecDFFiltered1 = aladdinSecDF1.dropna(axis=[0,1], how='all')
# print aladdinSecDF1

asOfDate = pd.to_datetime(aladdinSecDFFiltered1.loc['As Of Date:', 'Unnamed: 1']).strftime('%Y-%m-%d')

print "\nasOfDate = " + asOfDate


asOfDate = 2017-07-31

In [207]:
aladdinSecDF2 = pd.read_excel(getFileFromObjectStorage('MizuhoPOC', 'ALADDIN-SEC.xlsx'), header=[0], skipinitialspace=True, skiprows=9, index_col=[0,1])

# Drop rows & columns with all 'NaN' values, axis 0 is for row
aladdinSecDF2FilterNullRowsCols = aladdinSecDF2.dropna(axis=[0,1], how='all').fillna('')
print aladdinSecDF2FilterNullRowsCols

# aladdinSecDF2FilterNullRowsCols.index.names

# aladdinSecDF2FilterNullRowsCols.index.get_level_values("Portfolio")

# aladdinSecDF2FilterNullRowsCols.columns

# This step clears the first 2 rows of each fund as those are the aggregate columns and we do not need to store those
dfNewArr = []
for id in FUNDS_ID_LIST:    
    df = aladdinSecDF2FilterNullRowsCols.loc[id].iloc[2:]
    df['fund_id']=id
    dfNewArr.append(df)
    
# Concat all the funds together    
dfNew=pd.concat(dfNewArr)

# Flatten the list by removing all the index
dfNew = dfNew.reset_index()

# Rename column to match database columns
aladdinSecDF2Renamed = \
    dfNew.rename(index=str, 
                 columns={"fund_id": "FUND_ID", 
                          "Currency": "CURRENCY" ,
                          "CUSIP(Aladdin ID)": "CUSIP", 
                          "Sec Type": "SEC_TYPE",
                          "Ticker/Coupon/Maturity": "TICKER_COUPON_MATURITY",
                          "Sec Desc": "SEC_DESC",
                          "ISIN": "ISIN",
                          "Orig. Face": "ORIG_FACE", 
                          "Settled": "SETTLED",
                          "Notional Market Value": "NOTIONAL_MKT_VAL",
                          "Base Curr Market Val w/Acc Int": "BASE_CURR_MKT_VAL_ACC_INT",
                          "Base Curr Accr Int": "BASE_CURR_MKT_INT",
                          "Maturity": "MATURITY_DATE",
                          "Issue Date": "ISSUE_DATE",
                          "Base Curr FX Rate": "BASE_CURR_FX_RATE",
                          "Market Price": "MKT_PRICE",
                          "Coupon": "COUPON",
                          "S&P Rating": "SNP_RATING"
                         })\

# Convert to float. TODO - Should everything be String as CSV files data is inconsistent    
aladdinSecDF2Renamed[['ORIG_FACE', 'SETTLED', 'NOTIONAL_MKT_VAL', 'BASE_CURR_MKT_VAL_ACC_INT', 'BASE_CURR_MKT_INT', 'BASE_CURR_FX_RATE', 'MKT_PRICE']]  \
= aladdinSecDF2Renamed[['ORIG_FACE', 'SETTLED', 'NOTIONAL_MKT_VAL', 'BASE_CURR_MKT_VAL_ACC_INT', 'BASE_CURR_MKT_INT', 'BASE_CURR_FX_RATE', 'MKT_PRICE']] \
.astype(float)
    
aladdinSecDF2Renamed[['MATURITY_DATE','ISSUE_DATE']]=aladdinSecDF2Renamed[['MATURITY_DATE','ISSUE_DATE']].astype(str)
    
# aladdinSecDF2Renamed['MATURITY_DATE'] = pd.DatetimeIndex(aladdinSecDF2Renamed['MATURITY_DATE'], ambiguous='NaT').date
#aladdinSecDF2Renamed['ISSUE_DATE'] = pd.DatetimeIndex(aladdinSecDF2Renamed['ISSUE_DATE']).date
    
#aladdinSecDF2Renamed = aladdinSecDF2Renamed.fillna('')   
    
    
#aladdinSecDF2Renamed.dtypes 
# aladdinSecDF2Renamed.head(20)


                    CUSIP(Aladdin ID)  Sec Type   Ticker/Coupon/Maturity  \
Portfolio  Currency                                                        
I-CJF      JPY              B7A0B81E2      GOVT    JGB 1.700 20-MAR-2018   
           JPY              B7A0B81E2      GOVT    JGB 1.700 20-MAR-2018   
           JPY              B7A0B81E2      GOVT    JGB 1.700 20-MAR-2018   
I-MG1      USD                  (4/4)     (3/4)        (4/4) 0.000 (3/4)   
           USD                  (4/4)     (3/4)        (4/4) 0.000 (3/4)   
           USD              00771X500  OPEN_END                    FCAIX   
           USD              BRS30C544      STIF                  JPMULCD   
           USD              BMZ1ACYJ5      CORP    LEH 0.000 31-JAN-2024   
           USD              BMZ1UBNV0      CORP  SQGFS 0.000 31-JAN-2099   
I-SQGFSH2  USD                  (7/7)    EQUITY                    (7/7)   
           USD                  (7/7)    EQUITY                    (7/7)   
           USD              46432F339    EQUITY                     QUAL   
           USD              46432F370    EQUITY                     SIZE   
           USD              46432F388    EQUITY                     VLUE   
           USD              46432F396    EQUITY                     MTUM   
           USD              73935X302    EQUITY                      PEY   
           USD              73935X682    EQUITY                     SPHQ   
           USD              73937B779    EQUITY                     SPLV   
I-SQGFSH2O USD                  (7/7)    EQUITY                    (7/7)   
           USD                  (7/7)    EQUITY                    (7/7)   
           USD              46432F339    EQUITY                     QUAL   
           USD              46432F370    EQUITY                     SIZE   
           USD              46432F388    EQUITY                     VLUE   
           USD              46432F396    EQUITY                     MTUM   
           USD              73935X302    EQUITY                      PEY   
           USD              73935X682    EQUITY                     SPHQ   
           USD              73937B779    EQUITY                     SPLV   
(4/19)     (2/19)             (12/19)    (5/19)     (12/19) 0.000 (4/19)   

                                               Sec Desc          ISIN  \
Portfolio  Currency                                                     
I-CJF      JPY          JAPAN (GOVERNMENT OF) 10YR #292  JP1102921853   
           JPY          JAPAN (GOVERNMENT OF) 10YR #292  JP1102921853   
           JPY          JAPAN (GOVERNMENT OF) 10YR #292  JP1102921853   
I-MG1      USD                                    (4/4)         (3/4)   
           USD                                    (4/4)         (3/4)   
           USD           FIERA CAP DIVERS ALTER CL INST  US00771X5005   
           USD               JPM LIQ-USD LIQUIDITY-INSD  LU0103813712   
           USD                             LEHMAN CLAIM                 
           USD             BRIDGE INVESTMENT IN I-SQGFS                 
I-SQGFSH2  USD                                    (7/7)         (7/7)   
           USD                                    (7/7)         (7/7)   
           USD       ISHARES EDGE MSCI USA QUALITY FACT  US46432F3394   
           USD        ISHARES EDGE MSCI USA SIZE FACTOR  US46432F3709   
           USD       ISHARES EDGE MSCI USA VALUE FACTOR  US46432F3881   
           USD       ISHARES EDGE MSCI USA MOMENTUM FAC  US46432F3964   
           USD       POWERSHARES HIGH YIELD EQUITY DIVI  US73935X3026   
           USD                 POWERSHARES S&P HIGH ETF  US73935X6821   
           USD        POWERSHARES S&P LOW VOLATILITY PO  US73937B7799   
I-SQGFSH2O USD                                    (7/7)         (7/7)   
           USD                                    (7/7)         (7/7)   
           USD       ISHARES EDGE MSCI USA QUALITY FACT  US46432F3394   
           USD        ISHARES EDGE MSCI USA SIZE FACTOR  US46432F3709   
           USD       ISHARES EDGE MSCI USA VALUE FACTOR  US46432F3881   
           USD       ISHARES EDGE MSCI USA MOMENTUM FAC  US46432F3964   
           USD       POWERSHARES HIGH YIELD EQUITY DIVI  US73935X3026   
           USD                 POWERSHARES S&P HIGH ETF  US73935X6821   
           USD        POWERSHARES S&P LOW VOLATILITY PO  US73937B7799   
(4/19)     (2/19)                               (12/19)       (11/19)   

                       Orig. Face       Settled  Notional Market Value  \
Portfolio  Currency                                                      
I-CJF      JPY       4.000000e+09  4.000000e+09           4.071178e+09   
           JPY       4.000000e+09  4.000000e+09           4.071178e+09   
           JPY       4.000000e+09  4.000000e+09           4.071178e+09   
I-MG1      USD       1.855466e+07  1.855466e+07           1.054770e+08   
           USD       1.855466e+07  1.855466e+07           1.054770e+08   
           USD       5.295794e+06  5.295794e+06           5.115737e+07   
           USD       1.000000e+07  1.000000e+07           1.000000e+07   
           USD       3.213862e+06  3.213862e+06           4.660100e+05   
           USD       4.500000e+04  4.500000e+04           4.385358e+07   
I-SQGFSH2  USD       5.338430e+05  5.338430e+05           2.448747e+07   
           USD       5.338430e+05  5.338430e+05           2.448747e+07   
           USD       3.543200e+04  3.543200e+04           2.662006e+06   
           USD       6.461900e+04  6.461900e+04           5.092623e+06   
           USD       6.861800e+04  6.861800e+04           5.148409e+06   
           USD       3.250500e+04  3.250500e+04           2.989810e+06   
           USD       1.709870e+05  1.709870e+05           2.920458e+06   
           USD       9.651000e+04  9.651000e+04           2.724477e+06   
           USD       6.517200e+04  6.517200e+04           2.949685e+06   
I-SQGFSH2O USD       6.546500e+04  6.546500e+04           3.002893e+06   
           USD       6.546500e+04  6.546500e+04           3.002893e+06   
           USD       4.345000e+03  4.345000e+03           3.264398e+05   
           USD       7.924000e+03  7.924000e+03           6.244904e+05   
           USD       8.415000e+03  8.415000e+03           6.313774e+05   
           USD       3.986000e+03  3.986000e+03           3.666323e+05   
           USD       2.096800e+04  2.096800e+04           3.581334e+05   
           USD       1.183500e+04  1.183500e+04           3.341020e+05   
           USD       7.992000e+03  7.992000e+03           3.617179e+05   
(4/19)     (2/19)    4.019154e+09  4.019154e+09           4.204145e+09   

                     Base Curr Market Val w/Acc Int  Base Curr Accr Int  \
Portfolio  Currency                                                       
I-CJF      JPY                         3.684491e+07           224246.16   
           JPY                         3.684491e+07           224246.16   
           JPY                         3.684491e+07           224246.16   
I-MG1      USD                         1.054770e+08                0.00   
           USD                         1.054770e+08                0.00   
           USD                         5.115737e+07                0.00   
           USD                         1.000000e+07                0.00   
           USD                         4.660100e+05                0.00   
           USD                         4.385358e+07                0.00   
I-SQGFSH2  USD                         2.448747e+07                0.00   
           USD                         2.448747e+07                0.00   
           USD                         2.662006e+06                0.00   
           USD                         5.092623e+06                0.00   
           USD                         5.148409e+06                0.00   
           USD                         2.989810e+06                0.00   
           USD                         2.920458e+06                0.00   
           USD                         2.724477e+06                0.00   
           USD                         2.949685e+06                0.00   
I-SQGFSH2O USD                         3.002893e+06                0.00   
           USD                         3.002893e+06                0.00   
           USD                         3.264398e+05                0.00   
           USD                         6.244904e+05                0.00   
           USD                         6.313774e+05                0.00   
           USD                         3.666323e+05                0.00   
           USD                         3.581334e+05                0.00   
           USD                         3.341020e+05                0.00   
           USD                         3.617179e+05                0.00   
(4/19)     (2/19)                      1.698122e+08           224246.16   

                                Maturity           Issue Date  \
Portfolio  Currency                                             
I-CJF      JPY       2018-03-20 00:00:00  2008-05-13 00:00:00   
           JPY       2018-03-20 00:00:00  2008-05-13 00:00:00   
           JPY       2018-03-20 00:00:00  2008-05-13 00:00:00   
I-MG1      USD                     (3/4)                (4/4)   
           USD                     (3/4)                (4/4)   
           USD                                                  
           USD                            1999-12-22 00:00:00   
           USD       2024-01-31 00:00:00  2009-01-31 00:00:00   
           USD       2099-01-31 00:00:00  2014-02-03 00:00:00   
I-SQGFSH2  USD                                          (5/7)   
           USD                                          (5/7)   
           USD                                                  
           USD                            2013-04-18 00:00:00   
           USD                            2013-04-18 00:00:00   
           USD                            2013-04-18 00:00:00   
           USD                            2004-12-09 00:00:00   
           USD                            2005-12-06 00:00:00   
           USD                            2011-05-05 00:00:00   
I-SQGFSH2O USD                                          (5/7)   
           USD                                          (5/7)   
           USD                                                  
           USD                            2013-04-18 00:00:00   
           USD                            2013-04-18 00:00:00   
           USD                            2013-04-18 00:00:00   
           USD                            2004-12-09 00:00:00   
           USD                            2005-12-06 00:00:00   
           USD                            2011-05-05 00:00:00   
(4/19)     (2/19)                 (4/19)               (9/19)   

                     Base Curr FX Rate Market Price  Coupon S&P Rating  
Portfolio  Currency                                                     
I-CJF      JPY                  0.0091       101.16     1.7         A+  
           JPY                  0.0091       101.16     1.7         A+  
           JPY                  0.0091       101.16     1.7         A+  
I-MG1      USD                  4.0000        (4/4)   (2/4)      (2/4)  
           USD                  4.0000        (4/4)   (2/4)      (2/4)  
           USD                  1.0000         9.66                     
           USD                  1.0000            1                     
           USD                  1.0000        0.145       0         NR  
           USD                  1.0000      974.524       0         NR  
I-SQGFSH2  USD                  7.0000        (7/7)                     
           USD                  7.0000        (7/7)                     
           USD                  1.0000        75.13                     
           USD                  1.0000        78.81                     
           USD                  1.0000        75.03                     
           USD                  1.0000        91.98                     
           USD                  1.0000        17.08                     
           USD                  1.0000        28.23                     
           USD                  1.0000        45.26                     
I-SQGFSH2O USD                  7.0000        (7/7)                     
           USD                  7.0000        (7/7)                     
           USD                  1.0000        75.13                     
           USD                  1.0000        78.81                     
           USD                  1.0000        75.03                     
           USD                  1.0000        91.98                     
           USD                  1.0000        17.08                     
           USD                  1.0000        28.23                     
           USD                  1.0000        45.26                     
(4/19)     (2/19)              18.0091      (12/19)  (3/19)     (3/19)  

In [203]:
spark = SparkSession.builder.getOrCreate()  

def build_schema():
    """Build and return a schema to use for the sample data."""
    schema = StructType(
        [            
            StructField("CURRENCY", StringType(), True),
            StructField("CUSIP", StringType(), False),
            StructField("SEC_TYPE", StringType(), True),
            StructField("TICKER_COUPON_MATURITY", StringType(), True),
            StructField("SEC_DESC", StringType(), True),
            StructField("ISIN", StringType(), True),
            StructField("ORIG_FACE", DoubleType(), True),
            StructField("SETTLED", DoubleType(), True),            
            StructField("NOTIONAL_MKT_VAL", DoubleType(), True),            
            StructField("BASE_CURR_MKT_VAL_ACC_INT", DoubleType(), True),            
            StructField("BASE_CURR_MKT_INT", DoubleType(), True),            
            StructField("MATURITY_DATE", StringType(), True),
            StructField("ISSUE_DATE", StringType(), True),   
            StructField("BASE_CURR_FX_RATE", DoubleType(), True), 
            StructField("MKT_PRICE", DoubleType(), True),             
            StructField("COUPON", StringType(), True),   
            StructField("SNP_RATING", StringType(), True),            
            StructField("FUND_ID",  StringType(), False),            
        ]
    )
    return schema


aladdinSecDF2SparkDF = spark.createDataFrame(aladdinSecDF2Renamed, schema=build_schema()) \
                                .withColumn("AS_OF_DATE", lit(asOfDate).cast("date"))


aladdinSecDF2SparkDF.printSchema()
aladdinSecDF2SparkDF.head(1)


root
 |-- CURRENCY: string (nullable = true)
 |-- CUSIP: string (nullable = false)
 |-- SEC_TYPE: string (nullable = true)
 |-- TICKER_COUPON_MATURITY: string (nullable = true)
 |-- SEC_DESC: string (nullable = true)
 |-- ISIN: string (nullable = true)
 |-- ORIG_FACE: double (nullable = true)
 |-- SETTLED: double (nullable = true)
 |-- NOTIONAL_MKT_VAL: double (nullable = true)
 |-- BASE_CURR_MKT_VAL_ACC_INT: double (nullable = true)
 |-- BASE_CURR_MKT_INT: double (nullable = true)
 |-- MATURITY_DATE: string (nullable = true)
 |-- ISSUE_DATE: string (nullable = true)
 |-- BASE_CURR_FX_RATE: double (nullable = true)
 |-- MKT_PRICE: double (nullable = true)
 |-- COUPON: string (nullable = true)
 |-- SNP_RATING: string (nullable = true)
 |-- FUND_ID: string (nullable = false)
 |-- AS_OF_DATE: date (nullable = true)

Out[203]:
[Row(CURRENCY=u'JPY', CUSIP=u'B7A0B81E2', SEC_TYPE=u'GOVT', TICKER_COUPON_MATURITY=u'JGB 1.700 20-MAR-2018', SEC_DESC=u'JAPAN (GOVERNMENT OF) 10YR #292', ISIN=u'JP1102921853', ORIG_FACE=4000000000.0, SETTLED=4000000000.0, NOTIONAL_MKT_VAL=4071178080.0, BASE_CURR_MKT_VAL_ACC_INT=36844907.73, BASE_CURR_MKT_INT=224246.16, MATURITY_DATE=u'2018-03-20 00:00:00', ISSUE_DATE=u'2008-05-13 00:00:00', BASE_CURR_FX_RATE=0.0091, MKT_PRICE=101.16, COUPON=u'1.7', SNP_RATING=u'A+', FUND_ID=u'I-CJF', AS_OF_DATE=datetime.date(2017, 7, 31))]

In [204]:
dashDBloadOptions = { 
                    Connectors.DASHDB.HOST              : dashCredentials["host"],
                    Connectors.DASHDB.DATABASE          : dashCredentials["db"],
                    Connectors.DASHDB.USERNAME          : dashCredentials["username"],
                    Connectors.DASHDB.PASSWORD          : dashCredentials["password"],
                    Connectors.DASHDB.SOURCE_TABLE_NAME : dashCredentials["REF_FUND_TABLE"],
}

refFundDF = sqlContext.read.format("com.ibm.spark.discover").options(**dashDBloadOptions).load()
refFundDF.printSchema()
refFundDF.show(1)


root
 |-- ID: string (nullable = false)

+-----+
|   ID|
+-----+
|I-CJF|
+-----+
only showing top 1 row


In [205]:
aladdinSecJoinSparkDF = aladdinSecDF2SparkDF.join(refFundDF, 
                                               aladdinSecDF2SparkDF.FUND_ID == refFundDF.ID, "inner")\
                                        .select(
                                            refFundDF.ID.alias("FUND_ID"),
                                            aladdinSecDF2SparkDF.CURRENCY,
                                            aladdinSecDF2SparkDF.CUSIP,
                                            aladdinSecDF2SparkDF.SEC_TYPE,
                                            aladdinSecDF2SparkDF.TICKER_COUPON_MATURITY,
                                            aladdinSecDF2SparkDF.SEC_DESC,
                                            aladdinSecDF2SparkDF.ISIN,
                                            aladdinSecDF2SparkDF.ORIG_FACE,
                                            aladdinSecDF2SparkDF.SETTLED,
                                            aladdinSecDF2SparkDF.NOTIONAL_MKT_VAL,
                                            aladdinSecDF2SparkDF.BASE_CURR_MKT_VAL_ACC_INT,
                                            aladdinSecDF2SparkDF.BASE_CURR_MKT_INT,
                                            aladdinSecDF2SparkDF.MATURITY_DATE,
                                            aladdinSecDF2SparkDF.ISSUE_DATE,
                                            aladdinSecDF2SparkDF.BASE_CURR_FX_RATE,
                                            aladdinSecDF2SparkDF.MKT_PRICE,
                                            aladdinSecDF2SparkDF.COUPON,
                                            aladdinSecDF2SparkDF.SNP_RATING,
                                            aladdinSecDF2SparkDF.AS_OF_DATE,                                                    
                                            )

aladdinSecJoinSparkDF.show(1)


+-------+--------+---------+--------+----------------------+--------------------+------------+---------+-------+----------------+-------------------------+-----------------+-------------------+-------------------+-----------------+---------+------+----------+----------+
|FUND_ID|CURRENCY|    CUSIP|SEC_TYPE|TICKER_COUPON_MATURITY|            SEC_DESC|        ISIN|ORIG_FACE|SETTLED|NOTIONAL_MKT_VAL|BASE_CURR_MKT_VAL_ACC_INT|BASE_CURR_MKT_INT|      MATURITY_DATE|         ISSUE_DATE|BASE_CURR_FX_RATE|MKT_PRICE|COUPON|SNP_RATING|AS_OF_DATE|
+-------+--------+---------+--------+----------------------+--------------------+------------+---------+-------+----------------+-------------------------+-----------------+-------------------+-------------------+-----------------+---------+------+----------+----------+
|  I-CJF|     JPY|B7A0B81E2|    GOVT|  JGB 1.700 20-MAR-...|JAPAN (GOVERNMENT...|JP1102921853|    4.0E9|  4.0E9|    4.07117808E9|            3.684490773E7|        224246.16|2018-03-20 00:00:00|2008-05-13 00:00:00|           0.0091|   101.16|   1.7|        A+|2017-07-31|
+-------+--------+---------+--------+----------------------+--------------------+------------+---------+-------+----------------+-------------------------+-----------------+-------------------+-------------------+-----------------+---------+------+----------+----------+
only showing top 1 row


In [206]:
# Connection to Dash DB for writing the data
dashdbsaveoption = {
                     Connectors.DASHDB.HOST              : dashCredentials["host"],
                     Connectors.DASHDB.DATABASE          : dashCredentials["db"],
                     Connectors.DASHDB.USERNAME          : dashCredentials["username"],
                     Connectors.DASHDB.PASSWORD          : dashCredentials["password"],
                     Connectors.DASHDB.TARGET_TABLE_NAME : dashCredentials["tableName"],
                     Connectors.DASHDB.TARGET_WRITE_MODE : 'merge' 
}

aladdinSecDashDBDF = aladdinSecJoinSparkDF.write.format("com.ibm.spark.discover").options(**dashdbsaveoption).save()

In [ ]: