Review DS new file format


In [2]:
pwd


Out[2]:
'/home/jovyan/work/jnb_trump_train/develop'

In [3]:
import pandas as pd

In [136]:
!cat ../../zzData_RAW/header.txt | grep Name > ../../zzData_RAW/header2.txt

In [158]:
f = pd.read_table ('../../zzData_RAW/header2.txt', header=None )
cols = f.iloc[:,0].str.split('=').str.get(1)

In [189]:
df_new = pd.read_table ('../../zzData_RAW/BRSalesNew.txt',header=None, index_col=[0,1,2],names=cols, dtype=object)
df_new.GLANI = df_new.GLANI.astype('str')

In [191]:
df_new.head().T


Out[191]:
SHDOCO 911962 912189 912349
SHDCTO CM CM CM
SDLNID 1.0 1.0 1.0 1.0 1.0
SDAN8 2278638 2278638 1675988 1675988 1589355
SDSHAN 2278639 2278639 1675989 1675989 1589356
SDLITM 5897346 5897346 5849913 5849913 1075821
SDDOC 600180 600180 600155 600155 600085
SDDCT RM RM RM RM RM
QCAC10 AAD AAD AAD AAD AAD
QCAC08 DEN DEN DEN DEN DEN
QCAC04 SOL SOL PRC PRC PRC
QC$OSC E E W W T
SDLNTY CC CC CC CC CC
SDSRP1 021 021 022 022 013
SDGLC 0143 0143 0143 0143 0143
SHMCU 020001000000 020001000000 020001000000 020001000000 020001000000
SHMCU01 Dental Merch GP Dental Merch GP Dental Merch GP Dental Merch GP Dental Merch GP
SDMCU 020099990116 020099990116 020099990111 020099990111 020099990116
SDMCU01 CALGARY DC - NSP CALGARY DC - NSP NOTL DC - NSP NOTL DC - NSP CALGARY DC - NSP
SDEMCU 020001000000 020001000000 020001000000 020001000000 020001000000
SDEMCU01 Dental Merch GP Dental Merch GP Dental Merch GP Dental Merch GP Dental Merch GP
GLANI 020001000000.4521 020001000000.4140 020001000000.4521 020001000000.4140 020001000000.4521
GMDL01 Regular Returns Cost Sales Returns Regular Sa Regular Returns Cost Sales Returns Regular Sa Regular Returns Cost
GLAA -76.03 124.99 -270. 400. -16.12
QB$CBA 0 0 0 0 0
SDSOQS -1 -1 -4 -4 -1
QB$NTC 0 0 0 0 0
SDSO08 NaN NaN NaN NaN NaN
SDUPC2 NaN NaN NaN NaN NaN
SDDSC2 004813852 004813852 004815297 004815297 004816266

In [213]:
df_new.GLANI.unique


Out[213]:
<bound method Series.unique of SHDOCO   SHDCTO  SDLNID
911962   CM      1.0       020001000000.4521
                 1.0       020001000000.4140
912189   CM      1.0       020001000000.4521
                 1.0       020001000000.4140
912349   CM      1.0       020001000000.4521
                 1.0       020001000000.4140
912368   CM      1.0       020001000000.4521
                 1.0       020001000000.4140
912370   CM      1.0       020001000000.4521
                 1.0       020001000000.4140
912393   CM      1.0       020001000000.4521
                 1.0       020001000000.4140
912395   CM      1.0       020001000000.4521
                 1.0       020001000000.4140
                 2.0       020001000000.4521
                 2.0       020001000000.4140
                 3.0       020001000000.4521
                 3.0       020001000000.4140
912397   CM      1.0       020001000000.4521
                 1.0       020001000000.4140
912401   CM      1.0       020001000000.4521
                 1.0       020001000000.4140
912457   CM      1.0       020001000000.4521
                 1.0       020001000000.4140
912500   CM      1.0       020017000000.4521
                 1.0       020017000000.4140
912591   CM      1.0       020001000000.4521
                 1.0       020001000000.4140
                 2.0       020001000000.4521
                 2.0       020001000000.4140
                                 ...        
9200567  SL      6.0       020009000100.4510
                 6.0       020009000100.4510
                 6.0       020009000100.4020
                 7.0       020009000100.4510
                 7.0       020009000100.4510
                 7.0       020009000100.4020
9200568  SL      1.0       020009000000.4510
                 1.0       020009000000.4020
                 2.0       020009000100.4510
                 2.0       020009000100.4510
                 2.0       020009000100.4020
                 3.0       020009000100.4510
                 3.0       020009000100.4510
                 3.0       020009000100.4020
9200569  SL      1.0       020009000000.4510
                 1.0       020009000000.4020
                 2.0       020009000000.4510
                 2.0       020009000000.4020
                 3.0       020009000000.4510
                 3.0       020009000000.4020
                 4.0       020009000000.4510
                 4.0       020009000000.4020
                 5.0       020009000000.4510
                 5.0       020009000000.4020
                 6.0       020009000100.4510
                 6.0       020009000100.4510
                 6.0       020009000100.4020
                 7.0       020009000100.4510
                 7.0       020009000100.4510
                 7.0       020009000100.4020
Name: GLANI, dtype: object>

In [ ]: