In [1]:

    
%matplotlib inline
import numpy as np
import pandas as pd
from scipy.stats import probplot
from matplotlib import pyplot as plt

Load and explore

Most of the fields are self-explanatory. The following are descriptions for those that aren't.

Id - an Id that represents a (Store, Date) duple within the test set
Store - a unique Id for each store
Sales - the turnover for any given day (this is what you are predicting)
Customers - the number of customers on a given day
Open - an indicator for whether the store was open: 0 = closed, 1 = open
StateHoliday - indicates a state holiday. Normally all stores, with few exceptions, are closed on state holidays. Note that all schools are closed on public holidays and weekends. a = public holiday, b = Easter holiday, c = Christmas, 0 = None
SchoolHoliday - indicates if the (Store, Date) was affected by the closure of public schools
StoreType - differentiates between 4 different store models: a, b, c, d
Assortment - describes an assortment level: a = basic, b = extra, c = extended
CompetitionDistance - distance in meters to the nearest competitor store
CompetitionOpenSince[Month/Year] - gives the approximate year and month of the time the nearest competitor was opened
Promo - indicates whether a store is running a promo on that day
Promo2 - Promo2 is a continuing and consecutive promotion for some stores: 0 = store is not participating, 1 = store is participating
Promo2Since[Year/Week] - describes the year and calendar week when the store started participating in Promo2
PromoInterval - describes the consecutive intervals Promo2 is started, naming the months the promotion is started anew. E.g. "Feb,May,Aug,Nov" means each round starts in February, May, August, November of any given year for that store



In [2]:

    
!unzip -o '*.csv.zip'
!rm *.zip









    



unzip:  cannot find or open *.csv.zip, *.csv.zip.zip or *.csv.zip.ZIP.

No zipfiles found.
rm: *.zip: No such file or directory



In [3]:

    
ls









    



explore.ipynb          sample_submission.csv  test.csv
rf1.csv                store.csv              train.csv



In [4]:

    
!head train.csv









    



"Store","DayOfWeek","Date","Sales","Customers","Open","Promo","StateHoliday","SchoolHoliday"
1,5,2015-07-31,5263,555,1,1,"0","1"
2,5,2015-07-31,6064,625,1,1,"0","1"
3,5,2015-07-31,8314,821,1,1,"0","1"
4,5,2015-07-31,13995,1498,1,1,"0","1"
5,5,2015-07-31,4822,559,1,1,"0","1"
6,5,2015-07-31,5651,589,1,1,"0","1"
7,5,2015-07-31,15344,1414,1,1,"0","1"
8,5,2015-07-31,8492,833,1,1,"0","1"
9,5,2015-07-31,8565,687,1,1,"0","1"



In [5]:

    
df = pd.read_csv('train.csv')
for c in df.columns:
    v = ["2015-07-31", 'etc.'] if c == 'Date' else np.sort(df[c].unique())
    hasna = df[c].isnull().any()
    print "{}: {}\n  has NA values: {}\n".format(c, v, hasna)









    



Store: [   1    2    3 ..., 1113 1114 1115]
  has NA values: False

DayOfWeek: [1 2 3 4 5 6 7]
  has NA values: False

Date: ['2015-07-31', 'etc.']
  has NA values: False

Sales: [    0    46   124 ..., 38484 38722 41551]
  has NA values: False

Customers: [   0    3    5 ..., 5458 5494 7388]
  has NA values: False

Open: [0 1]
  has NA values: False

Promo: [0 1]
  has NA values: False

StateHoliday: [0 '0' 'a' 'b' 'c']
  has NA values: False

SchoolHoliday: [0 1]
  has NA values: False







    



/Users/jimbijwaard/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2902: DtypeWarning: Columns (7) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)



In [74]:

    
data_types = {
    "Store": int,
    "DayOfWeek": int,       # {1, 2, 3, 4, 5, 6, 7}
    "Date": object,         # e.g. "2015-07-31"
    "Sales": int,
    "Customers": int,
    "Open": bool,
    "Promo": bool,
    "StateHoliday": str,    # {'0', 'a', 'b', 'c'}
    "SchoolHoliday": bool,
}

df_train = pd.read_csv('train.csv', dtype=data_types)



In [6]:

    
!head test.csv









    



"Id","Store","DayOfWeek","Date","Open","Promo","StateHoliday","SchoolHoliday"
1,1,4,2015-09-17,1,1,"0","0"
2,3,4,2015-09-17,1,1,"0","0"
3,7,4,2015-09-17,1,1,"0","0"
4,8,4,2015-09-17,1,1,"0","0"
5,9,4,2015-09-17,1,1,"0","0"
6,10,4,2015-09-17,1,1,"0","0"
7,11,4,2015-09-17,1,1,"0","0"
8,12,4,2015-09-17,1,1,"0","0"
9,13,4,2015-09-17,1,1,"0","0"



In [93]:

    
df = pd.read_csv('test.csv')
for c in df.columns:
    v =  np.sort(df[c].unique())
    hasna = df[c].isnull().any()
    print "{}: {}\n  dtype: {}\n  has NA values: {}\n".format(c, v, df[c].dtype, hasna)









    



Id: [    1     2     3 ..., 41086 41087 41088]
  dtype: int64
  has NA values: False

Store: [   1    3    7    8    9   10   11   12   13   14   15   16   19   20   21
   22   23   24   25   27   29   30   31   32   33   35   36   38   39   40
   41   42   43   45   46   47   48   49   50   51   52   53   56   58   61
   62   63   64   66   67   68   69   70   71   72   73   74   75   76   77
   79   80   81   82   83   84   86   89   90   91   92   93   94   98   99
  100  101  102  105  107  108  109  110  111  112  113  115  117  118  119
  120  122  124  126  127  128  129  130  131  132  135  136  137  139  140
  141  142  143  144  145  146  147  149  150  152  153  154  155  157  158
  159  160  161  162  164  165  166  168  169  170  171  172  174  175  176
  179  180  181  182  183  184  186  187  188  189  190  191  192  193  194
  195  197  199  200  201  202  204  206  207  209  210  212  213  214  215
  216  217  218  219  220  221  224  226  227  228  229  230  231  233  234
  235  238  239  241  242  243  244  245  246  247  248  249  250  251  252
  253  254  255  256  258  259  262  263  265  267  268  269  272  273  274
  275  277  278  279  280  281  283  284  285  287  288  289  290  294  295
  296  297  298  299  300  301  302  303  304  305  306  308  309  310  311
  312  314  315  316  317  319  320  323  325  326  328  329  330  331  333
  334  335  336  337  338  339  340  341  342  343  344  345  346  347  348
  350  351  352  353  354  355  356  358  359  362  364  365  367  368  369
  370  371  372  373  377  378  379  380  383  385  386  387  388  389  391
  392  393  394  395  397  398  399  403  404  405  406  407  408  409  410
  411  412  413  414  415  416  417  418  420  421  422  424  425  426  427
  428  429  430  431  432  433  434  435  440  441  442  445  446  447  448
  449  450  451  452  453  455  456  457  458  459  461  463  465  466  467
  468  470  471  472  473  475  477  481  484  485  486  487  488  490  491
  492  493  495  497  498  499  500  501  502  504  505  506  507  508  509
  510  511  512  514  515  516  517  518  519  520  521  522  524  527  528
  529  530  531  532  533  534  535  536  537  538  539  540  541  542  543
  545  547  548  549  550  551  552  553  554  555  557  558  561  562  563
  564  565  566  567  568  570  571  572  573  574  575  577  578  579  580
  581  582  584  585  586  587  588  589  590  591  592  593  597  598  600
  601  602  603  604  605  610  611  612  615  616  618  619  620  621  622
  623  624  625  627  628  629  631  632  633  636  637  638  639  640  641
  642  643  644  645  646  647  650  651  653  655  656  657  658  659  660
  661  662  663  665  666  667  669  670  671  673  674  675  676  677  678
  680  681  684  685  687  689  690  691  692  693  694  695  696  697  699
  700  701  702  703  705  706  707  710  711  712  713  714  716  717  718
  719  720  721  722  723  724  725  727  728  729  731  732  733  734  736
  737  738  739  740  741  742  744  746  748  749  750  751  752  753  756
  757  758  759  762  763  764  765  766  767  768  769  770  771  772  773
  774  775  776  777  778  782  784  785  789  790  791  792  793  795  796
  797  799  800  801  802  803  804  805  806  807  809  810  811  813  815
  816  818  819  820  822  823  824  825  826  829  831  832  833  835  837
  840  842  844  845  846  847  848  849  850  851  852  853  855  856  857
  858  859  860  861  862  863  864  865  866  867  868  871  872  874  875
  877  879  880  881  882  883  884  885  886  887  888  890  891  893  894
  895  896  897  900  901  902  903  904  905  906  907  908  909  911  912
  913  914  915  916  917  919  920  922  924  925  926  927  928  929  930
  931  932  934  935  936  937  938  939  941  942  943  944  945  946  947
  948  950  951  952  954  955  956  960  961  962  964  965  966  967  969
  970  973  974  975  976  977  980  983  984  985  986  988  989  991  992
  994  997  998 1000 1003 1004 1005 1007 1008 1009 1010 1011 1012 1013 1014
 1015 1016 1019 1020 1022 1024 1025 1026 1027 1028 1031 1036 1037 1038 1039
 1040 1041 1042 1044 1045 1047 1048 1049 1050 1051 1052 1053 1054 1056 1057
 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1070 1071 1072 1073
 1076 1077 1078 1079 1080 1083 1084 1086 1087 1088 1089 1091 1092 1094 1096
 1097 1099 1100 1101 1102 1103 1104 1105 1106 1107 1109 1111 1112 1113 1114
 1115]
  dtype: int64
  has NA values: False

DayOfWeek: [1 2 3 4 5 6 7]
  dtype: int64
  has NA values: False

Date: ['2015-08-01' '2015-08-02' '2015-08-03' '2015-08-04' '2015-08-05'
 '2015-08-06' '2015-08-07' '2015-08-08' '2015-08-09' '2015-08-10'
 '2015-08-11' '2015-08-12' '2015-08-13' '2015-08-14' '2015-08-15'
 '2015-08-16' '2015-08-17' '2015-08-18' '2015-08-19' '2015-08-20'
 '2015-08-21' '2015-08-22' '2015-08-23' '2015-08-24' '2015-08-25'
 '2015-08-26' '2015-08-27' '2015-08-28' '2015-08-29' '2015-08-30'
 '2015-08-31' '2015-09-01' '2015-09-02' '2015-09-03' '2015-09-04'
 '2015-09-05' '2015-09-06' '2015-09-07' '2015-09-08' '2015-09-09'
 '2015-09-10' '2015-09-11' '2015-09-12' '2015-09-13' '2015-09-14'
 '2015-09-15' '2015-09-16' '2015-09-17']
  dtype: object
  has NA values: False

Open: [  0.   1.  nan]
  dtype: float64
  has NA values: True

Promo: [0 1]
  dtype: int64
  has NA values: False

StateHoliday: ['0' 'a']
  dtype: object
  has NA values: False

SchoolHoliday: [0 1]
  dtype: int64
  has NA values: False



In [61]:

    
!head store.csv









    



"Store","StoreType","Assortment","CompetitionDistance","CompetitionOpenSinceMonth","CompetitionOpenSinceYear","Promo2","Promo2SinceWeek","Promo2SinceYear","PromoInterval"
1,"c","a",1270,9,2008,0,,,""
2,"a","a",570,11,2007,1,13,2010,"Jan,Apr,Jul,Oct"
3,"a","a",14130,12,2006,1,14,2011,"Jan,Apr,Jul,Oct"
4,"c","c",620,9,2009,0,,,""
5,"a","a",29910,4,2015,0,,,""
6,"a","a",310,12,2013,0,,,""
7,"a","c",24000,4,2013,0,,,""
8,"a","a",7520,10,2014,0,,,""
9,"a","c",2030,8,2000,0,,,""



In [98]:

    
df = pd.read_csv('store.csv')
for c in df.columns:
    v = [20, 30, 'etc.', np.nan] if c == 'CompetitionDistance' else np.sort(df[c].unique())
    print "{}: {}\n  dtype: {}\n  has NA values: {}\n".format(c, v, df[c].dtype, df[c].isnull().any())









    



Store: [   1    2    3 ..., 1113 1114 1115]
  dtype: int64
  has NA values: False

StoreType: ['a' 'b' 'c' 'd']
  dtype: object
  has NA values: False

Assortment: ['a' 'b' 'c']
  dtype: object
  has NA values: False

CompetitionDistance: [20, 30, 'etc.', nan]
  dtype: float64
  has NA values: True

CompetitionOpenSinceMonth: [  1.   2.   3.   4.   5.   6.   7.   8.   9.  10.  11.  12.  nan]
  dtype: float64
  has NA values: True

CompetitionOpenSinceYear: [ 1900.  1961.  1990.  1994.  1995.  1998.  1999.  2000.  2001.  2002.
  2003.  2004.  2005.  2006.  2007.  2008.  2009.  2010.  2011.  2012.
  2013.  2014.  2015.    nan]
  dtype: float64
  has NA values: True

Promo2: [0 1]
  dtype: int64
  has NA values: False

Promo2SinceWeek: [  1.   5.   6.   9.  10.  13.  14.  18.  22.  23.  26.  27.  28.  31.  35.
  36.  37.  39.  40.  44.  45.  48.  49.  50.  nan]
  dtype: float64
  has NA values: True

Promo2SinceYear: [ 2009.  2010.  2011.  2012.  2013.  2014.  2015.    nan]
  dtype: float64
  has NA values: True

PromoInterval: [nan 'Feb,May,Aug,Nov' 'Jan,Apr,Jul,Oct' 'Mar,Jun,Sept,Dec']
  dtype: object
  has NA values: True



In [120]:

    
# check consistency of 'Promo2' flag
for c in ('PromoInterval', 'Promo2SinceWeek', 'Promo2SinceYear'):
    print (df.Promo2[df[c].isnull()].unique(), df.Promo2[~df[c].isnull()].unique()) == ([0], [1])









    



True
True
True



In [121]:

    
# add analogous 'Competition' flag
print all(df['CompetitionDistance'].isnull() == df['CompetitionOpenSinceYear'].isnull())
print all(df['CompetitionDistance'].isnull() == df['CompetitionOpenSinceMonth'].isnull())
print all(df['CompetitionOpenSinceMonth'].isnull() == df['CompetitionOpenSinceYear'].isnull())









    



False
False
True



In [122]:

    
mask = df['CompetitionDistance'].isnull() != df['CompetitionOpenSinceYear'].isnull()
df[mask]









    Out[122]:






  
    
      
      Store
      StoreType
      Assortment
      CompetitionDistance
      CompetitionOpenSinceMonth
      CompetitionOpenSinceYear
      Promo2
      Promo2SinceWeek
      Promo2SinceYear
      PromoInterval
    
  
  
    
      11
      12
      a
      c
      1070
      NaN
      NaN
      1
      13
      2010
      Jan,Apr,Jul,Oct
    
    
      12
      13
      d
      a
      310
      NaN
      NaN
      1
      45
      2009
      Feb,May,Aug,Nov
    
    
      15
      16
      a
      c
      3270
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      18
      19
      a
      c
      3240
      NaN
      NaN
      1
      22
      2011
      Mar,Jun,Sept,Dec
    
    
      21
      22
      a
      a
      1040
      NaN
      NaN
      1
      22
      2012
      Jan,Apr,Jul,Oct
    
    
      25
      26
      d
      a
      2300
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      28
      29
      d
      c
      2170
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      31
      32
      a
      a
      2910
      NaN
      NaN
      1
      45
      2009
      Feb,May,Aug,Nov
    
    
      39
      40
      a
      a
      180
      NaN
      NaN
      1
      45
      2009
      Feb,May,Aug,Nov
    
    
      40
      41
      d
      c
      1180
      NaN
      NaN
      1
      31
      2013
      Jan,Apr,Jul,Oct
    
    
      41
      42
      a
      c
      290
      NaN
      NaN
      1
      40
      2011
      Jan,Apr,Jul,Oct
    
    
      42
      43
      d
      a
      4880
      NaN
      NaN
      1
      37
      2009
      Jan,Apr,Jul,Oct
    
    
      61
      62
      a
      a
      2050
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      63
      64
      d
      c
      22560
      NaN
      NaN
      1
      14
      2013
      Jan,Apr,Jul,Oct
    
    
      65
      66
      d
      a
      7660
      NaN
      NaN
      1
      37
      2009
      Jan,Apr,Jul,Oct
    
    
      67
      68
      a
      c
      250
      NaN
      NaN
      1
      35
      2012
      Mar,Jun,Sept,Dec
    
    
      68
      69
      c
      c
      1130
      NaN
      NaN
      1
      40
      2011
      Jan,Apr,Jul,Oct
    
    
      69
      70
      c
      c
      4840
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      73
      74
      a
      a
      330
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      78
      79
      a
      a
      3320
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      79
      80
      d
      a
      7910
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      82
      83
      a
      a
      2710
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      90
      91
      c
      a
      2410
      NaN
      NaN
      1
      35
      2011
      Jan,Apr,Jul,Oct
    
    
      91
      92
      c
      a
      240
      NaN
      NaN
      1
      45
      2009
      Feb,May,Aug,Nov
    
    
      92
      93
      a
      a
      16690
      NaN
      NaN
      1
      14
      2011
      Jan,Apr,Jul,Oct
    
    
      93
      94
      d
      c
      14620
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      96
      97
      d
      c
      8980
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      99
      100
      d
      a
      17930
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      100
      101
      d
      c
      2440
      NaN
      NaN
      1
      22
      2012
      Mar,Jun,Sept,Dec
    
    
      104
      105
      a
      c
      6190
      NaN
      NaN
      1
      23
      2013
      Mar,Jun,Sept,Dec
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      1035
      1036
      d
      c
      9560
      NaN
      NaN
      1
      36
      2013
      Jan,Apr,Jul,Oct
    
    
      1036
      1037
      a
      c
      150
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      1041
      1042
      a
      a
      3440
      NaN
      NaN
      1
      31
      2013
      Feb,May,Aug,Nov
    
    
      1046
      1047
      a
      a
      3750
      NaN
      NaN
      1
      45
      2009
      Feb,May,Aug,Nov
    
    
      1051
      1052
      a
      c
      5080
      NaN
      NaN
      1
      31
      2013
      Feb,May,Aug,Nov
    
    
      1053
      1054
      a
      c
      13190
      NaN
      NaN
      1
      45
      2013
      Feb,May,Aug,Nov
    
    
      1055
      1056
      d
      c
      5350
      NaN
      NaN
      1
      40
      2012
      Jan,Apr,Jul,Oct
    
    
      1057
      1058
      a
      c
      180
      NaN
      NaN
      1
      35
      2010
      Mar,Jun,Sept,Dec
    
    
      1059
      1060
      a
      c
      3430
      NaN
      NaN
      1
      31
      2013
      Feb,May,Aug,Nov
    
    
      1060
      1061
      d
      c
      8110
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      1062
      1063
      a
      c
      6250
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      1063
      1064
      a
      c
      420
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      1064
      1065
      a
      a
      1290
      NaN
      NaN
      1
      35
      2011
      Mar,Jun,Sept,Dec
    
    
      1065
      1066
      a
      a
      3350
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      1067
      1068
      d
      c
      5010
      NaN
      NaN
      1
      5
      2013
      Jan,Apr,Jul,Oct
    
    
      1068
      1069
      a
      c
      18050
      NaN
      NaN
      1
      14
      2011
      Jan,Apr,Jul,Oct
    
    
      1072
      1073
      a
      c
      1710
      NaN
      NaN
      1
      44
      2012
      Jan,Apr,Jul,Oct
    
    
      1075
      1076
      a
      c
      90
      NaN
      NaN
      1
      1
      2013
      Jan,Apr,Jul,Oct
    
    
      1077
      1078
      d
      c
      670
      NaN
      NaN
      1
      40
      2011
      Jan,Apr,Jul,Oct
    
    
      1078
      1079
      a
      a
      16680
      NaN
      NaN
      1
      37
      2009
      Jan,Apr,Jul,Oct
    
    
      1079
      1080
      a
      a
      2410
      NaN
      NaN
      1
      40
      2014
      Jan,Apr,Jul,Oct
    
    
      1082
      1083
      d
      c
      11540
      NaN
      NaN
      1
      5
      2013
      Feb,May,Aug,Nov
    
    
      1083
      1084
      a
      a
      190
      NaN
      NaN
      1
      13
      2010
      Jan,Apr,Jul,Oct
    
    
      1089
      1090
      a
      a
      330
      NaN
      NaN
      1
      14
      2011
      Jan,Apr,Jul,Oct
    
    
      1090
      1091
      a
      c
      9990
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      1095
      1096
      a
      c
      1130
      NaN
      NaN
      1
      10
      2014
      Mar,Jun,Sept,Dec
    
    
      1099
      1100
      a
      a
      540
      NaN
      NaN
      1
      14
      2011
      Jan,Apr,Jul,Oct
    
    
      1112
      1113
      a
      c
      9260
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      1113
      1114
      a
      c
      870
      NaN
      NaN
      0
      NaN
      NaN
      NaN
    
    
      1114
      1115
      d
      c
      5350
      NaN
      NaN
      1
      22
      2012
      Mar,Jun,Sept,Dec
    
  

351 rows × 10 columns



In [77]:

    
data_types = {
    "Store": int,
    "StoreType": str,
    "Assortment": str,
    "CompetitionDistance": float,
    "CompetitionOpenSinceMonth": float,
    "CompetitionOpenSinceYear": float,
    "Promo2": bool,
    "Promo2SinceWeek": float,
    "Promo2SinceYear": float,
    "PromoInterval": object,
}

df_store = pd.read_csv('store.csv', dtype=data_types)

Explore target



In [45]:

    
mask = df_train.Sales != 0

df_train[mask].Sales.hist(bins=100)









    Out[45]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f1720175690>

This looks like a log-normal distribution...



In [51]:

    
df_train[mask].Sales.map(np.log10).hist(bins=100)









    Out[51]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f170d683250>

Check QQ plot to see how well this is modeled by a Gaussian distribution:



In [60]:

    
probplot(df_train[mask].Sales.map(np.log10), dist="norm", plot=plt)









    Out[60]:





((array([-4.79314476, -4.61214391, -4.51425117, ...,  4.51425117,
          4.61214391,  4.79314476]),
  array([ 1.66275783,  2.09342169,  2.12385164, ...,  4.58528021,
          4.58795778,  4.61858148])),
 (0.18438248551453371, 3.8033618126742885, 0.99829684200204238))

Explore features



In [ ]:

	Store	StoreType	Assortment	CompetitionDistance	CompetitionOpenSinceMonth	CompetitionOpenSinceYear	Promo2	Promo2SinceWeek	Promo2SinceYear	PromoInterval
11	12	a	c	1070	NaN	NaN	1	13	2010	Jan,Apr,Jul,Oct
12	13	d	a	310	NaN	NaN	1	45	2009	Feb,May,Aug,Nov
15	16	a	c	3270	NaN	NaN	0	NaN	NaN	NaN
18	19	a	c	3240	NaN	NaN	1	22	2011	Mar,Jun,Sept,Dec
21	22	a	a	1040	NaN	NaN	1	22	2012	Jan,Apr,Jul,Oct
25	26	d	a	2300	NaN	NaN	0	NaN	NaN	NaN
28	29	d	c	2170	NaN	NaN	0	NaN	NaN	NaN
31	32	a	a	2910	NaN	NaN	1	45	2009	Feb,May,Aug,Nov
39	40	a	a	180	NaN	NaN	1	45	2009	Feb,May,Aug,Nov
40	41	d	c	1180	NaN	NaN	1	31	2013	Jan,Apr,Jul,Oct
41	42	a	c	290	NaN	NaN	1	40	2011	Jan,Apr,Jul,Oct
42	43	d	a	4880	NaN	NaN	1	37	2009	Jan,Apr,Jul,Oct
61	62	a	a	2050	NaN	NaN	0	NaN	NaN	NaN
63	64	d	c	22560	NaN	NaN	1	14	2013	Jan,Apr,Jul,Oct
65	66	d	a	7660	NaN	NaN	1	37	2009	Jan,Apr,Jul,Oct
67	68	a	c	250	NaN	NaN	1	35	2012	Mar,Jun,Sept,Dec
68	69	c	c	1130	NaN	NaN	1	40	2011	Jan,Apr,Jul,Oct
69	70	c	c	4840	NaN	NaN	0	NaN	NaN	NaN
73	74	a	a	330	NaN	NaN	0	NaN	NaN	NaN
78	79	a	a	3320	NaN	NaN	0	NaN	NaN	NaN
79	80	d	a	7910	NaN	NaN	0	NaN	NaN	NaN
82	83	a	a	2710	NaN	NaN	0	NaN	NaN	NaN
90	91	c	a	2410	NaN	NaN	1	35	2011	Jan,Apr,Jul,Oct
91	92	c	a	240	NaN	NaN	1	45	2009	Feb,May,Aug,Nov
92	93	a	a	16690	NaN	NaN	1	14	2011	Jan,Apr,Jul,Oct
93	94	d	c	14620	NaN	NaN	0	NaN	NaN	NaN
96	97	d	c	8980	NaN	NaN	0	NaN	NaN	NaN
99	100	d	a	17930	NaN	NaN	0	NaN	NaN	NaN
100	101	d	c	2440	NaN	NaN	1	22	2012	Mar,Jun,Sept,Dec
104	105	a	c	6190	NaN	NaN	1	23	2013	Mar,Jun,Sept,Dec
...	...	...	...	...	...	...	...	...	...	...
1035	1036	d	c	9560	NaN	NaN	1	36	2013	Jan,Apr,Jul,Oct
1036	1037	a	c	150	NaN	NaN	0	NaN	NaN	NaN
1041	1042	a	a	3440	NaN	NaN	1	31	2013	Feb,May,Aug,Nov
1046	1047	a	a	3750	NaN	NaN	1	45	2009	Feb,May,Aug,Nov
1051	1052	a	c	5080	NaN	NaN	1	31	2013	Feb,May,Aug,Nov
1053	1054	a	c	13190	NaN	NaN	1	45	2013	Feb,May,Aug,Nov
1055	1056	d	c	5350	NaN	NaN	1	40	2012	Jan,Apr,Jul,Oct
1057	1058	a	c	180	NaN	NaN	1	35	2010	Mar,Jun,Sept,Dec
1059	1060	a	c	3430	NaN	NaN	1	31	2013	Feb,May,Aug,Nov
1060	1061	d	c	8110	NaN	NaN	0	NaN	NaN	NaN
1062	1063	a	c	6250	NaN	NaN	0	NaN	NaN	NaN
1063	1064	a	c	420	NaN	NaN	0	NaN	NaN	NaN
1064	1065	a	a	1290	NaN	NaN	1	35	2011	Mar,Jun,Sept,Dec
1065	1066	a	a	3350	NaN	NaN	0	NaN	NaN	NaN
1067	1068	d	c	5010	NaN	NaN	1	5	2013	Jan,Apr,Jul,Oct
1068	1069	a	c	18050	NaN	NaN	1	14	2011	Jan,Apr,Jul,Oct
1072	1073	a	c	1710	NaN	NaN	1	44	2012	Jan,Apr,Jul,Oct
1075	1076	a	c	90	NaN	NaN	1	1	2013	Jan,Apr,Jul,Oct
1077	1078	d	c	670	NaN	NaN	1	40	2011	Jan,Apr,Jul,Oct
1078	1079	a	a	16680	NaN	NaN	1	37	2009	Jan,Apr,Jul,Oct
1079	1080	a	a	2410	NaN	NaN	1	40	2014	Jan,Apr,Jul,Oct
1082	1083	d	c	11540	NaN	NaN	1	5	2013	Feb,May,Aug,Nov
1083	1084	a	a	190	NaN	NaN	1	13	2010	Jan,Apr,Jul,Oct
1089	1090	a	a	330	NaN	NaN	1	14	2011	Jan,Apr,Jul,Oct
1090	1091	a	c	9990	NaN	NaN	0	NaN	NaN	NaN
1095	1096	a	c	1130	NaN	NaN	1	10	2014	Mar,Jun,Sept,Dec
1099	1100	a	a	540	NaN	NaN	1	14	2011	Jan,Apr,Jul,Oct
1112	1113	a	c	9260	NaN	NaN	0	NaN	NaN	NaN
1113	1114	a	c	870	NaN	NaN	0	NaN	NaN	NaN
1114	1115	d	c	5350	NaN	NaN	1	22	2012	Mar,Jun,Sept,Dec