Import useful stuff and define ancillary functions



In [38]:

    
%pylab inline
%load_ext autoreload
%autoreload 2

from __future__ import division

from collections import defaultdict, namedtuple
from datetime import datetime, timedelta
from functools import partial
import inspect
import json
import os
import re
import sys
import cPickle as pickle

import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder


def analyze_str_columns(cols, df, only_percent=False):
    print 'Total samples: %s' % len(df)

    for c in cols:
        print '##############################'
        VAR_df = df[[c, 'target']]
        unique_vals = VAR_df[c].unique()
        # NaNs are the only floats among the values
        non_nan = [v for v in unique_vals if type(v) == str]
        str_0 = []
        str_1 = []
        col_names = []
        for u in unique_vals:
            if type(u) == str:
                col_mask = (VAR_df[c] == u)
            else:
                col_mask = VAR_df[c].isnull()
            str_0.append(len(VAR_df[col_mask & (VAR_df['target'] == 0)]))
            str_1.append(len(VAR_df[col_mask & (VAR_df['target'] == 1)]))

            col_names.append('%s_%s'%(c,u))
        VAR_df_counts = pd.DataFrame([str_0, str_1],
                                     columns=col_names,
                                     index=pd.Index([0, 1], name='target'))
        if not only_percent:
            print "------Counts-------"
            print VAR_df_counts
        print "----Percentages----"
        print VAR_df_counts/VAR_df_counts.sum()*100









    



Populating the interactive namespace from numpy and matplotlib
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload






    



WARNING: pylab import has clobbered these variables: ['datetime']
`%matplotlib` prevents importing * from pylab and numpy

Load train data

Using pandas' read_csv with all the defaults



In [2]:

    
if os.name == 'nt':
    TRAIN_PATH = r'D:\train.csv'
    PTRAIN_PATH = r'D:\train_preprocessed_float_string.csv'
    TEST_PATH = r'D:\test.csv'
    GOOGNEWS_PATH = r'D:\GoogleNews-vectors-negative300.bin.gz'
    VOCAB_PATH = r'D:\big.txt'
else:
    TRAIN_PATH = r'/media/mtambos/speedy/train.csv'
    PTRAIN_PATH = r'/media/mtambos/speedy/train_preprocessed_float_string.csv'
    TEST_PATH = r'/media/mtambos/speedy/test.csv'
    GOOGNEWS_PATH = r'/media/mtambos/speedy/GoogleNews-vectors-negative300.bin.gz'
    VOCAB_PATH = r'/media/mtambos/speedy/big.txt'
df = pd.read_csv(PTRAIN_PATH, index_col="ID")









    



/home/mtambos/anaconda/lib/python2.7/site-packages/pandas/io/parsers.py:1170: DtypeWarning: Columns (8,9,10,11,12,23,137,173,186,194,197,198,200,204,207) have mixed types. Specify dtype option on import or set low_memory=False.
  data = self._reader.read(nrows)

Define columns



In [3]:

    
str_cols = [u'VAR_0001', u'VAR_0005', u'VAR_0044',
            u'VAR_0200', u'VAR_0202', u'VAR_0214',
            u'VAR_0216', u'VAR_0222', u'VAR_0237',
            u'VAR_0274', u'VAR_0283', u'VAR_0305',
            u'VAR_0325', u'VAR_0342', u'VAR_0352',
            u'VAR_0353', u'VAR_0354', u'VAR_0404',
            u'VAR_0466', u'VAR_0467', u'VAR_0493',
            u'VAR_1934']
try:
    str_cols = [c for c in str_cols if c in df.columns and df[c].dtype==np.object]
except NameError:
    pass

See if the classes are skewed



In [5]:

    
neg_samples_count = len(df['target'][df['target']==0])
pos_samples_count = len(df['target'][df['target']==1])
print '%s negative samples; %.2f%% of total' % (neg_samples_count, neg_samples_count/len(df)*100)
print '%s positive samples; %.2f%% of total' % (pos_samples_count, pos_samples_count/len(df)*100)









    



111458 negative samples; 76.75% of total
33773 positive samples; 23.25% of total

Cast string columns as string and make 'null' data uniform (instead of nan, -1, [], etc.)



In [4]:

    
def filter_str(str_cell):
    str_cell = re.sub(r'[\W_]+', ' ', str(str_cell))
    str_cell = str_cell.strip().lower()
    if str_cell in ('1', '-1', '[]', 'nan', ''):
        return None
    else:
        return str_cell

df[str_cols] = df[str_cols].astype(np.str).applymap(filter_str)
df[str_cols]









    Out[4]:






  
    
      
      VAR_0001
      VAR_0005
      VAR_0044
      VAR_0200
      VAR_0202
      VAR_0214
      VAR_0216
      VAR_0222
      VAR_0237
      VAR_0274
      ...
      VAR_0325
      VAR_0342
      VAR_0352
      VAR_0353
      VAR_0354
      VAR_0404
      VAR_0466
      VAR_0467
      VAR_0493
      VAR_1934
    
    
      ID
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2
      h
      c
      None
      ft lauderdale
      batchinquiry
      None
      ds
      c6
      fl
      fl
      ...
      None
      cf
      o
      u
      o
      chief executive officer
      None
      None
      community association manager
      iaps
    
    
      4
      h
      b
      None
      santee
      batchinquiry
      None
      ds
      c6
      ca
      mi
      ...
      h
      ec
      o
      r
      r
      None
      i
      discharged
      None
      iaps
    
    
      5
      h
      c
      None
      reedsville
      batchinquiry
      None
      ds
      c6
      wv
      wv
      ...
      r
      uu
      r
      r
      None
      None
      None
      None
      None
      iaps
    
    
      7
      h
      c
      None
      liberty
      batchinquiry
      None
      ds
      c6
      tx
      tx
      ...
      h
      None
      r
      r
      None
      None
      None
      None
      None
      rcc
    
    
      8
      r
      n
      None
      frankfort
      batchinquiry
      None
      ds
      c6
      il
      il
      ...
      s
      None
      r
      u
      o
      None
      None
      None
      None
      branch
    
    
      14
      r
      c
      None
      spring
      batchinquiry
      None
      ds
      c6
      tx
      me
      ...
      h
      fe
      u
      r
      r
      None
      None
      None
      None
      iaps
    
    
      16
      h
      c
      None
      gresham
      batchinquiry
      None
      ds
      c6
      or
      ca
      ...
      s
      dc
      o
      o
      o
      None
      None
      None
      None
      iaps
    
    
      20
      r
      b
      None
      warner robins
      batchinquiry
      None
      ds
      c6
      ga
      sc
      ...
      s
      ff
      u
      o
      r
      None
      None
      None
      None
      iaps
    
    
      21
      r
      n
      None
      san antonio
      batchinquiry
      None
      ds
      c6
      tx
      tx
      ...
      s
      None
      r
      u
      u
      None
      None
      None
      None
      mobile
    
    
      22
      r
      n
      None
      norristown
      batchinquiry
      None
      ds
      c6
      pa
      pa
      ...
      s
      ee
      u
      u
      u
      None
      None
      None
      None
      branch
    
    
      23
      r
      c
      None
      murfreesboro
      batchinquiry
      None
      ds
      c6
      tn
      tn
      ...
      h
      fe
      o
      r
      r
      None
      None
      None
      None
      branch
    
    
      24
      h
      c
      None
      cartersville
      batchinquiry
      None
      ds
      c6
      ga
      ga
      ...
      s
      None
      r
      o
      u
      None
      i
      dismissed
      None
      branch
    
    
      25
      h
      b
      None
      bethlehem
      batchinquiry
      None
      ds
      c6
      pa
      pa
      ...
      s
      None
      u
      o
      u
      None
      i
      discharged
      None
      iaps
    
    
      26
      r
      b
      None
      gig harbor
      batchinquiry
      None
      ds
      c6
      wa
      ca
      ...
      s
      bc
      u
      u
      o
      None
      None
      None
      None
      branch
    
    
      28
      q
      b
      None
      bakersfield
      batchinquiry
      None
      ds
      c6
      ca
      ca
      ...
      s
      None
      r
      o
      u
      None
      None
      None
      None
      iaps
    
    
      30
      r
      b
      None
      herndon
      batchinquiry
      None
      ds
      c6
      va
      va
      ...
      h
      fe
      r
      r
      u
      None
      None
      None
      None
      iaps
    
    
      31
      h
      c
      None
      shelby gap
      batchinquiry
      None
      ds
      c6
      ky
      ky
      ...
      s
      None
      r
      o
      o
      None
      None
      None
      None
      branch
    
    
      32
      r
      n
      None
      ontario
      batchinquiry
      None
      ds
      c6
      ca
      al
      ...
      h
      ee
      r
      r
      r
      None
      None
      None
      None
      iaps
    
    
      35
      h
      c
      None
      memphis
      batchinquiry
      None
      ds
      c6
      tn
      tn
      ...
      s
      fe
      o
      u
      u
      None
      None
      None
      None
      iaps
    
    
      36
      r
      b
      None
      bakersfied
      batchinquiry
      None
      ds
      c6
      ca
      ca
      ...
      h
      ff
      u
      r
      u
      None
      None
      None
      None
      branch
    
    
      37
      r
      b
      None
      altamonte springs
      batchinquiry
      None
      ds
      c6
      fl
      nc
      ...
      h
      ef
      r
      r
      u
      None
      None
      None
      None
      iaps
    
    
      38
      h
      b
      None
      barnesville
      batchinquiry
      None
      ds
      c6
      pa
      pa
      ...
      r
      fd
      o
      r
      u
      None
      i
      discharged
      None
      branch
    
    
      40
      h
      b
      None
      houston
      batchinquiry
      None
      ds
      c6
      tx
      tx
      ...
      s
      ff
      u
      u
      u
      conta
      None
      None
      None
      iaps
    
    
      42
      r
      n
      None
      peru
      batchinquiry
      None
      ds
      c6
      in
      in
      ...
      s
      ff
      u
      u
      None
      None
      None
      None
      None
      branch
    
    
      43
      r
      b
      None
      tacoma
      batchinquiry
      None
      ds
      c6
      wa
      ga
      ...
      s
      ee
      r
      u
      None
      None
      None
      None
      None
      iaps
    
    
      46
      r
      c
      None
      ozark
      batchinquiry
      None
      ds
      c6
      mo
      mo
      ...
      s
      uu
      r
      u
      None
      None
      None
      None
      None
      iaps
    
    
      50
      r
      c
      None
      riverside
      batchinquiry
      None
      ds
      c6
      ca
      ca
      ...
      None
      None
      o
      None
      None
      None
      None
      None
      None
      branch
    
    
      51
      r
      b
      None
      augusta
      batchinquiry
      None
      ds
      c6
      ga
      ga
      ...
      s
      ff
      u
      u
      u
      None
      None
      None
      None
      iaps
    
    
      52
      r
      b
      None
      chicago
      batchinquiry
      None
      ds
      c6
      il
      mn
      ...
      h
      ed
      r
      r
      o
      contact
      None
      None
      None
      iaps
    
    
      54
      r
      b
      None
      sevierville
      batchinquiry
      None
      ds
      c6
      tn
      mi
      ...
      s
      dd
      u
      u
      u
      None
      None
      None
      None
      branch
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      290409
      h
      n
      None
      las cruces
      batchinquiry
      None
      ds
      c6
      nm
      il
      ...
      s
      ff
      r
      o
      None
      None
      None
      None
      None
      branch
    
    
      290412
      r
      c
      None
      martinsville
      batchinquiry
      None
      ds
      c6
      in
      fl
      ...
      f
      ce
      r
      r
      None
      None
      i
      discharged
      None
      iaps
    
    
      290414
      r
      s
      None
      bakersfield
      batchinquiry
      None
      ds
      c6
      ca
      ca
      ...
      s
      None
      u
      u
      u
      None
      None
      None
      None
      branch
    
    
      290415
      r
      c
      None
      la porte
      batchinquiry
      None
      ds
      c6
      in
      in
      ...
      h
      fe
      u
      r
      u
      None
      i
      discharged
      None
      branch
    
    
      290417
      r
      s
      None
      newport news
      batchinquiry
      None
      ds
      c6
      va
      md
      ...
      s
      fe
      u
      u
      r
      None
      None
      None
      None
      branch
    
    
      290424
      h
      b
      None
      englewood
      batchinquiry
      None
      ds
      c6
      tn
      tn
      ...
      s
      ff
      o
      o
      o
      None
      None
      None
      None
      branch
    
    
      290426
      h
      c
      None
      warfield
      batchinquiry
      None
      ds
      c6
      va
      va
      ...
      s
      uu
      u
      o
      r
      None
      i
      discharged
      nurse aide
      branch
    
    
      290427
      h
      c
      None
      malden
      batchinquiry
      None
      ds
      c6
      mo
      mo
      ...
      s
      None
      u
      u
      r
      None
      None
      None
      None
      iaps
    
    
      290429
      r
      b
      None
      marietta
      batchinquiry
      None
      ds
      c6
      ga
      ct
      ...
      p
      dd
      u
      r
      u
      None
      None
      None
      None
      iaps
    
    
      290431
      r
      c
      None
      prospect
      batchinquiry
      None
      ds
      c6
      oh
      oh
      ...
      h
      fe
      o
      r
      o
      None
      None
      None
      None
      iaps
    
    
      290432
      r
      c
      None
      olive branch
      batchinquiry
      None
      ds
      c6
      ms
      ms
      ...
      h
      ff
      r
      r
      u
      None
      None
      None
      None
      branch
    
    
      290434
      r
      b
      None
      san diego
      batchinquiry
      None
      ds
      c6
      ca
      tx
      ...
      h
      ed
      u
      r
      u
      None
      None
      None
      None
      branch
    
    
      290436
      h
      b
      None
      summerville
      batchinquiry
      None
      ds
      c6
      ga
      ga
      ...
      s
      ff
      u
      o
      r
      None
      i
      dismissed
      None
      iaps
    
    
      290439
      r
      b
      None
      alice
      batchinquiry
      None
      ds
      c6
      tx
      tx
      ...
      p
      uu
      u
      r
      r
      None
      None
      None
      None
      branch
    
    
      290440
      r
      c
      None
      pittsvile
      batchinquiry
      None
      ds
      c6
      va
      va
      ...
      h
      fd
      u
      r
      u
      None
      None
      None
      None
      branch
    
    
      290441
      r
      b
      None
      nashville
      batchinquiry
      None
      ds
      c6
      nc
      nc
      ...
      h
      fe
      u
      r
      u
      None
      None
      None
      None
      iaps
    
    
      290443
      r
      b
      None
      wyncote
      batchinquiry
      None
      ds
      c6
      pa
      pa
      ...
      None
      dc
      o
      u
      u
      secretary
      None
      None
      None
      branch
    
    
      290445
      r
      c
      None
      indianapolis
      batchinquiry
      None
      ds
      c6
      in
      in
      ...
      s
      ef
      u
      u
      u
      None
      None
      None
      None
      branch
    
    
      290447
      r
      c
      None
      lagrande
      batchinquiry
      None
      ds
      c6
      or
      pa
      ...
      s
      dd
      u
      u
      u
      None
      i
      discharged
      None
      iaps
    
    
      290448
      r
      c
      None
      commerce
      batchinquiry
      None
      ds
      c6
      tx
      tx
      ...
      None
      ff
      o
      u
      r
      contact
      None
      None
      None
      iaps
    
    
      290449
      r
      b
      None
      thornville
      batchinquiry
      None
      ds
      c6
      oh
      oh
      ...
      s
      None
      u
      o
      r
      None
      None
      None
      None
      iaps
    
    
      290450
      r
      n
      None
      new braunfels
      batchinquiry
      None
      ds
      c6
      tx
      tx
      ...
      s
      dd
      u
      u
      u
      None
      None
      None
      None
      iaps
    
    
      290452
      h
      b
      None
      bethel
      batchinquiry
      None
      ds
      c6
      oh
      oh
      ...
      p
      ff
      o
      r
      u
      None
      None
      None
      None
      branch
    
    
      290453
      r
      b
      None
      indianapolis
      batchinquiry
      None
      ds
      c6
      in
      il
      ...
      h
      ff
      u
      r
      None
      None
      None
      None
      None
      iaps
    
    
      290454
      r
      n
      None
      katy
      batchinquiry
      None
      ds
      c6
      tx
      tx
      ...
      s
      dd
      u
      u
      o
      None
      None
      None
      None
      iaps
    
    
      290457
      h
      c
      None
      lafayette
      batchinquiry
      None
      ds
      c6
      la
      la
      ...
      s
      None
      r
      u
      r
      None
      None
      None
      None
      branch
    
    
      290458
      r
      c
      None
      gardena
      batchinquiry
      None
      ds
      c6
      ca
      ca
      ...
      s
      be
      r
      u
      r
      None
      i
      discharged
      None
      iaps
    
    
      290459
      r
      b
      None
      covina
      batchinquiry
      None
      ds
      c6
      ca
      ca
      ...
      p
      cc
      u
      r
      None
      None
      None
      None
      None
      branch
    
    
      290461
      h
      c
      None
      batavia
      batchinquiry
      None
      ds
      c6
      oh
      None
      ...
      None
      None
      None
      None
      None
      None
      None
      None
      None
      iaps
    
    
      290463
      h
      b
      None
      sherman oaks
      batchinquiry
      None
      ds
      c6
      ca
      ca
      ...
      None
      None
      None
      None
      None
      None
      None
      None
      None
      iaps
    
  

145231 rows × 22 columns

Vectorize String and Datetime colums

String columns

See how many different values the string columns have



In [35]:

    
str_desc = df[str_cols].describe()
str_desc = pd.DataFrame(str_desc, columns=sorted(str_desc.columns, key=lambda c: str_desc.loc['std', c]))
str_desc









    Out[35]:






  
    
      
      VAR_0214
      VAR_0005
      VAR_0467
      VAR_0353
      VAR_0352
      VAR_0001
      VAR_1934
      VAR_0354
      VAR_0283
      VAR_0305
      VAR_0325
      VAR_0237
      VAR_0342
      VAR_0274
      VAR_0493
      VAR_0404
      VAR_0200
    
  
  
    
      count
      145231.000000
      145231.000000
      145231.000000
      145231.000000
      145231.000000
      145231.000000
      145231.000000
      145231.000000
      145231.000000
      145231.000000
      145231.000000
      145231.000000
      145231.000000
      145231.000000
      145231.000000
      145231.000000
      145231.00000
    
    
      mean
      0.000537
      0.662001
      0.321880
      2.182723
      2.008228
      1.172580
      1.125985
      2.015279
      4.273241
      5.047738
      5.362078
      23.839256
      26.243385
      27.826828
      20.664059
      75.749227
      5948.37755
    
    
      std
      0.066898
      0.755864
      0.756309
      0.918042
      0.928806
      0.983218
      1.096356
      1.123611
      1.432471
      1.739619
      2.385425
      13.322850
      15.853942
      16.728497
      87.876039
      278.754038
      3487.11275
    
    
      min
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.00000
    
    
      25%
      0.000000
      0.000000
      0.000000
      1.000000
      1.000000
      0.000000
      0.000000
      1.000000
      5.000000
      5.000000
      3.000000
      11.000000
      17.000000
      12.000000
      0.000000
      0.000000
      2804.00000
    
    
      50%
      0.000000
      1.000000
      0.000000
      2.000000
      2.000000
      2.000000
      2.000000
      2.000000
      5.000000
      6.000000
      7.000000
      26.000000
      32.000000
      28.000000
      0.000000
      0.000000
      5894.00000
    
    
      75%
      0.000000
      1.000000
      0.000000
      3.000000
      3.000000
      2.000000
      2.000000
      3.000000
      5.000000
      6.000000
      7.000000
      36.000000
      40.000000
      41.000000
      0.000000
      0.000000
      8983.00000
    
    
      max
      12.000000
      3.000000
      3.000000
      3.000000
      3.000000
      2.000000
      4.000000
      3.000000
      6.000000
      7.000000
      8.000000
      45.000000
      49.000000
      56.000000
      602.000000
      1787.000000
      12275.00000

Column VAR_0044 has not a single value, drop it.



In [25]:

    
df.drop('VAR_0044', axis=1, inplace=True)
str_cols.remove('VAR_0044')

Columns VAR_0202, VAR_0216, VAR_0222 and VAR_0466 have only one value. Check if there's some correlation between the values and the target.

Replace their string values for 1 if there was something in the cell, or 0 if there wasn't.



In [26]:

    
analyze_str_columns(['VAR_0202', 'VAR_0216', 'VAR_0222', 'VAR_0466'], df)









    



Total samples: 145231
##############################
------Counts-------
        VAR_0202_batchinquiry  VAR_0202_None
target                                      
0                      111415             43
1                       33760             13
----Percentages----
        VAR_0202_batchinquiry  VAR_0202_None
target                                      
0                   76.745307      76.785714
1                   23.254693      23.214286
##############################
------Counts-------
        VAR_0216_ds  VAR_0216_None
target                            
0            111415             43
1             33760             13
----Percentages----
        VAR_0216_ds  VAR_0216_None
target                            
0         76.745307      76.785714
1         23.254693      23.214286
##############################
------Counts-------
        VAR_0222_c6  VAR_0222_None
target                            
0            111415             43
1             33760             13
----Percentages----
        VAR_0222_c6  VAR_0222_None
target                            
0         76.745307      76.785714
1         23.254693      23.214286
##############################
------Counts-------
        VAR_0466_None  VAR_0466_i
target                           
0               94671       16787
1               27708        6065
----Percentages----
        VAR_0466_None  VAR_0466_i
target                           
0           77.358861   73.459653
1           22.641139   26.540347

The values of these columns seem to be distributed according to the same distribution in the target column, so they're useless.



In [27]:

    
cols = ['VAR_0202', 'VAR_0216', 'VAR_0222', 'VAR_0466']
df.drop(cols, axis=1, inplace=True)
for c in cols:
    str_cols.remove(c)
del cols

Encode the labels of the rest of the columns



In [33]:

    
encoder = LabelEncoder()
for col in str_cols:
    df[col] = encoder.fit_transform(df[col])

Save preprocessed data to another csv file



In [36]:

    
df.to_csv(PTRAIN_PATH)



In [39]:

    
with open('deleted_str_cols.pickle', 'wb') as fp:
    pickle.dump(['VAR_0044', 'VAR_0202', 'VAR_0216', 'VAR_0222', 'VAR_0466'], fp)

	VAR_0001	VAR_0005	VAR_0044	VAR_0200	VAR_0202	VAR_0214	VAR_0216	VAR_0222	VAR_0237	VAR_0274	...	VAR_0325	VAR_0342	VAR_0352	VAR_0353	VAR_0354	VAR_0404	VAR_0466	VAR_0467	VAR_0493	VAR_1934
ID
2	h	c	None	ft lauderdale	batchinquiry	None	ds	c6	fl	fl	...	None	cf	o	u	o	chief executive officer	None	None	community association manager	iaps
4	h	b	None	santee	batchinquiry	None	ds	c6	ca	mi	...	h	ec	o	r	r	None	i	discharged	None	iaps
5	h	c	None	reedsville	batchinquiry	None	ds	c6	wv	wv	...	r	uu	r	r	None	None	None	None	None	iaps
7	h	c	None	liberty	batchinquiry	None	ds	c6	tx	tx	...	h	None	r	r	None	None	None	None	None	rcc
8	r	n	None	frankfort	batchinquiry	None	ds	c6	il	il	...	s	None	r	u	o	None	None	None	None	branch
14	r	c	None	spring	batchinquiry	None	ds	c6	tx	me	...	h	fe	u	r	r	None	None	None	None	iaps
16	h	c	None	gresham	batchinquiry	None	ds	c6	or	ca	...	s	dc	o	o	o	None	None	None	None	iaps
20	r	b	None	warner robins	batchinquiry	None	ds	c6	ga	sc	...	s	ff	u	o	r	None	None	None	None	iaps
21	r	n	None	san antonio	batchinquiry	None	ds	c6	tx	tx	...	s	None	r	u	u	None	None	None	None	mobile
22	r	n	None	norristown	batchinquiry	None	ds	c6	pa	pa	...	s	ee	u	u	u	None	None	None	None	branch
23	r	c	None	murfreesboro	batchinquiry	None	ds	c6	tn	tn	...	h	fe	o	r	r	None	None	None	None	branch
24	h	c	None	cartersville	batchinquiry	None	ds	c6	ga	ga	...	s	None	r	o	u	None	i	dismissed	None	branch
25	h	b	None	bethlehem	batchinquiry	None	ds	c6	pa	pa	...	s	None	u	o	u	None	i	discharged	None	iaps
26	r	b	None	gig harbor	batchinquiry	None	ds	c6	wa	ca	...	s	bc	u	u	o	None	None	None	None	branch
28	q	b	None	bakersfield	batchinquiry	None	ds	c6	ca	ca	...	s	None	r	o	u	None	None	None	None	iaps
30	r	b	None	herndon	batchinquiry	None	ds	c6	va	va	...	h	fe	r	r	u	None	None	None	None	iaps
31	h	c	None	shelby gap	batchinquiry	None	ds	c6	ky	ky	...	s	None	r	o	o	None	None	None	None	branch
32	r	n	None	ontario	batchinquiry	None	ds	c6	ca	al	...	h	ee	r	r	r	None	None	None	None	iaps
35	h	c	None	memphis	batchinquiry	None	ds	c6	tn	tn	...	s	fe	o	u	u	None	None	None	None	iaps
36	r	b	None	bakersfied	batchinquiry	None	ds	c6	ca	ca	...	h	ff	u	r	u	None	None	None	None	branch
37	r	b	None	altamonte springs	batchinquiry	None	ds	c6	fl	nc	...	h	ef	r	r	u	None	None	None	None	iaps
38	h	b	None	barnesville	batchinquiry	None	ds	c6	pa	pa	...	r	fd	o	r	u	None	i	discharged	None	branch
40	h	b	None	houston	batchinquiry	None	ds	c6	tx	tx	...	s	ff	u	u	u	conta	None	None	None	iaps
42	r	n	None	peru	batchinquiry	None	ds	c6	in	in	...	s	ff	u	u	None	None	None	None	None	branch
43	r	b	None	tacoma	batchinquiry	None	ds	c6	wa	ga	...	s	ee	r	u	None	None	None	None	None	iaps
46	r	c	None	ozark	batchinquiry	None	ds	c6	mo	mo	...	s	uu	r	u	None	None	None	None	None	iaps
50	r	c	None	riverside	batchinquiry	None	ds	c6	ca	ca	...	None	None	o	None	None	None	None	None	None	branch
51	r	b	None	augusta	batchinquiry	None	ds	c6	ga	ga	...	s	ff	u	u	u	None	None	None	None	iaps
52	r	b	None	chicago	batchinquiry	None	ds	c6	il	mn	...	h	ed	r	r	o	contact	None	None	None	iaps
54	r	b	None	sevierville	batchinquiry	None	ds	c6	tn	mi	...	s	dd	u	u	u	None	None	None	None	branch
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
290409	h	n	None	las cruces	batchinquiry	None	ds	c6	nm	il	...	s	ff	r	o	None	None	None	None	None	branch
290412	r	c	None	martinsville	batchinquiry	None	ds	c6	in	fl	...	f	ce	r	r	None	None	i	discharged	None	iaps
290414	r	s	None	bakersfield	batchinquiry	None	ds	c6	ca	ca	...	s	None	u	u	u	None	None	None	None	branch
290415	r	c	None	la porte	batchinquiry	None	ds	c6	in	in	...	h	fe	u	r	u	None	i	discharged	None	branch
290417	r	s	None	newport news	batchinquiry	None	ds	c6	va	md	...	s	fe	u	u	r	None	None	None	None	branch
290424	h	b	None	englewood	batchinquiry	None	ds	c6	tn	tn	...	s	ff	o	o	o	None	None	None	None	branch
290426	h	c	None	warfield	batchinquiry	None	ds	c6	va	va	...	s	uu	u	o	r	None	i	discharged	nurse aide	branch
290427	h	c	None	malden	batchinquiry	None	ds	c6	mo	mo	...	s	None	u	u	r	None	None	None	None	iaps
290429	r	b	None	marietta	batchinquiry	None	ds	c6	ga	ct	...	p	dd	u	r	u	None	None	None	None	iaps
290431	r	c	None	prospect	batchinquiry	None	ds	c6	oh	oh	...	h	fe	o	r	o	None	None	None	None	iaps
290432	r	c	None	olive branch	batchinquiry	None	ds	c6	ms	ms	...	h	ff	r	r	u	None	None	None	None	branch
290434	r	b	None	san diego	batchinquiry	None	ds	c6	ca	tx	...	h	ed	u	r	u	None	None	None	None	branch
290436	h	b	None	summerville	batchinquiry	None	ds	c6	ga	ga	...	s	ff	u	o	r	None	i	dismissed	None	iaps
290439	r	b	None	alice	batchinquiry	None	ds	c6	tx	tx	...	p	uu	u	r	r	None	None	None	None	branch
290440	r	c	None	pittsvile	batchinquiry	None	ds	c6	va	va	...	h	fd	u	r	u	None	None	None	None	branch
290441	r	b	None	nashville	batchinquiry	None	ds	c6	nc	nc	...	h	fe	u	r	u	None	None	None	None	iaps
290443	r	b	None	wyncote	batchinquiry	None	ds	c6	pa	pa	...	None	dc	o	u	u	secretary	None	None	None	branch
290445	r	c	None	indianapolis	batchinquiry	None	ds	c6	in	in	...	s	ef	u	u	u	None	None	None	None	branch
290447	r	c	None	lagrande	batchinquiry	None	ds	c6	or	pa	...	s	dd	u	u	u	None	i	discharged	None	iaps
290448	r	c	None	commerce	batchinquiry	None	ds	c6	tx	tx	...	None	ff	o	u	r	contact	None	None	None	iaps
290449	r	b	None	thornville	batchinquiry	None	ds	c6	oh	oh	...	s	None	u	o	r	None	None	None	None	iaps
290450	r	n	None	new braunfels	batchinquiry	None	ds	c6	tx	tx	...	s	dd	u	u	u	None	None	None	None	iaps
290452	h	b	None	bethel	batchinquiry	None	ds	c6	oh	oh	...	p	ff	o	r	u	None	None	None	None	branch
290453	r	b	None	indianapolis	batchinquiry	None	ds	c6	in	il	...	h	ff	u	r	None	None	None	None	None	iaps
290454	r	n	None	katy	batchinquiry	None	ds	c6	tx	tx	...	s	dd	u	u	o	None	None	None	None	iaps
290457	h	c	None	lafayette	batchinquiry	None	ds	c6	la	la	...	s	None	r	u	r	None	None	None	None	branch
290458	r	c	None	gardena	batchinquiry	None	ds	c6	ca	ca	...	s	be	r	u	r	None	i	discharged	None	iaps
290459	r	b	None	covina	batchinquiry	None	ds	c6	ca	ca	...	p	cc	u	r	None	None	None	None	None	branch
290461	h	c	None	batavia	batchinquiry	None	ds	c6	oh	None	...	None	None	None	None	None	None	None	None	None	iaps
290463	h	b	None	sherman oaks	batchinquiry	None	ds	c6	ca	ca	...	None	None	None	None	None	None	None	None	None	iaps

	VAR_0214	VAR_0005	VAR_0467	VAR_0353	VAR_0352	VAR_0001	VAR_1934	VAR_0354	VAR_0283	VAR_0305	VAR_0325	VAR_0237	VAR_0342	VAR_0274	VAR_0493	VAR_0404	VAR_0200
count	145231.000000	145231.000000	145231.000000	145231.000000	145231.000000	145231.000000	145231.000000	145231.000000	145231.000000	145231.000000	145231.000000	145231.000000	145231.000000	145231.000000	145231.000000	145231.000000	145231.00000
mean	0.000537	0.662001	0.321880	2.182723	2.008228	1.172580	1.125985	2.015279	4.273241	5.047738	5.362078	23.839256	26.243385	27.826828	20.664059	75.749227	5948.37755
std	0.066898	0.755864	0.756309	0.918042	0.928806	0.983218	1.096356	1.123611	1.432471	1.739619	2.385425	13.322850	15.853942	16.728497	87.876039	278.754038	3487.11275
min	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000
25%	0.000000	0.000000	0.000000	1.000000	1.000000	0.000000	0.000000	1.000000	5.000000	5.000000	3.000000	11.000000	17.000000	12.000000	0.000000	0.000000	2804.00000
50%	0.000000	1.000000	0.000000	2.000000	2.000000	2.000000	2.000000	2.000000	5.000000	6.000000	7.000000	26.000000	32.000000	28.000000	0.000000	0.000000	5894.00000
75%	0.000000	1.000000	0.000000	3.000000	3.000000	2.000000	2.000000	3.000000	5.000000	6.000000	7.000000	36.000000	40.000000	41.000000	0.000000	0.000000	8983.00000
max	12.000000	3.000000	3.000000	3.000000	3.000000	2.000000	4.000000	3.000000	6.000000	7.000000	8.000000	45.000000	49.000000	56.000000	602.000000	1787.000000	12275.00000