In [1]:
# Obtain the summary of train.csv
# train.csv file downloaded from https://www.kaggle.com/c/predict-west-nile-virus/data

%matplotlib inline
import sys
sys.path.append("../bin/")
from data import DataIn

if __name__ == "__main__":
    train = DataIn("train.csv")    
    train.summarize()


No missing values in the columns of train.csv!

--------------------------------------------------------------------------------
********************    Begin of the summary of text data   ********************
--------------------------------------------------------------------------------
count          10506
unique            95
top       2007-08-01
freq             551
Name: Date, dtype: object


count                                                 10506
unique                                                  138
top       ORD Terminal 5, O'Hare International Airport, ...
freq                                                    750
Name: Address, dtype: object


count                      10506
unique                         7
top       CULEX PIPIENS/RESTUANS
freq                        4752
Name: Species, dtype: object


count                10506
unique                 128
top        W OHARE AIRPORT
freq                   750
Name: Street, dtype: object


count     10506
unique      136
top        T900
freq        750
Name: Trap, dtype: object


count                                  10506
unique                                   138
top       1000  W OHARE AIRPORT, Chicago, IL
freq                                     750
Name: AddressNumberAndStreet, dtype: object


--------------------------------------------------------------------------------
********************    End of the summary of text data     ********************
--------------------------------------------------------------------------------