In [1]:
import numpy as np
import pandas as pd

In [2]:
from os import listdir
from os.path import isfile, join

mypath = "D:/Kaggle_ws/Bosch/src/stations/train"
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
only_notnull_date = [f for f in onlyfiles if f.find("notnull_date") > 0]

In [3]:
for f in sorted(only_notnull_date):
    print f
    date_df = pd.read_csv(join(mypath, f))
    date_df.info()


L0S00_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 673862 entries, 0 to 673861
Data columns (total 13 columns):
Id           673862 non-null int64
L0_S0_D1     673862 non-null float64
L0_S0_D3     673862 non-null float64
L0_S0_D5     673862 non-null float64
L0_S0_D7     673862 non-null float64
L0_S0_D9     673862 non-null float64
L0_S0_D11    673862 non-null float64
L0_S0_D13    673862 non-null float64
L0_S0_D15    673862 non-null float64
L0_S0_D17    673862 non-null float64
L0_S0_D19    673862 non-null float64
L0_S0_D21    673862 non-null float64
L0_S0_D23    673862 non-null float64
dtypes: float64(12), int64(1)
memory usage: 66.8 MB
L0S01_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 673904 entries, 0 to 673903
Data columns (total 3 columns):
Id           673904 non-null int64
L0_S1_D26    673902 non-null float64
L0_S1_D30    673904 non-null float64
dtypes: float64(2), int64(1)
memory usage: 15.4 MB
L0S02_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 339774 entries, 0 to 339773
Data columns (total 10 columns):
Id           339774 non-null int64
L0_S2_D34    339774 non-null float64
L0_S2_D38    339774 non-null float64
L0_S2_D42    339774 non-null float64
L0_S2_D46    339774 non-null float64
L0_S2_D50    339774 non-null float64
L0_S2_D54    339774 non-null float64
L0_S2_D58    339774 non-null float64
L0_S2_D62    339774 non-null float64
L0_S2_D66    339774 non-null float64
dtypes: float64(9), int64(1)
memory usage: 25.9 MB
L0S03_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 334708 entries, 0 to 334707
Data columns (total 10 columns):
Id            334708 non-null int64
L0_S3_D70     334708 non-null float64
L0_S3_D74     334708 non-null float64
L0_S3_D78     334708 non-null float64
L0_S3_D82     334708 non-null float64
L0_S3_D86     334708 non-null float64
L0_S3_D90     334708 non-null float64
L0_S3_D94     334708 non-null float64
L0_S3_D98     334708 non-null float64
L0_S3_D102    334708 non-null float64
dtypes: float64(9), int64(1)
memory usage: 25.5 MB
L0S04_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 335295 entries, 0 to 335294
Data columns (total 3 columns):
Id            335295 non-null int64
L0_S4_D106    335295 non-null float64
L0_S4_D111    335243 non-null float64
dtypes: float64(2), int64(1)
memory usage: 7.7 MB
L0S05_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 339512 entries, 0 to 339511
Data columns (total 3 columns):
Id            339512 non-null int64
L0_S5_D115    339512 non-null float64
L0_S5_D117    339512 non-null float64
dtypes: float64(2), int64(1)
memory usage: 7.8 MB
L0S06_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 338988 entries, 0 to 338987
Data columns (total 6 columns):
Id            338988 non-null int64
L0_S6_D120    338988 non-null float64
L0_S6_D124    338988 non-null float64
L0_S6_D127    338988 non-null float64
L0_S6_D130    338988 non-null float64
L0_S6_D134    338988 non-null float64
dtypes: float64(5), int64(1)
memory usage: 15.5 MB
L0S07_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 335698 entries, 0 to 335697
Data columns (total 6 columns):
Id            335698 non-null int64
L0_S7_D137    335698 non-null float64
L0_S7_D139    335698 non-null float64
L0_S7_D140    335698 non-null float64
L0_S7_D141    335698 non-null float64
L0_S7_D143    335698 non-null float64
dtypes: float64(5), int64(1)
memory usage: 15.4 MB
L0S08_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 673881 entries, 0 to 673880
Data columns (total 5 columns):
Id            673881 non-null int64
L0_S8_D145    673881 non-null float64
L0_S8_D147    673881 non-null float64
L0_S8_D148    673881 non-null float64
L0_S8_D150    673881 non-null float64
dtypes: float64(4), int64(1)
memory usage: 25.7 MB
L0S09_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 225679 entries, 0 to 225678
Data columns (total 14 columns):
Id            225679 non-null int64
L0_S9_D152    225679 non-null float64
L0_S9_D157    225659 non-null float64
L0_S9_D162    225678 non-null float64
L0_S9_D167    225671 non-null float64
L0_S9_D172    225678 non-null float64
L0_S9_D177    225659 non-null float64
L0_S9_D182    225659 non-null float64
L0_S9_D187    225659 non-null float64
L0_S9_D192    225659 non-null float64
L0_S9_D197    225659 non-null float64
L0_S9_D202    225659 non-null float64
L0_S9_D207    225659 non-null float64
L0_S9_D212    225659 non-null float64
dtypes: float64(13), int64(1)
memory usage: 24.1 MB
L0S10_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 224540 entries, 0 to 224539
Data columns (total 14 columns):
Id             224540 non-null int64
L0_S10_D216    224540 non-null float64
L0_S10_D221    224523 non-null float64
L0_S10_D226    224540 non-null float64
L0_S10_D231    224527 non-null float64
L0_S10_D236    224540 non-null float64
L0_S10_D241    224523 non-null float64
L0_S10_D246    224523 non-null float64
L0_S10_D251    224523 non-null float64
L0_S10_D256    224523 non-null float64
L0_S10_D261    224523 non-null float64
L0_S10_D266    224523 non-null float64
L0_S10_D271    224523 non-null float64
L0_S10_D276    224523 non-null float64
dtypes: float64(13), int64(1)
memory usage: 24.0 MB
L0S11_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 225452 entries, 0 to 225451
Data columns (total 14 columns):
Id             225452 non-null int64
L0_S11_D280    225452 non-null float64
L0_S11_D284    225452 non-null float64
L0_S11_D288    225452 non-null float64
L0_S11_D292    225452 non-null float64
L0_S11_D296    225452 non-null float64
L0_S11_D300    225452 non-null float64
L0_S11_D304    225452 non-null float64
L0_S11_D308    225452 non-null float64
L0_S11_D312    225452 non-null float64
L0_S11_D316    225452 non-null float64
L0_S11_D320    225452 non-null float64
L0_S11_D324    225452 non-null float64
L0_S11_D328    225452 non-null float64
dtypes: float64(13), int64(1)
memory usage: 24.1 MB
L0S12_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 242061 entries, 0 to 242060
Data columns (total 13 columns):
Id             242061 non-null int64
L0_S12_D331    242061 non-null float64
L0_S12_D333    242061 non-null float64
L0_S12_D335    242061 non-null float64
L0_S12_D337    242061 non-null float64
L0_S12_D339    242061 non-null float64
L0_S12_D341    242061 non-null float64
L0_S12_D343    242061 non-null float64
L0_S12_D345    242061 non-null float64
L0_S12_D347    242061 non-null float64
L0_S12_D349    242061 non-null float64
L0_S12_D351    242061 non-null float64
L0_S12_D353    242061 non-null float64
dtypes: float64(12), int64(1)
memory usage: 24.0 MB
L0S13_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 242065 entries, 0 to 242064
Data columns (total 3 columns):
Id             242065 non-null int64
L0_S13_D355    242065 non-null float64
L0_S13_D357    242065 non-null float64
dtypes: float64(2), int64(1)
memory usage: 5.5 MB
L0S14_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120625 entries, 0 to 120624
Data columns (total 10 columns):
Id             120625 non-null int64
L0_S14_D360    120625 non-null float64
L0_S14_D364    120625 non-null float64
L0_S14_D368    120625 non-null float64
L0_S14_D372    120625 non-null float64
L0_S14_D376    120625 non-null float64
L0_S14_D380    120625 non-null float64
L0_S14_D384    120625 non-null float64
L0_S14_D388    120625 non-null float64
L0_S14_D392    120625 non-null float64
dtypes: float64(9), int64(1)
memory usage: 9.2 MB
L0S15_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121445 entries, 0 to 121444
Data columns (total 10 columns):
Id             121445 non-null int64
L0_S15_D395    121445 non-null float64
L0_S15_D398    121445 non-null float64
L0_S15_D401    121445 non-null float64
L0_S15_D404    121445 non-null float64
L0_S15_D407    121445 non-null float64
L0_S15_D410    121445 non-null float64
L0_S15_D413    121445 non-null float64
L0_S15_D416    121445 non-null float64
L0_S15_D419    121445 non-null float64
dtypes: float64(9), int64(1)
memory usage: 9.3 MB
L0S16_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119139 entries, 0 to 119138
Data columns (total 3 columns):
Id             119139 non-null int64
L0_S16_D423    119139 non-null float64
L0_S16_D428    119139 non-null float64
dtypes: float64(2), int64(1)
memory usage: 2.7 MB
L0S17_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 123027 entries, 0 to 123026
Data columns (total 3 columns):
Id             123027 non-null int64
L0_S17_D432    123027 non-null float64
L0_S17_D434    123027 non-null float64
dtypes: float64(2), int64(1)
memory usage: 2.8 MB
L0S18_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121081 entries, 0 to 121080
Data columns (total 6 columns):
Id             121081 non-null int64
L0_S18_D437    121081 non-null float64
L0_S18_D441    121081 non-null float64
L0_S18_D444    121081 non-null float64
L0_S18_D447    121081 non-null float64
L0_S18_D451    121081 non-null float64
dtypes: float64(5), int64(1)
memory usage: 5.5 MB
L0S19_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121027 entries, 0 to 121026
Data columns (total 6 columns):
Id             121027 non-null int64
L0_S19_D454    121027 non-null float64
L0_S19_D456    121027 non-null float64
L0_S19_D457    121027 non-null float64
L0_S19_D458    121027 non-null float64
L0_S19_D460    121027 non-null float64
dtypes: float64(5), int64(1)
memory usage: 5.5 MB
L0S20_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 242111 entries, 0 to 242110
Data columns (total 5 columns):
Id             242111 non-null int64
L0_S20_D462    242111 non-null float64
L0_S20_D464    242111 non-null float64
L0_S20_D465    242111 non-null float64
L0_S20_D467    242111 non-null float64
dtypes: float64(4), int64(1)
memory usage: 9.2 MB
L0S21_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 81409 entries, 0 to 81408
Data columns (total 16 columns):
Id             81409 non-null int64
L0_S21_D469    81409 non-null float64
L0_S21_D474    81368 non-null float64
L0_S21_D479    81409 non-null float64
L0_S21_D484    81373 non-null float64
L0_S21_D489    81409 non-null float64
L0_S21_D494    81368 non-null float64
L0_S21_D499    81368 non-null float64
L0_S21_D504    81368 non-null float64
L0_S21_D509    81368 non-null float64
L0_S21_D514    81368 non-null float64
L0_S21_D519    81368 non-null float64
L0_S21_D524    81368 non-null float64
L0_S21_D529    81368 non-null float64
L0_S21_D534    81368 non-null float64
L0_S21_D539    81368 non-null float64
dtypes: float64(15), int64(1)
memory usage: 9.9 MB
L0S22_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 80601 entries, 0 to 80600
Data columns (total 16 columns):
Id             80601 non-null int64
L0_S22_D543    80600 non-null float64
L0_S22_D548    80591 non-null float64
L0_S22_D553    80599 non-null float64
L0_S22_D558    80595 non-null float64
L0_S22_D563    80599 non-null float64
L0_S22_D568    80591 non-null float64
L0_S22_D573    80591 non-null float64
L0_S22_D578    80591 non-null float64
L0_S22_D583    80591 non-null float64
L0_S22_D588    80591 non-null float64
L0_S22_D593    80591 non-null float64
L0_S22_D598    80591 non-null float64
L0_S22_D603    80591 non-null float64
L0_S22_D608    80591 non-null float64
L0_S22_D613    80591 non-null float64
dtypes: float64(15), int64(1)
memory usage: 9.8 MB
L0S23_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 80290 entries, 0 to 80289
Data columns (total 16 columns):
Id             80290 non-null int64
L0_S23_D617    80290 non-null float64
L0_S23_D621    80290 non-null float64
L0_S23_D625    80290 non-null float64
L0_S23_D629    80290 non-null float64
L0_S23_D633    80290 non-null float64
L0_S23_D637    80290 non-null float64
L0_S23_D641    80290 non-null float64
L0_S23_D645    80290 non-null float64
L0_S23_D649    80290 non-null float64
L0_S23_D653    80290 non-null float64
L0_S23_D657    80290 non-null float64
L0_S23_D661    80290 non-null float64
L0_S23_D665    80290 non-null float64
L0_S23_D669    80290 non-null float64
L0_S23_D673    80290 non-null float64
dtypes: float64(15), int64(1)
memory usage: 9.8 MB
L1S24_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 183727 entries, 0 to 183726
Columns: 289 entries, Id to L1_S24_D1851
dtypes: float64(288), int64(1)
memory usage: 405.1 MB
L1S25_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 83658 entries, 0 to 83657
Columns: 334 entries, Id to L1_S25_D3035
dtypes: float64(333), int64(1)
memory usage: 213.2 MB
L2S26_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 227011 entries, 0 to 227010
Data columns (total 27 columns):
Id              227011 non-null int64
L2_S26_D3037    227011 non-null float64
L2_S26_D3041    227011 non-null float64
L2_S26_D3044    227011 non-null float64
L2_S26_D3048    227011 non-null float64
L2_S26_D3052    227011 non-null float64
L2_S26_D3056    227011 non-null float64
L2_S26_D3059    227011 non-null float64
L2_S26_D3063    227011 non-null float64
L2_S26_D3066    227011 non-null float64
L2_S26_D3070    227011 non-null float64
L2_S26_D3074    227011 non-null float64
L2_S26_D3078    227011 non-null float64
L2_S26_D3081    227010 non-null float64
L2_S26_D3084    227010 non-null float64
L2_S26_D3087    227010 non-null float64
L2_S26_D3090    227010 non-null float64
L2_S26_D3093    227010 non-null float64
L2_S26_D3096    227010 non-null float64
L2_S26_D3100    227011 non-null float64
L2_S26_D3103    227011 non-null float64
L2_S26_D3107    227011 non-null float64
L2_S26_D3110    227011 non-null float64
L2_S26_D3114    227011 non-null float64
L2_S26_D3118    227011 non-null float64
L2_S26_D3122    227011 non-null float64
L2_S26_D3126    227011 non-null float64
dtypes: float64(26), int64(1)
memory usage: 46.8 MB
L2S27_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120729 entries, 0 to 120728
Data columns (total 27 columns):
Id              120729 non-null int64
L2_S27_D3130    120729 non-null float64
L2_S27_D3134    120729 non-null float64
L2_S27_D3137    120729 non-null float64
L2_S27_D3141    120729 non-null float64
L2_S27_D3145    120729 non-null float64
L2_S27_D3149    120729 non-null float64
L2_S27_D3152    120729 non-null float64
L2_S27_D3156    120729 non-null float64
L2_S27_D3159    120729 non-null float64
L2_S27_D3163    120729 non-null float64
L2_S27_D3167    120729 non-null float64
L2_S27_D3171    120729 non-null float64
L2_S27_D3174    120729 non-null float64
L2_S27_D3177    120729 non-null float64
L2_S27_D3180    120729 non-null float64
L2_S27_D3183    120729 non-null float64
L2_S27_D3186    120729 non-null float64
L2_S27_D3189    120729 non-null float64
L2_S27_D3193    120729 non-null float64
L2_S27_D3196    120729 non-null float64
L2_S27_D3200    120729 non-null float64
L2_S27_D3203    120729 non-null float64
L2_S27_D3207    120729 non-null float64
L2_S27_D3211    120729 non-null float64
L2_S27_D3215    120729 non-null float64
L2_S27_D3219    120729 non-null float64
dtypes: float64(26), int64(1)
memory usage: 24.9 MB
L2S28_notnull_date.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9583 entries, 0 to 9582
Data columns (total 27 columns):
Id              9583 non-null int64
L2_S28_D3223    9583 non-null float64
L2_S28_D3227    9583 non-null float64
L2_S28_D3230    9583 non-null float64
L2_S28_D3234    9583 non-null float64
L2_S28_D3238    9583 non-null float64
L2_S28_D3242    9583 non-null float64
L2_S28_D3245    9583 non-null float64
L2_S28_D3249    9583 non-null float64
L2_S28_D3252    9583 non-null float64
L2_S28_D3256    9583 non-null float64
L2_S28_D3260    9583 non-null float64
L2_S28_D3264    9583 non-null float64
L2_S28_D3267    9583 non-null float64
L2_S28_D3270    9583 non-null float64
L2_S28_D3273    9583 non-null float64
L2_S28_D3276    9583 non-null float64
L2_S28_D3279    9583 non-null float64
L2_S28_D3282    9583 non-null float64
L2_S28_D3286    9583 non-null float64
L2_S28_D3289    9583 non-null float64
L2_S28_D3293    9583 non-null float64
L2_S28_D3296    9583 non-null float64
L2_S28_D3300    9583 non-null float64
L2_S28_D3304    9583 non-null float64
L2_S28_D3308    9583 non-null float64
L2_S28_D3312    9583 non-null float64
dtypes: float64(26), int64(1)
memory usage: 2.0 MB
L3S29_notnull_date.csv
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-3-b1a0a79366be> in <module>()
      1 for f in sorted(only_notnull_date):
      2     print f
----> 3     date_df = pd.read_csv(join(mypath, f))
      4     date_df.info()

d:\Anaconda\envs\Deep2\lib\site-packages\pandas\io\parsers.pyc in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    653                     skip_blank_lines=skip_blank_lines)
    654 
--> 655         return _read(filepath_or_buffer, kwds)
    656 
    657     parser_f.__name__ = name

d:\Anaconda\envs\Deep2\lib\site-packages\pandas\io\parsers.pyc in _read(filepath_or_buffer, kwds)
    409 
    410     try:
--> 411         data = parser.read(nrows)
    412     finally:
    413         parser.close()

d:\Anaconda\envs\Deep2\lib\site-packages\pandas\io\parsers.pyc in read(self, nrows)
    980                 raise ValueError('skipfooter not supported for iteration')
    981 
--> 982         ret = self._engine.read(nrows)
    983 
    984         if self.options.get('as_recarray'):

d:\Anaconda\envs\Deep2\lib\site-packages\pandas\io\parsers.pyc in read(self, nrows)
   1717     def read(self, nrows=None):
   1718         try:
-> 1719             data = self._reader.read(nrows)
   1720         except StopIteration:
   1721             if self._first_chunk:

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader.read (pandas\_libs\parsers.c:10862)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._read_low_memory (pandas\_libs\parsers.c:11138)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._read_rows (pandas\_libs\parsers.c:12175)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._convert_column_data (pandas\_libs\parsers.c:14136)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._convert_tokens (pandas\_libs\parsers.c:14858)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._convert_with_dtype (pandas\_libs\parsers.c:15629)()

d:\Anaconda\envs\Deep2\lib\site-packages\pandas\core\dtypes\common.pyc in is_integer_dtype(arr_or_dtype)
    735 
    736 
--> 737 def is_integer_dtype(arr_or_dtype):
    738     """
    739     Check whether the provided array or dtype is of an integer dtype.

KeyboardInterrupt: 

In [84]:
l0s01_df = pd.read_csv(join(mypath,'L0S01_notnull_date.csv'), index_col=['Id'])

In [33]:
def apply_mean(row):
    print row
    return row.mean()

In [39]:
col_count = len(l0s01_df.columns)
last_column = col_count - 1

In [40]:
last_column_name = l0s01_df.columns[-1]

In [41]:
last_column_name


Out[41]:
'L0_S1_D30'

In [61]:
new_df = pd.DataFrame(columns=['Station', 'Time'], index=l0s01_df.index)

In [62]:
new_df['Station'] = 'L0S01'
new_df['Time'] = l0s01_df[last_column_name]

In [63]:
new_df.head()


Out[63]:
Station Time
Id
4 L0S01 82.24
7 L0S01 1618.70
9 L0S01 1149.20
11 L0S01 602.64
13 L0S01 1331.66

In [67]:
new_df.sort_values(['Time'])


Out[67]:
Station Time
Id
596800 L0S01 0.01
108193 L0S01 0.01
587799 L0S01 0.01
669517 L0S01 0.01
737264 L0S01 0.01
651542 L0S01 0.01
521262 L0S01 0.01
577092 L0S01 0.02
236181 L0S01 0.02
655331 L0S01 0.02
594797 L0S01 0.02
582547 L0S01 0.02
503457 L0S01 0.02
967196 L0S01 0.02
590121 L0S01 0.02
233347 L0S01 0.02
960462 L0S01 0.02
378408 L0S01 0.02
957292 L0S01 0.02
319905 L0S01 0.02
441134 L0S01 0.02
507124 L0S01 0.02
972750 L0S01 0.02
434276 L0S01 0.02
498778 L0S01 0.02
508801 L0S01 0.02
823623 L0S01 0.06
823227 L0S01 0.06
893347 L0S01 0.06
318952 L0S01 0.06
... ... ...
2143486 L0S01 1713.69
144684 L0S01 1713.69
339892 L0S01 1713.69
127650 L0S01 1713.69
71882 L0S01 1713.69
2270239 L0S01 1713.69
173196 L0S01 1713.69
2343682 L0S01 1713.69
488549 L0S01 1713.69
278952 L0S01 1713.69
2198522 L0S01 1713.69
142867 L0S01 1713.69
2346806 L0S01 1713.69
2354873 L0S01 1713.69
220234 L0S01 1713.70
57789 L0S01 1713.70
67890 L0S01 1713.70
280611 L0S01 1713.70
2290257 L0S01 1713.70
261850 L0S01 1713.70
62043 L0S01 1713.70
201609 L0S01 1713.70
216683 L0S01 1713.70
199629 L0S01 1713.70
288287 L0S01 1713.70
414416 L0S01 1713.70
413345 L0S01 1713.70
2216389 L0S01 1713.70
2363613 L0S01 1713.71
64615 L0S01 1713.71

673904 rows × 2 columns


In [10]:
df = pd.read_csv('data/item_station_date.csv', index_col=['Id'])

In [11]:
df.head()


Out[11]:
Station Time
Id
651542 L0S00 0.00
510783 L0S00 0.00
108193 L0S00 0.01
521262 L0S00 0.01
587799 L0S00 0.01

In [12]:
df.tail()


Out[12]:
Station Time
Id
1246863 L3S37 1718.48
1247466 L3S37 1718.48
1255566 L3S37 1718.48
1906063 L3S37 1718.48
594051 L3S37 1718.49

In [14]:
df[df.index==594051]


Out[14]:
Station Time
Id
594051 L0S00 1713.67
594051 L0S01 1713.67
594051 L0S02 1713.67
594051 L0S05 1713.69
594051 L0S06 1713.69
594051 L0S08 1713.69
594051 L0S10 1713.69
594051 L3S29 1718.43
594051 L3S30 1718.43
594051 L3S33 1718.47
594051 L3S34 1718.47
594051 L3S35 1718.48
594051 L3S37 1718.49

In [100]:
df['Time'] = df['Time'].astype(np.float32)
df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 28769709 entries, 651542 to 594051
Data columns (total 2 columns):
Station    object
Time       float32
dtypes: float32(1), object(1)
memory usage: 548.7+ MB

In [93]:
first = [df[df.index==x].head(1)['Time'].values[0] for x in df.index.unique()] #df.index.max()


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-93-253d8a8c8069> in <module>()
----> 1 first = [df[df.index==x].head(1)['Time'].values[0] for x in df.index.unique()] #df.index.max()

d:\Anaconda\envs\Deep2\lib\site-packages\pandas\core\indexes\base.pyc in _evaluate_compare(self, other)
   3675                 else:
   3676                     with np.errstate(all='ignore'):
-> 3677                         result = op(self.values, np.asarray(other))
   3678 
   3679                 # technically we could support bool dtyped Index

KeyboardInterrupt: 

In [ ]:
%time last = [df[df.index==x].tail(1)['Time'].values[0] for x in df.index.unique()] #df.index.max()

In [66]:
diff = np.array(last) - np.array(first)

In [67]:
diff


Out[67]:
array([  7.93      ,  34.43697368,  39.13      ,   5.05      ,
         0.83      ,   2.63      ,   5.72      ,  26.88      ,   4.96      ])

In [68]:
[df[df.index==x] for x in range(1,10)] #df.index.max()


Out[68]:
[   Station    Time
 Id                
 1    L1S24  792.77
 1    L2S27  794.72
 1    L3S29  800.65
 1    L3S30  800.66
 1    L3S33  800.68
 1    L3S34  800.70
 1    L3S35  800.70
 1    L3S37  800.70,    Station         Time
 Id                     
 2    L1S24  1025.633026
 2    L2S27  1045.960000
 2    L3S29  1060.040000
 2    L3S30  1060.040000
 2    L3S33  1060.060000
 2    L3S34  1060.060000
 2    L3S36  1060.070000
 2    L3S37  1060.070000,    Station    Time
 Id                
 3    L1S24  671.95
 3    L2S26  704.11
 3    L3S29  710.94
 3    L3S30  710.95
 3    L3S33  711.07
 3    L3S34  711.08
 3    L3S35  711.08
 3    L3S37  711.08,    Station   Time
 Id               
 4    L0S00  82.24
 4    L0S01  82.24
 4    L0S02  82.24
 4    L0S04  82.26
 4    L0S07  82.26
 4    L0S08  82.27
 4    L0S11  82.27
 4    L3S29  87.26
 4    L3S30  87.26
 4    L3S31  87.27
 4    L3S33  87.28
 4    L3S34  87.28
 4    L3S35  87.29
 4    L3S37  87.29,    Station    Time
 Id                
 5    L0S00  255.45
 5    L0S01  255.45
 5    L0S02  255.46
 5    L0S04  255.48
 5    L0S07  255.48
 5    L0S08  255.48
 5    L0S10  255.49
 5    L2S26  255.50
 5    L3S29  256.25
 5    L3S30  256.25
 5    L3S33  256.27
 5    L3S34  256.27
 5    L3S36  256.28
 5    L3S37  256.28,    Station     Time
 Id                 
 6    L0S12  1313.12
 6    L0S13  1313.12
 6    L0S14  1313.12
 6    L0S16  1313.14
 6    L0S18  1313.15
 6    L0S20  1313.15
 6    L0S21  1313.15
 6    L3S29  1315.73
 6    L3S30  1315.73
 6    L3S33  1315.75
 6    L3S34  1315.75
 6    L3S35  1315.75
 6    L3S37  1315.75,    Station     Time
 Id                 
 7    L0S00  1618.70
 7    L0S01  1618.70
 7    L0S02  1618.70
 7    L0S05  1618.72
 7    L0S06  1618.72
 7    L0S08  1618.73
 7    L0S10  1618.73
 7    L3S29  1624.40
 7    L3S30  1624.41
 7    L3S33  1624.42
 7    L3S34  1624.42
 7    L3S35  1624.42
 7    L3S37  1624.42,    Station    Time
 Id                
 8    L1S24  743.40
 8    L2S26  760.93
 8    L3S29  770.27
 8    L3S30  770.27
 8    L3S33  770.28
 8    L3S34  770.28
 8    L3S35  770.28
 8    L3S37  770.28,    Station     Time
 Id                 
 9    L0S00  1149.20
 9    L0S01  1149.20
 9    L0S02  1149.21
 9    L0S04  1149.22
 9    L0S07  1149.22
 9    L0S08  1149.22
 9    L0S10  1149.22
 9    L3S29  1154.12
 9    L3S30  1154.13
 9    L3S33  1154.14
 9    L3S34  1154.15
 9    L3S36  1154.16
 9    L3S37  1154.16]

In [95]:
df.index.unique()


Out[95]:
Int64Index([ 651542,  510783,  108193,  521262,  587799,  596800,  669517,
             737264,  430412,  443497,
            ...
            2138774, 2207689, 2279491, 2282378, 2284766, 2350543, 2352005,
            2362548,   64615, 2343448],
           dtype='int64', name=u'Id', length=2366330)

In [84]:



Out[84]:
<enumerate at 0x9de2b88>

In [ ]: