In [3]:
    
import pandas as pd
    
In [4]:
    
import numpy as np
    
In [5]:
    
import sys
sys.path.append('..')
    
In [6]:
    
from dis_ds import parsing
    
In [7]:
    
all_files = !ls ../test_data
    
In [8]:
    
full_path_all_files = ['../test_data/' + a for a in all_files]
    
In [9]:
    
all_files_df = parsing.parse_file_list(full_path_all_files)
    
In [14]:
    
all_files_df[:1000]
    
    Out[14]:
  
    
       
      bakerloo 
      central 
      circle 
      district 
      hammersmith-city 
      jubilee 
      metropolitan 
      northern 
      piccadilly 
      victoria 
      waterloo-city 
     
  
  
    
      2015-02-24 11:51:45 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 11:52:44 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 11:53:44 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 11:54:45 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 11:55:44 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 11:56:45 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:01:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:02:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:03:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:04:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:05:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:06:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:07:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:08:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:09:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:10:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:11:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:12:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:13:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:14:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:15:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:16:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:17:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:18:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:19:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:20:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:21:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:22:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:23:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:24:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
     
    
      2015-02-25 04:05:15 
      10 
      10 
      10 
      10 
      20 
      20 
      20 
      20 
      10 
      20 
      20 
     
    
      2015-02-25 04:06:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:07:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:08:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:09:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:10:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:11:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:12:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:13:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:14:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:15:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:16:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:17:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:18:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:19:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:20:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:21:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:22:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:23:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:24:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:25:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:26:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:27:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:28:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:29:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:30:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:31:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:32:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:33:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
    
      2015-02-25 04:34:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      20 
     
  
1000 rows × 11 columns
In [17]:
    
import xlsxwriter
writer = pd.ExcelWriter('tfldata.xlsx', engine='xlsxwriter')
all_files_df.to_excel(writer, sheet_name="Sheet 1")
    
In [11]:
    
all_files_df.save(all_files_df)
    
    
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/core/generic.py:1000: FutureWarning: save is deprecated, use to_pickle
  warnings.warn("save is deprecated, use to_pickle", FutureWarning)
    
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-11-33722759911e> in <module>()
----> 1 all_files_df.save(all_files_df)
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/core/generic.py in save(self, path)
    999         from pandas.io.pickle import to_pickle
   1000         warnings.warn("save is deprecated, use to_pickle", FutureWarning)
-> 1001         return to_pickle(self, path)
   1002 
   1003     def load(self, path):  # TODO remove in 0.14
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/io/pickle.py in to_pickle(obj, path)
     11         File path
     12     """
---> 13     with open(path, 'wb') as f:
     14         pkl.dump(obj, f, protocol=pkl.HIGHEST_PROTOCOL)
     15 
TypeError: invalid file:                      bakerloo  central  circle  district  hammersmith-city  \
2015-02-24 11:51:45        10       10      10        10                10   
2015-02-24 11:52:44        10       10      10        10                10   
2015-02-24 11:53:44        10       10      10        10                10   
2015-02-24 11:54:45        10       10      10        10                10   
2015-02-24 11:55:44        10       10      10        10                10   
2015-02-24 11:56:45        10       10      10        10                10   
2015-02-24 12:01:14        10       10      10        10                10   
2015-02-24 12:02:15        10       10      10        10                10   
2015-02-24 12:03:14        10       10      10        10                10   
2015-02-24 12:04:14        10       10      10        10                10   
2015-02-24 12:05:14        10       10      10        10                10   
2015-02-24 12:06:14        10       10      10        10                10   
2015-02-24 12:07:15        10       10      10        10                10   
2015-02-24 12:08:15        10       10      10        10                10   
2015-02-24 12:09:14        10       10      10        10                10   
2015-02-24 12:10:14        10       10      10        10                10   
2015-02-24 12:11:15        10       10      10        10                10   
2015-02-24 12:12:14        10       10      10        10                10   
2015-02-24 12:13:14        10       10      10        10                10   
2015-02-24 12:14:14        10       10      10        10                10   
2015-02-24 12:15:14        10       10      10        10                10   
2015-02-24 12:16:14        10       10      10        10                10   
2015-02-24 12:17:15        10       10      10        10                10   
2015-02-24 12:18:14        10       10      10        10                10   
2015-02-24 12:19:14        10       10      10        10                10   
2015-02-24 12:20:14        10       10      10        10                10   
2015-02-24 12:21:15        10       10      10        10                10   
2015-02-24 12:22:15        10       10      10        10                10   
2015-02-24 12:23:14        10       10      10        10                10   
2015-02-24 12:24:14        10       10      10        10                10   
...                       ...      ...     ...       ...               ...   
2015-03-03 09:29:13        10       10      10        10                10   
2015-03-03 09:30:14        10       10      10        10                10   
2015-03-03 09:31:14        10       10      10        10                10   
2015-03-03 09:32:13        10       10      10        10                10   
2015-03-03 09:33:13        10       10      10        10                10   
2015-03-03 09:34:14        10       10      10        10                10   
2015-03-03 09:35:14        10       10      10        10                10   
2015-03-03 09:36:13        10       10      10        10                10   
2015-03-03 09:37:14        10       10      10        10                10   
2015-03-03 09:38:14        10       10      10        10                10   
2015-03-03 09:39:14        10       10      10        10                10   
2015-03-03 09:40:14        10       10      10        10                10   
2015-03-03 09:41:14        10       10      10        10                10   
2015-03-03 09:42:13        10       10      10        10                10   
2015-03-03 09:43:14        10       10      10        10                10   
2015-03-03 09:44:14        10       10      10        10                10   
2015-03-03 09:45:15        10       10      10        10                10   
2015-03-03 09:46:15        10       10      10        10                10   
2015-03-03 09:47:14        10       10      10        10                10   
2015-03-03 09:48:14        10       10      10        10                10   
2015-03-03 09:49:13        10       10      10        10                10   
2015-03-03 09:50:14        10       10      10        10                10   
2015-03-03 09:51:14        10       10      10        10                10   
2015-03-03 09:52:13        10       10      10        10                10   
2015-03-03 09:53:14        10       10      10        10                10   
2015-03-03 09:54:13        10       10      10        10                10   
2015-03-03 09:55:14        10       10      10        10                10   
2015-03-03 09:56:14        10       10      10        10                10   
2015-03-03 09:57:14        10       10      10        10                10   
2015-03-03 09:58:13        10       10      10        10                10   
                     jubilee  metropolitan  northern  piccadilly  victoria  \
2015-02-24 11:51:45       10            10        10          10        10   
2015-02-24 11:52:44       10            10        10          10        10   
2015-02-24 11:53:44       10            10        10          10        10   
2015-02-24 11:54:45       10            10        10          10        10   
2015-02-24 11:55:44       10            10        10          10        10   
2015-02-24 11:56:45       10            10        10          10        10   
2015-02-24 12:01:14       10            10        10          10        10   
2015-02-24 12:02:15       10            10        10          10        10   
2015-02-24 12:03:14       10            10        10          10        10   
2015-02-24 12:04:14       10            10        10          10        10   
2015-02-24 12:05:14       10            10        10          10        10   
2015-02-24 12:06:14       10            10        10          10        10   
2015-02-24 12:07:15       10            10        10          10        10   
2015-02-24 12:08:15       10            10        10          10        10   
2015-02-24 12:09:14       10            10        10          10        10   
2015-02-24 12:10:14       10            10        10          10        10   
2015-02-24 12:11:15       10            10        10          10        10   
2015-02-24 12:12:14       10            10        10          10        10   
2015-02-24 12:13:14       10            10        10          10        10   
2015-02-24 12:14:14       10            10        10          10        10   
2015-02-24 12:15:14       10            10        10          10        10   
2015-02-24 12:16:14       10            10        10          10        10   
2015-02-24 12:17:15       10            10        10          10        10   
2015-02-24 12:18:14       10            10        10          10        10   
2015-02-24 12:19:14       10            10        10          10        10   
2015-02-24 12:20:14       10            10        10          10        10   
2015-02-24 12:21:15       10            10        10          10        10   
2015-02-24 12:22:15       10            10        10          10        10   
2015-02-24 12:23:14       10            10        10          10        10   
2015-02-24 12:24:14       10            10        10          10        10   
...                      ...           ...       ...         ...       ...   
2015-03-03 09:29:13       10            10        10          10        10   
2015-03-03 09:30:14       10            10        10          10        10   
2015-03-03 09:31:14       10            10        10          10        10   
2015-03-03 09:32:13       10            10        10          10        10   
2015-03-03 09:33:13       10            10        10          10        10   
2015-03-03 09:34:14       10            10        10          10        10   
2015-03-03 09:35:14       10            10        10          10        10   
2015-03-03 09:36:13       10            10        10          10        10   
2015-03-03 09:37:14       10            10        10          10        10   
2015-03-03 09:38:14       10            10        10          10        10   
2015-03-03 09:39:14       10            10        10          10        10   
2015-03-03 09:40:14       10            10        10          10        10   
2015-03-03 09:41:14       10            10        10          10        10   
2015-03-03 09:42:13       10            10        10          10        10   
2015-03-03 09:43:14       10            10        10          10        10   
2015-03-03 09:44:14       10            10        10          10        10   
2015-03-03 09:45:15       10            10        10          10        10   
2015-03-03 09:46:15       10            10        10          10        10   
2015-03-03 09:47:14       10            10        10          10        10   
2015-03-03 09:48:14       10            10        10          10        10   
2015-03-03 09:49:13       10            10        10          10        10   
2015-03-03 09:50:14       10            10        10          10        10   
2015-03-03 09:51:14       10            10        10          10        10   
2015-03-03 09:52:13       10            10        10          10        10   
2015-03-03 09:53:14       10            10        10          10        10   
2015-03-03 09:54:13       10            10        10          10        10   
2015-03-03 09:55:14       10            10        10          10        10   
2015-03-03 09:56:14       10            10        10          10        10   
2015-03-03 09:57:14       10            10        10          10        10   
2015-03-03 09:58:13       10            10        10          10        10   
                     waterloo-city  
2015-02-24 11:51:45             10  
2015-02-24 11:52:44             10  
2015-02-24 11:53:44             10  
2015-02-24 11:54:45             10  
2015-02-24 11:55:44             10  
2015-02-24 11:56:45             10  
2015-02-24 12:01:14             10  
2015-02-24 12:02:15             10  
2015-02-24 12:03:14             10  
2015-02-24 12:04:14             10  
2015-02-24 12:05:14             10  
2015-02-24 12:06:14             10  
2015-02-24 12:07:15             10  
2015-02-24 12:08:15             10  
2015-02-24 12:09:14             10  
2015-02-24 12:10:14             10  
2015-02-24 12:11:15             10  
2015-02-24 12:12:14             10  
2015-02-24 12:13:14             10  
2015-02-24 12:14:14             10  
2015-02-24 12:15:14             10  
2015-02-24 12:16:14             10  
2015-02-24 12:17:15             10  
2015-02-24 12:18:14             10  
2015-02-24 12:19:14             10  
2015-02-24 12:20:14             10  
2015-02-24 12:21:15             10  
2015-02-24 12:22:15             10  
2015-02-24 12:23:14             10  
2015-02-24 12:24:14             10  
...                            ...  
2015-03-03 09:29:13             10  
2015-03-03 09:30:14             10  
2015-03-03 09:31:14             10  
2015-03-03 09:32:13             10  
2015-03-03 09:33:13             10  
2015-03-03 09:34:14             10  
2015-03-03 09:35:14             10  
2015-03-03 09:36:13             10  
2015-03-03 09:37:14             10  
2015-03-03 09:38:14             10  
2015-03-03 09:39:14             10  
2015-03-03 09:40:14             10  
2015-03-03 09:41:14             10  
2015-03-03 09:42:13             10  
2015-03-03 09:43:14             10  
2015-03-03 09:44:14             10  
2015-03-03 09:45:15             10  
2015-03-03 09:46:15             10  
2015-03-03 09:47:14             10  
2015-03-03 09:48:14             10  
2015-03-03 09:49:13             10  
2015-03-03 09:50:14             10  
2015-03-03 09:51:14             10  
2015-03-03 09:52:13             10  
2015-03-03 09:53:14             10  
2015-03-03 09:54:13             10  
2015-03-03 09:55:14             10  
2015-03-03 09:56:14             10  
2015-03-03 09:57:14             10  
2015-03-03 09:58:13             10  
[9944 rows x 11 columns]
In [13]:
    
%save?
    
In [16]:
    
from sqlalchemy import create_engine
    
In [17]:
    
engine = create_engine('postgres://pmgigyko:Mb7sR3WMZSNPYjm4FTvS0WRDhtqUgcam@pellefant.db.elephantsql.com:5432/pmgigyko')
    
In [18]:
    
all_files_df.to_sql('disruptions_test1',engine)
    
    
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-18-111b9adef532> in <module>()
----> 1 all_files_df.to_sql('disruptions_test1',engine)
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/core/generic.py in to_sql(self, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype)
    980             self, name, con, flavor=flavor, schema=schema, if_exists=if_exists,
    981             index=index, index_label=index_label, chunksize=chunksize,
--> 982             dtype=dtype)
    983 
    984     def to_pickle(self, path):
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/io/sql.py in to_sql(frame, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype)
    547     pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index,
    548                       index_label=index_label, schema=schema,
--> 549                       chunksize=chunksize, dtype=dtype)
    550 
    551 
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/io/sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype)
   1185                          if_exists=if_exists, index_label=index_label,
   1186                          schema=schema, dtype=dtype)
-> 1187         table.create()
   1188         table.insert(chunksize)
   1189         # check for potentially case sensitivity issues (GH7815)
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/io/sql.py in create(self)
    648         if self.exists():
    649             if self.if_exists == 'fail':
--> 650                 raise ValueError("Table '%s' already exists." % self.name)
    651             elif self.if_exists == 'replace':
    652                 self.pd_sql.drop_table(self.name, self.schema)
ValueError: Table 'disruptions_test1' already exists.
In [22]:
    
df_feb = parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-02')
    
In [24]:
    
df_march=parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-03')
    
In [25]:
    
df_april=parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-04')
    
In [31]:
    
df_may=parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-05')
    
In [32]:
    
df_may_full= parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-05').to_string()
    
    
---------------------------------------------------------------------------
S3ResponseError                           Traceback (most recent call last)
<ipython-input-32-f771dfcf1dae> in <module>()
----> 1 df_may_full= parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-05').to_string()
/Users/pivotal/dis/dis_ds/parsing.py in parse_s3_files(file_prefix)
     74     b = c.get_bucket('pivotal-london-dis')
     75     key_list = b.list(prefix=file_prefix)
---> 76     return parse_file_list(key_list)
     77 
/Users/pivotal/dis/dis_ds/parsing.py in parse_file_list(file_list)
     65 
     66 def parse_file_list(file_list):
---> 67     result_list = [parse_file(file) for file in file_list]
     68     result_df = pd.concat(result_list)
     69     return result_df
/Users/pivotal/dis/dis_ds/parsing.py in <listcomp>(.0)
     65 
     66 def parse_file_list(file_list):
---> 67     result_list = [parse_file(file) for file in file_list]
     68     result_df = pd.concat(result_list)
     69     return result_df
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/boto/s3/bucketlistresultset.py in bucket_lister(bucket, prefix, delimiter, marker, headers, encoding_type)
     32         rs = bucket.get_all_keys(prefix=prefix, marker=marker,
     33                                  delimiter=delimiter, headers=headers,
---> 34                                  encoding_type=encoding_type)
     35         for k in rs:
     36             yield k
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/boto/s3/bucket.py in get_all_keys(self, headers, **params)
    470         return self._get_all([('Contents', self.key_class),
    471                               ('CommonPrefixes', Prefix)],
--> 472                              '', headers, **params)
    473 
    474     def get_all_versions(self, headers=None, **params):
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/boto/s3/bucket.py in _get_all(self, element_map, initial_query_string, headers, **params)
    408         else:
    409             raise self.connection.provider.storage_response_error(
--> 410                 response.status, response.reason, body)
    411 
    412     def validate_kwarg_names(self, kwargs, names):
S3ResponseError: S3ResponseError: 403 Forbidden
<?xml version="1.0" encoding="UTF-8"?>
<Error><Code>RequestTimeTooSkewed</Code><Message>The difference between the request time and the current time is too large.</Message><RequestTime>Sat, 17 Oct 2015 04:02:16 GMT</RequestTime><ServerTime>2015-10-17T06:04:18Z</ServerTime><MaxAllowedSkewMilliseconds>900000</MaxAllowedSkewMilliseconds><RequestId>D656B3EB85DB0AC7</RequestId><HostId>h7a9QifJel2RsnFl4/Whqi/Muc9LqeIfOHj0fotpfE0WnwSNobbCramJkBGl+DvudoFLLA48ZU0=</HostId></Error>
In [27]:
    
frames = (df_feb,df_march,df_april,df_may)
    
In [28]:
    
total_df = pd.concat(frames)
    
In [29]:
    
total_df
    
    Out[29]:
  
    
       
      bakerloo 
      central 
      circle 
      district 
      hammersmith-city 
      jubilee 
      metropolitan 
      northern 
      piccadilly 
      victoria 
      waterloo-city 
     
  
  
    
      2015-02-24 11:51:45 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 11:52:44 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 11:53:44 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 11:54:45 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 11:55:44 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 11:56:45 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:01:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:02:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:03:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:04:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:05:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:06:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:07:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:08:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:09:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:10:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:11:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:12:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:13:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:14:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:15:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:16:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:17:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:18:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:19:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:20:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:21:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:22:15 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:23:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      2015-02-24 12:24:14 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
      10 
     
    
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
     
    
      2015-05-31 15:47:55 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 16:04:35 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 16:21:15 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 16:37:55 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 16:54:36 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 17:11:14 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 17:27:54 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 17:44:35 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 18:01:18 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 18:17:55 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 18:34:35 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 18:51:15 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 19:07:55 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 19:24:35 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 19:41:16 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 19:57:55 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 20:14:45 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 20:31:15 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 20:47:55 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 21:04:35 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 21:21:26 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 21:37:55 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 21:54:35 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 22:11:15 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 22:27:54 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 22:44:35 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 23:01:15 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 23:17:56 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 23:34:36 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      2015-05-31 23:51:15 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
  
132702 rows × 11 columns
In [ ]:
    
parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-05')
    
In [ ]:
    
s3_files_df=parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-')
    
In [ ]:
    
s3_files_df
    
In [19]:
    
parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-09-24_07:16:27')
    
    Out[19]:
  
    
       
      bakerloo 
      central 
      circle 
      district 
      hammersmith-city 
      jubilee 
      metropolitan 
      northern 
      piccadilly 
      victoria 
      waterloo-city 
     
  
  
    
      2015-09-24 07:16:27 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
  
In [28]:
    
parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-09-12_07:16:23')
    
    
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-28-be5970fda21c> in <module>()
----> 1 parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-09-12_07:16:23')
/Users/pivotal/dis/dis_ds/parsing.py in parse_s3_files(file_prefix)
     74     b = c.get_bucket('pivotal-london-dis')
     75     key_list = b.list(prefix=file_prefix)
---> 76     return parse_file_list(key_list)
     77 
/Users/pivotal/dis/dis_ds/parsing.py in parse_file_list(file_list)
     66 def parse_file_list(file_list):
     67     result_list = [parse_file(file) for file in file_list]
---> 68     result_df = pd.concat(result_list)
     69     return result_df
     70 
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/tools/merge.py in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, copy)
    752                        keys=keys, levels=levels, names=names,
    753                        verify_integrity=verify_integrity,
--> 754                        copy=copy)
    755     return op.get_result()
    756 
/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/tools/merge.py in __init__(self, objs, axis, join, join_axes, keys, levels, names, ignore_index, verify_integrity, copy)
    797 
    798         if len(objs) == 0:
--> 799             raise ValueError('All objects passed were None')
    800 
    801         # consolidate data & figure out what our result ndim is going to be
ValueError: All objects passed were None
In [46]:
    
s3_files_df.describe()
    
    
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-46-0a7a1fef5d51> in <module>()
----> 1 pd.s3_files_df.describe()
AttributeError: 'module' object has no attribute 's3_files_df'
In [ ]:
    
s3_files_df
    
In [ ]:
    
    
Content source: ihuston/dis-datascience
Similar notebooks: