発電量データを30分単位に変更する


In [1]:
# データ加工・処理・分析モジュール
import numpy as np
import pandas as pd

In [2]:
# train_kwhをエクセル等で開くとdatetimeが指数表示に直される可能性がある
# その場合うまくいかないので201201010120の形になってることを確認する必要あり
output_data = pd.read_csv('train_kwh.tsv', delimiter = '\t')

In [3]:
def set_time(dataframe, col_name):
    '''
    to_datetimeを使うための前処理
    '''
    dataframe[col_name] = dataframe[col_name].map(lambda x : transform_time(x))
    return dataframe

In [4]:
def transform_time(x):
    '''
    set_time内で使う関数
    to_datetimeで24時をサポートしないので00に変更する処理
    '''
    str_x = str(x)
    res = ''
    if str(x)[8:10] == '24':
        res = str_x[0:4] + '-' + str_x[4:6] + '-' + str_x[6:8] + ' 00:'+str_x[10:12] 
    else:
        res = str_x[0:4] + '-' + str_x[4:6] + '-' + str_x[6:8] + ' '+ str_x[8:10] +':'+str_x[10:12]
    return res

In [5]:
# datetimeの行をpd.Timestampのインスタンスに変更
output_data = set_time(output_data, 'datetime')
output_data['datetime'] = output_data['datetime'].map(lambda x : pd.to_datetime(x))

# 30分ごとに集計
output_data.set_index('datetime').groupby(pd.TimeGrouper(freq='1800s', closed='left')).sum()


Out[5]:
SOLA01 SOLA02 SOLA03
datetime
2012-01-01 00:00:00 0 0.0 NaN
2012-01-01 00:30:00 0 0.0 NaN
2012-01-01 01:00:00 0 0.0 NaN
2012-01-01 01:30:00 0 0.0 NaN
2012-01-01 02:00:00 0 0.0 NaN
2012-01-01 02:30:00 0 0.0 NaN
2012-01-01 03:00:00 0 0.0 NaN
2012-01-01 03:30:00 0 0.0 NaN
2012-01-01 04:00:00 0 0.0 NaN
2012-01-01 04:30:00 0 0.0 NaN
2012-01-01 05:00:00 0 0.0 NaN
2012-01-01 05:30:00 0 0.0 NaN
2012-01-01 06:00:00 0 0.0 NaN
2012-01-01 06:30:00 0 0.0 NaN
2012-01-01 07:00:00 15 0.0 NaN
2012-01-01 07:30:00 180 340.0 NaN
2012-01-01 08:00:00 527 840.0 NaN
2012-01-01 08:30:00 670 1110.0 NaN
2012-01-01 09:00:00 904 1680.0 NaN
2012-01-01 09:30:00 911 1730.0 NaN
2012-01-01 10:00:00 1705 2920.0 NaN
2012-01-01 10:30:00 1754 3090.0 NaN
2012-01-01 11:00:00 1696 2900.0 NaN
2012-01-01 11:30:00 1908 3300.0 NaN
2012-01-01 12:00:00 1756 3160.0 NaN
2012-01-01 12:30:00 1665 2770.0 NaN
2012-01-01 13:00:00 1805 2980.0 NaN
2012-01-01 13:30:00 1381 2080.0 NaN
2012-01-01 14:00:00 1143 1910.0 NaN
2012-01-01 14:30:00 944 1680.0 NaN
... ... ... ...
2015-12-31 09:00:00 464 670.0 470.0
2015-12-31 09:30:00 644 720.0 2160.0
2015-12-31 10:00:00 1589 1500.0 2450.0
2015-12-31 10:30:00 1612 2020.0 2630.0
2015-12-31 11:00:00 1776 2650.0 2780.0
2015-12-31 11:30:00 1510 1310.0 2870.0
2015-12-31 12:00:00 1923 2620.0 2870.0
2015-12-31 12:30:00 2244 2990.0 2780.0
2015-12-31 13:00:00 2041 2620.0 2610.0
2015-12-31 13:30:00 1737 1300.0 2380.0
2015-12-31 14:00:00 1574 1500.0 2060.0
2015-12-31 14:30:00 746 1040.0 1640.0
2015-12-31 15:00:00 353 720.0 1140.0
2015-12-31 15:30:00 134 440.0 630.0
2015-12-31 16:00:00 57 130.0 180.0
2015-12-31 16:30:00 0 0.0 0.0
2015-12-31 17:00:00 0 0.0 0.0
2015-12-31 17:30:00 0 0.0 0.0
2015-12-31 18:00:00 0 0.0 0.0
2015-12-31 18:30:00 0 0.0 0.0
2015-12-31 19:00:00 0 0.0 0.0
2015-12-31 19:30:00 0 0.0 0.0
2015-12-31 20:00:00 0 0.0 0.0
2015-12-31 20:30:00 0 0.0 0.0
2015-12-31 21:00:00 0 0.0 0.0
2015-12-31 21:30:00 0 0.0 0.0
2015-12-31 22:00:00 0 0.0 0.0
2015-12-31 22:30:00 0 0.0 0.0
2015-12-31 23:00:00 0 0.0 0.0
2015-12-31 23:30:00 0 0.0 0.0

70128 rows × 3 columns


In [ ]: