In [18]:
import pandas as pd
import re

In [15]:
df = pd.read_excel("earthquake_month.xlsx", "2").dropna()

In [29]:
import datetime as dt

In [31]:
r = re.match("(\d+)月(\d+)日(\d+)時(\d+)分", "2月02日02時03分", re.UNICODE).groups()

In [35]:
dt.datetime(2013, *(int(d) for d in r))


Out[35]:
'2013-02-02'

In [39]:
def parse_sheet(df, month):
    df.columns = ["no", "date", "lon", "lat", "depth", "scale"]
    df["date"] = df.date.apply(
        lambda t: dt.datetime(2013, 
            *(int(d) for d in re.match("(\d+)月(\d+)日(\d+)時(\d+)分", t, re.UNICODE).groups())
    ))
    df["month"] = month
    return df

In [41]:
df_month_list = [parse_sheet(pd.read_excel("earthquake_month.xlsx", str(m)).dropna(), str(m)) for m in range(1, 12)]

In [42]:
df_combined = pd.concat(df_month_list)

In [47]:
df_combined.to_csv("earthquake_month_parsed.csv")

In [ ]: