In [18]:
import pandas as pd
import re
In [15]:
df = pd.read_excel("earthquake_month.xlsx", "2").dropna()
In [29]:
import datetime as dt
In [31]:
r = re.match("(\d+)月(\d+)日(\d+)時(\d+)分", "2月02日02時03分", re.UNICODE).groups()
In [35]:
dt.datetime(2013, *(int(d) for d in r))
Out[35]:
In [39]:
def parse_sheet(df, month):
df.columns = ["no", "date", "lon", "lat", "depth", "scale"]
df["date"] = df.date.apply(
lambda t: dt.datetime(2013,
*(int(d) for d in re.match("(\d+)月(\d+)日(\d+)時(\d+)分", t, re.UNICODE).groups())
))
df["month"] = month
return df
In [41]:
df_month_list = [parse_sheet(pd.read_excel("earthquake_month.xlsx", str(m)).dropna(), str(m)) for m in range(1, 12)]
In [42]:
df_combined = pd.concat(df_month_list)
In [47]:
df_combined.to_csv("earthquake_month_parsed.csv")
In [ ]: