In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict
from datetime import datetime, timedelta
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
np.random.seed(1445)
ZERO_TIMESTAMP = datetime(2017, 8, 15, 21)
class MySQLConfig:
HOST = '10.0.149.62'
USER = "root"
PASS = "root123"
DB = "hangzhouhubqa_v1_2"
CHARSET = 'utf8'
engine = create_engine(
f'mysql+pymysql://{USER}:{PASS}@{HOST}/{DB}?charset={CHARSET}',
isolation_level="READ UNCOMMITTED")
def load_from_mysql(table_name: str=None, user_name: str=None):
"""读取远程mysql数据表"""
if user_name is None:
table = pd.read_sql_table(con=MySQLConfig.engine, table_name=f"{table_name}")
return table
else:
query_str = f"select * from {table_name} where user_name = '{user_name}'"
table = pd.read_sql_query(sql=query_str, con=MySQLConfig.engine)
return table
def continuous_node_time(x):
""""""
if x.shape[0] > 1:
x['end_time'] = list(x['start_time'])[1:] + [np.inf]
else:
x['end_time'] = np.inf
return x
def plt_bar_with_labels(pd, y_label):
ax = pd.plot(kind='bar')
x_labels = pd.index.tolist()
ax.set_ylabel(y_label)
rects = ax.patches
labels = pd.tolist()
for rect, label in zip(rects, labels):
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width()/2, height + 5, label, ha='center', va='bottom')
plt.show()
def set_plot(if_subplot: bool=False, ax: pd.DataFrame.plot=None, x_label: str=None, y_label: str=None):
"""
discrip:
one axes fig type plot
Args:
ax: pd.DataFrame type data to get axes infomations such as x axis, bar height, bar width ...;
y_label: str, show on each axes y label;
Return:
axes type to be ploted for plt
"""
# one axes plot
if not if_subplot:
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
for p in ax.patches:
ax.text(p.get_x() + p.get_width()/2, p.get_height() + 5, int(p.get_height()), ha='center', va='bottom')
return ax
# subplot axex plot
else:
for n_ax in ax:
for p in n_ax:
p.set_xlabel(x_label)
p.set_ylabel(y_label)
for x in p.patches:
p.text(x.get_x() + x.get_width()/2, x.get_height() + 5, int(x.get_height()), ha='center', va='bottom')
return ax
In [2]:
# =========================================设备时间计划表 i_equipment_io==================================
equipment_t_t = load_from_mysql('i_equipment_io', 'kissf')
# =========================================设备时间计划表 i_equipment_io==================================
equipment_r_t = load_from_mysql('i_resource_limit', 'kissf')
In [3]:
# ======================================获取状态有变化的时间节点
change_e_t = equipment_t_t.loc[
equipment_t_t.sort_values('start_time').groupby('equipment_port').equipment_status.diff() != 0,:].reset_index(drop=True)
# e_t = equipment_t_t.sort_values('start_time').groupby('equipment_port')
# change_e_t = e_t.apply(lambda x: x[x.equipment_status.diff() !=0]).reset_index(drop=True)
change_e_t['start_time'] = (
pd.to_datetime(change_e_t['start_time']) - ZERO_TIMESTAMP
).apply(lambda x: x.total_seconds() if x.total_seconds()>0 else 0)
change_e_t['end_time'] = (
pd.to_datetime(change_e_t['end_time']) - ZERO_TIMESTAMP
).apply(lambda x: x.total_seconds() if x.total_seconds()>0 else 0)
# =====================================将状态变化时间节点连续化
change_e_t = change_e_t[change_e_t.equipment_port.str.startswith('h')]
change_e_t = change_e_t.groupby('equipment_port').apply(
continuous_node_time
).loc[:,('equipment_port', 'start_time', 'end_time', 'process_time', 'equipment_status', 'user_name')]
# ====================================获取开机时间段数据
change_open_t = change_e_t[change_e_t.equipment_status == 1]
change_open_t
Out[3]:
In [27]:
# =========================================资源数为0的资源等效为
equipment_r_t.resource_limit = equipment_r_t.resource_limit.mask(
equipment_r_t.resource_limit == 0, equipment_r_t.resource_number
)
# resource_occupy:系统禁用资源数目
equipment_r_t['resource_occupy'] = equipment_r_t['resource_number'] - equipment_r_t['resource_limit']
change_r_t = equipment_r_t.loc[
equipment_r_t.sort_values('start_time').groupby('resource_id').resource_occupy.diff() !=0,:].reset_index(drop=True)
# ======================================获取状态有变化的时间节点
# change_r_t = r_t.apply(lambda x: x[x.resource_occupy.diff() !=0]).reset_index(drop=True)
# change_r_t = equipment_r_t[(equipment_r_t.sort_values('start_time').groupby('resource_id')['resource_occupy'].diff() !=0)]
# =====================================将所有datetime格式的时间转换为以机器开始启动为0点的时间戳(秒)
change_r_t['start_time'] = (
pd.to_datetime(change_r_t['start_time']) - ZERO_TIMESTAMP
).apply(lambda x: x.total_seconds() if x.total_seconds()>0 else 0)
change_r_t['end_time'] = (
pd.to_datetime(change_r_t['end_time']) - ZERO_TIMESTAMP
).apply(lambda x: x.total_seconds() if x.total_seconds()>0 else 0)
# =====================================将状态变化时间节点连续化,使上一结束时间等于下一开始时间
change_r_t = change_r_t[change_r_t.resource_id.str.startswith('man_a')]
change_r_t = change_r_t.groupby('resource_id').apply(
continuous_node_time
).loc[:,('resource_id', 'resource_occupy', 'resource_limit', 'resource_number', 'start_time' , 'end_time','user_name')]
change_r_t['equipment_id'] = change_r_t.resource_id.str.replace(r'man_', '')
change_r_t
Out[27]:
In [50]:
# =======================================设备参数统计====================================
equipment_p_t = load_from_mysql('i_equipment_parameter')
equipment_p_t[equipment_p_t.parameter_id.isin(['uld_turnaround_time', 'truck_turnaround_time'])]
Out[50]:
In [7]:
# ====================================land data
table_parcel_land = load_from_mysql('i_od_parcel_landside', 'kissf')
table_parcel_land = table_parcel_land.loc[:,( 'parcel_type','parcel_id', 'arrive_time', 'src_type', 'dest_type')]
# t_p_l['arrive_time'] = (pd.to_datetime(t_p_l.arrive_time) - ZERO_TIMESTAMP
# ).apply(lambda x: x.total_seconds() if x.total_seconds()> 0 else 0)
# ====================================air data
table_parcel_air = load_from_mysql('i_od_parcel_airside', 'kissf')
table_parcel_air = table_parcel_air.loc[:,( 'parcel_type','parcel_id', 'arrive_time', 'src_type', 'dest_type')]
In [32]:
p_l = (table_parcel_land.groupby(['parcel_type']).parcel_id.apply(set).apply(len)
).sort_values(ascending=False).to_frame()
p_l = p_l.rename(columns={'parcel_id': 'LandSide'})
p_a = (table_parcel_air.groupby(['parcel_type']).parcel_id.apply(set).apply(len)
).sort_values(ascending=False).to_frame()
p_a = p_a.rename(columns={'parcel_id': 'AirSide'})
# ===================================concat data
p_concat = pd.concat([p_l, p_a], axis=1)
# ===================================all sum
p_concat['All'] = p_concat['LandSide'] + p_concat['AirSide']
# ===================================concat data
p_concat = p_concat.loc[:, ('All', 'LandSide', 'AirSide')]
# ==================================plt setting
# ax1=p_concat.aggregate(['sum']).plot.box()
ax2 = p_concat.plot.bar(figsize=(10,5))
# subplots_plot(ax=ax1, y_label='Num')
# ax1 = set_plot(ax=ax1, x_label='parcel type', y_label='Num')
ax2 = set_plot(ax=ax2, x_label='Parcel Type', y_label='Num')
# =================================plot show
plt.show()
p_concat.aggregate(['sum'])
Out[32]:
In [17]:
# ======================================landside parcel
t_p_l_hh = table_parcel_land
t_p_l_hh['half_hour'] = (pd.to_datetime(t_p_l_hh.arrive_time)).apply(
lambda x : (x.strftime('%d %H')+':00') if 0<=x.minute<=30 else (x.strftime('%d %H')+':30'))
# to frame sort
t_p_l_hh_grp = t_p_l_hh.groupby(['half_hour']).parcel_id.size().to_frame()
# rename column
t_p_l_hh_grp = t_p_l_hh_grp.rename(columns={'parcel_id': 'LandSide'})
# ======================================landside parcel
t_p_a_hh = table_parcel_air
t_p_a_hh['half_hour'] = (pd.to_datetime(t_p_a_hh.arrive_time)).apply(
lambda x : (x.strftime('%d %H')+':00') if 0<=x.minute<=30 else (x.strftime('%d %H')+':30'))
# to frame sort
t_p_a_hh_grp = t_p_a_hh.groupby(['half_hour']).parcel_id.size().to_frame()
# rename column
t_p_a_hh_grp = t_p_a_hh_grp.rename(columns={'parcel_id': 'AandSide'})
# =====================================concat landside and airside data
t_p_hh_concat = pd.concat([t_p_l_hh_grp, t_p_a_hh_grp], axis=1);t_p_a_hh_concat
# ====================================================plot concat data
ax_p_hh_tls = t_p_hh_concat.plot.bar(figsize=(20,6), title='Parcel VS Half Hours Fig')
ax_p_hh_tls = set_plot(ax=ax_p_hh_tls, x_label='time/s', y_label='Num') # plot
plt.show()
In [34]:
# =======================================landside small
table_land_small = load_from_mysql('i_od_small_landside', 'kissf')
table_land_parcel = load_from_mysql('i_od_parcel_landside', 'kissf')
t_l_p = table_land_parcel.loc[:,('arrive_time', 'src_type', 'dest_type','parcel_type', 'parcel_id')]
t_l_s = table_land_small.loc[:, ('arrive_time', 'src_type', 'dest_type','parcel_type', 'parcel_id', 'small_id')]
t_l_s = t_l_s[t_l_s.parcel_id.isin(t_l_p.parcel_id)]
t_l_s['arrive_time'] = (pd.to_datetime(t_l_s.arrive_time)).apply(lambda x : x.strftime('%d %H'))
# to frame sort
t_l_s_plot = t_l_s.groupby(['arrive_time']).parcel_id.size().to_frame()
# rename column
t_l_s_plot = t_l_s_plot.rename(columns={'parcel_id': 'LandSide'})
# =======================================airside small
table_air_small = load_from_mysql('i_od_small_airside', 'kissf')
table_air_parcel = load_from_mysql('i_od_parcel_airside', 'kissf')
t_a_p = table_air_parcel.loc[:,('arrive_time', 'src_type', 'dest_type','parcel_type', 'parcel_id')]
t_a_s = table_air_small.loc[:, ('arrive_time', 'src_type', 'dest_type','parcel_type', 'parcel_id', 'small_id')]
t_a_s = t_a_s[t_a_s.parcel_id.isin(t_a_p.parcel_id)]
t_a_s['arrive_time'] = (pd.to_datetime(t_a_s.arrive_time)).apply(lambda x : x.strftime('%d %H'))
# to frame sort
t_a_s_plot = t_a_s.groupby(['arrive_time']).parcel_id.size().to_frame()
# rename column
t_a_s_plot = t_a_s_plot.rename(columns={'parcel_id': 'AirSide'})
In [37]:
# ======================================================Small Parcel (L, A)
t_s_concat = pd.concat([t_l_s_plot, t_a_s_plot], axis=1)
ax_small = t_s_concat.plot.bar(figsize=(20,6), title=u'Small Parcel vs Time dis')
ax_small = set_plot(ax=ax_small, y_label='Num') # plot all side
# =====================================================LandSide Small src-des sort static
t_l_s_type = t_l_s.groupby(['arrive_time','dest_type']).parcel_id.size().to_frame()
t_l_s_type.reset_index(level=1, inplace=True)
# ====================================================L A sort
tls_L_L = t_l_s_type[t_l_s_type.dest_type=='L']
tls_L_L = tls_L_L.loc[:,('parcel_id')].to_frame().rename(columns={'parcel_id': 'L-L'})
tls_L_A = t_l_s_type[t_l_s_type.dest_type=='A']
tls_L_A = tls_L_A.loc[:,('parcel_id')].to_frame().rename(columns={'parcel_id': 'L-A'})
tls_L_concat = pd.concat([tls_L_L, tls_L_A], axis=1)
ax_L_tls = tls_L_concat.plot.bar(figsize=(20,6), title='LandSide Small - Dest Type')
ax_L_tls = set_plot(ax=ax_L_tls, y_label='Num') # plot
# =====================================================AirSide Small src-des sort static
t_a_s_type = t_a_s.groupby(['arrive_time','dest_type']).parcel_id.size().to_frame()
t_a_s_type.reset_index(level=1, inplace=True)
# =====================================================L A sort
tas_A_L = t_a_s_type[t_a_s_type.dest_type=='L']
tas_A_L = tas_A_L.loc[:,('parcel_id')].to_frame().rename(columns={'parcel_id': 'A-L'})
tas_A_A = t_a_s_type[t_a_s_type.dest_type=='A']
tas_A_A = tas_A_A.loc[:,('parcel_id')].to_frame().rename(columns={'parcel_id': 'A-A'})
tas_concat = pd.concat([tas_A_L, tas_A_A], axis=1)
# ====================================================plot concat data
ax_tas = tas_concat.plot.bar(figsize=(20,6), title='AirSide Small - Dest Type')
ax_tas = set_plot(ax=ax_tas, y_label='Num') # plot
plt.show()
In [15]:
table_small_air = load_from_mysql('i_od_small_airside', 'kissf')
t_a_s = table_small_air.loc[:, ('plate_num', 'arrive_time', 'src_type', 'dest_type','parcel_type', 'parcel_id', 'small_id')]
t_a_s = t_a_s[t_a_s.parcel_id.isin(t_p_a.parcel_id)];t_a_s
Out[15]: