In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict
from datetime import datetime, timedelta
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
np.random.seed(1445)
ZERO_TIMESTAMP = datetime(2017, 8, 15, 21)
class MySQLConfig:
HOST = '10.0.149.62'
USER = "root"
PASS = "root123"
DB = "hangzhouhubqa_v1_1"
CHARSET = 'utf8'
engine = create_engine(
f'mysql+pymysql://{USER}:{PASS}@{HOST}/{DB}?charset={CHARSET}',
isolation_level="READ UNCOMMITTED")
def load_from_mysql(table_name: str=None, user_name: str=None):
"""读取远程mysql数据表"""
if user_name is None:
table = pd.read_sql_table(con=MySQLConfig.engine, table_name=f"{table_name}")
return table
else:
query_str = f"select * from {table_name} where user_name = '{user_name}'"
table = pd.read_sql_query(sql=query_str, con=MySQLConfig.engine)
return table
def continuous_node_time(x):
""""""
if x.shape[0] > 1:
x['end_time'] = list(x['start_time'])[1:] + [np.inf]
else:
x['end_time'] = np.inf
return x
def plt_bar_with_labels(pd, y_label):
ax = pd.plot(kind='bar')
x_labels = pd.index.tolist()
ax.set_ylabel(y_label)
rects = ax.patches
labels = pd.tolist()
for rect, label in zip(rects, labels):
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width()/2, height + 5, label, ha='center', va='bottom')
plt.show()
def set_plot(if_subplot: bool=False, ax: pd.DataFrame.plot=None, y_label: str=None):
"""
discrip:
one axes fig type plot
Args:
ax: pd.DataFrame type data to get axes infomations such as x axis, bar height, bar width ...;
y_label: str, show on each axes y label;
Return:
axes type to be ploted for plt
"""
# one axes plot
if not if_subplot:
ax.set_ylabel(y_label)
for p in ax.patches:
ax.text(p.get_x() + p.get_width()/2, p.get_height() + 5, int(p.get_height()), ha='center', va='bottom')
return ax
# subplot axex plot
else:
for n_ax in ax:
for p in n_ax:
p.set_ylabel(y_label)
for x in p.patches:
p.text(x.get_x() + x.get_width()/2, x.get_height() + 5, int(x.get_height()), ha='center', va='bottom')
return ax
In [105]:
# =========================================设备时间计划表 i_equipment_io==================================
equipment_t_t = load_from_mysql('i_equipment_io', 'kissf')
equipment_t_t[equipment_t_t.equipment_port.str.startswith('r1')].sort_values(['equipment_port', 'start_time'])
e_t = equipment_t_t.sort_values('start_time').groupby('equipment_port')
# ======================================获取状态有变化的时间节点
change_e_t = e_t.apply(lambda x: x[x.equipment_status.diff() !=0]).reset_index(drop=True)
change_e_t
change_e_t['start_time'] = (
pd.to_datetime(change_e_t['start_time']) - ZERO_TIMESTAMP
).apply(lambda x: x.total_seconds() if x.total_seconds()>0 else 0)
change_e_t['end_time'] = (
pd.to_datetime(change_e_t['end_time']) - ZERO_TIMESTAMP
).apply(lambda x: x.total_seconds() if x.total_seconds()>0 else 0)
# =====================================将状态变化时间节点连续化
change_e_t = change_e_t[change_e_t.equipment_port.str.startswith('a')]
change_e_t = change_e_t.groupby('equipment_port').apply(
continuous_node_time
).loc[:,('equipment_port', 'start_time', 'end_time', 'process_time', 'equipment_status', 'user_name')]
# ====================================获取开机时间段数据
change_open_t = change_e_t[change_e_t.equipment_status == 1]
change_open_t
# e_open_t = defaultdict(list)
# for k , v in change_open_user_t.set_index('equipment_port').iterrows():
# e_open_t[k].append((v['start_time'], v['end_time']))
Out[105]:
In [11]:
# =========================================设备时间计划表 i_equipment_io==================================
equipment_r_t = load_from_mysql('i_resource_limit', 'kissf')
# =========================================资源数为0的资源等效为
equipment_r_t.resource_limit = equipment_r_t.resource_limit.mask(
equipment_r_t.resource_limit == 0, equipment_r_t.resource_number
)
# resource_occupy:系统禁用资源数目
equipment_r_t['resource_occupy'] = equipment_r_t['resource_number'] - equipment_r_t['resource_limit']
# equipment_r_t[equipment_t.equipment_port.str.startswith('r1')].sort_values(['equipment_port', 'start_time'])
r_t = equipment_r_t.sort_values('start_time').groupby('resource_id')
# ======================================获取状态有变化的时间节点
change_r_t = r_t.apply(lambda x: x[x.resource_occupy.diff() !=0]).reset_index(drop=True)
# =====================================将所有datetime格式的时间转换为以机器开始启动为0点的时间戳(秒)
change_r_t['start_time'] = (
pd.to_datetime(change_r_t['start_time']) - ZERO_TIMESTAMP
).apply(lambda x: x.total_seconds() if x.total_seconds()>0 else 0)
change_r_t['end_time'] = (
pd.to_datetime(change_r_t['end_time']) - ZERO_TIMESTAMP
).apply(lambda x: x.total_seconds() if x.total_seconds()>0 else 0)
# =====================================将状态变化时间节点连续化,使上一结束时间等于下一开始时间
change_r_t = change_r_t[change_r_t.resource_id.str.startswith('man_r1')]
change_r_t = change_r_t.groupby('resource_id').apply(
continuous_node_time
).loc[:,('resource_id', 'resource_occupy', 'resource_limit', 'resource_number', 'start_time' , 'end_time','inserted_on')]
change_r_t
Out[11]:
In [50]:
# =======================================设备参数统计====================================
equipment_p_t = load_from_mysql('i_equipment_parameter')
equipment_p_t[equipment_p_t.parameter_id.isin(['uld_turnaround_time', 'truck_turnaround_time'])]
Out[50]:
In [2]:
# ====================================land data
table_parcel_land = load_from_mysql('i_od_parcel_landside')
table_parcel_land = table_parcel_land.loc[:,( 'parcel_type','parcel_id')]
# t_p_l['arrive_time'] = (pd.to_datetime(t_p_l.arrive_time) - ZERO_TIMESTAMP
# ).apply(lambda x: x.total_seconds() if x.total_seconds()> 0 else 0)
p_l = (table_parcel_land.groupby(['parcel_type']).parcel_id.apply(set).apply(len)
).sort_values(ascending=False).to_frame()
p_l = p_l.rename(columns={'parcel_id': 'LandSide'})
# ====================================air data
table_parcel_air = load_from_mysql('i_od_parcel_airside')
table_parcel_air = table_parcel_air.loc[:,( 'parcel_type','parcel_id')]
p_a = (table_parcel_air.groupby(['parcel_type']).parcel_id.apply(set).apply(len)
).sort_values(ascending=False).to_frame()
p_a = p_a.rename(columns={'parcel_id': 'AirSide'})
# ===================================concat data
p_concat = pd.concat([p_l, p_a], axis=1, join_axes=[p_l.index])
# ===================================all sum
p_concat['All'] = p_concat['LandSide'] + p_concat['AirSide']
p_concat = p_concat.loc[:, ('All', 'LandSide', 'AirSide')]
# ==================================plt setting
# ax1=p_concat.plot.bar(figsize=(20,5), subplots=True, layout=(1,3))
ax2 = p_concat.plot.bar(figsize=(10,5))
# subplots_plot(ax=ax1, y_label='Num')
ax2 = set_plot(ax=ax2, y_label='Num')
# =================================plot show
plt.show()
In [9]:
# =======================================sql from od table
# land sql
table_land_small = load_from_mysql('i_od_small_landside')
table_land_parcel = load_from_mysql('i_od_parcel_landside')
# air sql
table_air_small = load_from_mysql('i_od_small_airside')
table_air_parcel = load_from_mysql('i_od_parcel_airside')
# =======================================landside small data process
t_l_p = table_land_parcel.loc[:,('arrive_time', 'src_type', 'dest_type','parcel_type', 'parcel_id')]
t_l_s = table_land_small.loc[:, ('arrive_time', 'src_type', 'dest_type','parcel_type', 'parcel_id', 'small_id')]
t_l_s = t_l_s[t_l_s.parcel_id.isin(t_l_p.parcel_id)]
t_l_s['arrive_time'] = (pd.to_datetime(t_l_s.arrive_time)).apply(lambda x : x.strftime('%d %H'))
t_l_s_plot = t_l_s.groupby(['arrive_time']).parcel_id.size().to_frame() # to frame sort
t_l_s_plot = t_l_s_plot.rename(columns={'parcel_id': 'LandSide'}) # rename column
# =======================================airside small data process
t_a_p = table_air_parcel.loc[:,('arrive_time', 'src_type', 'dest_type','parcel_type', 'parcel_id')]
t_a_s = table_air_small.loc[:, ('arrive_time', 'src_type', 'dest_type','parcel_type', 'parcel_id', 'small_id')]
t_a_s = t_a_s[t_a_s.parcel_id.isin(t_a_p.parcel_id)]
t_a_s['arrive_time'] = (pd.to_datetime(t_a_s.arrive_time)).apply(lambda x : x.strftime('%d %H'))
t_a_s_plot = t_a_s.groupby(['arrive_time']).parcel_id.size().to_frame()
t_a_s_plot = t_a_s_plot.rename(columns={'parcel_id': 'AirSide'})
In [12]:
# ======================================================Small parcel (L, A)
t_s_concat = pd.concat([t_l_s_plot, t_a_s_plot], axis=1)
ax_small = t_s_concat.plot.bar(figsize=(20,6))
ax_small = set_plot(ax=ax_small, y_label='Num') # plot all side
# =====================================================LandSide Small src-des sort static
t_l_s_type = t_l_s.groupby(['arrive_time','dest_type']).parcel_id.size().to_frame()
t_l_s_type.reset_index(level=1, inplace=True)
#
tls_L_L = t_l_s_type[t_l_s_type.dest_type=='L']
tls_L_L = tls_L_L.loc[:,('parcel_id')].to_frame().rename(columns={'parcel_id': 'L-L'})
tls_L_A = t_l_s_type[t_l_s_type.dest_type=='A']
tls_L_A = tls_L_A.loc[:,('parcel_id')].to_frame().rename(columns={'parcel_id': 'L-A'})
tls_L_concat = pd.concat([tls_L_L, tls_L_A], axis=1)
ax_L_tls = tls_L_concat.plot.bar(figsize=(20,6), title='LandSide Small - Dest Type')
ax_L_tls = set_plot(ax=ax_L_tls, y_label='Num') # plot
# =====================================================AirSide Small src-des sort static
t_a_s_type = t_a_s.groupby(['arrive_time','dest_type']).parcel_id.size().to_frame()
t_a_s_type.reset_index(level=1, inplace=True)
#
tls_A_L = t_a_s_type[t_a_s_type.dest_type=='L']
tls_A_L = tls_A_L.loc[:,('parcel_id')].to_frame().rename(columns={'parcel_id': 'A-L'})
tls_A_A = t_a_s_type[t_a_s_type.dest_type=='A']
tls_A_A = tls_A_A.loc[:,('parcel_id')].to_frame().rename(columns={'parcel_id': 'A-A'})
tls_A_concat = pd.concat([tls_A_L, tls_A_A], axis=1, join_axes=[tls_A_L.index])
ax_A_tls = tls_A_concat.plot.bar(figsize=(20,6), title='AirSide Small - Dest Type')
ax_A_tls = set_plot(ax=ax_A_tls, y_label='Num') # plot
plt.show()
In [15]:
table_small_air = load_from_mysql('i_od_small_airside', 'kissf')
t_a_s = table_small_air.loc[:, ('plate_num', 'arrive_time', 'src_type', 'dest_type','parcel_type', 'parcel_id', 'small_id')]
t_a_s = t_a_s[t_a_s.parcel_id.isin(t_p_a.parcel_id)];t_a_s
Out[15]: