In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict
from datetime import datetime, timedelta
from sqlalchemy import create_engine
import matplotlib.pyplot as plt

np.random.seed(1445)
ZERO_TIMESTAMP = datetime(2017, 8, 15, 21)

class MySQLConfig:
    HOST = '10.0.149.62'
    USER = "root"
    PASS = "root123"
    DB = "hangzhouhubqa_v1_2"
    CHARSET = 'utf8'

    engine = create_engine(
        f'mysql+pymysql://{USER}:{PASS}@{HOST}/{DB}?charset={CHARSET}',
        isolation_level="READ UNCOMMITTED")


def load_from_mysql(table_name: str=None, user_name: str=None):
    """读取远程mysql数据表"""
    if user_name is None:
        table = pd.read_sql_table(con=MySQLConfig.engine, table_name=f"{table_name}")
        return table
    else:
        query_str = f"select * from {table_name} where user_name = '{user_name}'"
        table = pd.read_sql_query(sql=query_str, con=MySQLConfig.engine)
        return table


def continuous_node_time(x):
    """"""
    if x.shape[0] > 1:
        x['end_time'] = list(x['start_time'])[1:] + [np.inf]
    else:
        x['end_time'] = np.inf
    return x

def plt_bar_with_labels(pd, y_label):
    ax = pd.plot(kind='bar')
    x_labels = pd.index.tolist()
    ax.set_ylabel(y_label)
    
    rects = ax.patches
    labels = pd.tolist()
    
    for rect, label in zip(rects, labels):
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2, height + 5, label, ha='center', va='bottom')

    plt.show()
    
def set_plot(if_subplot: bool=False, ax: pd.DataFrame.plot=None, x_label: str=None, y_label: str=None):
    """
    discrip:
        one axes fig type plot
    Args:
        ax: pd.DataFrame type data to get axes infomations such as x axis, bar height,  bar width ...;
        y_label: str, show on each axes y label;
    Return:
        axes type to be ploted for plt
    """
    # one axes plot
    if not if_subplot:
        ax.set_xlabel(x_label)
        ax.set_ylabel(y_label)
        for p in ax.patches:
            ax.text(p.get_x() + p.get_width()/2, p.get_height() + 5, int(p.get_height()), ha='center', va='bottom')
        return ax
    # subplot axex plot
    else:
        for n_ax in ax:
            for p in n_ax:
                p.set_xlabel(x_label)
                p.set_ylabel(y_label)
                for x in p.patches:
                    p.text(x.get_x() + x.get_width()/2, x.get_height() + 5, int(x.get_height()), ha='center', va='bottom')
        return ax

In [2]:
# =========================================设备时间计划表 i_equipment_io==================================
equipment_t_t = load_from_mysql('i_equipment_io', 'kissf')
# =========================================设备时间计划表 i_equipment_io==================================
equipment_r_t = load_from_mysql('i_resource_limit', 'kissf')

In [3]:
# ======================================获取状态有变化的时间节点
change_e_t = equipment_t_t.loc[
    equipment_t_t.sort_values('start_time').groupby('equipment_port').equipment_status.diff() != 0,:].reset_index(drop=True)

# e_t = equipment_t_t.sort_values('start_time').groupby('equipment_port')
# change_e_t = e_t.apply(lambda x: x[x.equipment_status.diff() !=0]).reset_index(drop=True)
change_e_t['start_time'] = (
    pd.to_datetime(change_e_t['start_time']) - ZERO_TIMESTAMP
).apply(lambda x: x.total_seconds() if x.total_seconds()>0 else 0)
change_e_t['end_time'] = (
    pd.to_datetime(change_e_t['end_time']) - ZERO_TIMESTAMP
).apply(lambda x: x.total_seconds() if x.total_seconds()>0 else 0)

# =====================================将状态变化时间节点连续化
change_e_t = change_e_t[change_e_t.equipment_port.str.startswith('h')]

change_e_t = change_e_t.groupby('equipment_port').apply(
    continuous_node_time
).loc[:,('equipment_port', 'start_time', 'end_time', 'process_time', 'equipment_status', 'user_name')]
# ====================================获取开机时间段数据
change_open_t = change_e_t[change_e_t.equipment_status == 1]
change_open_t


Out[3]:
equipment_port start_time end_time process_time equipment_status user_name
1214 h1_1 0.0 inf 15.0 1 kissf
1215 h2_1 0.0 inf 15.0 1 kissf
1216 h3_1 0.0 inf 15.0 1 kissf

In [27]:
# =========================================资源数为0的资源等效为
equipment_r_t.resource_limit = equipment_r_t.resource_limit.mask(
        equipment_r_t.resource_limit == 0, equipment_r_t.resource_number
)
# resource_occupy:系统禁用资源数目
equipment_r_t['resource_occupy'] = equipment_r_t['resource_number'] - equipment_r_t['resource_limit']
change_r_t = equipment_r_t.loc[
    equipment_r_t.sort_values('start_time').groupby('resource_id').resource_occupy.diff() !=0,:].reset_index(drop=True)
# ======================================获取状态有变化的时间节点
# change_r_t = r_t.apply(lambda x: x[x.resource_occupy.diff() !=0]).reset_index(drop=True)
# change_r_t = equipment_r_t[(equipment_r_t.sort_values('start_time').groupby('resource_id')['resource_occupy'].diff() !=0)]
# =====================================将所有datetime格式的时间转换为以机器开始启动为0点的时间戳(秒)
change_r_t['start_time'] = (
    pd.to_datetime(change_r_t['start_time']) - ZERO_TIMESTAMP
).apply(lambda x: x.total_seconds() if x.total_seconds()>0 else 0)
change_r_t['end_time'] = (
    pd.to_datetime(change_r_t['end_time']) - ZERO_TIMESTAMP
).apply(lambda x: x.total_seconds() if x.total_seconds()>0 else 0)
# =====================================将状态变化时间节点连续化,使上一结束时间等于下一开始时间
change_r_t = change_r_t[change_r_t.resource_id.str.startswith('man_a')]
change_r_t = change_r_t.groupby('resource_id').apply(
    continuous_node_time
).loc[:,('resource_id', 'resource_occupy', 'resource_limit', 'resource_number', 'start_time' , 'end_time','user_name')]
change_r_t['equipment_id'] = change_r_t.resource_id.str.replace(r'man_', '')
change_r_t


Out[27]:
resource_id resource_occupy resource_limit resource_number start_time end_time user_name equipment_id
4 man_a1_1 0.0 1.0 1.0 0.0 inf kissf a1_1
5 man_a1_10 0.0 1.0 1.0 0.0 inf kissf a1_10
6 man_a1_11 0.0 1.0 1.0 0.0 inf kissf a1_11
7 man_a1_12 0.0 1.0 1.0 0.0 inf kissf a1_12
8 man_a1_2 0.0 1.0 1.0 0.0 inf kissf a1_2
9 man_a1_3 0.0 1.0 1.0 0.0 inf kissf a1_3
10 man_a1_4 0.0 1.0 1.0 0.0 inf kissf a1_4
11 man_a1_5 0.0 1.0 1.0 0.0 inf kissf a1_5
12 man_a1_6 0.0 1.0 1.0 0.0 inf kissf a1_6
13 man_a1_7 0.0 1.0 1.0 0.0 inf kissf a1_7
14 man_a1_8 0.0 1.0 1.0 0.0 inf kissf a1_8
15 man_a1_9 0.0 1.0 1.0 0.0 inf kissf a1_9
16 man_a2_1 0.0 1.0 1.0 0.0 inf kissf a2_1
17 man_a2_10 0.0 1.0 1.0 0.0 inf kissf a2_10
18 man_a2_11 0.0 1.0 1.0 0.0 inf kissf a2_11
19 man_a2_12 0.0 1.0 1.0 0.0 inf kissf a2_12
20 man_a2_2 0.0 1.0 1.0 0.0 inf kissf a2_2
21 man_a2_3 0.0 1.0 1.0 0.0 inf kissf a2_3
22 man_a2_4 0.0 1.0 1.0 0.0 inf kissf a2_4
23 man_a2_5 0.0 1.0 1.0 0.0 inf kissf a2_5
24 man_a2_6 0.0 1.0 1.0 0.0 inf kissf a2_6
25 man_a2_7 0.0 1.0 1.0 0.0 inf kissf a2_7
26 man_a2_8 0.0 1.0 1.0 0.0 inf kissf a2_8
27 man_a2_9 0.0 1.0 1.0 0.0 inf kissf a2_9

In [50]:
# =======================================设备参数统计====================================
equipment_p_t = load_from_mysql('i_equipment_parameter')
equipment_p_t[equipment_p_t.parameter_id.isin(['uld_turnaround_time', 'truck_turnaround_time'])]


Out[50]:
equipment_id parameter_id parameter_name parameter_value parameter_unit inserted_on modified_on
1 a1 uld_turnaround_time 卸ULD转换时间 0.0 s NaT NaT
3 a2 uld_turnaround_time 卸ULD转换时间 0.0 s NaT NaT
156 r1 truck_turnaround_time 卸车转换时间 90.0 s NaT NaT
158 r2 truck_turnaround_time 卸车转换时间 90.0 s NaT NaT
160 r3 truck_turnaround_time 卸车转换时间 90.0 s NaT NaT
162 r4 truck_turnaround_time 卸车转换时间 90.0 s NaT NaT
164 r5 truck_turnaround_time 卸车转换时间 90.0 s NaT NaT

统计- parcel-type


In [7]:
# ====================================land data
table_parcel_land = load_from_mysql('i_od_parcel_landside', 'kissf')
table_parcel_land = table_parcel_land.loc[:,( 'parcel_type','parcel_id', 'arrive_time', 'src_type', 'dest_type')]
# t_p_l['arrive_time'] = (pd.to_datetime(t_p_l.arrive_time) - ZERO_TIMESTAMP
#                        ).apply(lambda x: x.total_seconds() if x.total_seconds()> 0 else 0)
# ====================================air data
table_parcel_air = load_from_mysql('i_od_parcel_airside', 'kissf')
table_parcel_air = table_parcel_air.loc[:,( 'parcel_type','parcel_id', 'arrive_time', 'src_type', 'dest_type')]

In [32]:
p_l = (table_parcel_land.groupby(['parcel_type']).parcel_id.apply(set).apply(len)
      ).sort_values(ascending=False).to_frame()
p_l = p_l.rename(columns={'parcel_id': 'LandSide'})

p_a = (table_parcel_air.groupby(['parcel_type']).parcel_id.apply(set).apply(len)
      ).sort_values(ascending=False).to_frame()
p_a = p_a.rename(columns={'parcel_id': 'AirSide'})

# ===================================concat data
p_concat = pd.concat([p_l, p_a], axis=1)
# ===================================all sum
p_concat['All'] = p_concat['LandSide'] + p_concat['AirSide']
# ===================================concat data
p_concat = p_concat.loc[:, ('All', 'LandSide', 'AirSide')]
# ==================================plt setting
# ax1=p_concat.aggregate(['sum']).plot.box()
ax2 = p_concat.plot.bar(figsize=(10,5))
# subplots_plot(ax=ax1, y_label='Num')
# ax1 = set_plot(ax=ax1, x_label='parcel type', y_label='Num')
ax2 = set_plot(ax=ax2, x_label='Parcel Type', y_label='Num')
# =================================plot show
plt.show()
p_concat.aggregate(['sum'])


Out[32]:
All LandSide AirSide
sum 99714 93397 6317

parcel 半小时维度统计


In [17]:
# ======================================landside parcel 
t_p_l_hh = table_parcel_land
t_p_l_hh['half_hour'] = (pd.to_datetime(t_p_l_hh.arrive_time)).apply(
    lambda x : (x.strftime('%d %H')+':00') if 0<=x.minute<=30 else (x.strftime('%d %H')+':30'))
# to frame sort
t_p_l_hh_grp = t_p_l_hh.groupby(['half_hour']).parcel_id.size().to_frame()
# rename column
t_p_l_hh_grp = t_p_l_hh_grp.rename(columns={'parcel_id': 'LandSide'})
# ======================================landside parcel 
t_p_a_hh = table_parcel_air
t_p_a_hh['half_hour'] = (pd.to_datetime(t_p_a_hh.arrive_time)).apply(
    lambda x : (x.strftime('%d %H')+':00') if 0<=x.minute<=30 else (x.strftime('%d %H')+':30'))
# to frame sort
t_p_a_hh_grp = t_p_a_hh.groupby(['half_hour']).parcel_id.size().to_frame()
# rename column
t_p_a_hh_grp = t_p_a_hh_grp.rename(columns={'parcel_id': 'AandSide'})
# =====================================concat landside and airside data
t_p_hh_concat = pd.concat([t_p_l_hh_grp, t_p_a_hh_grp], axis=1);t_p_a_hh_concat
# ====================================================plot concat data 
ax_p_hh_tls = t_p_hh_concat.plot.bar(figsize=(20,6), title='Parcel VS Half Hours Fig')
ax_p_hh_tls = set_plot(ax=ax_p_hh_tls, x_label='time/s', y_label='Num')  # plot 

plt.show()


small 统计-小时


In [34]:
# =======================================landside small 
table_land_small = load_from_mysql('i_od_small_landside', 'kissf')
table_land_parcel = load_from_mysql('i_od_parcel_landside', 'kissf')
t_l_p = table_land_parcel.loc[:,('arrive_time', 'src_type', 'dest_type','parcel_type', 'parcel_id')]
t_l_s = table_land_small.loc[:, ('arrive_time', 'src_type', 'dest_type','parcel_type', 'parcel_id', 'small_id')]

t_l_s = t_l_s[t_l_s.parcel_id.isin(t_l_p.parcel_id)]
t_l_s['arrive_time'] = (pd.to_datetime(t_l_s.arrive_time)).apply(lambda x : x.strftime('%d %H'))
# to frame sort
t_l_s_plot = t_l_s.groupby(['arrive_time']).parcel_id.size().to_frame()
# rename column
t_l_s_plot = t_l_s_plot.rename(columns={'parcel_id': 'LandSide'})
# =======================================airside small 
table_air_small = load_from_mysql('i_od_small_airside', 'kissf')
table_air_parcel = load_from_mysql('i_od_parcel_airside', 'kissf')
t_a_p = table_air_parcel.loc[:,('arrive_time', 'src_type', 'dest_type','parcel_type', 'parcel_id')]
t_a_s = table_air_small.loc[:, ('arrive_time', 'src_type', 'dest_type','parcel_type', 'parcel_id', 'small_id')]
t_a_s = t_a_s[t_a_s.parcel_id.isin(t_a_p.parcel_id)]
t_a_s['arrive_time'] = (pd.to_datetime(t_a_s.arrive_time)).apply(lambda x : x.strftime('%d %H'))
# to frame sort
t_a_s_plot = t_a_s.groupby(['arrive_time']).parcel_id.size().to_frame()
# rename column
t_a_s_plot = t_a_s_plot.rename(columns={'parcel_id': 'AirSide'})

small parcel 时间 流向统计(L-L, L-A)


In [37]:
# ======================================================Small Parcel (L, A)
t_s_concat = pd.concat([t_l_s_plot, t_a_s_plot], axis=1)
ax_small = t_s_concat.plot.bar(figsize=(20,6), title=u'Small Parcel vs Time dis')
ax_small = set_plot(ax=ax_small, y_label='Num')  # plot all side
# =====================================================LandSide Small src-des sort static
t_l_s_type = t_l_s.groupby(['arrive_time','dest_type']).parcel_id.size().to_frame()
t_l_s_type.reset_index(level=1, inplace=True)
# ====================================================L A sort
tls_L_L = t_l_s_type[t_l_s_type.dest_type=='L']
tls_L_L = tls_L_L.loc[:,('parcel_id')].to_frame().rename(columns={'parcel_id': 'L-L'})
tls_L_A = t_l_s_type[t_l_s_type.dest_type=='A']
tls_L_A = tls_L_A.loc[:,('parcel_id')].to_frame().rename(columns={'parcel_id': 'L-A'})
tls_L_concat = pd.concat([tls_L_L, tls_L_A], axis=1)
ax_L_tls = tls_L_concat.plot.bar(figsize=(20,6), title='LandSide Small - Dest Type')
ax_L_tls = set_plot(ax=ax_L_tls, y_label='Num')  # plot 

# =====================================================AirSide Small src-des sort static
t_a_s_type = t_a_s.groupby(['arrive_time','dest_type']).parcel_id.size().to_frame()
t_a_s_type.reset_index(level=1, inplace=True)
# =====================================================L A sort
tas_A_L = t_a_s_type[t_a_s_type.dest_type=='L']
tas_A_L = tas_A_L.loc[:,('parcel_id')].to_frame().rename(columns={'parcel_id': 'A-L'})
tas_A_A = t_a_s_type[t_a_s_type.dest_type=='A']
tas_A_A = tas_A_A.loc[:,('parcel_id')].to_frame().rename(columns={'parcel_id': 'A-A'})
tas_concat = pd.concat([tas_A_L, tas_A_A], axis=1)
# ====================================================plot concat data 
ax_tas = tas_concat.plot.bar(figsize=(20,6), title='AirSide Small - Dest Type')
ax_tas = set_plot(ax=ax_tas, y_label='Num')  # plot 

plt.show()


small 空侧 统计


In [15]:
table_small_air = load_from_mysql('i_od_small_airside', 'kissf')
t_a_s = table_small_air.loc[:, ('plate_num', 'arrive_time', 'src_type', 'dest_type','parcel_type', 'parcel_id', 'small_id')]

t_a_s = t_a_s[t_a_s.parcel_id.isin(t_p_a.parcel_id)];t_a_s


Out[15]:
plate_num arrive_time src_type dest_type parcel_type parcel_id small_id
0 339753869387 2017-08-16 18:30:02 A L small 1160447 1293917
1 339753869387 2017-08-16 18:30:02 A L small 1160447 1293941
2 339753869387 2017-08-16 18:30:02 A L small 1160447 1294119
3 339753869387 2017-08-16 18:30:02 A L small 1160447 1294128
4 339753869387 2017-08-16 18:30:02 A L small 1160447 1294131
5 339753869387 2017-08-16 18:30:02 A L small 1160447 1294469
6 339753869387 2017-08-16 18:30:02 A L small 1160447 1294916
7 339753869387 2017-08-16 18:30:02 A L small 1160447 1294975
8 339753869387 2017-08-16 18:30:02 A L small 1160447 1295081
9 339753869387 2017-08-16 18:30:02 A L small 1160447 1295475
10 339753869387 2017-08-16 18:30:02 A L small 1160447 1296140
11 339753869387 2017-08-16 18:30:02 A L small 1160447 1296253
12 339753869387 2017-08-16 18:30:02 A L small 1160447 1297057
13 339753869387 2017-08-16 18:30:02 A L small 1160447 1297390
14 339753869387 2017-08-16 18:30:02 A L small 1160447 1297905
15 339753869387 2017-08-16 18:30:02 A L small 1160447 1297908
16 339753869387 2017-08-16 18:30:02 A L small 1160447 1298240
17 339753869387 2017-08-16 18:30:02 A L small 1160447 1298343
18 339753869387 2017-08-16 18:30:02 A L small 1160447 1299015
19 339753869387 2017-08-16 18:30:02 A L small 1160447 1300038
20 339753869387 2017-08-16 18:30:02 A L small 1160447 1300039
21 339753869387 2017-08-16 18:30:02 A L small 1160447 1300042
22 339753869387 2017-08-16 18:30:02 A L small 1160447 1300175