In [1]:
import pandas as pd
from collections.abc import Iterator, Iterable, Generator, Hashable
from collections import defaultdict, namedtuple
import re
import os
import datetime
import bson

from sqlalchemy.types import Integer, Text, DateTime, Float, VARCHAR
from sqlalchemy import Table, MetaData, Column
import itertools as it
import pickle
from tools import deco_logging
import numpy as np
# add tools funcs
from tools.func_abc import *

MHS_SHEET_NAME = ['Parcel pcs', 'Small bag pcs', 'Irregular pcs', 'NC pcs', 'Mail pcs', 'sorting by pcs']
SHEET_DF = {}
DEFAULT_DATA_PATH = ('data\\')
EXCEL_DIR = 'data//2025OD_matrix_Pre-Sort.xlsx'
SHEEF_FOOT_SKIP_NUM = 57

log_ipy = deco_logging(user='jupyter')

1. DATA


In [2]:
excel_dir = 'data//2025OD_matrix_Pre-Sort.xlsx'
pcs_sheet_name = 'sum_pcs'
pcs_left_parse_cols = 'A:Q'
pcs_right_parse_cols = 'D,R:ZD'
other_left_parse_cols = 'B:F'
other_right_parse_cols = 'C,G:YS'
sorted_left_parse_cols = 'B:H'
sorted_right_parse_cols = 'C,I:YU'
cross_d_left_parse_cols = 'B:G'
cross_d_right_parse_cols = 'C,H:AR'

In [3]:
def get_df_excel_sheet(excel_dir: str=None, sheet_lst: list=None, skip_foot: int=0):
    df_dic = dict()
    if excel_dir is None:
        raise ValueError('Should Set Excel Path!')
    if sheet_lst is None and not isinstance(sheet_lst, list):
        raise ValueError('Should Set Excel Sheet List, Value is List Type!')
    for st in sheet_lst:
        yield pd.read_excel(io=excel_dir, sheetname=st, skip_footer =skip_foot)
        
    return df_dic

In [4]:
base_col =['type', 'apt', 'model', 'Flight ID', 'Flight type', 'by pcs of ULD', 'MHS ULD', 'landing_time']
round_col = ['by pcs of ULD', 'pcs_sum']
dest_col = ['dest_city', 'dest_opt', 'dest_model']
new_col = ['apt', 'model', 'Flight ID', 'by pcs of ULD', 'MHS ULD', 'landing_time', 'dest_city', 'dest_opt', 'dest_model', 'pcs_sum']

#air_df_parcel = sorted_df.loc[sorted_df.model.isin(['D', 'I', 'INF']),:]

# melt_df = df_melt_data_format(
#     df=air_df_parcel, base_col= base_col, melt_name='dest', value_name='pcs_sum', round_col=round_col)

# melt_df['dest_city'], melt_df['dest_opt'], melt_df['dest_model'] = melt_df.dest.str.split('_').str
# melt_df = melt_df.drop(labels=['dest'], axis=1)
# melt_df = melt_df.loc[:,new_col]
# melt_df.head(10)

1.2 Parcel Data


In [14]:
# ==============================================parcel data===========================
df_parcel_left, df_parcel_right = read_excel_split(
    excel_dir=excel_dir, sheet_name='Parcel pcs', left_parse_col=other_left_parse_cols, 
    right_parse_col=other_right_parse_cols, skip_footer=57)

1.3 Small Bag Data


In [7]:
# ================================================= small bag data===================
df_small_left, df_small_right = read_excel_split(
    excel_dir=excel_dir, sheet_name='Small bag pcs', left_parse_col=other_left_parse_cols, 
    right_parse_col=other_right_parse_cols, skip_footer=57, fill_nan=0)

1.4 Irregular Parcel Data


In [8]:
# ================================================= Irregular Parcel Data ===========
df_irregular_left, df_irregular_right = read_excel_split(
    excel_dir=excel_dir, sheet_name='Irregular pcs', left_parse_col=other_left_parse_cols, 
    right_parse_col=other_right_parse_cols, skip_footer=57, fill_nan=0)

1.5 NC Parcel Data


In [9]:
# ================================================= NC Parcel Data =================
df_nc_left, df_nc_right = read_excel_split(
    excel_dir=excel_dir, sheet_name='NC pcs', left_parse_col=other_left_parse_cols, 
    right_parse_col=other_right_parse_cols, skip_footer=57, fill_nan=0)

1.6 Mail Parcel Data


In [10]:
# ================================================= Mail Parcel Data ==============
df_ims_left, df_ims_right = read_excel_split(
    excel_dir=excel_dir, sheet_name='Mail pcs', left_parse_col=other_left_parse_cols, 
    right_parse_col=other_right_parse_cols, skip_footer=57, fill_nan=0)

1.8 Sorted Parcel Data


In [11]:
# ================================================= Sorted Parcel Data ===========
df_sorted_left, df_sorted_right = read_excel_split(
    excel_dir=excel_dir, sheet_name='sorting by pcs', left_parse_col=sorted_left_parse_cols, 
    right_parse_col=sorted_right_parse_cols, skip_footer=57, fill_nan=0)

2. Air Side Data


In [12]:
air_model_list = ['I', 'D', 'INF']
select_list = ['MHS', 'type', 'apt', 'model', 'Flight ID', 'Crossdock ULD', 'Dom-ULD', 'Inter-ULD', 'Loaded ULD', 
               'sum pcs', 'landing_time']
sort_parcel_selctc_left = ['apt', 'model', 'Flight ID', 'landing_time']
sort_parcel_sum_selctc_left = ['apt', 'model', 'Flight ID', 'by pcs of ULD', 'MHS ULD', 'landing_time']
cross_parcel_sum_selctc_left = ['apt', 'model', 'Flight ID', 'by pcs of ULD', 'landing_time']
left_round_list = [[['Crossdock_ULD', 'Dom_ULD', 'Inter_ULD','Loaded_ULD'], 0], [['sum_pcs'], 2]]
sort_parcel_sum_left_round_list = [[['by_pcs_of_ULD'], 2]]
index_slip_name = ['dest_city', 'dest_apt', 'dest_model']
right_round_list = [[None,2]]
in_tabel_merge_on = ['index_num']
merg_data_on = [
    'apt', 'model',  'Flight_ID', 'landing_time','dest_city','dest_apt', 'dest_model'
]

a_pcs = TableT(air_model_list)

2.1 PCS Data


In [651]:
# # ====================================get left table =======================================
# a_df_pcs_left = a_pcs.get_left_table(df=df_parcel_left, select_list=select_list, left_round_list=left_round_list)
# a_df_pcs_left.tail(7)
# # ====================================get right table ======================================
# a_df_pcs_right = a_pcs.get_right_table(df=df_pcs_right, drop_col='model', 
#                         round_list=right_round_list, index_slip_name=index_slip_name, column_index_data_name='pcs_sum')
# a_df_pcs_right.tail(2)
# # ====================================merge data ===========================================
# a_df_pcs_data = a_pcs.get_merge_table(
#     df_left=a_df_pcs_left, df_right=a_df_pcs_right, merge_on=in_tabel_merge_on , drop_cloum=in_tabel_merge_on)
# log_ipy(f'PCS Size: {a_df_pcs_data.index.size}')
# a_df_pcs_data.tail(2)

2.2 Parcel


In [15]:
# ====================================get left table =======================================
a_df_parcel_left = a_pcs.get_left_table(df=df_parcel_left, select_list=sort_parcel_selctc_left)
a_df_parcel_left.tail(7)
# ====================================get right table ======================================
a_df_parcel_right = a_pcs.get_right_table(df=df_parcel_right, drop_col='model', 
                        round_list=right_round_list, index_slip_name=index_slip_name, column_index_data_name='parcel_sum')
a_df_parcel_right.query("dest_model=='INF.1'").tail(2)
# ====================================merge data ===========================================
a_df_parcel_data = a_pcs.get_merge_table(
    df_left=a_df_parcel_left, df_right=a_df_parcel_right, merge_on=in_tabel_merge_on , drop_cloum=in_tabel_merge_on)

log_ipy(f'Parcel Size: {a_df_parcel_data.index.size}')
a_df_parcel_data.query("dest_model=='INF.1'").tail(2)

a_df_parcel_data.head(2)


<2017-11-09 18:18:20,723><jupyter>: Parcel Size: 68952
Out[15]:
apt model Flight_ID landing_time dest_city dest_apt dest_model parcel_sum
0 WUX D CSS224 00:10:36 10 PEK D 0.00
1 WUX D CSS224 00:10:36 20 CAN D 23.75

2.3 Small Bag


In [16]:
# ====================================get left table =======================================
a_df_sb_left = a_pcs.get_left_table(df=df_small_left, select_list=sort_parcel_selctc_left)
log(f'Small Bag Left Size: {a_df_sb_left.index.size}')
# a_df_sb_left.tail(7)
# ====================================get right table ======================================
a_df_sb_right = a_pcs.get_right_table(df=df_small_right, drop_col='model', 
                        round_list=right_round_list, index_slip_name=index_slip_name, column_index_data_name='small_sum')
log(f'Small Bag Right Size: {a_df_sb_right.index.size}')
# a_df_parcel_right.head(2)
# ====================================merge data ===========================================
a_df_sb_data = a_pcs.get_merge_table(
    df_left=a_df_sb_left, df_right=a_df_sb_right, merge_on=in_tabel_merge_on , drop_cloum=in_tabel_merge_on)
log(f'Small Bag Data Size: {a_df_sb_data.index.size}')
a_df_sb_data.head(2)


<2017-11-09 18:18:26,102><tools>: Small Bag Left Size: 104
<2017-11-09 18:18:26,672><tools>: Small Bag Right Size: 68952
<2017-11-09 18:18:26,708><tools>: Small Bag Data Size: 68952
Out[16]:
apt model Flight_ID landing_time dest_city dest_apt dest_model small_sum
0 WUX D CSS224 00:10:36 10 PEK D 0.0
1 WUX D CSS224 00:10:36 20 CAN D 0.0

2.4 Irregular Parcel


In [17]:
# ====================================get left table =======================================
a_df_irregular_left = a_pcs.get_left_table(df=df_irregular_left, select_list=sort_parcel_selctc_left)
log(f'Irregular Parcel Left Size: {a_df_irregular_left.index.size}')
# a_df_sb_left.tail(7)
# ====================================get right table ======================================
a_df_irregula_right = a_pcs.get_right_table(df=df_irregular_right, drop_col='model', 
                        round_list=right_round_list, index_slip_name=index_slip_name, column_index_data_name='irregular_sum')
log(f'Irregular Parcel Right Size: {a_df_irregula_right.index.size}')
# a_df_parcel_right.head(2)
# ====================================merge data ===========================================
a_df_irregular_data = a_pcs.get_merge_table(
    df_left=a_df_irregular_left, df_right=a_df_irregula_right, merge_on=in_tabel_merge_on , drop_cloum=in_tabel_merge_on)
log(f'Irregular Parcel Data Size: {a_df_irregular_data.index.size}')
a_df_irregular_data.head(2)


<2017-11-09 18:18:30,943><tools>: Irregular Parcel Left Size: 104
<2017-11-09 18:18:31,541><tools>: Irregular Parcel Right Size: 68952
<2017-11-09 18:18:31,587><tools>: Irregular Parcel Data Size: 68952
Out[17]:
apt model Flight_ID landing_time dest_city dest_apt dest_model irregular_sum
0 WUX D CSS224 00:10:36 10 PEK D 0.0
1 WUX D CSS224 00:10:36 20 CAN D 0.0

2.5 NC Parcel


In [18]:
# ====================================get left table =======================================
a_df_nc_left = a_pcs.get_left_table(df=df_nc_left, select_list=sort_parcel_selctc_left)
log(f'NC Parcel Left Size: {a_df_nc_left.index.size}')
# a_df_sb_left.tail(7)
# ====================================get right table ======================================
a_df_nc_right = a_pcs.get_right_table(df=df_nc_right, drop_col='model', 
                        round_list=right_round_list, index_slip_name=index_slip_name, column_index_data_name='nc_sum')
log(f'NC Parcel Right Size: {a_df_nc_right.index.size}')
# a_df_parcel_right.head(2)
# ====================================merge data ===========================================
a_df_nc_data = a_pcs.get_merge_table(
    df_left=a_df_nc_left, df_right=a_df_nc_right, merge_on=in_tabel_merge_on , drop_cloum=in_tabel_merge_on)
log(f'NC Parcel Data Size: {a_df_nc_data.index.size}')
a_df_nc_data.head(2)


<2017-11-09 18:18:37,147><tools>: NC Parcel Left Size: 104
<2017-11-09 18:18:37,617><tools>: NC Parcel Right Size: 68952
<2017-11-09 18:18:37,650><tools>: NC Parcel Data Size: 68952
Out[18]:
apt model Flight_ID landing_time dest_city dest_apt dest_model nc_sum
0 WUX D CSS224 00:10:36 10 PEK D 0.0
1 WUX D CSS224 00:10:36 20 CAN D 0.0

2.6 ISB (国际-不需要小件拆包直接终分拣的小件包)


In [19]:
# ====================================get left table =======================================
a_df_isb_left = a_pcs.get_left_table(df=df_ims_left, select_list=sort_parcel_selctc_left)
log(f'ISB Left Size: {a_df_isb_left.index.size}')
# a_df_sb_left.tail(7)
# ====================================get right table ======================================
a_df_isb_right = a_pcs.get_right_table(df=df_ims_right, drop_col='model', 
                        round_list=right_round_list, index_slip_name=index_slip_name, column_index_data_name='isb_sum')
log(f'ISB Right Size: {a_df_isb_right.index.size}')
# a_df_parcel_right.head(2)
# ====================================merge data ===========================================
a_df_isb_data = a_pcs.get_merge_table(
    df_left=a_df_isb_left, df_right=a_df_isb_right, merge_on=in_tabel_merge_on , drop_cloum=in_tabel_merge_on)
log(f'ISB Data Size: {a_df_isb_data.index.size}')
a_df_isb_data.head(2)


<2017-11-09 18:18:45,013><tools>: ISB Left Size: 104
<2017-11-09 18:18:45,476><tools>: ISB Right Size: 68952
<2017-11-09 18:18:45,514><tools>: ISB Data Size: 68952
Out[19]:
apt model Flight_ID landing_time dest_city dest_apt dest_model isb_sum
0 WUX D CSS224 00:10:36 10 PEK D 0.0
1 WUX D CSS224 00:10:36 20 CAN D 0.0

2.8 Sorted Parcel Data


In [20]:
# ====================================get left table =======================================
a_df_sorted_left = a_pcs.get_left_table(df=df_sorted_left, 
                                        select_list=sort_parcel_sum_selctc_left, 
                                        left_round_list=sort_parcel_sum_left_round_list)
log(f'Sorted Left Size: {a_df_sorted_left.index.size}')
# a_df_sorted_left.tail(7)
# ====================================get right table ======================================
a_df_sorted_right = a_pcs.get_right_table(df=df_sorted_right, drop_col='model', 
                        round_list=right_round_list, index_slip_name=index_slip_name, column_index_data_name='sorted_sum')
log(f'Sorted Right Size: {a_df_sorted_right.index.size}')
# a_df_sorted_right.head(2)
# ====================================merge data ===========================================
a_df_sorte_data = a_pcs.get_merge_table(
    df_left=a_df_sorted_left, df_right=a_df_sorted_right, merge_on=in_tabel_merge_on , drop_cloum=in_tabel_merge_on)
a_df_sorte_data = a_df_sorte_data.rename_axis(
    {"by_pcs_of_ULD": "sorted_by_pcs_of_ULD", "MHS_ULD": "sorted_MHS_ULD"}, axis="columns")
log(f'Sorted Data Size: {a_df_sorte_data.index.size}')
a_df_sorte_data.tail(2)
# a_df_sorte_data.loc[a_df_sorte_data.Flight_ID.isin(['CSS68']), ['sorted_sum']].agg(['sum'])


<2017-11-09 18:18:50,366><tools>: Sorted Left Size: 104
<2017-11-09 18:18:50,997><tools>: Sorted Right Size: 68952
<2017-11-09 18:18:51,062><tools>: Sorted Data Size: 68952
Out[20]:
apt model Flight_ID sorted_by_pcs_of_ULD sorted_MHS_ULD landing_time dest_city dest_apt dest_model sorted_sum
68950 CGN INF INF142 213.7 8 23:25:00 728 728 R 0.0
68951 CGN INF INF142 213.7 8 23:25:00 792 792 R 0.0

2.* Merge Data


In [21]:
# parcel data #--------------------------------------------------------------------------------
# df_right_sample = a_df_parcel_data.head(test_num)
# merge data #---------------------------------------------------------------------------------
a_df_data = merg_data(a_df_sorte_data, 
                      [a_df_parcel_data, a_df_sb_data, a_df_irregular_data, a_df_nc_data, a_df_isb_data
                      ], 
                      'left', merg_data_on)
# excel out #----------------------------------------------------------------------------------
a_df_data_no_zero = a_df_data.query('sorted_sum>0')
# df_to_excel(file_name='a_pcs', sheet='a_pcs', df=a_df_data_no_zero)
log(f'Air Data Size: {a_df_data.index.size}')
log(f'No Zero Air Data Size: {a_df_data_no_zero.index.size}')
# log(a_df_data.loc[a_df_data.Flight_ID.isin(['CSS68']),['sorted_sum']].agg(['sum']))
a_df_data_no_zero.sample(7)


<2017-11-09 18:18:57,855><tools>: Air Data Size: 68952
<2017-11-09 18:18:57,856><tools>: No Zero Air Data Size: 18349
Out[21]:
apt model Flight_ID sorted_by_pcs_of_ULD sorted_MHS_ULD landing_time dest_city dest_apt dest_model sorted_sum parcel_sum small_sum irregular_sum nc_sum isb_sum
42080 HFE D CSS34 84.68 13 02:00:21 LAX LAX I 81.11 73.35 1.47 2.04 0.05 4.19
20557 PVG D CSS81 131.92 32 01:13:32 23 CKG D 29.49 29.49 0.00 0.00 0.00 0.00
47059 PEK D CSS68 116.30 26 02:10:36 27 27 R 99.53 76.30 12.98 10.07 0.17 0.00
20018 SZX D CSS102 139.29 32 01:12:04 562 HFE D 0.56 0.56 0.00 0.00 0.00 0.00
36773 SWA D CSS94 72.87 14 01:48:39 DXB DXB I 44.49 40.23 0.80 1.12 0.03 2.30
32630 CKG D CSS15 68.01 24 01:39:52 591 FOC D 15.29 15.29 0.00 0.00 0.00 0.00
12894 PVG D CSS77 131.92 32 00:53:02 990 URC D 0.04 0.04 0.00 0.00 0.00 0.00

3. Land Side Data

Setting


In [22]:
land_model_list = ['R']
land_sort_parcel_left = ['apt', 'model', 'Flight ID', 'landing_time']
index_slip_name = ['dest_city', 'dest_apt', 'dest_model']
right_round_list = [[None,2]]
in_tabel_merge_on = ['index_num']
merg_data_on = [
    'apt', 'model',  'Flight_ID', 'landing_time','dest_city','dest_apt', 'dest_model'
]
l_pcs = TableT(land_model_list)

# =================== sorted data ============================
def merg_all_data(cls_func,df_l, df_r, value_name, log_name):
    # left
    df_left = cls_func.get_left_table(df=df_l, select_list=land_sort_parcel_left)
#     log(f'{log_name} Left Size: {df_left.index.size}')
    # right
    df_right = cls_func.get_right_table(df=df_r, drop_col='model', 
                        round_list=right_round_list, index_slip_name=index_slip_name, column_index_data_name=value_name)
#     log(f'{log_name} Right Size: {df_right.index.size}')
    # merge
    df_merge = cls_func.get_merge_table(
        df_left=df_left, df_right=df_right, merge_on=in_tabel_merge_on , drop_cloum=in_tabel_merge_on)
    log(f'{log_name} Merge Data Size: {df_merge.index.size}')
    return df_merge

3.1 Data LS


In [23]:
# ========================================sorted data=================================
l_df_sorte_data = merg_all_data(l_pcs, df_sorted_left, df_sorted_right, 'sorted_sum', 'sorted')
l_df_sorte_data.tail(10),
# =======================================parcel bag data===============================
l_df_parcel_data = merg_all_data(l_pcs, df_parcel_left, df_parcel_right, 'parcel_sum', 'Parcel')
l_df_parcel_data.tail(10)
# =======================================small bag data================================
l_df_small_data = merg_all_data(l_pcs, df_small_left, df_small_right, 'small_sum', 'small')
l_df_small_data.tail(10)
# =======================================Irregular Parcel Data ==========================
l_df_irregular_data = merg_all_data(l_pcs, df_irregular_left, df_irregular_right, 'irregular_sum', 'irregular')
# l_df_irregular_data.tail(10)
# ================================================= NC Parcel Data ==========================
l_df_nc_data = merg_all_data(l_pcs, df_nc_left, df_nc_right, 'nc_sum', 'NC')
# l_df_nc_data.tail(10)
# ================================================= Mail Parcel Data ==========================
l_df_ims_data = merg_all_data(l_pcs, df_ims_left, df_ims_right, 'isb_sum', 'IMS')
l_df_ims_data.tail(2)


<2017-11-09 18:19:13,512><tools>: sorted Merge Data Size: 66963
<2017-11-09 18:19:13,911><tools>: Parcel Merge Data Size: 66963
<2017-11-09 18:19:14,303><tools>: small Merge Data Size: 66963
<2017-11-09 18:19:14,707><tools>: irregular Merge Data Size: 66963
<2017-11-09 18:19:15,111><tools>: NC Merge Data Size: 66963
<2017-11-09 18:19:15,497><tools>: IMS Merge Data Size: 66963
Out[23]:
apt model Flight_ID landing_time dest_city dest_apt dest_model isb_sum
66961 27 R CSSLocalE39 23:50:00 728 728 R 0.0
66962 27 R CSSLocalE39 23:50:00 792 792 R 0.0

In [24]:
# ================================================= merge all data =========================
l_df_data = merg_data(l_df_sorte_data, 
                      [l_df_parcel_data, 
                       l_df_small_data, l_df_irregular_data, l_df_nc_data, l_df_ims_data
                      ], 
                      'left', merg_data_on)
# excel out #----------------------------------------------------------------------------------
l_df_data_no_zero = l_df_data.query('sorted_sum>0')
# df_to_excel(file_name='l_pcs', sheet='l_pcs', df=l_df_data_no_zero)
log(f'Land Data Size: {l_df_data.index.size}')
log(f'No Zero Land Data Size: {l_df_data_no_zero.index.size}')
# log(a_df_data.loc[a_df_data.Flight_ID.isin(['CSS68']),['sorted_sum']].agg(['sum']))
l_df_data_no_zero.query("isb_sum>0").head(7)


<2017-11-09 18:19:28,521><tools>: Land Data Size: 66963
<2017-11-09 18:19:28,522><tools>: No Zero Land Data Size: 31498
Out[24]:
apt model Flight_ID landing_time dest_city dest_apt dest_model sorted_sum parcel_sum small_sum irregular_sum nc_sum isb_sum
18206 27 R CSSLocalE24 00:20:00 852 HKG I 2.24 2.03 0.04 0.06 0.0 0.12
18207 27 R CSSLocalE24 00:20:00 886 TPE I 1.02 0.92 0.02 0.03 0.0 0.05
18208 27 R CSSLocalE24 00:20:00 BKK BKK I 0.73 0.66 0.01 0.02 0.0 0.04
18209 27 R CSSLocalE24 00:20:00 DXB DXB I 0.37 0.34 0.01 0.01 0.0 0.02
18210 27 R CSSLocalE24 00:20:00 ICN ICN I 1.23 1.11 0.02 0.03 0.0 0.06
18211 27 R CSSLocalE24 00:20:00 KUL KUL I 0.40 0.36 0.01 0.01 0.0 0.02
18212 27 R CSSLocalE24 00:20:00 LAX LAX I 1.59 1.44 0.03 0.04 0.0 0.08

4 Data Analysis

4.1 Air


In [25]:
parcel_data = ['parcel_sum', 'small_sum', 'irregular_sum', 'nc_sum', 'isb_sum']
head_data = [
    'apt', 'model',  'Flight_ID', 'landing_time','dest_city','dest_apt', 'dest_model',
    'parcel_sum', 'small_sum', 'irregular_sum', 'nc_sum', 'isb_sum'
]
all_data = ['apt', 'model', 'Flight_ID', 'sorted_by_pcs_of_ULD', 'sorted_MHS_ULD',
            'landing_time', 'dest_city', 'dest_apt', 'dest_model', 
            'sorted_sum', 'parcel_sum', 'small_sum', 'irregular_sum', 'nc_sum', 'isb_sum']

# 精度控制, 值小于1的精度取法
def right_back(dec, right_p):
    if dec>right_p:
        dec = 1
    else:
        dec = 0
    return dec

# 精度控制, 值大于1的精度取法
def left_back(dec, left_p):
    int_dec = int(dec)
    point_dec = round((dec - int_dec), 2)
    
    if point_dec>=left_p:
        re_dec = int_dec+1
    else:
        re_dec = int_dec
    return re_dec


def creat_od_table():
    db_eng = MetaData(bind=MySQLConfig.engine)
    machine_table_sche = \
    Table(
        "i_od_parcel",
        db_eng,
        Column("small_id", VARCHAR(length=10, )),
        Column("parcel_id", VARCHAR(length=20, )),
        Column("src_type", VARCHAR(length=5, )),
        Column("plate_num", VARCHAR(length=20, )),
        Column("uld_num", VARCHAR(length=20, )),
        Column("parcel_type", VARCHAR(length=10, )),
        Column("arrive_time", DateTime()),
        Column("dest_code", VARCHAR(length=10, )),
        Column("dest_apt", VARCHAR(length=10, )),
        Column("dest_type", VARCHAR(length=5, )),
    )
    machine_table_sche.create(checkfirst=True)

4.1.1 原始数据

AirSide


In [26]:
file_name = "a_pcs"
file_dir = os.path.join(DEFAULT_DATA_PATH, file_name+'.xlsx')
a_pcs_data2 = pd.read_excel(io=file_dir, sheetname='a_pcs')
a_pcs_data3 = pd.read_excel(io=file_dir, sheetname='a_pcs')

4.1.2 精度控制前,未拆小件包数据


In [27]:
f0 = a_pcs_data2.loc[:, parcel_data]
f0['sorted_sum'] = f0.loc[:,parcel_data].sum(axis=1)
f0_t = f0.sum().to_frame().T
f0_t


Out[27]:
parcel_sum small_sum irregular_sum nc_sum isb_sum sorted_sum
0 313017.03 6460.95 8378.24 196.84 2456.06 330509.12

Land


In [28]:
l_f0 = l_df_data_no_zero.loc[:, parcel_data]
l_f0['sorted_sum'] = l_f0.loc[:,parcel_data].sum(axis=1)
l_f0_t = l_f0.sum().to_frame().T
l_f0_t


Out[28]:
parcel_sum small_sum irregular_sum nc_sum isb_sum sorted_sum
0 105650.1 17193.88 13412.71 212.51 24.64 136493.84

4.1.3 精度控制前,拆小件包数据


In [29]:
f1 = a_pcs_data2.loc[:, parcel_data]
# small sum * 20
f1.loc[:, ['small_sum', 'isb_sum']] = f1.loc[:, ['small_sum', 'isb_sum']].applymap(lambda x : x*20)
f1['sorted_sum'] = f1.loc[:,parcel_data].sum(axis=1)
f1_t = f1.sum().to_frame().T

In [30]:
# =============================== 不同列的精度取值
parcel_round = {'parcel_sum': {'L':0.97, 'R': 0.01}, 'small_sum': {'L': 0.6, 'R': 0.01}, 
                'irregular_sum': {'L': 0.98, 'R': 0.2}, 'nc_sum': {'L': 0.98, 'R': 0.2}, 
                'isb_sum': {'L': 0.5, 'R': 0.01}}
# =============================== 获取需要进行精度控制数据 并拆小件包
f3 = a_pcs_data2.loc[:, parcel_data]
f3.loc[:, ['small_sum', 'isb_sum']] = f3.loc[:, ['small_sum', 'isb_sum']].applymap(lambda x : x*20)
# ============================== 进行精度计算 
for col in parcel_data:
    f3.loc[:, col] = f3.loc[:, col].apply(
        lambda x : left_back(x, parcel_round[col]['L']) if x >=1 else right_back(x, parcel_round[col]['R']))

f3['sorted_sum'] = f3.loc[:,parcel_data].sum(axis=1)
# f3.tail(10)
# ========================= data analysis==================
f3_t = f3.sum().to_frame().T;f3_t
# ============================== 合并取精度前后的数据进行对比 =======================
ft_parcel = pd.concat([f1_t, f3_t])
ft_parcel = ft_parcel.reset_index(drop=True).rename({0:'befor', 1:'after_round'})
ft_parcel


Out[30]:
parcel_sum small_sum irregular_sum nc_sum isb_sum sorted_sum
befor 313017.03 129219.0 8378.24 196.84 49121.2 499932.31
after_round 313059.00 129920.0 8338.00 205.00 49136.0 500658.00

Land


In [31]:
l_f1 = l_df_data_no_zero.loc[:, parcel_data]
# small sum * 20
l_f1.loc[:, ['small_sum', 'isb_sum']] = l_f1.loc[:, ['small_sum', 'isb_sum']].applymap(lambda x : x*20)
l_f1['sorted_sum'] = l_f1.loc[:,parcel_data].sum(axis=1)
l_f1_t = l_f1.sum().to_frame().T

# =============================== 不同列的精度取值
l_parcel_round = {'parcel_sum': {'L':0.99, 'R': 0.05}, 'small_sum': {'L': 0.55, 'R': 0.3}, 
                'irregular_sum': {'L': 0.99, 'R': 0.27}, 'nc_sum': {'L': 0.6, 'R': 0.08}, 
                'isb_sum': {'L': 0.5, 'R': 0.4}}
# =============================== 获取需要进行精度控制数据 并拆小件包
l_f3 = l_df_data_no_zero.loc[:, parcel_data]
l_f3.loc[:, ['small_sum', 'isb_sum']] = l_f3.loc[:, ['small_sum', 'isb_sum']].applymap(lambda x : x*20)
# ============================== 进行精度计算 
for col in parcel_data:
    l_f3.loc[:, col] = l_f3.loc[:, col].apply(
        lambda x : left_back(x, l_parcel_round[col]['L']) if x >=1 else right_back(x, l_parcel_round[col]['R']))

l_f3['sorted_sum'] = l_f3.loc[:,parcel_data].sum(axis=1)
l_f3.tail(10)
# ========================= data analysis==================
l_f3_t = l_f3.sum().to_frame().T;l_f3_t
# ============================== 合并取精度前后的数据进行对比 =======================
l_ft_parcel = pd.concat([l_f1_t, l_f3_t])
l_ft_parcel = l_ft_parcel.reset_index(drop=True).rename({0:'befor', 1:'after_round'})
l_ft_parcel


Out[31]:
parcel_sum small_sum irregular_sum nc_sum isb_sum sorted_sum
befor 105650.1 343877.6 13412.71 212.51 492.8 463645.72
after_round 105878.0 344777.0 13414.00 216.00 483.0 464768.00

4.1.4 应用配置精度到整体数据


In [32]:
opt_df = a_pcs_data3.copy()
# small sum * 20
opt_df.loc[:, ['small_sum', 'isb_sum']] = opt_df.loc[:, ['small_sum', 'isb_sum']].applymap(lambda x : x*20)
# # ===========================round for each columns==========
for col in parcel_data:
    opt_df.loc[:, col] = opt_df.loc[:, col].apply(
        lambda x : left_back(x, parcel_round[col]['L']) if x >=1 else right_back(x, parcel_round[col]['R']))

opt_df['sorted_sum'] = opt_df.loc[:,parcel_data].sum(axis=1)
opt_df.head(2)


Out[32]:
apt model Flight_ID sorted_by_pcs_of_ULD sorted_MHS_ULD landing_time dest_city dest_apt dest_model sorted_sum parcel_sum small_sum irregular_sum nc_sum isb_sum
0 WUX D CSS224 94.98 28 00:10:36 20 CAN D 23 23 0 0 0 0
1 WUX D CSS224 94.98 28 00:10:36 22 TSN D 58 58 0 0 0 0

Land


In [33]:
l_opt_df = l_df_data_no_zero.copy()
# small sum * 20
l_opt_df.loc[:, ['small_sum', 'isb_sum']] = l_opt_df.loc[:, ['small_sum', 'isb_sum']].applymap(lambda x : x*20)
# # ===========================round for each columns==========
for col in parcel_data:
    l_opt_df.loc[:, col] = l_opt_df.loc[:, col].apply(
        lambda x : left_back(x, l_parcel_round[col]['L']) if x >=1 else right_back(x, l_parcel_round[col]['R']))

l_opt_df['sorted_sum'] = l_opt_df.loc[:,parcel_data].sum(axis=1)
l_opt_df.head(2)


Out[33]:
apt model Flight_ID landing_time dest_city dest_apt dest_model sorted_sum parcel_sum small_sum irregular_sum nc_sum isb_sum
0 7112R R CSSIPE05 00:00:00 10 PEK D 987 217 741 28 1 0
1 7112R R CSSIPE05 00:00:00 20 CAN D 374 82 281 10 1 0

4.2 生成包裹详情表


In [34]:
base_col = ['model', 'Flight_ID', 'sorted_by_pcs_of_ULD', 'sorted_MHS_ULD', 'landing_time', 'dest_city', 'dest_apt', 'dest_model']
melt_col = [
    #'sorted_sum', 
            'parcel_sum', 'small_sum', 'irregular_sum', 'nc_sum', 'isb_sum'
           ]
opt_melt_df = df_melt_data_format(df=opt_df, base_col=base_col, melt_col=melt_col, melt_name='parcel_type', value_name='num')
opt_melt_df.parcel_type = opt_melt_df.parcel_type.str.replace('_sum', '')
opt_melt_df = opt_melt_df.query('num>0')
# T data
tt = opt_melt_df.T
# 按照件量扩展整体数据
def map_col(series, key):
    yield from [series]*series[key]
# 生成重复性的列,根据parcel数量
temp = []
for iter_col in tt.items():
    for col in map_col(iter_col[1], 'num'):
        temp.append(col)
        
opt_pca_data_a = pd.DataFrame(data=temp)
opt_pca_data_a = opt_pca_data_a.reset_index(drop=True)
opt_pca_data_a.head(2)
# 添加objectid作为parcel id
# opt_pca_data_a['id_parcel'] = pd.Series(data=[str(bson.objectid.ObjectId()) for _ in range(opt_pca_data_a.index.size)])


Out[34]:
model Flight_ID sorted_by_pcs_of_ULD sorted_MHS_ULD landing_time dest_city dest_apt dest_model parcel_type num
0 D CSS224 94.98 28 00:10:36 20 CAN D parcel 23
1 D CSS224 94.98 28 00:10:36 20 CAN D parcel 23

In [35]:
opt_pca_data_a.index.size


Out[35]:
500658

Land


In [36]:
base_col = ['model', 'Flight_ID', 'landing_time', 'dest_city', 'dest_apt', 'dest_model']
melt_col = [
    #'sorted_sum', 
            'parcel_sum', 'small_sum', 'irregular_sum', 'nc_sum', 'isb_sum'
           ]
l_opt_melt_df = df_melt_data_format(df=l_opt_df, base_col=base_col, melt_col=melt_col, melt_name='parcel_type', value_name='num')
l_opt_melt_df.parcel_type = l_opt_melt_df.parcel_type.str.replace('_sum', '')
l_opt_melt_df = l_opt_melt_df.query('num>0')
# T data
l_tt = l_opt_melt_df.T
# 按照件量扩展整体数据
def map_col(series, key):
    yield from [series]*series[key]
# 生成重复性的列,根据parcel数量
l_temp = []
for iter_col in l_tt.items():
    for col in map_col(iter_col[1], 'num'):
        l_temp.append(col)
        
l_opt_pca_data = pd.DataFrame(data=l_temp)
l_opt_pca_data = l_opt_pca_data.reset_index(drop=True)
# 添加objectid作为parcel id
# opt_pca_data_a['id_parcel'] = pd.Series(data=[str(bson.objectid.ObjectId()) for _ in range(opt_pca_data_a.index.size)])

In [37]:
l_opt_pca_data.query("Flight_ID=='CSSLocalE18' & parcel_type=='small'").sample(10) #.num.count()


Out[37]:
model Flight_ID landing_time dest_city dest_apt dest_model parcel_type num
297798 R CSSLocalE18 00:20:00 763 734W R small 92
295302 R CSSLocalE18 00:20:00 552 552W R small 43
294997 R CSSLocalE18 00:20:00 535 745W R small 85
292695 R CSSLocalE18 00:20:00 315 553W R small 83
294973 R CSSLocalE18 00:20:00 535 745W R small 85
298772 R CSSLocalE18 00:20:00 826 745W R small 12
293997 R CSSLocalE18 00:20:00 467 451WA R small 11
295429 R CSSLocalE18 00:20:00 555 025WA R small 41
299455 R CSSLocalE18 00:20:00 931 710W R small 91
297390 R CSSLocalE18 00:20:00 745 734W R small 45

4.2.1 随机分配包裹随机选择ULD


In [38]:
#  INF174 uld数量为零, 实际配置为2
opt_pca_data_a.loc[opt_pca_data_a.Flight_ID=='INF174', ['sorted_MHS_ULD']] = 2
opt_pca_data_a['id_uld'] = opt_pca_data_a.sorted_MHS_ULD.map(lambda x: np.random.randint(1, int(x)+1))
# opt_pca_data_a.query("Flight_ID=='CSS1' & parcel_type=='small'").groupby(['id_uld']).Flight_ID.count()
opt_pca_data_a.head(7)


Out[38]:
model Flight_ID sorted_by_pcs_of_ULD sorted_MHS_ULD landing_time dest_city dest_apt dest_model parcel_type num id_uld
0 D CSS224 94.98 28 00:10:36 20 CAN D parcel 23 28
1 D CSS224 94.98 28 00:10:36 20 CAN D parcel 23 26
2 D CSS224 94.98 28 00:10:36 20 CAN D parcel 23 17
3 D CSS224 94.98 28 00:10:36 20 CAN D parcel 23 25
4 D CSS224 94.98 28 00:10:36 20 CAN D parcel 23 12
5 D CSS224 94.98 28 00:10:36 20 CAN D parcel 23 13
6 D CSS224 94.98 28 00:10:36 20 CAN D parcel 23 18

4.3 ARC 包裹仿真基础数据


In [39]:
arc_df = pd.read_pickle(path='data\\arc.xz', compression='xz')

4.3.1 ARC 不同航班不同ULD到达槽口时间


In [40]:
arc_col_select = ['ID', 'Origin', 'Destination', 'TypeIn', 'TypeOut', 'FltIn', 'FltOut',
                  'ULDIn', 'ULDOut', 'AMIT']
# ================================================取出空侧数据,统计每个航班ULD数量
selec_arc_df = arc_df.query("TypeIn == 'A'").loc[:, arc_col_select]
flyid_uld_df = selec_arc_df.groupby(by=['TypeIn', 'FltIn']).ULDIn.apply(set).apply(len).to_frame()
flyid_uld_df = flyid_uld_df.reset_index()
flyid_uld_df = flyid_uld_df.rename_axis({'ULDIn': 'ULD_CON'}, axis=1)
flyid_uld_df
# ================================================ 统计每个航班每个ULD的parcel 的件数
uld_amit_par_df = selec_arc_df.groupby(by=['TypeIn', 'FltIn', 'ULDIn', 'AMIT']).ID.apply(set).apply(len).to_frame()
uld_amit_par_df = uld_amit_par_df.reset_index()
uld_amit_par_df = uld_amit_par_df.rename_axis({'ID': 'PAR_NUM'}, axis=1)
# =============================================== 合并表格
uld_amit_par_df = pd.merge(left=uld_amit_par_df, right=flyid_uld_df, on=['TypeIn', 'FltIn'], how='left')
#  规整数据
uld_amit_par_df = uld_amit_par_df.loc[:,['FltIn', 'ULDIn', 'AMIT', 'PAR_NUM', 'ULD_CON']]
uld_amit_par_df.head(2)
# df_to_excel(df=uld_amit_par_df, file_name='uld_con', sheet='uld_con')


Out[40]:
FltIn ULDIn AMIT PAR_NUM ULD_CON
0 CSS1 ULD40230158 2025-02-08 00:38:10 172 14
1 CSS1 ULD40230159 2025-02-08 00:37:56 170 14

Land - 到达分拣场地时间


In [41]:
arc_col_select = ['ID', 'FltIn', 'ALDT']
# ================================================取出空侧数据,统计每个航班ULD数量
l_selec_arc_df = arc_df.query("TypeIn == 'L'").loc[:, arc_col_select]
l_arc_landtime_df = l_selec_arc_df.groupby(by=[ 'FltIn', 'ALDT']).ID.apply(set).apply(len).to_frame()
l_arc_landtime_df.tail(2)


Out[41]:
ID
FltIn ALDT
CSSLongEInf11 2025-02-08 00:05:10 2565
CSSLongEInf12 2025-02-07 23:35:10 170

4.3.2 生成uld 小编号


In [42]:
uld_id_df = flyid_uld_df.T
def map_col(uld_id_df):
    for col in uld_id_df.iteritems():
        for i in range(1, col[1]['ULD_CON']+1):
            col[1]['id_uld'] = i
            yield col[1].copy()
            
col_df = pd.DataFrame(data=list(map_col(uld_id_df))).reset_index(drop=True)
uld_amit_par_df['id_uld'] = col_df.id_uld
uld_match_table = uld_amit_par_df.rename_axis(mapper={'FltIn': 'Flight_ID', 'ULDIn': 'ULD_ID', 'AMIT':'arrive_time'}, axis=1)
uld_match_table.head(2)
# df_to_excel(df=uld_amit_par_df, file_name='uld_amit_par_df', sheet='uld_amit_par_df')


Out[42]:
Flight_ID ULD_ID arrive_time PAR_NUM ULD_CON id_uld
0 CSS1 ULD40230158 2025-02-08 00:38:10 172 14 1
1 CSS1 ULD40230159 2025-02-08 00:37:56 170 14 2

In [43]:
opt_pca_data_a.loc[opt_pca_data_a.Flight_ID.isin(['CSS100']) & opt_pca_data_a.parcel_type.isin(['isb']),:].head(2)


Out[43]:
model Flight_ID sorted_by_pcs_of_ULD sorted_MHS_ULD landing_time dest_city dest_apt dest_model parcel_type num id_uld
470469 D CSS100 139.29 32 00:58:54 BKK BKK I isb 48 24
470470 D CSS100 139.29 32 00:58:54 BKK BKK I isb 48 16

4.3.3 匹配uld编码和时间


In [44]:
merg_col = ['Flight_ID', 'id_uld']
a_par_data_df = opt_pca_data_a.merge(right=uld_match_table, on=merg_col, how='left')
a_par_output_data_df = a_par_data_df.loc[:, [
    'model', 'Flight_ID', 'ULD_ID', 'arrive_time', 'parcel_type', 'dest_city', 'dest_apt', 'dest_model']
                        ]
# s1 = pd.Series(data={'model.1': 'src_type', 'Flight_ID': 'plate_num.4', 'ULD_ID': 'uld_num.2'})
# s1.str.replace('\.[0-9]$', '')
a_par_output_data_df = a_par_output_data_df.rename_axis(mapper={
    'model': 'src_type', 'Flight_ID': 'plate_num', 'ULD_ID': 'uld_num', 'dest_city': 'dest_code', 'dest_model': 'dest_type'}, axis=1)

# a_par_output_data_df.loc[a_par_output_data_df.ULD_ID.isnull(),:]
# df_to_pickle(df=a_par_output_data_df, file_name='parcel_data_erzhou_airside.pkl')
# df_to_excel(df=a_par_output_data_df, file_name='parcel_data_erzhou_airside', sheet='parcel_data_erzhou_airside')
a_par_output_data_df.dest_type = a_par_output_data_df.dest_type.str.replace('\.[0-9]$', '')
a_par_output_data_df.head(2)


Out[44]:
src_type plate_num uld_num arrive_time parcel_type dest_code dest_apt dest_type
0 D CSS224 ULD40245213 2025-02-08 00:32:42 parcel 20 CAN D
1 D CSS224 ULD40245211 2025-02-08 00:34:16 parcel 20 CAN D

small ID生成


In [47]:
a_small_data_con = a_par_output_data_df.query("parcel_type == 'small' | parcel_type == 'isb'"
                                             ).groupby(['plate_num','uld_num', 'arrive_time', 'parcel_type']
                                                      ).parcel_type.count().to_frame().copy()
a_small_data_con = a_small_data_con.rename_axis(mapper={'parcel_type': 'con'}, axis=1)
a_small_data_con = a_small_data_con.reset_index()
a_small_data_con['sb_sum'] = a_small_data_con.con.map(lambda x : round(x/20, 2))
a_small_data_con.query("plate_num=='CSS100' & uld_num == 'ULD40657472' & parcel_type == 'isb'")


Out[47]:
plate_num uld_num arrive_time parcel_type con sb_sum
14 CSS100 ULD40657472 2025-02-08 01:13:22 isb 29 1.45

land


In [48]:
l_small_data_con = l_opt_pca_data.query("parcel_type == 'small' | parcel_type == 'isb'"
                                             ).groupby(['Flight_ID', 'landing_time', 'parcel_type']
                                                      ).model.count().to_frame().copy()  #.tail(5)
l_small_data_con = l_small_data_con.reset_index()
l_small_data_con = l_small_data_con.rename_axis(mapper={'model': 'par_num'}, axis=1)
l_small_data_con['small_num'] = l_small_data_con.par_num.map(lambda x : round(x/20, 2))
l_small_data_con.query("Flight_ID=='CSSLocalE18'")


Out[48]:
Flight_ID landing_time parcel_type par_num small_num
106 CSSLocalE18 00:20:00 small 7778 388.9

Air 小件包号统计


In [49]:
def _round_one(df, col_list, round_map):
    for col in col_list:
        df.loc[:, col] = df.loc[:, col].apply(
            lambda x : left_back(x, round_map[col]['L']) if x >=1 else right_back(x, round_map[col]['R']))
    return df

a_small_data_con_round = _round_one(a_small_data_con, ['sb_sum'], {'sb_sum': {'L': 0.00001, 'R': 0.000001}})
small_bag_map = a_small_data_con_round.loc[:, ['uld_num', 'parcel_type', 'sb_sum']]
small_bag_map.head(2)


Out[49]:
uld_num parcel_type sb_sum
0 ULD40230158 small 4
1 ULD40230159 small 4

land 小件包号统计


In [50]:
l_small_data_con_round = _round_one(l_small_data_con, ['small_num'], {'small_num': {'L': 0.00001, 'R': 0.000001}})
l_small_bag_map = l_small_data_con_round.loc[:, ['Flight_ID', 'parcel_type', 'small_num']]
l_small_bag_map.query("Flight_ID=='CSSLocalE18'").head(10)


Out[50]:
Flight_ID parcel_type small_num
106 CSSLocalE18 small 389

In [55]:
SMALL_ID = 1000_0001
a_par_small_bag_df = a_par_output_data_df.merge(small_bag_map, how='left', on=['uld_num', 'parcel_type'])
a_par_small_bag_df.sb_sum = a_par_small_bag_df.sb_sum.fillna(value=0)
a_par_small_bag_df['sb_id'] = a_par_small_bag_df.sb_sum.map(lambda x : np.random.randint(1, int(x)+1) if x >0 else x)
a_par_small_bag_df.sb_id = a_par_small_bag_df.sb_id.apply(int)

a_par_small_bag_df['parcel_id'] = a_par_small_bag_df.sb_id.mask(a_par_small_bag_df.sb_id > 0, 
                                                                a_par_small_bag_df.uld_num.apply(str)+a_par_small_bag_df.parcel_type.str.get(0)+a_par_small_bag_df.sb_id.apply(str))


a_par_small_bag_data = a_par_small_bag_df.loc[:,[
    'parcel_id', 'src_type', 'plate_num', 'uld_num', 'arrive_time', 'parcel_type', 'dest_code', 'dest_apt', 'dest_type']]

a_par_small_bag_data['small_id'] = pd.Series(data=np.arange(SMALL_ID, SMALL_ID+a_par_small_bag_data.index.size))
a_par_small_bag_data['parcel_id'] = a_par_small_bag_data.parcel_id.mask(a_par_small_bag_data.parcel_id == 0, 
                                                   a_par_small_bag_data.small_id)
a_par_small_bag_data

a_par_small_bag_data = a_par_small_bag_data.loc[:, [
    'small_id', 'parcel_id', 'src_type', 'plate_num', 'uld_num', 'arrive_time', 'parcel_type', 'dest_code', 'dest_apt', 'dest_type']]
df_to_excel(df=a_par_small_bag_data, file_name='i_od_parcel_air')
a_par_small_bag_data.sample(5)


# a_par_small_bag_data.to_sql('i_od_parcel', MySQLConfig.engine, index=False, index_label='small_id', if_exists='append')


<2017-11-09 18:43:37,925><tools>: data to excel success!
Out[55]:
small_id parcel_id src_type plate_num uld_num arrive_time parcel_type dest_code dest_apt dest_type
438401 10438402 ULD40685541s3 INF INF1182 ULD40685541 2025-02-08 01:24:26 small 512 WUX D
299878 10299879 10299879 INF INF1182 ULD40685537 2025-02-08 01:25:10 parcel 7110 BKK INF
63855 10063856 10063856 D CSS99 ULD40598481 2025-02-08 01:04:52 parcel 7110 ICN INF
217714 10217715 10217715 I CSS43 ULD40358823 2025-02-08 00:47:22 parcel 10 PEK D
72353 10072354 10072354 D CSS7 ULD40643977 2025-02-08 01:14:06 parcel DXB DXB I

Land 小件包号分配


In [53]:
DATE_END = '2025-02-08 '
DATE_BEGIN = '2025-02-07 '
L_SMALL_ID = 2000_0001


def counter_rand(map_num, cho_data, cho_counter, end_cho):
    """

    :param map_num:
    :param cho_data:
    :param cho_counter:
    :param end_cho:
    :return:
    """
    choise_data = cho_data
    for _ in map_num:

        try:
            cho_num = np.random.choice(choise_data)
            cho_counter[cho_num] += 1
            yield cho_num
            if cho_counter[cho_num] >= end_cho:
                choise_data.remove(cho_num)
        except ValueError:
            raise ValueError('计数器的截止数太小!')
            


l_par_small_bag_df = l_opt_pca_data.merge(l_small_bag_map, how='left', on=['Flight_ID', 'parcel_type'])
l_par_small_bag_df.small_num = l_par_small_bag_df.small_num.fillna(value=0)

l_par_small_bag_df['sb_id'] = l_par_small_bag_df.small_num.map(lambda x : np.random.randint(1, int(x)+1) if x >0 else x)
l_par_small_bag_df.sb_id = l_par_small_bag_df.sb_id.apply(int)

l_par_small_bag_df['small_id'] = pd.Series(data=np.arange(L_SMALL_ID, L_SMALL_ID+l_par_small_bag_df.index.size))

l_par_small_bag_df['parcel_id'] = l_par_small_bag_df.sb_id.mask(
    l_par_small_bag_df.sb_id > 0, 
    l_par_small_bag_df.Flight_ID.apply(str)+l_par_small_bag_df.parcel_type.str.get(0)+l_par_small_bag_df.sb_id.apply(str))

l_par_small_bag_df['parcel_id'] = l_par_small_bag_df.parcel_id.mask(l_par_small_bag_df.parcel_id == 0, l_par_small_bag_df.small_id)

l_par_small_bag_df = l_par_small_bag_df.rename_axis(mapper={
    'model': 'src_type', 'Flight_ID': 'plate_num', 'landing_time': 'arrive_time', 'dest_city': 'dest_code', 'dest_model': 'dest_type'}, axis=1)
l_par_small_bag_data = l_par_small_bag_df.loc[:, [
    'small_id', 'parcel_id', 'src_type', 'plate_num', 'uld_num', 'arrive_time', 'parcel_type', 'dest_code', 'dest_apt', 'dest_type']]
l_par_small_bag_data.arrive_time = l_par_small_bag_data.arrive_time.map(
    lambda x: DATE_END+x
    if pd.datetime.strptime(x, '%H:%M:%S').hour == 0 
    else DATE_BEGIN+x)
l_par_small_bag_data.dest_type = l_par_small_bag_data.dest_type.str.replace('\.[0-9]$', '')
l_par_small_bag_data.sample(10)


Out[53]:
small_id parcel_id src_type plate_num uld_num arrive_time parcel_type dest_code dest_apt dest_type
419335 20419336 CSSLocalAir12s19 R CSSLocalAir12 NaN 2025-02-07 23:30:00 small 736 736W R
446054 20446055 CSSIPE08s4 R CSSIPE08 NaN 2025-02-07 23:45:00 small 7112 7112 R
428044 20428045 CSSLocalAir16s111 R CSSLocalAir16 NaN 2025-02-07 23:30:00 small 316 531W R
15571 20015572 20015572 R CSSLocalE32 NaN 2025-02-08 00:00:00 parcel 577 WNZ D
115104 20115105 CSSIPE06s42 R CSSIPE06 NaN 2025-02-08 00:00:00 small 355 TYN D
333475 20333476 CSSIPAir09s141 R CSSIPAir09 NaN 2025-02-07 22:30:00 small 571 551W R
224089 20224090 CSSLocalE31s269 R CSSLocalE31 NaN 2025-02-08 00:10:00 small 471 710W R
234933 20234934 CSSLocalE37s87 R CSSLocalE37 NaN 2025-02-08 00:10:00 small 735 020W R
48797 20048798 20048798 R CSSLongEInf07 NaN 2025-02-08 00:15:00 parcel 412 552W R
229322 20229323 CSSLocalE31s284 R CSSLocalE31 NaN 2025-02-08 00:10:00 small 912 710W R

In [ ]: