In [1]:
import pandas as pd
from collections.abc import Iterator, Iterable, Generator, Hashable
from collections import defaultdict, namedtuple
import re
import os
import datetime
import bson
from sqlalchemy.types import Integer, Text, DateTime, Float, VARCHAR
from sqlalchemy import Table, MetaData, Column
import itertools as it
import pickle
from tools import deco_logging
import numpy as np
# add tools funcs
from tools.func_abc import *
MHS_SHEET_NAME = ['Parcel pcs', 'Small bag pcs', 'Irregular pcs', 'NC pcs', 'Mail pcs', 'sorting by pcs']
SHEET_DF = {}
DEFAULT_DATA_PATH = ('data\\')
EXCEL_DIR = 'data//2025OD_matrix_Pre-Sort.xlsx'
SHEEF_FOOT_SKIP_NUM = 57
log_ipy = deco_logging(user='jupyter')
In [2]:
excel_dir = 'data//2025OD_matrix_Pre-Sort.xlsx'
pcs_sheet_name = 'sum_pcs'
pcs_left_parse_cols = 'A:Q'
pcs_right_parse_cols = 'D,R:ZD'
other_left_parse_cols = 'B:F'
other_right_parse_cols = 'C,G:YS'
sorted_left_parse_cols = 'B:H'
sorted_right_parse_cols = 'C,I:YU'
cross_d_left_parse_cols = 'B:G'
cross_d_right_parse_cols = 'C,H:AR'
In [3]:
def get_df_excel_sheet(excel_dir: str=None, sheet_lst: list=None, skip_foot: int=0):
df_dic = dict()
if excel_dir is None:
raise ValueError('Should Set Excel Path!')
if sheet_lst is None and not isinstance(sheet_lst, list):
raise ValueError('Should Set Excel Sheet List, Value is List Type!')
for st in sheet_lst:
yield pd.read_excel(io=excel_dir, sheetname=st, skip_footer =skip_foot)
return df_dic
In [4]:
base_col =['type', 'apt', 'model', 'Flight ID', 'Flight type', 'by pcs of ULD', 'MHS ULD', 'landing_time']
round_col = ['by pcs of ULD', 'pcs_sum']
dest_col = ['dest_city', 'dest_opt', 'dest_model']
new_col = ['apt', 'model', 'Flight ID', 'by pcs of ULD', 'MHS ULD', 'landing_time', 'dest_city', 'dest_opt', 'dest_model', 'pcs_sum']
#air_df_parcel = sorted_df.loc[sorted_df.model.isin(['D', 'I', 'INF']),:]
# melt_df = df_melt_data_format(
# df=air_df_parcel, base_col= base_col, melt_name='dest', value_name='pcs_sum', round_col=round_col)
# melt_df['dest_city'], melt_df['dest_opt'], melt_df['dest_model'] = melt_df.dest.str.split('_').str
# melt_df = melt_df.drop(labels=['dest'], axis=1)
# melt_df = melt_df.loc[:,new_col]
# melt_df.head(10)
In [14]:
# ==============================================parcel data===========================
df_parcel_left, df_parcel_right = read_excel_split(
excel_dir=excel_dir, sheet_name='Parcel pcs', left_parse_col=other_left_parse_cols,
right_parse_col=other_right_parse_cols, skip_footer=57)
In [7]:
# ================================================= small bag data===================
df_small_left, df_small_right = read_excel_split(
excel_dir=excel_dir, sheet_name='Small bag pcs', left_parse_col=other_left_parse_cols,
right_parse_col=other_right_parse_cols, skip_footer=57, fill_nan=0)
In [8]:
# ================================================= Irregular Parcel Data ===========
df_irregular_left, df_irregular_right = read_excel_split(
excel_dir=excel_dir, sheet_name='Irregular pcs', left_parse_col=other_left_parse_cols,
right_parse_col=other_right_parse_cols, skip_footer=57, fill_nan=0)
In [9]:
# ================================================= NC Parcel Data =================
df_nc_left, df_nc_right = read_excel_split(
excel_dir=excel_dir, sheet_name='NC pcs', left_parse_col=other_left_parse_cols,
right_parse_col=other_right_parse_cols, skip_footer=57, fill_nan=0)
In [10]:
# ================================================= Mail Parcel Data ==============
df_ims_left, df_ims_right = read_excel_split(
excel_dir=excel_dir, sheet_name='Mail pcs', left_parse_col=other_left_parse_cols,
right_parse_col=other_right_parse_cols, skip_footer=57, fill_nan=0)
In [11]:
# ================================================= Sorted Parcel Data ===========
df_sorted_left, df_sorted_right = read_excel_split(
excel_dir=excel_dir, sheet_name='sorting by pcs', left_parse_col=sorted_left_parse_cols,
right_parse_col=sorted_right_parse_cols, skip_footer=57, fill_nan=0)
In [12]:
air_model_list = ['I', 'D', 'INF']
select_list = ['MHS', 'type', 'apt', 'model', 'Flight ID', 'Crossdock ULD', 'Dom-ULD', 'Inter-ULD', 'Loaded ULD',
'sum pcs', 'landing_time']
sort_parcel_selctc_left = ['apt', 'model', 'Flight ID', 'landing_time']
sort_parcel_sum_selctc_left = ['apt', 'model', 'Flight ID', 'by pcs of ULD', 'MHS ULD', 'landing_time']
cross_parcel_sum_selctc_left = ['apt', 'model', 'Flight ID', 'by pcs of ULD', 'landing_time']
left_round_list = [[['Crossdock_ULD', 'Dom_ULD', 'Inter_ULD','Loaded_ULD'], 0], [['sum_pcs'], 2]]
sort_parcel_sum_left_round_list = [[['by_pcs_of_ULD'], 2]]
index_slip_name = ['dest_city', 'dest_apt', 'dest_model']
right_round_list = [[None,2]]
in_tabel_merge_on = ['index_num']
merg_data_on = [
'apt', 'model', 'Flight_ID', 'landing_time','dest_city','dest_apt', 'dest_model'
]
a_pcs = TableT(air_model_list)
In [651]:
# # ====================================get left table =======================================
# a_df_pcs_left = a_pcs.get_left_table(df=df_parcel_left, select_list=select_list, left_round_list=left_round_list)
# a_df_pcs_left.tail(7)
# # ====================================get right table ======================================
# a_df_pcs_right = a_pcs.get_right_table(df=df_pcs_right, drop_col='model',
# round_list=right_round_list, index_slip_name=index_slip_name, column_index_data_name='pcs_sum')
# a_df_pcs_right.tail(2)
# # ====================================merge data ===========================================
# a_df_pcs_data = a_pcs.get_merge_table(
# df_left=a_df_pcs_left, df_right=a_df_pcs_right, merge_on=in_tabel_merge_on , drop_cloum=in_tabel_merge_on)
# log_ipy(f'PCS Size: {a_df_pcs_data.index.size}')
# a_df_pcs_data.tail(2)
In [15]:
# ====================================get left table =======================================
a_df_parcel_left = a_pcs.get_left_table(df=df_parcel_left, select_list=sort_parcel_selctc_left)
a_df_parcel_left.tail(7)
# ====================================get right table ======================================
a_df_parcel_right = a_pcs.get_right_table(df=df_parcel_right, drop_col='model',
round_list=right_round_list, index_slip_name=index_slip_name, column_index_data_name='parcel_sum')
a_df_parcel_right.query("dest_model=='INF.1'").tail(2)
# ====================================merge data ===========================================
a_df_parcel_data = a_pcs.get_merge_table(
df_left=a_df_parcel_left, df_right=a_df_parcel_right, merge_on=in_tabel_merge_on , drop_cloum=in_tabel_merge_on)
log_ipy(f'Parcel Size: {a_df_parcel_data.index.size}')
a_df_parcel_data.query("dest_model=='INF.1'").tail(2)
a_df_parcel_data.head(2)
Out[15]:
In [16]:
# ====================================get left table =======================================
a_df_sb_left = a_pcs.get_left_table(df=df_small_left, select_list=sort_parcel_selctc_left)
log(f'Small Bag Left Size: {a_df_sb_left.index.size}')
# a_df_sb_left.tail(7)
# ====================================get right table ======================================
a_df_sb_right = a_pcs.get_right_table(df=df_small_right, drop_col='model',
round_list=right_round_list, index_slip_name=index_slip_name, column_index_data_name='small_sum')
log(f'Small Bag Right Size: {a_df_sb_right.index.size}')
# a_df_parcel_right.head(2)
# ====================================merge data ===========================================
a_df_sb_data = a_pcs.get_merge_table(
df_left=a_df_sb_left, df_right=a_df_sb_right, merge_on=in_tabel_merge_on , drop_cloum=in_tabel_merge_on)
log(f'Small Bag Data Size: {a_df_sb_data.index.size}')
a_df_sb_data.head(2)
Out[16]:
In [17]:
# ====================================get left table =======================================
a_df_irregular_left = a_pcs.get_left_table(df=df_irregular_left, select_list=sort_parcel_selctc_left)
log(f'Irregular Parcel Left Size: {a_df_irregular_left.index.size}')
# a_df_sb_left.tail(7)
# ====================================get right table ======================================
a_df_irregula_right = a_pcs.get_right_table(df=df_irregular_right, drop_col='model',
round_list=right_round_list, index_slip_name=index_slip_name, column_index_data_name='irregular_sum')
log(f'Irregular Parcel Right Size: {a_df_irregula_right.index.size}')
# a_df_parcel_right.head(2)
# ====================================merge data ===========================================
a_df_irregular_data = a_pcs.get_merge_table(
df_left=a_df_irregular_left, df_right=a_df_irregula_right, merge_on=in_tabel_merge_on , drop_cloum=in_tabel_merge_on)
log(f'Irregular Parcel Data Size: {a_df_irregular_data.index.size}')
a_df_irregular_data.head(2)
Out[17]:
In [18]:
# ====================================get left table =======================================
a_df_nc_left = a_pcs.get_left_table(df=df_nc_left, select_list=sort_parcel_selctc_left)
log(f'NC Parcel Left Size: {a_df_nc_left.index.size}')
# a_df_sb_left.tail(7)
# ====================================get right table ======================================
a_df_nc_right = a_pcs.get_right_table(df=df_nc_right, drop_col='model',
round_list=right_round_list, index_slip_name=index_slip_name, column_index_data_name='nc_sum')
log(f'NC Parcel Right Size: {a_df_nc_right.index.size}')
# a_df_parcel_right.head(2)
# ====================================merge data ===========================================
a_df_nc_data = a_pcs.get_merge_table(
df_left=a_df_nc_left, df_right=a_df_nc_right, merge_on=in_tabel_merge_on , drop_cloum=in_tabel_merge_on)
log(f'NC Parcel Data Size: {a_df_nc_data.index.size}')
a_df_nc_data.head(2)
Out[18]:
In [19]:
# ====================================get left table =======================================
a_df_isb_left = a_pcs.get_left_table(df=df_ims_left, select_list=sort_parcel_selctc_left)
log(f'ISB Left Size: {a_df_isb_left.index.size}')
# a_df_sb_left.tail(7)
# ====================================get right table ======================================
a_df_isb_right = a_pcs.get_right_table(df=df_ims_right, drop_col='model',
round_list=right_round_list, index_slip_name=index_slip_name, column_index_data_name='isb_sum')
log(f'ISB Right Size: {a_df_isb_right.index.size}')
# a_df_parcel_right.head(2)
# ====================================merge data ===========================================
a_df_isb_data = a_pcs.get_merge_table(
df_left=a_df_isb_left, df_right=a_df_isb_right, merge_on=in_tabel_merge_on , drop_cloum=in_tabel_merge_on)
log(f'ISB Data Size: {a_df_isb_data.index.size}')
a_df_isb_data.head(2)
Out[19]:
In [20]:
# ====================================get left table =======================================
a_df_sorted_left = a_pcs.get_left_table(df=df_sorted_left,
select_list=sort_parcel_sum_selctc_left,
left_round_list=sort_parcel_sum_left_round_list)
log(f'Sorted Left Size: {a_df_sorted_left.index.size}')
# a_df_sorted_left.tail(7)
# ====================================get right table ======================================
a_df_sorted_right = a_pcs.get_right_table(df=df_sorted_right, drop_col='model',
round_list=right_round_list, index_slip_name=index_slip_name, column_index_data_name='sorted_sum')
log(f'Sorted Right Size: {a_df_sorted_right.index.size}')
# a_df_sorted_right.head(2)
# ====================================merge data ===========================================
a_df_sorte_data = a_pcs.get_merge_table(
df_left=a_df_sorted_left, df_right=a_df_sorted_right, merge_on=in_tabel_merge_on , drop_cloum=in_tabel_merge_on)
a_df_sorte_data = a_df_sorte_data.rename_axis(
{"by_pcs_of_ULD": "sorted_by_pcs_of_ULD", "MHS_ULD": "sorted_MHS_ULD"}, axis="columns")
log(f'Sorted Data Size: {a_df_sorte_data.index.size}')
a_df_sorte_data.tail(2)
# a_df_sorte_data.loc[a_df_sorte_data.Flight_ID.isin(['CSS68']), ['sorted_sum']].agg(['sum'])
Out[20]:
In [21]:
# parcel data #--------------------------------------------------------------------------------
# df_right_sample = a_df_parcel_data.head(test_num)
# merge data #---------------------------------------------------------------------------------
a_df_data = merg_data(a_df_sorte_data,
[a_df_parcel_data, a_df_sb_data, a_df_irregular_data, a_df_nc_data, a_df_isb_data
],
'left', merg_data_on)
# excel out #----------------------------------------------------------------------------------
a_df_data_no_zero = a_df_data.query('sorted_sum>0')
# df_to_excel(file_name='a_pcs', sheet='a_pcs', df=a_df_data_no_zero)
log(f'Air Data Size: {a_df_data.index.size}')
log(f'No Zero Air Data Size: {a_df_data_no_zero.index.size}')
# log(a_df_data.loc[a_df_data.Flight_ID.isin(['CSS68']),['sorted_sum']].agg(['sum']))
a_df_data_no_zero.sample(7)
Out[21]:
In [22]:
land_model_list = ['R']
land_sort_parcel_left = ['apt', 'model', 'Flight ID', 'landing_time']
index_slip_name = ['dest_city', 'dest_apt', 'dest_model']
right_round_list = [[None,2]]
in_tabel_merge_on = ['index_num']
merg_data_on = [
'apt', 'model', 'Flight_ID', 'landing_time','dest_city','dest_apt', 'dest_model'
]
l_pcs = TableT(land_model_list)
# =================== sorted data ============================
def merg_all_data(cls_func,df_l, df_r, value_name, log_name):
# left
df_left = cls_func.get_left_table(df=df_l, select_list=land_sort_parcel_left)
# log(f'{log_name} Left Size: {df_left.index.size}')
# right
df_right = cls_func.get_right_table(df=df_r, drop_col='model',
round_list=right_round_list, index_slip_name=index_slip_name, column_index_data_name=value_name)
# log(f'{log_name} Right Size: {df_right.index.size}')
# merge
df_merge = cls_func.get_merge_table(
df_left=df_left, df_right=df_right, merge_on=in_tabel_merge_on , drop_cloum=in_tabel_merge_on)
log(f'{log_name} Merge Data Size: {df_merge.index.size}')
return df_merge
In [23]:
# ========================================sorted data=================================
l_df_sorte_data = merg_all_data(l_pcs, df_sorted_left, df_sorted_right, 'sorted_sum', 'sorted')
l_df_sorte_data.tail(10),
# =======================================parcel bag data===============================
l_df_parcel_data = merg_all_data(l_pcs, df_parcel_left, df_parcel_right, 'parcel_sum', 'Parcel')
l_df_parcel_data.tail(10)
# =======================================small bag data================================
l_df_small_data = merg_all_data(l_pcs, df_small_left, df_small_right, 'small_sum', 'small')
l_df_small_data.tail(10)
# =======================================Irregular Parcel Data ==========================
l_df_irregular_data = merg_all_data(l_pcs, df_irregular_left, df_irregular_right, 'irregular_sum', 'irregular')
# l_df_irregular_data.tail(10)
# ================================================= NC Parcel Data ==========================
l_df_nc_data = merg_all_data(l_pcs, df_nc_left, df_nc_right, 'nc_sum', 'NC')
# l_df_nc_data.tail(10)
# ================================================= Mail Parcel Data ==========================
l_df_ims_data = merg_all_data(l_pcs, df_ims_left, df_ims_right, 'isb_sum', 'IMS')
l_df_ims_data.tail(2)
Out[23]:
In [24]:
# ================================================= merge all data =========================
l_df_data = merg_data(l_df_sorte_data,
[l_df_parcel_data,
l_df_small_data, l_df_irregular_data, l_df_nc_data, l_df_ims_data
],
'left', merg_data_on)
# excel out #----------------------------------------------------------------------------------
l_df_data_no_zero = l_df_data.query('sorted_sum>0')
# df_to_excel(file_name='l_pcs', sheet='l_pcs', df=l_df_data_no_zero)
log(f'Land Data Size: {l_df_data.index.size}')
log(f'No Zero Land Data Size: {l_df_data_no_zero.index.size}')
# log(a_df_data.loc[a_df_data.Flight_ID.isin(['CSS68']),['sorted_sum']].agg(['sum']))
l_df_data_no_zero.query("isb_sum>0").head(7)
Out[24]:
In [25]:
parcel_data = ['parcel_sum', 'small_sum', 'irregular_sum', 'nc_sum', 'isb_sum']
head_data = [
'apt', 'model', 'Flight_ID', 'landing_time','dest_city','dest_apt', 'dest_model',
'parcel_sum', 'small_sum', 'irregular_sum', 'nc_sum', 'isb_sum'
]
all_data = ['apt', 'model', 'Flight_ID', 'sorted_by_pcs_of_ULD', 'sorted_MHS_ULD',
'landing_time', 'dest_city', 'dest_apt', 'dest_model',
'sorted_sum', 'parcel_sum', 'small_sum', 'irregular_sum', 'nc_sum', 'isb_sum']
# 精度控制, 值小于1的精度取法
def right_back(dec, right_p):
if dec>right_p:
dec = 1
else:
dec = 0
return dec
# 精度控制, 值大于1的精度取法
def left_back(dec, left_p):
int_dec = int(dec)
point_dec = round((dec - int_dec), 2)
if point_dec>=left_p:
re_dec = int_dec+1
else:
re_dec = int_dec
return re_dec
def creat_od_table():
db_eng = MetaData(bind=MySQLConfig.engine)
machine_table_sche = \
Table(
"i_od_parcel",
db_eng,
Column("small_id", VARCHAR(length=10, )),
Column("parcel_id", VARCHAR(length=20, )),
Column("src_type", VARCHAR(length=5, )),
Column("plate_num", VARCHAR(length=20, )),
Column("uld_num", VARCHAR(length=20, )),
Column("parcel_type", VARCHAR(length=10, )),
Column("arrive_time", DateTime()),
Column("dest_code", VARCHAR(length=10, )),
Column("dest_apt", VARCHAR(length=10, )),
Column("dest_type", VARCHAR(length=5, )),
)
machine_table_sche.create(checkfirst=True)
In [26]:
file_name = "a_pcs"
file_dir = os.path.join(DEFAULT_DATA_PATH, file_name+'.xlsx')
a_pcs_data2 = pd.read_excel(io=file_dir, sheetname='a_pcs')
a_pcs_data3 = pd.read_excel(io=file_dir, sheetname='a_pcs')
In [27]:
f0 = a_pcs_data2.loc[:, parcel_data]
f0['sorted_sum'] = f0.loc[:,parcel_data].sum(axis=1)
f0_t = f0.sum().to_frame().T
f0_t
Out[27]:
In [28]:
l_f0 = l_df_data_no_zero.loc[:, parcel_data]
l_f0['sorted_sum'] = l_f0.loc[:,parcel_data].sum(axis=1)
l_f0_t = l_f0.sum().to_frame().T
l_f0_t
Out[28]:
In [29]:
f1 = a_pcs_data2.loc[:, parcel_data]
# small sum * 20
f1.loc[:, ['small_sum', 'isb_sum']] = f1.loc[:, ['small_sum', 'isb_sum']].applymap(lambda x : x*20)
f1['sorted_sum'] = f1.loc[:,parcel_data].sum(axis=1)
f1_t = f1.sum().to_frame().T
In [30]:
# =============================== 不同列的精度取值
parcel_round = {'parcel_sum': {'L':0.97, 'R': 0.01}, 'small_sum': {'L': 0.6, 'R': 0.01},
'irregular_sum': {'L': 0.98, 'R': 0.2}, 'nc_sum': {'L': 0.98, 'R': 0.2},
'isb_sum': {'L': 0.5, 'R': 0.01}}
# =============================== 获取需要进行精度控制数据 并拆小件包
f3 = a_pcs_data2.loc[:, parcel_data]
f3.loc[:, ['small_sum', 'isb_sum']] = f3.loc[:, ['small_sum', 'isb_sum']].applymap(lambda x : x*20)
# ============================== 进行精度计算
for col in parcel_data:
f3.loc[:, col] = f3.loc[:, col].apply(
lambda x : left_back(x, parcel_round[col]['L']) if x >=1 else right_back(x, parcel_round[col]['R']))
f3['sorted_sum'] = f3.loc[:,parcel_data].sum(axis=1)
# f3.tail(10)
# ========================= data analysis==================
f3_t = f3.sum().to_frame().T;f3_t
# ============================== 合并取精度前后的数据进行对比 =======================
ft_parcel = pd.concat([f1_t, f3_t])
ft_parcel = ft_parcel.reset_index(drop=True).rename({0:'befor', 1:'after_round'})
ft_parcel
Out[30]:
In [31]:
l_f1 = l_df_data_no_zero.loc[:, parcel_data]
# small sum * 20
l_f1.loc[:, ['small_sum', 'isb_sum']] = l_f1.loc[:, ['small_sum', 'isb_sum']].applymap(lambda x : x*20)
l_f1['sorted_sum'] = l_f1.loc[:,parcel_data].sum(axis=1)
l_f1_t = l_f1.sum().to_frame().T
# =============================== 不同列的精度取值
l_parcel_round = {'parcel_sum': {'L':0.99, 'R': 0.05}, 'small_sum': {'L': 0.55, 'R': 0.3},
'irregular_sum': {'L': 0.99, 'R': 0.27}, 'nc_sum': {'L': 0.6, 'R': 0.08},
'isb_sum': {'L': 0.5, 'R': 0.4}}
# =============================== 获取需要进行精度控制数据 并拆小件包
l_f3 = l_df_data_no_zero.loc[:, parcel_data]
l_f3.loc[:, ['small_sum', 'isb_sum']] = l_f3.loc[:, ['small_sum', 'isb_sum']].applymap(lambda x : x*20)
# ============================== 进行精度计算
for col in parcel_data:
l_f3.loc[:, col] = l_f3.loc[:, col].apply(
lambda x : left_back(x, l_parcel_round[col]['L']) if x >=1 else right_back(x, l_parcel_round[col]['R']))
l_f3['sorted_sum'] = l_f3.loc[:,parcel_data].sum(axis=1)
l_f3.tail(10)
# ========================= data analysis==================
l_f3_t = l_f3.sum().to_frame().T;l_f3_t
# ============================== 合并取精度前后的数据进行对比 =======================
l_ft_parcel = pd.concat([l_f1_t, l_f3_t])
l_ft_parcel = l_ft_parcel.reset_index(drop=True).rename({0:'befor', 1:'after_round'})
l_ft_parcel
Out[31]:
In [32]:
opt_df = a_pcs_data3.copy()
# small sum * 20
opt_df.loc[:, ['small_sum', 'isb_sum']] = opt_df.loc[:, ['small_sum', 'isb_sum']].applymap(lambda x : x*20)
# # ===========================round for each columns==========
for col in parcel_data:
opt_df.loc[:, col] = opt_df.loc[:, col].apply(
lambda x : left_back(x, parcel_round[col]['L']) if x >=1 else right_back(x, parcel_round[col]['R']))
opt_df['sorted_sum'] = opt_df.loc[:,parcel_data].sum(axis=1)
opt_df.head(2)
Out[32]:
In [33]:
l_opt_df = l_df_data_no_zero.copy()
# small sum * 20
l_opt_df.loc[:, ['small_sum', 'isb_sum']] = l_opt_df.loc[:, ['small_sum', 'isb_sum']].applymap(lambda x : x*20)
# # ===========================round for each columns==========
for col in parcel_data:
l_opt_df.loc[:, col] = l_opt_df.loc[:, col].apply(
lambda x : left_back(x, l_parcel_round[col]['L']) if x >=1 else right_back(x, l_parcel_round[col]['R']))
l_opt_df['sorted_sum'] = l_opt_df.loc[:,parcel_data].sum(axis=1)
l_opt_df.head(2)
Out[33]:
In [34]:
base_col = ['model', 'Flight_ID', 'sorted_by_pcs_of_ULD', 'sorted_MHS_ULD', 'landing_time', 'dest_city', 'dest_apt', 'dest_model']
melt_col = [
#'sorted_sum',
'parcel_sum', 'small_sum', 'irregular_sum', 'nc_sum', 'isb_sum'
]
opt_melt_df = df_melt_data_format(df=opt_df, base_col=base_col, melt_col=melt_col, melt_name='parcel_type', value_name='num')
opt_melt_df.parcel_type = opt_melt_df.parcel_type.str.replace('_sum', '')
opt_melt_df = opt_melt_df.query('num>0')
# T data
tt = opt_melt_df.T
# 按照件量扩展整体数据
def map_col(series, key):
yield from [series]*series[key]
# 生成重复性的列,根据parcel数量
temp = []
for iter_col in tt.items():
for col in map_col(iter_col[1], 'num'):
temp.append(col)
opt_pca_data_a = pd.DataFrame(data=temp)
opt_pca_data_a = opt_pca_data_a.reset_index(drop=True)
opt_pca_data_a.head(2)
# 添加objectid作为parcel id
# opt_pca_data_a['id_parcel'] = pd.Series(data=[str(bson.objectid.ObjectId()) for _ in range(opt_pca_data_a.index.size)])
Out[34]:
In [35]:
opt_pca_data_a.index.size
Out[35]:
In [36]:
base_col = ['model', 'Flight_ID', 'landing_time', 'dest_city', 'dest_apt', 'dest_model']
melt_col = [
#'sorted_sum',
'parcel_sum', 'small_sum', 'irregular_sum', 'nc_sum', 'isb_sum'
]
l_opt_melt_df = df_melt_data_format(df=l_opt_df, base_col=base_col, melt_col=melt_col, melt_name='parcel_type', value_name='num')
l_opt_melt_df.parcel_type = l_opt_melt_df.parcel_type.str.replace('_sum', '')
l_opt_melt_df = l_opt_melt_df.query('num>0')
# T data
l_tt = l_opt_melt_df.T
# 按照件量扩展整体数据
def map_col(series, key):
yield from [series]*series[key]
# 生成重复性的列,根据parcel数量
l_temp = []
for iter_col in l_tt.items():
for col in map_col(iter_col[1], 'num'):
l_temp.append(col)
l_opt_pca_data = pd.DataFrame(data=l_temp)
l_opt_pca_data = l_opt_pca_data.reset_index(drop=True)
# 添加objectid作为parcel id
# opt_pca_data_a['id_parcel'] = pd.Series(data=[str(bson.objectid.ObjectId()) for _ in range(opt_pca_data_a.index.size)])
In [37]:
l_opt_pca_data.query("Flight_ID=='CSSLocalE18' & parcel_type=='small'").sample(10) #.num.count()
Out[37]:
In [38]:
# INF174 uld数量为零, 实际配置为2
opt_pca_data_a.loc[opt_pca_data_a.Flight_ID=='INF174', ['sorted_MHS_ULD']] = 2
opt_pca_data_a['id_uld'] = opt_pca_data_a.sorted_MHS_ULD.map(lambda x: np.random.randint(1, int(x)+1))
# opt_pca_data_a.query("Flight_ID=='CSS1' & parcel_type=='small'").groupby(['id_uld']).Flight_ID.count()
opt_pca_data_a.head(7)
Out[38]:
In [39]:
arc_df = pd.read_pickle(path='data\\arc.xz', compression='xz')
In [40]:
arc_col_select = ['ID', 'Origin', 'Destination', 'TypeIn', 'TypeOut', 'FltIn', 'FltOut',
'ULDIn', 'ULDOut', 'AMIT']
# ================================================取出空侧数据,统计每个航班ULD数量
selec_arc_df = arc_df.query("TypeIn == 'A'").loc[:, arc_col_select]
flyid_uld_df = selec_arc_df.groupby(by=['TypeIn', 'FltIn']).ULDIn.apply(set).apply(len).to_frame()
flyid_uld_df = flyid_uld_df.reset_index()
flyid_uld_df = flyid_uld_df.rename_axis({'ULDIn': 'ULD_CON'}, axis=1)
flyid_uld_df
# ================================================ 统计每个航班每个ULD的parcel 的件数
uld_amit_par_df = selec_arc_df.groupby(by=['TypeIn', 'FltIn', 'ULDIn', 'AMIT']).ID.apply(set).apply(len).to_frame()
uld_amit_par_df = uld_amit_par_df.reset_index()
uld_amit_par_df = uld_amit_par_df.rename_axis({'ID': 'PAR_NUM'}, axis=1)
# =============================================== 合并表格
uld_amit_par_df = pd.merge(left=uld_amit_par_df, right=flyid_uld_df, on=['TypeIn', 'FltIn'], how='left')
# 规整数据
uld_amit_par_df = uld_amit_par_df.loc[:,['FltIn', 'ULDIn', 'AMIT', 'PAR_NUM', 'ULD_CON']]
uld_amit_par_df.head(2)
# df_to_excel(df=uld_amit_par_df, file_name='uld_con', sheet='uld_con')
Out[40]:
In [41]:
arc_col_select = ['ID', 'FltIn', 'ALDT']
# ================================================取出空侧数据,统计每个航班ULD数量
l_selec_arc_df = arc_df.query("TypeIn == 'L'").loc[:, arc_col_select]
l_arc_landtime_df = l_selec_arc_df.groupby(by=[ 'FltIn', 'ALDT']).ID.apply(set).apply(len).to_frame()
l_arc_landtime_df.tail(2)
Out[41]:
In [42]:
uld_id_df = flyid_uld_df.T
def map_col(uld_id_df):
for col in uld_id_df.iteritems():
for i in range(1, col[1]['ULD_CON']+1):
col[1]['id_uld'] = i
yield col[1].copy()
col_df = pd.DataFrame(data=list(map_col(uld_id_df))).reset_index(drop=True)
uld_amit_par_df['id_uld'] = col_df.id_uld
uld_match_table = uld_amit_par_df.rename_axis(mapper={'FltIn': 'Flight_ID', 'ULDIn': 'ULD_ID', 'AMIT':'arrive_time'}, axis=1)
uld_match_table.head(2)
# df_to_excel(df=uld_amit_par_df, file_name='uld_amit_par_df', sheet='uld_amit_par_df')
Out[42]:
In [43]:
opt_pca_data_a.loc[opt_pca_data_a.Flight_ID.isin(['CSS100']) & opt_pca_data_a.parcel_type.isin(['isb']),:].head(2)
Out[43]:
In [44]:
merg_col = ['Flight_ID', 'id_uld']
a_par_data_df = opt_pca_data_a.merge(right=uld_match_table, on=merg_col, how='left')
a_par_output_data_df = a_par_data_df.loc[:, [
'model', 'Flight_ID', 'ULD_ID', 'arrive_time', 'parcel_type', 'dest_city', 'dest_apt', 'dest_model']
]
# s1 = pd.Series(data={'model.1': 'src_type', 'Flight_ID': 'plate_num.4', 'ULD_ID': 'uld_num.2'})
# s1.str.replace('\.[0-9]$', '')
a_par_output_data_df = a_par_output_data_df.rename_axis(mapper={
'model': 'src_type', 'Flight_ID': 'plate_num', 'ULD_ID': 'uld_num', 'dest_city': 'dest_code', 'dest_model': 'dest_type'}, axis=1)
# a_par_output_data_df.loc[a_par_output_data_df.ULD_ID.isnull(),:]
# df_to_pickle(df=a_par_output_data_df, file_name='parcel_data_erzhou_airside.pkl')
# df_to_excel(df=a_par_output_data_df, file_name='parcel_data_erzhou_airside', sheet='parcel_data_erzhou_airside')
a_par_output_data_df.dest_type = a_par_output_data_df.dest_type.str.replace('\.[0-9]$', '')
a_par_output_data_df.head(2)
Out[44]:
In [47]:
a_small_data_con = a_par_output_data_df.query("parcel_type == 'small' | parcel_type == 'isb'"
).groupby(['plate_num','uld_num', 'arrive_time', 'parcel_type']
).parcel_type.count().to_frame().copy()
a_small_data_con = a_small_data_con.rename_axis(mapper={'parcel_type': 'con'}, axis=1)
a_small_data_con = a_small_data_con.reset_index()
a_small_data_con['sb_sum'] = a_small_data_con.con.map(lambda x : round(x/20, 2))
a_small_data_con.query("plate_num=='CSS100' & uld_num == 'ULD40657472' & parcel_type == 'isb'")
Out[47]:
In [48]:
l_small_data_con = l_opt_pca_data.query("parcel_type == 'small' | parcel_type == 'isb'"
).groupby(['Flight_ID', 'landing_time', 'parcel_type']
).model.count().to_frame().copy() #.tail(5)
l_small_data_con = l_small_data_con.reset_index()
l_small_data_con = l_small_data_con.rename_axis(mapper={'model': 'par_num'}, axis=1)
l_small_data_con['small_num'] = l_small_data_con.par_num.map(lambda x : round(x/20, 2))
l_small_data_con.query("Flight_ID=='CSSLocalE18'")
Out[48]:
In [49]:
def _round_one(df, col_list, round_map):
for col in col_list:
df.loc[:, col] = df.loc[:, col].apply(
lambda x : left_back(x, round_map[col]['L']) if x >=1 else right_back(x, round_map[col]['R']))
return df
a_small_data_con_round = _round_one(a_small_data_con, ['sb_sum'], {'sb_sum': {'L': 0.00001, 'R': 0.000001}})
small_bag_map = a_small_data_con_round.loc[:, ['uld_num', 'parcel_type', 'sb_sum']]
small_bag_map.head(2)
Out[49]:
In [50]:
l_small_data_con_round = _round_one(l_small_data_con, ['small_num'], {'small_num': {'L': 0.00001, 'R': 0.000001}})
l_small_bag_map = l_small_data_con_round.loc[:, ['Flight_ID', 'parcel_type', 'small_num']]
l_small_bag_map.query("Flight_ID=='CSSLocalE18'").head(10)
Out[50]:
In [55]:
SMALL_ID = 1000_0001
a_par_small_bag_df = a_par_output_data_df.merge(small_bag_map, how='left', on=['uld_num', 'parcel_type'])
a_par_small_bag_df.sb_sum = a_par_small_bag_df.sb_sum.fillna(value=0)
a_par_small_bag_df['sb_id'] = a_par_small_bag_df.sb_sum.map(lambda x : np.random.randint(1, int(x)+1) if x >0 else x)
a_par_small_bag_df.sb_id = a_par_small_bag_df.sb_id.apply(int)
a_par_small_bag_df['parcel_id'] = a_par_small_bag_df.sb_id.mask(a_par_small_bag_df.sb_id > 0,
a_par_small_bag_df.uld_num.apply(str)+a_par_small_bag_df.parcel_type.str.get(0)+a_par_small_bag_df.sb_id.apply(str))
a_par_small_bag_data = a_par_small_bag_df.loc[:,[
'parcel_id', 'src_type', 'plate_num', 'uld_num', 'arrive_time', 'parcel_type', 'dest_code', 'dest_apt', 'dest_type']]
a_par_small_bag_data['small_id'] = pd.Series(data=np.arange(SMALL_ID, SMALL_ID+a_par_small_bag_data.index.size))
a_par_small_bag_data['parcel_id'] = a_par_small_bag_data.parcel_id.mask(a_par_small_bag_data.parcel_id == 0,
a_par_small_bag_data.small_id)
a_par_small_bag_data
a_par_small_bag_data = a_par_small_bag_data.loc[:, [
'small_id', 'parcel_id', 'src_type', 'plate_num', 'uld_num', 'arrive_time', 'parcel_type', 'dest_code', 'dest_apt', 'dest_type']]
df_to_excel(df=a_par_small_bag_data, file_name='i_od_parcel_air')
a_par_small_bag_data.sample(5)
# a_par_small_bag_data.to_sql('i_od_parcel', MySQLConfig.engine, index=False, index_label='small_id', if_exists='append')
Out[55]:
In [53]:
DATE_END = '2025-02-08 '
DATE_BEGIN = '2025-02-07 '
L_SMALL_ID = 2000_0001
def counter_rand(map_num, cho_data, cho_counter, end_cho):
"""
:param map_num:
:param cho_data:
:param cho_counter:
:param end_cho:
:return:
"""
choise_data = cho_data
for _ in map_num:
try:
cho_num = np.random.choice(choise_data)
cho_counter[cho_num] += 1
yield cho_num
if cho_counter[cho_num] >= end_cho:
choise_data.remove(cho_num)
except ValueError:
raise ValueError('计数器的截止数太小!')
l_par_small_bag_df = l_opt_pca_data.merge(l_small_bag_map, how='left', on=['Flight_ID', 'parcel_type'])
l_par_small_bag_df.small_num = l_par_small_bag_df.small_num.fillna(value=0)
l_par_small_bag_df['sb_id'] = l_par_small_bag_df.small_num.map(lambda x : np.random.randint(1, int(x)+1) if x >0 else x)
l_par_small_bag_df.sb_id = l_par_small_bag_df.sb_id.apply(int)
l_par_small_bag_df['small_id'] = pd.Series(data=np.arange(L_SMALL_ID, L_SMALL_ID+l_par_small_bag_df.index.size))
l_par_small_bag_df['parcel_id'] = l_par_small_bag_df.sb_id.mask(
l_par_small_bag_df.sb_id > 0,
l_par_small_bag_df.Flight_ID.apply(str)+l_par_small_bag_df.parcel_type.str.get(0)+l_par_small_bag_df.sb_id.apply(str))
l_par_small_bag_df['parcel_id'] = l_par_small_bag_df.parcel_id.mask(l_par_small_bag_df.parcel_id == 0, l_par_small_bag_df.small_id)
l_par_small_bag_df = l_par_small_bag_df.rename_axis(mapper={
'model': 'src_type', 'Flight_ID': 'plate_num', 'landing_time': 'arrive_time', 'dest_city': 'dest_code', 'dest_model': 'dest_type'}, axis=1)
l_par_small_bag_data = l_par_small_bag_df.loc[:, [
'small_id', 'parcel_id', 'src_type', 'plate_num', 'uld_num', 'arrive_time', 'parcel_type', 'dest_code', 'dest_apt', 'dest_type']]
l_par_small_bag_data.arrive_time = l_par_small_bag_data.arrive_time.map(
lambda x: DATE_END+x
if pd.datetime.strptime(x, '%H:%M:%S').hour == 0
else DATE_BEGIN+x)
l_par_small_bag_data.dest_type = l_par_small_bag_data.dest_type.str.replace('\.[0-9]$', '')
l_par_small_bag_data.sample(10)
Out[53]:
In [ ]: