param_rule 数据清洗


In [1]:
from dist_time import *
import pandas as pd
from collections.abc import Iterator, Iterable, Generator
from collections import defaultdict
import re
from sqlalchemy import create_engine
from sqlalchemy.types import Integer, Text, DateTime, Float, VARCHAR
from sqlalchemy import Table, MetaData, Column
import itertools as it

class MySQLConfig:
    HOST = '10.0.149.62'
    USER = "root"
    PASS = "root123"
    DB = "cm_erzhou_v1_1"
    CHARSET = 'utf8'

    engine = create_engine(
        f'mysql+pymysql://{USER}:{PASS}@{HOST}/{DB}?charset={CHARSET}',
        isolation_level="READ UNCOMMITTED")

CREATE_TABLE_DB_ENG = MetaData(bind=MySQLConfig.engine)    
    
def creat_table(db, table_name, col_list):
    machine_table_sche = \
    Table(
        table_name,
        db,
        *col_list
    )
    machine_table_sche.create(checkfirst=True)

def to_mysql(table_name: str, data: pd.DataFrame):
    
        try:
            data.to_sql(name=table_name,
                        con=MySQLConfig.engine,
                        if_exists='append',
                        index=0)
            print(f"mysql write table {table_name} succeed!")
            
        except Exception as exc:
            print(f"mysql write table {table_name} failed, error: {exc}.")
            raise Exception
    
    
def dict_to_dataframe(pd: pd, *, parameter_list: list=None):
   
    data = defaultdict(list)
    for paramer in parameter_list:
        data['process_time'].extend(list(paramer.values()))
        data ['parameter_name'].extend(list(paramer.keys()))
        
    return pd.DataFrame(data)


def df_to_excel(file_name, sheet, df):
    file = f"d:/Git/git.jupyter/jupyter_app/ipython/py36_erzhou_input/dist_time/out/{file_name}.xlsx"
    df.to_excel(file, sheet_name=sheet, index=False)
    
    
def check_dic_value(data: dict=None):
    type_dic_value = set()
    if data is None:
        return
    else:
        for _,v in data.items():
            if isinstance(v, list):
                type_dic_value.add('L')
            elif isinstance(v, str):
                type_dic_value.add('S')
            elif isinstance(v, int):
                type_dic_value.add('I')
            else:
                raise ValueError('字典值类型不是字符或列表形式!')
            if len(type_dic_value) !=1:
                raise ValueError('字典值类型既有字符也有列表!')
        return {type_dic_value.pop(): data}

    
def dict_list_to_dataframe(pd: pd.DataFrame, parameter_list: list=None, colum_name: list=None):
    if parameter_list:
        re_list = []
        for data in parameter_list:
#             print(data[1])
            ckeck_data = check_dic_value(data[0])
            for k, _ in ckeck_data.items():
                if k == 'L':
                    re_data = it.chain(
                        *[i for  i in map(
                            lambda x, y : [[x, v, data[1]] for v in y if  isinstance(y, list)], 
                            ckeck_data[k].keys(), ckeck_data[k].values())
                         ]
                    )
                    re_list.extend(re_data)
#                     return pd.DataFrame(list(re_data), columns=['equipment_port_last', 'equipment_port_next'])
                elif k == 'S':
                    re_data = [[k, v, data[1]] for k, v in ckeck_data[k].items()]
                    re_list.extend(re_data)
                elif k == 'I':
                    re_data = [[k, v, data[1]] for k, v in ckeck_data[k].items()]
                    re_list.extend(re_data)
                    
        return pd.DataFrame(re_list, columns=colum_name)
    else:
        return parameter_list

    
def double_list_to_list_tuple(dic_data: dict=None):
    list_tuple = []
    for k, v in dic_data.items():
        lst = list(v)
        if isinstance(lst[0], list):
            if isinstance(lst[1], list):
                
                list_tuple.extend([(k, x, y) for x, y in  it.zip_longest(lst[0], lst[1])])
            elif isinstance(lst[1], str):
                
                list_tuple.extend([(k, x, y) for x, y in  [(e, lst[1]) for e in lst[0]]])
        else:
            list_tuple.extend([(k, x, y) for x, y in  [(e, 'equal') for e in lst]])
#             raise ValueError(f'权值数据异常,请核实:{k} 的权值!')

    return list_tuple
        

def dict_double_list_to_dataframe(pd: pd.DataFrame, parameter_list: list=None, colum_name: list=None):
    if parameter_list:
        re_list = []
        for data in parameter_list:
            ckeck_data = check_dic_value(data[0])
            for k, _ in ckeck_data.items():
                if k == 'L':
                    weight_data = double_list_to_list_tuple(ckeck_data[k])
                    re_data = [(x, y, z, data[1]) for x, y , z in weight_data]
                    re_list.extend(re_data)
                elif k == 'S':
                    re_data = [[k, v, data[1]] for k, v in ckeck_data[k].items()]
                    re_list.extend(re_data)
                elif k == 'I':
                    re_data = [[k, v, data[1]] for k, v in ckeck_data[k].items()]
                    re_list.extend(re_data)
                    
        return pd.DataFrame(re_list, 
                            columns=colum_name
                           )
    else:
        return parameter_list

配置信息表


In [26]:
PARAM_DICT = {
    'ULD_Truck_Capacity':[
        (R_Reload_Truck_Capacity, 'truck_capacity'),  # 卡车容量配置表
        (uld_capacity, 'uld_capacity'),  # uld容量配置表
    ],
    'City_Airport_Code': [
        (city_apt, 'city_airport_code'),  # 城市编码-机场编码表
    ],
    'S_Road_City2Apt': [
        (S_Road_city2apt, 's_road_city2apt'),  #
    ],
    'Flight_ID_Unload_des': [
        (flight_unload, 'flight_unload'),  # 飞机卸货区字典
    ],
    'Secondary_Reload_Sort': [
        (R_Reload_R_Secondary, 'secondary_reload'),  # Secondary_Reload_Sort- R
        (D_Reload_D_Secondary, 'secondary_reload'),  # Secondary_Reload_Sort- D
        (I_Reload_I_Secondary, 'secondary_reload'),  # Secondary_Reload_Sort- I
        
    ],
    'S_Secondary_Reload_Sort': [
        (S_Secondary_apt, 'small_secondary_reload'),  # small_secondary_reload sort- S
    ],
    'Reload_Setting': [
        (R_Secondary_rule, 'reload'),  # reload- R
        (D_Secondary_rule, 'reload'),  # reload- D
        (I_Secondary_rule, 'reload'),  # reload- I
    ],
    'S_Reload_Setting': [
        (S_Secondary_rule, 'small_reload'),  # reload- S
    ],
    'apt_reload': [
        (apt_reload, 'reload_ot'),  # 目的地编码与装货口对应表
    ]
}

# -----------------------------------db eng for creatting tables--------------------
#  uld_truck_capacity_table
# uld_truck_capacity_table = [
#     Column("dest_zone_code", VARCHAR(length=32, )),
#     Column("equipment_capacity", VARCHAR(length=32, )),
#     Column("parameter_name", VARCHAR(length=32, ))
# ]
# city_code_table = [
    
#     Column("city_code", VARCHAR(length=32, )),
#     Column("airport_code", VARCHAR(length=32, )),
#     Column("parameter_name", VARCHAR(length=32, ))
# ]
# s_road_city2apt_table = [
#     Column("city_code", VARCHAR(length=32, )),
#     Column("dest_zone_code", VARCHAR(length=32, )),
#     Column("parameter_name", VARCHAR(length=32, ))
# ]
# flight_unload_setting_table = [
#     Column("flight_id", VARCHAR(length=32, )),
#     Column("unload_id", VARCHAR(length=32, )),
#     Column("parameter_name", VARCHAR(length=32, ))
# ]
# secondary_reload_sort_table = [
#     Column("dest_zone_code", VARCHAR(length=32, )),
#     Column("dest_type", VARCHAR(length=32, )),
#     Column("equipment_id", VARCHAR(length=32, )),
#     Column("allocate_weight", VARCHAR(length=32, )),
#     Column("reload_type", VARCHAR(length=32, ))
# ]
# reload_setting_table = [
#     Column("dest_zone_code", VARCHAR(length=32, )),
#     Column("dest_type", VARCHAR(length=32, )),
#     Column("equipment_id", VARCHAR(length=32, )),
#     Column("equipment_port", VARCHAR(length=32, )),
#     Column("allocate_weight", VARCHAR(length=32, )),
#     Column("reload_type", VARCHAR(length=32, ))
# ]
# apt_reload_table = [
#     Column("dest_zone_code", VARCHAR(length=32, )),
#     Column("equipment_port", VARCHAR(length=32, )),
#     Column("allocate_weight", VARCHAR(length=32, )),
#     Column("reload_type", VARCHAR(length=32, ))
# ]


# ---------------------------------------------excel db insert methods ----------------------------------------------------------
# df_to_excel(file_name='', sheet='', df=)
# creat_table(db=CREATE_TABLE_DB_ENG, table_name='', col_list=)
# to_mysql(table_name='', data=)

ULD_Truck_Capacity 表单


In [4]:
capacity_colum_name = ['dest_zone_code', 'equipment_capacity', 'parameter_name']
truck_uld_capacity_df = dict_list_to_dataframe(pd, parameter_list=PARAM_DICT['ULD_Truck_Capacity'], colum_name=capacity_colum_name)
truck_uld_capacity_df.equipment_capacity = truck_uld_capacity_df.equipment_capacity.apply(str)
# df_to_excel(file_name='i_truck_uld_capacity', sheet='capacity', df=truck_uld_capacity_df)
# creat_table(db=CREATE_TABLE_DB_ENG, table_name='i_truck_uld_capacity', col_list=uld_truck_capacity_table)
# to_mysql(table_name='i_truck_uld_capacity', data=truck_uld_capacity_df)
truck_uld_capacity_df.sample(10)


Out[4]:
dest_zone_code equipment_capacity parameter_name
108 TYN 302 uld_capacity
33 717W 2285 truck_capacity
102 SYM 157 uld_capacity
82 LGG 352 uld_capacity
51 DEL 245 uld_capacity
21 552W 2285 truck_capacity
68 iICN 281 uld_capacity
104 TNA 372 uld_capacity
70 iKUL 239 uld_capacity
28 451WA 2285 truck_capacity

City_Airport_Code 表单


In [5]:
city_code_colum_name = ['city_code', 'airport_code', 'parameter_name']
city_code_df = dict_list_to_dataframe(pd, parameter_list=PARAM_DICT['City_Airport_Code'], colum_name=city_code_colum_name)

city_code_df.sample(10)


Out[5]:
city_code airport_code parameter_name
308 7312 CSX city_airport_code
247 872 KMG city_airport_code
12 88 KMG city_airport_code
29 354 TYN city_airport_code
165 692 SYM city_airport_code
255 883 SYM city_airport_code
286 952 INC city_airport_code
288 954 INC city_airport_code
21 318 SJW city_airport_code
99 513 WUX city_airport_code

S_Road_City2Apt 表单


In [6]:
s_road_city2apt_colum_name = ['city_code', 'dest_zone_code', 'parameter_name']
s_road_city2apt_df = dict_list_to_dataframe(pd, parameter_list=PARAM_DICT['S_Road_City2Apt'], colum_name=s_road_city2apt_colum_name)

s_road_city2apt_df.sample(10)


Out[6]:
city_code dest_zone_code parameter_name
239 377 377W s_road_city2apt
180 855 745W s_road_city2apt
225 973 377W s_road_city2apt
117 543 371XA s_road_city2apt
109 511 025WA s_road_city2apt
123 554 552W s_road_city2apt
223 970 377W s_road_city2apt
208 930 710W s_road_city2apt
243 592 020W s_road_city2apt
290 575 551W s_road_city2apt

flight unload setting 表单


In [7]:
flight_id_unload_des_colum_name = ['flight_id', 'unload_id', 'parameter_name']
flight_id_unload_des_df = dict_list_to_dataframe(
    pd, parameter_list=PARAM_DICT['Flight_ID_Unload_des'], colum_name=flight_id_unload_des_colum_name)
flight_id_unload_des_df.sample(10)


Out[7]:
flight_id unload_id parameter_name
104 172 Unload2 flight_unload
142 97 Unload18 flight_unload
45 16 Unload2 flight_unload
154 98 Unload18 flight_unload
39 55 Unload8 flight_unload
163 New33 Unload18 flight_unload
165 174-1 Unload18 flight_unload
103 116 Unload2 flight_unload
33 149 Unload7 flight_unload
9 133 Unload11 flight_unload

secondary_reload_sort 表单

  • R, D, I

In [25]:
secondary_reload_sort_colum_name = ['dest_zone_code', 'equipment_id', 'allocate_weight', 'reload_type']
secondary_reload_sort_df = dict_double_list_to_dataframe(
    pd, parameter_list=PARAM_DICT['Secondary_Reload_Sort'], colum_name=secondary_reload_sort_colum_name)
secondary_reload_sort_df['dest_type'], secondary_reload_sort_df['equipment_id'] = secondary_reload_sort_df.equipment_id.str.split(
    '_', 1).str
secondary_reload_sort_df = secondary_reload_sort_df.loc[:, [
    'dest_zone_code', 'dest_type', 'equipment_id', 'allocate_weight',  'reload_type']]


secondary_reload_sort_df.tail(10)


Out[25]:
dest_zone_code dest_type equipment_id allocate_weight reload_type
760 iNRT I Secondary10_1 equal secondary_reload
761 iNRT I Secondary10_2 equal secondary_reload
762 RGN I Secondary1_1 equal secondary_reload
763 RGN I Secondary1_2 equal secondary_reload
764 RIO I Secondary1_1 equal secondary_reload
765 RIO I Secondary1_2 equal secondary_reload
766 SCL I Secondary1_1 equal secondary_reload
767 SCL I Secondary1_2 equal secondary_reload
768 SGN I Secondary1_1 equal secondary_reload
769 SGN I Secondary1_2 equal secondary_reload

s_secondary_reload_sort 表单

  • Small

In [9]:
s_secondary_reload_sort_colum_name = ['dest_zone_code', 'equipment_id', 'allocate_weight', 'reload_type']
s_secondary_reload_sort_df = dict_double_list_to_dataframe(
    pd, parameter_list=PARAM_DICT['S_Secondary_Reload_Sort'], colum_name=s_secondary_reload_sort_colum_name)
s_secondary_reload_sort_df.equipment_id = s_secondary_reload_sort_df.equipment_id.str.replace('^[A-Z]_', '')
s_secondary_reload_sort_df['dest_type'], s_secondary_reload_sort_df['dest_zone_code'] = \
s_secondary_reload_sort_df.dest_zone_code.str.split('-', 1).str
s_secondary_reload_sort_df = s_secondary_reload_sort_df.loc[:, [
    'dest_zone_code', 'dest_type', 'equipment_id', 'allocate_weight',  'reload_type']]

s_secondary_reload_sort_df.tail(10)#.index.size


Out[9]:
dest_zone_code dest_type equipment_id allocate_weight reload_type
2030 28 D Secondary1_3 equal small_secondary_reload
2031 455 D Secondary1_1 equal small_secondary_reload
2032 455 D Secondary1_2 equal small_secondary_reload
2033 455 D Secondary1_3 equal small_secondary_reload
2034 573 D Secondary1_1 equal small_secondary_reload
2035 573 D Secondary1_2 equal small_secondary_reload
2036 573 D Secondary1_3 equal small_secondary_reload
2037 7112l D Secondary1_1 equal small_secondary_reload
2038 7112l D Secondary1_2 equal small_secondary_reload
2039 7112l D Secondary1_3 equal small_secondary_reload

concat secondary_reload_sort


In [24]:
secondary_reload_sort_concat = pd.concat([secondary_reload_sort_df, s_secondary_reload_sort_df], axis=0)
secondary_reload_sort_concat = secondary_reload_sort_concat.reset_index(drop=True)

secondary_reload_sort_concat.sample(10)


Out[24]:
dest_zone_code dest_type equipment_id allocate_weight reload_type
2333 916 R Secondary13_1 equal small_secondary_reload
2132 751 R Secondary14_1 equal small_secondary_reload
903 454 D Secondary9_2 equal small_secondary_reload
727 HAN I Secondary6_2 equal secondary_reload
1308 437 D Secondary5_2 equal small_secondary_reload
261 555 D Secondary14_2 equal secondary_reload
1677 591 R Secondary15_2 equal small_secondary_reload
1129 354 D Secondary8_3 equal small_secondary_reload
536 598 D Secondary6_1 equal secondary_reload
1977 354 R Secondary14_2 equal small_secondary_reload

reload_setting 表单

  • R、 D、 I

In [35]:
reload_setting_colum_name = ['dest_zone_code', 'equipment_port', 'allocate_weight', 'reload_type']
reload_setting_df = dict_double_list_to_dataframe(
    pd, parameter_list=PARAM_DICT['Reload_Setting'], colum_name=reload_setting_colum_name)
reload_setting_df['dest_type'], reload_setting_df['equipment_port'] = reload_setting_df.equipment_port.str.split('_', 1).str
reload_setting_df['equipment_id'], reload_setting_df['dest_zone_code'] = \
reload_setting_df.dest_zone_code.str.split('-', 1).str
reload_setting_df.equipment_id = reload_setting_df.equipment_id.str.replace('^[A-Z]_', '')
reload_setting_df = reload_setting_df.loc[:, ['dest_zone_code', 'dest_type', 'equipment_id', 'equipment_port', 'allocate_weight', 
                                                  'reload_type']]
reload_setting_df.sample(10)


Out[35]:
dest_zone_code dest_type equipment_id equipment_port allocate_weight reload_type
1724 iCGN I Secondary3_1 Secondary3_1_ot11 equal reload
1160 22 D Secondary5_2 Secondary5_2_ot9 equal reload
1222 592 D Secondary6_1 Secondary6_1_ot32 equal reload
2021 AMS I Secondary7_1 Secondary7_1_ot20 equal reload
1865 iSIN I Secondary5_1 Secondary5_1_ot14 equal reload
887 812 D Secondary2_1 Secondary2_1_ot19 equal reload
1862 JNB I Secondary5_1 Secondary5_1_ot11 equal reload
950 750 D Secondary2_2 Secondary2_2_ot8 equal reload
2230 iICN I Secondary9_2 Secondary9_2_ot29 equal reload
562 21 D Secondary13_1 Secondary13_1_ot17 equal reload

small_reload_setting 表单


In [37]:
s_reload_setting_colum_name = ['dest_zone_code', 'equipment_port', 'allocate_weight', 'reload_type']
s_reload_setting_df = dict_double_list_to_dataframe(
    pd, parameter_list=PARAM_DICT['S_Reload_Setting'], colum_name=s_reload_setting_colum_name)
_, s_reload_setting_df['equipment_port'] = s_reload_setting_df.equipment_port.str.split('_', 1).str
s_reload_setting_df['equipment_id'], s_reload_setting_df['dest_type'], s_reload_setting_df['dest_zone_code'] = \
s_reload_setting_df.dest_zone_code.str.split('-', 2).str
s_reload_setting_df.equipment_id = s_reload_setting_df.equipment_id.str.replace('^[A-Z]_', '')
s_reload_setting_df = s_reload_setting_df.loc[:, ['dest_zone_code', 'dest_type', 'equipment_id', 'equipment_port', 'allocate_weight', 
                                                  'reload_type']]
s_reload_setting_df.sample(10)


Out[37]:
dest_zone_code dest_type equipment_id equipment_port allocate_weight reload_type
8758 851 D Secondary6_2 Secondary6_2_ot176 equal small_reload
7315 372 D Secondary4_2 Secondary4_2_ot144 equal small_reload
570 7112l D Secondary1_3 Secondary1_3_ot103 equal small_reload
10600 753 D Secondary9_1 Secondary9_1_ot163 equal small_reload
5552 LGG I Secondary16_3 Secondary16_3_ot38 equal small_reload
10725 459 D Secondary9_1 Secondary9_1_ot66 equal small_reload
1911 7112l R Secondary11_3 Secondary11_3_ot40 equal small_reload
3903 471 R Secondary14_2 Secondary14_2_ot31 equal small_reload
2424 435 R Secondary12_2 Secondary12_2_ot155 equal small_reload
1430 7112l R Secondary11_1 Secondary11_1_ot27 equal small_reload

concat_reload_setting

  • reload配置表汇总

In [63]:
reload_setting_concat = pd.concat([reload_setting_df, s_reload_setting_df], axis=0)

reload_setting_concat = reload_setting_concat.reset_index(drop=True)

r_i_d_reload_setting_concat = reload_setting_concat[reload_setting_concat.reload_type=='reload']
s_reload_setting_concat = reload_setting_concat[reload_setting_concat.reload_type=='small_reload']
# r_i_d_reload_setting_concat_con = r_i_d_reload_setting_concat.groupby(
#     ['equipment_id', 'dest_type']).equipment_port.apply(set).apply(len).to_frame().reset_index()
# df_to_excel(file_name='r_i_d_reload_setting_concat_con', sheet='r_i_d_reload_setting_concat_con', df=r_i_d_reload_setting_concat_con)
# r_i_d_reload_setting_concat_con #.sample(10)

s_reload_setting_concat_con = s_reload_setting_concat.groupby(
    ['equipment_id', ]).equipment_port.apply(set).apply(len).to_frame().reset_index()
df_to_excel(file_name='s_reload_setting_concat_con', sheet='s_reload_setting_concat_con', df=s_reload_setting_concat_con)
s_reload_setting_concat_con
# s_reload_setting_concat.index.size


Out[63]:
equipment_id equipment_port
0 Secondary10_1 234
1 Secondary10_2 234
2 Secondary10_3 234
3 Secondary11_1 234
4 Secondary11_2 234
5 Secondary11_3 234
6 Secondary12_1 234
7 Secondary12_2 234
8 Secondary12_3 234
9 Secondary13_1 234
10 Secondary13_2 234
11 Secondary13_3 234
12 Secondary14_1 234
13 Secondary14_2 234
14 Secondary14_3 234
15 Secondary15_1 234
16 Secondary15_2 234
17 Secondary15_3 234
18 Secondary16_1 234
19 Secondary16_2 234
20 Secondary16_3 234
21 Secondary1_1 234
22 Secondary1_2 234
23 Secondary1_3 234
24 Secondary2_1 234
25 Secondary2_2 234
26 Secondary2_3 234
27 Secondary3_1 234
28 Secondary3_2 234
29 Secondary3_3 234
30 Secondary4_1 234
31 Secondary4_2 234
32 Secondary4_3 234
33 Secondary5_1 234
34 Secondary5_2 234
35 Secondary5_3 234
36 Secondary6_1 234
37 Secondary6_2 234
38 Secondary6_3 234
39 Secondary7_1 234
40 Secondary7_2 234
41 Secondary7_3 234
42 Secondary8_1 234
43 Secondary8_2 234
44 Secondary8_3 234
45 Secondary9_1 234
46 Secondary9_2 234
47 Secondary9_3 234

In [ ]:

apt_reload 表单


In [375]:
apt_reload_colum_name = ['dest_zone_code', 'equipment_port', 'allocate_weight', 'reload_type']
apt_reload_df = dict_double_list_to_dataframe(
    pd, parameter_list=PARAM_DICT['apt_reload'], colum_name=apt_reload_colum_name)
apt_reload_df.tail(10)


Out[375]:
dest_zone_code equipment_port allocate_weight reload_type
1110 SCL Reload16_ot13 equal reload_ot
1111 SCL Reload16_ot14 equal reload_ot
1112 SCL Reload16_ot15 equal reload_ot
1113 SCL Reload16_ot16 equal reload_ot
1114 SGN Reload16_ot1 equal reload_ot
1115 SGN Reload16_ot2 equal reload_ot
1116 SGN Reload16_ot3 equal reload_ot
1117 SGN Reload16_ot4 equal reload_ot
1118 SGN Reload16_ot5 equal reload_ot
1119 SGN Reload16_ot6 equal reload_ot