In [1]:
import re
import requests
import pandas as pd
In [2]:
# 获取城市字符对应表
url = 'https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.8993'
response = requests.get(url, verify=False)
stations = re.findall(u'([\u4e00-\u9fa5]+)\|([A-Z]+)', response.text)
stations = dict(stations)
In [3]:
# 根据出发、到达城市以及日期获取数据
def get_raw_data(from_station, to_station, date, stations):
from_station = stations[from_station]
to_station = stations[to_station]
date = date
# 构建URL
url = ('https://kyfw.12306.cn/otn/leftTicket/queryA?leftTicketDTO.train_date={}&leftTicketDTO.from_station={}&leftTicketDTO.to_station={}&purpose_codes=ADULT').format(
date, from_station, to_station
)
r = requests.get(url, verify=False)
return r.json()
from_station = '哈尔滨'
to_station = '南京'
date = '2017-01-20'
result = get_raw_data(from_station, to_station, date, stations)['data']
In [4]:
# 废弃,改用另一种方式
def reshape_train_data(train):
result = {}
result['车次'] = train.get('station_train_code')
result['车站'] = train.get('from_station_name') + '->' + train.get('to_station_name')
result['全程'] = train.get('start_station_name') + '->' + train.get('end_station_name')
result['时间'] = train.get('start_time') + '->' + train.get('arrive_time')
result['发售时间'] = train.get('sale_time')
result['商务座'] = train.get('swz_num')
result['特等座'] = train.get('tz_num')
result['一等座'] = train.get('zy_num')
result['二等座'] = train.get('ze_num')
result['高级软卧'] = train.get('gr_num')
result['软卧'] = train.get('rw_num')
result['硬卧'] = train.get('yw_num')
result['软座'] = train.get('rz_num')
result['硬座'] = train.get('yz_num')
result['无座'] = train.get('wz_num')
return result
train1 = result[1]['queryLeftNewDTO']
reshape_train_data(train1).keys()
Out[4]:
In [5]:
def reshape_train_data(train):
result = [
train.get('station_train_code'),
train.get('from_station_name') + '->' + train.get('to_station_name'),
train.get('start_station_name') + '->' + train.get('end_station_name'),
train.get('start_time') + '->' + train.get('arrive_time'),
train.get('sale_time'),
train.get('swz_num'),
train.get('tz_num'),
train.get('zy_num'),
train.get('ze_num'),
train.get('gr_num'),
train.get('rw_num'),
train.get('yw_num'),
train.get('rz_num'),
train.get('yz_num'),
train.get('wz_num')
]
return result
train1 = result[1]['queryLeftNewDTO']
reshape_train_data(train1)
Out[5]:
In [6]:
# 把结果整理成数据框
result_dict = {}
for i in range(len(result)):
train_temp = result[i]['queryLeftNewDTO']
result_dict[train_temp['station_train_code']] = reshape_train_data(train_temp)
result_table = pd.DataFrame(result_dict).T
result_table.columns = ['车次', '车站', '全程', '时间', '发售时间', '商务座', '特等座', '一等座', '二等座', '高级软卧', '软卧', '硬卧', '软座', '硬座', '无座']
In [7]:
result_table
Out[7]: