In [1]:
import requests
from bs4 import BeautifulSoup
from collections import defaultdict
In [2]:
'''
0: ths_category
1: zjh_category
2: concept_category
3: region_category
'''
base_url = 'http://basic.10jqka.com.cn'
base_res = requests.get(base_url)
base_soup = BeautifulSoup(base_res.text, 'lxml')
In [7]:
def get_category_items(base_soup, index):
result = []
stock_list = []
for category_item in base_soup.findAll("div", { "class" : "category" })[index].find_all('a'):
category_name = category_item.attrs['title']
category_code = category_item.attrs['name']
category_url = base_url + category_item.attrs['href']
category_data = {'code': category_code, 'name': category_name, 'stocks': []}
# print('{} {}: {}'.format(category_name, category_code, category_url))
category_res = requests.get(category_url)
category_soup = BeautifulSoup(category_res.text, 'lxml')
for stock_item in category_soup.findAll("div", { "class" : "c_content clearfix" })[0].find_all('a'):
stock_name = stock_item.attrs['title']
stock_code = stock_item.attrs['href'][1:-1]
if stock_code.startswith('0') or stock_code.startswith('3'):
stock_code += '.XSHE'
elif stock_code.startswith('6'):
stock_code += '.XSHG'
else:
continue
stock_url = base_url + stock_item.attrs['href']
category_data['stocks'].append({'code': stock_code, 'name': stock_name, 'url': stock_url})
stock_list.append(stock_code)
# print('{} {}: {}'.format(stock_name, stock_code, stock_url))
result.append(category_data)
return result, set(stock_list)
ths_industry_category, ths_industry_stock_list = get_category_items(base_soup, 0)
ths_concept_category, ths_concept_stock_list = get_category_items(base_soup, 2)
In [10]:
stock_categories = {}
categories = [ths_industry_category, ths_concept_category]
for category in categories:
for plate in category:
for stock in plate['stocks']:
if stock['code'] in stock_categories:
stock_categories[stock['code']].append(plate['name'])
else:
stock_categories[stock['code']] = [plate['name']]
In [46]:
stock_categories['603955.XSHG']