In [1]:
import requests
from bs4 import BeautifulSoup
from collections import defaultdict

In [2]:
'''
0: ths_category
1: zjh_category
2: concept_category
3: region_category
'''

base_url = 'http://basic.10jqka.com.cn'
base_res = requests.get(base_url)
base_soup = BeautifulSoup(base_res.text, 'lxml')

In [7]:
def get_category_items(base_soup, index):
    result = []
    stock_list = []
    for category_item in base_soup.findAll("div", { "class" : "category" })[index].find_all('a'):
        category_name = category_item.attrs['title']
        category_code =  category_item.attrs['name']
        category_url = base_url + category_item.attrs['href']
        category_data = {'code': category_code, 'name': category_name, 'stocks': []}
        # print('{} {}: {}'.format(category_name, category_code, category_url))
        category_res = requests.get(category_url)
        category_soup = BeautifulSoup(category_res.text, 'lxml')
        for stock_item in category_soup.findAll("div", { "class" : "c_content clearfix" })[0].find_all('a'):
            stock_name = stock_item.attrs['title']
            stock_code = stock_item.attrs['href'][1:-1]
            if stock_code.startswith('0') or stock_code.startswith('3'):
                stock_code += '.XSHE'
            elif stock_code.startswith('6'):
                stock_code += '.XSHG'
            else:
                continue
            stock_url = base_url + stock_item.attrs['href']
            category_data['stocks'].append({'code': stock_code, 'name': stock_name, 'url': stock_url})
            stock_list.append(stock_code)
            # print('{} {}: {}'.format(stock_name, stock_code, stock_url))
        result.append(category_data)
    return result, set(stock_list)
            
ths_industry_category, ths_industry_stock_list = get_category_items(base_soup, 0)
ths_concept_category, ths_concept_stock_list = get_category_items(base_soup, 2)

In [10]:
stock_categories = {}

categories = [ths_industry_category, ths_concept_category] 
for category in categories:
    for plate in category:
        for stock in plate['stocks']:
            if stock['code'] in stock_categories:
                stock_categories[stock['code']].append(plate['name'])
            else:
                stock_categories[stock['code']] = [plate['name']]

In [46]:
stock_categories['603955.XSHG']


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-46-722e17169366> in <module>()
----> 1 stock_categories['603955.XSHG']

KeyError: '603955.XSHG'