In [3]:
import requests
from bs4 import BeautifulSoup as bs
a=[]
url="http://zh.pttpedia.wikia.com/wiki/PTT%E6%94%BF%E6%B2%BB%E4%BA%BA%E7%89%A9%E7%B6%BD%E8%99%9F%E5%88%97%E8%A1%A8"
res = requests.get(url)
soup = bs(res.text)
sort = soup.select("td")
#print sort[0].txt,sort[1].txt
for i in range(0,148,2):
dic={'name':sort[i].text,'nickname':sort[i+1].text}
a.append(dic)
In [5]:
for i in a:
print i["name"].strip()+"|0x0080|101"
In [14]:
import requests
from bs4 import BeautifulSoup as bs
a=[]
url="http://zh.pttpedia.wikia.com/wiki/PTT%E8%97%9D%E4%BA%BA%E7%B6%BD%E8%99%9F%E5%88%97%E8%A1%A8"
res = requests.get(url)
soup = bs(res.text)
sort = soup.select("#mw-content-text li")
#print sort[0].txt,sort[1].txt
for i in sort:
print i.text.strip()[0:3]+"|0x0080|101"
In [15]:
import requests
from bs4 import BeautifulSoup as bs
a=[]
url="http://zh.pttpedia.wikia.com/wiki/PTT%E9%81%8B%E5%8B%95%E5%93%A1%E7%B6%BD%E8%99%9F%E5%88%97%E8%A1%A8"
res = requests.get(url)
soup = bs(res.text)
sort = soup.select("#mw-content-text li")
#print sort[0].txt,sort[1].txt
for i in sort:
print i.text.strip()[0:3]+"|0x0080|101"
In [19]:
import requests
from bs4 import BeautifulSoup as bs
a=[]
url="https://www.hbrtaiwan.com/event/2016twceo/ceoList.php"
res = requests.get(url)
soup = bs(res.text)
sort = soup.select("h2")
#print sort[0].txt,sort[1].txt
for i in sort[0:50]:
print i.text.strip()+"|0x0080|101"
In [ ]: