In [1]:
import requests, lxml.html
import pandas as pd
pd.set_option('display.max_colwidth', 100)
In [2]:
response = requests.get("http://songmeanings.com/artist/directory/a/?page=%d" % 1)
doc = lxml.html.fromstring(response.content)
rows = []
for artist in doc.cssselect("tr[id=fan-]"):
row = [artist.text_content(), "http://" + artist.cssselect('a')[0].get('href').lstrip('/')]
rows.append(row)
df = pd.DataFrame(rows, columns=['artist', 'link'])
df.head(5)
Out[2]: