scrape_songmeanings2



In [1]:
import requests, lxml.html
import pandas as pd
pd.set_option('display.max_colwidth', 100)

In [2]:
response = requests.get("http://songmeanings.com/artist/directory/a/?page=%d" % 1)
doc = lxml.html.fromstring(response.content)

rows = []
for artist in doc.cssselect("tr[id=fan-]"):
    row = [artist.text_content(), "http://" + artist.cssselect('a')[0].get('href').lstrip('/')]
    rows.append(row)

df = pd.DataFrame(rows, columns=['artist', 'link'])
df.head(5)


Out[2]:
artist link
0 A http://songmeanings.com/artist/view/songs/6024/
1 A B http://songmeanings.com/artist/view/songs/137439043394/
2 A B & The Sea http://songmeanings.com/artist/view/songs/137438996227/
3 A Backward Glance On A Travel Road http://songmeanings.com/artist/view/songs/137439060610/
4 A Balladeer http://songmeanings.com/artist/view/songs/137438972714/