In [1]:
from pymongo import MongoClient
client = MongoClient()
db = client["lyrical_classifier"]
song_coll = db["songs"]
In [2]:
import requests
artist_name="Kendrick Lamar"
r = requests.get("https://api.spotify.com/v1/search?q={name}&type=track&market=US&limit=50".format(name=artist_name))
In [3]:
matching_artist = filter(lambda track: track["artists"][0]["name"].lower() == artist_name.lower(), r.json()["tracks"]["items"])
seen_songs = set()
songs = []
for track in matching_artist:
if not track["name"] in seen_songs:
seen_songs.add(track["name"])
songs.append(track)
In [ ]:
from bs4 import BeautifulSoup
for track in songs:
try:
song_name = track["name"].encode("ascii", "replace")
spotify_id = track["id"]
album_name = track["album"]["name"]
search_song = requests.get("http://search.azlyrics.com/search.php?q={artist_name} {song_name}".format(artist_name="Kendrick Lamar", song_name=song_name))
soup = BeautifulSoup(search_song.text, 'html.parser')
lyrics_url = soup.select("tr a")[0]['href']
print lyrics_url
lyrics_page = requests.get(lyrics_url, headers = {"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:39.0) Gecko/20100101 Firefox/39.0"})
lyrics_soup = BeautifulSoup(lyrics_page.content, 'html.parser')
lyrics = lyrics_soup.select(".col-xs-12.col-lg-8.text-center div")[7].get_text()
song_coll.update({
"spotify_id": spotify_id
},{
"spotify_id": spotify_id,
"artist": artist_name,
"song": song_name,
"album": album_name,
"lyrics": lyrics.encode('ascii', 'ignore')
}, upsert=True)
except StandardError as e:
print e