In [52]:
import pandas as pd
In [53]:
# using lyrics dataset from
# http://kaylinwalker.com/50-years-of-pop-music/
df = pd.read_csv('../data/billboard_lyrics_1964-2015.csv', encoding='ISO-8859-1')
In [54]:
songs = [str(s[4]).strip() for s in df.values] # pull out the values from the 'lyrics' column
songs = [s for s in songs if s != 'nan'] # remove NaNs
corpus = '|'.join(songs) # join into one string, seperating songs with a vertical bar character
# save the file
with open('../data/lyrics.txt', 'w') as f:
f.write(corpus)