In [52]:
import pandas as pd

In [53]:
# using lyrics dataset from 
# http://kaylinwalker.com/50-years-of-pop-music/
df = pd.read_csv('../data/billboard_lyrics_1964-2015.csv', encoding='ISO-8859-1')

In [54]:
songs = [str(s[4]).strip() for s in df.values] # pull out the values from the 'lyrics' column
songs = [s for s in songs if s != 'nan'] # remove NaNs
corpus = '|'.join(songs) # join into one string, seperating songs with a vertical bar character

# save the file
with open('../data/lyrics.txt', 'w') as f:
    f.write(corpus)