In [3]:
%load_ext watermark
%watermark -d -v -a 'Sebastian Raschka' -p scikit-learn,numpy


Sebastian Raschka 06/12/2014 

CPython 3.4.2
IPython 2.3.1

scikit-learn 0.15.2
numpy 1.9.1

Initialize

Initialize number of mood labels


In [1]:
import pickle

num_moodlab = 1200
pickle.dump(num_moodlab, open('num_moodlab.p','wb'))

Initialize and pickle pandas data frame


In [2]:
import pandas as pd
import re

df = pd.read_pickle('all_data.p')
df = df[['artist', 'title', 'lyrics', 'mood']]
df.index = range(1200)
df.tail()


Out[2]:
artist title lyrics mood
1195 prince escape {B-side of Glam Slam}\nSnare drum pounds on th... happy
1196 cavo over again Well I will rise\nThe morning comes\nNothing e... sad
1197 afi summer shudder Listen when I say, when I say it's real\nReal ... happy
1198 vitamin c girls against boys Imagine a world where the girls, girls rule th... happy
1199 richard burton camelot Each evening, from December to December\nBefor... happy

In [4]:
df['mood'] = df['mood'].astype(object)
for i in df.index:
    #df.ix[i]['mood'] = [df.ix[i]['mood']]
    df.ix[i]['lyrics'] = unicode(df.ix[i]['lyrics'], errors='ignore')
    df.ix[i]['artist'] = unicode(df.ix[i]['artist'].lower(), errors='ignore')
    df.ix[i]['title'] = re.sub(r'\([^)]*\)', '', df.ix[i]['title']).strip()
    df.ix[i]['title'] = unicode(df.ix[i]['title'].lower(), errors='ignore')

In [5]:
df.tail()


Out[5]:
artist title lyrics mood
1195 prince escape {B-side of Glam Slam}\nSnare drum pounds on th... happy
1196 cavo over again Well I will rise\nThe morning comes\nNothing e... sad
1197 afi summer shudder Listen when I say, when I say it's real\nReal ... happy
1198 vitamin c girls against boys Imagine a world where the girls, girls rule th... happy
1199 richard burton camelot Each evening, from December to December\nBefor... happy

In [7]:
pd.to_pickle(df, 'all_data.p')

Initialize sqlite3 database


In [4]:
import sqlite3

conn = sqlite3.connect('./all_data.sqlite')

df.to_sql(name='moodtable', con=conn, flavor='sqlite', index=False)

conn.close()

Test writing to and reading from sqlite3 database


In [6]:
artist = u'testar'
title = u'testit'
lyr = u'testlys'
mood = u'somemood'

In [7]:
conn = sqlite3.connect('./all_data.sqlite')
cursor = conn.cursor()

sql = "SELECT mood FROM moodtable WHERE artist=? AND title=?"
cursor.execute(sql, [(artist), (title)])
cur =  cursor.fetchone()

if cur:
    cur = cur[0]
    cur += u',test'

    sql = "UPDATE moodtable SET mood=? WHERE artist=? AND title=?"
    cursor.execute(sql, [(cur), (artist), (title)])

else:
    sql = "INSERT INTO moodtable VALUES (?,?,?,?)"
    cursor.execute(sql, [(artist), (title), (lyr), (mood)])
    
conn.commit()
conn.close()

Test reading number of rows in sqlite database


In [5]:
import sqlite3

conn = sqlite3.connect('./all_data.sqlite')
cursor = conn.cursor()

sql = "SELECT COUNT(*) FROM moodtable"
cursor.execute(sql)
result = cursor.fetchone()
print result[0]

conn.close()


1200

In [ ]: