In [1]:
import pandas as pd
import numpy as np
import itertools
In [2]:
!ls
In [3]:
#Load file into rows of User,Artist,Freq
rows = []
with open("artistToGaguiel.csv") as fh:
for line in fh:
user,artists,freqs = line.rstrip("\n").split("\t")
user = user.strip('"')
artists = artists.strip('"').split(";")
freqs = freqs.strip('"').split(";")
for art,freq in zip(artists,freqs):
rows.append( (user,art,freq) )
#Transform data into a dataframe
df = pd.DataFrame(rows,columns=["user","artist","freq"])
#Verify data was loaded properly
df.head()
Out[3]:
In [4]:
df[df["artist"] == "the beatles"].sort("freq",ascending=False).head(10)
Out[4]:
In [5]:
df[["artist","freq"]].groupby("artist",sort=False).count().sort("freq",ascending=False).head(1)
Out[5]:
In [6]:
df[["user","artist"]].groupby("user").count().sort("artist",ascending=False).head(2)
Out[6]: