In [1]:
#%pprint ON #pretty printing
#import pdb #debugger
#will only work if in proper enviroment -> import graphlab
import graphlab as gl
gl.canvas.set_target('ipynb') # use IPython Notebook output for GraphLab Canvas
A simple intro to ML discussing its origin from robots.
Data -> ML Method -> My curve is better -> write a paper
Data -> ML Method -> Intelligence
For this Coursera course we will use SFrame and graphlab libraries for python. First is free second is commerical package, which I got free for a year. Main advantage over python is that it can run massive datasets allowing to cache data from HDD. Lets see how it stack up.
Lets quickly process example from the course
In [ ]:
data = gl.SFrame('people-example.csv')
In [4]:
data.tail()
Out[4]:
In [6]:
data.show()
In [16]:
data['age'].show(view='Categorical')
#everything else looks preety much like pandas
print data['age'].mean()
print data['age'].max()
print data['Country']
In [19]:
data['Full Name'] = data['First Name'] + ' ' + data['Last Name']
data
Out[19]:
In [20]:
def transform_country(country):
if country == 'USA':
return 'United States'
else:
return country
In [21]:
print transform_country('Brazil')
print transform_country('USA')
In [23]:
data['Country'] = data['Country'].apply(transform_country)
data
Out[23]:
This is the same logic as lambdas function, see example below.
In [24]:
a = 5
square = lambda x: x*x
square(a)
Out[24]:
In [58]:
import pandas as pd
import pylab
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
#import seaborn as sns
import numpy as np
%matplotlib inline
In [43]:
df = pd.read_csv('people-example.csv')
df.tail()
Out[43]:
In [59]:
df.plot(kind="hist", orientation='horizontal', cumulative=True,legend=False)
df.describe()
Out[59]:
In [49]:
#make it look like R
def Rstr(df): return df.shape, df.apply(lambda x: [x.unique()])
Rstr(df)
Out[49]:
In [53]:
df.Country.apply(transform_country)
df['Full Name'] = df['First Name'] + ' ' + df['Last Name']
df
Out[53]: