In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
url = "http://donnees.ville.montreal.qc.ca/storage/f/2014-01-20T20%3A48%3A50.296Z/2013.csv"

In [4]:
df = pd.read_csv(url, index_col='Date',
                 parse_dates=True, dayfirst=True)

In [5]:
df.head(2)


Out[5]:
Unnamed: 1 Berri1 CSC Mais1 Mais2 Parc PierDup Rachel1 Totem_Laurier
Date
2013-01-01 00:00 0 0 1 0 6 0 1 0
2013-01-02 00:00 69 0 13 0 18 0 2 0

In [6]:
df.describe()


Out[6]:
Berri1 CSC Mais1 Mais2 Parc PierDup Rachel1 Totem_Laurier
count 261.000000 261.000000 261.000000 261.000000 261.000000 261.000000 261.000000 261.000000
mean 2743.390805 1221.858238 1757.590038 3224.130268 1669.425287 1152.885057 3084.425287 1858.793103
std 2247.957848 1070.037364 1458.793882 2589.514354 1363.738862 1208.848429 2380.255540 1434.899574
min 0.000000 0.000000 1.000000 0.000000 6.000000 0.000000 0.000000 0.000000
25% 392.000000 12.000000 236.000000 516.000000 222.000000 12.000000 451.000000 340.000000
50% 2771.000000 1184.000000 1706.000000 3178.000000 1584.000000 818.000000 3111.000000 2087.000000
75% 4767.000000 2168.000000 3158.000000 5812.000000 3068.000000 2104.000000 5338.000000 3168.000000
max 6803.000000 3330.000000 4716.000000 7684.000000 4103.000000 4841.000000 8555.000000 4293.000000

In [7]:
df[['Berri1', 'PierDup']].plot()


Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f254aa105d0>

In [9]:
from IPython.html.widgets import interact
@interact
def plot(n=(1, 30)):
    pd.rolling_mean(df['Berri1'], n).dropna().plot()
    plt.ylim(0, 8000)
    plt.show()



In [10]:
import random
%precision 3


Out[10]:
u'%.3f'

In [13]:
n = 1000000
x = [random.random() for _ in range(n)]
y = [random.random() for _ in range(n)]
x[:3], y[:3]


Out[13]:
([0.304, 0.291, 0.593], [0.174, 0.704, 0.607])

In [15]:
%%timeit
z = [x[i] + y[i] for i in range(n)]


1 loops, best of 3: 111 ms per loop

In [16]:
xa = np.array(x)
ya = np.array(y)
xa[:3]


Out[16]:
array([ 0.304,  0.291,  0.593])

In [19]:
%timeit za = xa + ya


100 loops, best of 3: 3.06 ms per loop

In [20]:
da = np.abs(xa[:1000,None] - ya[:1000])
da


Out[20]:
array([[ 0.13 ,  0.4  ,  0.303, ...,  0.193,  0.522,  0.333],
       [ 0.117,  0.413,  0.316, ...,  0.18 ,  0.535,  0.345],
       [ 0.419,  0.111,  0.014, ...,  0.482,  0.233,  0.044],
       ..., 
       [ 0.078,  0.608,  0.512, ...,  0.015,  0.73 ,  0.541],
       [ 0.738,  0.208,  0.304, ...,  0.801,  0.086,  0.275],
       [ 0.552,  0.021,  0.118, ...,  0.614,  0.1  ,  0.089]])

In [22]:
%timeit [abs(x[i] - y[j]) for i in range(1000) for j in range(1000)]
%timeit np.abs(xa[:1000,None] - ya[:1000])


1 loops, best of 3: 130 ms per loop
100 loops, best of 3: 4.39 ms per loop

In [30]:
test1 = [1,2,3,4]
atest1 = np.array(test1)
test1
atest1[:,None]


Out[30]:
array([[1],
       [2],
       [3],
       [4]])

In [31]:
xa[:13,None] - ya[:3]


Out[31]:
array([[ 0.13 , -0.4  , -0.303],
       [ 0.117, -0.413, -0.316],
       [ 0.419, -0.111, -0.014],
       [-0.024, -0.555, -0.458],
       [ 0.69 ,  0.16 ,  0.257],
       [-0.038, -0.568, -0.471],
       [ 0.617,  0.086,  0.183],
       [-0.127, -0.658, -0.561],
       [ 0.026, -0.504, -0.407],
       [-0.115, -0.646, -0.549],
       [ 0.083, -0.447, -0.351],
       [-0.166, -0.696, -0.599],
       [-0.031, -0.561, -0.465]])

In [33]:
from IPython.core.magic import (register_line_magic, register_cell_magic)
import pandas as pd
from StringIO import StringIO  # Python 2
#from io import StringIO  # Python 3

@register_cell_magic
def csv(line, cell):
    # We create a string buffer containing the
    # contents of the cell.
    sio = StringIO(cell)
    # We use pandas' read_csv function to parse
    # the CSV string.
    return pd.read_csv(sio)

In [34]:
%%csv
col1,col2,col3
0,1,2
3,4,5
6,7,8


Out[34]:
col1 col2 col3
0 0 1 2
1 3 4 5
2 6 7 8

In [36]:
mydef = _
mydef.describe()


Out[36]:
col1 col2 col3
count 3.0 3.0 3.0
mean 3.0 4.0 5.0
std 3.0 3.0 3.0
min 0.0 1.0 2.0
25% 1.5 2.5 3.5
50% 3.0 4.0 5.0
75% 4.5 5.5 6.5
max 6.0 7.0 8.0

In [ ]: