notebook.community

Edit and run



In [2]:

    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline



In [3]:

    
url = "http://donnees.ville.montreal.qc.ca/storage/f/2014-01-20T20%3A48%3A50.296Z/2013.csv"



In [4]:

    
df = pd.read_csv(url, index_col='Date',
                 parse_dates=True, dayfirst=True)



In [5]:

    
df.head(2)









    Out[5]:






  
    
      
      Unnamed: 1
      Berri1
      CSC
      Mais1
      Mais2
      Parc
      PierDup
      Rachel1
      Totem_Laurier
    
    
      Date
      
      
      
      
      
      
      
      
      
    
  
  
    
      2013-01-01
      00:00
      0
      0
      1
      0
      6
      0
      1
      0
    
    
      2013-01-02
      00:00
      69
      0
      13
      0
      18
      0
      2
      0



In [6]:

    
df.describe()









    Out[6]:






  
    
      
      Berri1
      CSC
      Mais1
      Mais2
      Parc
      PierDup
      Rachel1
      Totem_Laurier
    
  
  
    
      count
      261.000000
      261.000000
      261.000000
      261.000000
      261.000000
      261.000000
      261.000000
      261.000000
    
    
      mean
      2743.390805
      1221.858238
      1757.590038
      3224.130268
      1669.425287
      1152.885057
      3084.425287
      1858.793103
    
    
      std
      2247.957848
      1070.037364
      1458.793882
      2589.514354
      1363.738862
      1208.848429
      2380.255540
      1434.899574
    
    
      min
      0.000000
      0.000000
      1.000000
      0.000000
      6.000000
      0.000000
      0.000000
      0.000000
    
    
      25%
      392.000000
      12.000000
      236.000000
      516.000000
      222.000000
      12.000000
      451.000000
      340.000000
    
    
      50%
      2771.000000
      1184.000000
      1706.000000
      3178.000000
      1584.000000
      818.000000
      3111.000000
      2087.000000
    
    
      75%
      4767.000000
      2168.000000
      3158.000000
      5812.000000
      3068.000000
      2104.000000
      5338.000000
      3168.000000
    
    
      max
      6803.000000
      3330.000000
      4716.000000
      7684.000000
      4103.000000
      4841.000000
      8555.000000
      4293.000000



In [7]:

    
df[['Berri1', 'PierDup']].plot()









    Out[7]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f254aa105d0>



In [9]:

    
from IPython.html.widgets import interact
@interact
def plot(n=(1, 30)):
    pd.rolling_mean(df['Berri1'], n).dropna().plot()
    plt.ylim(0, 8000)
    plt.show()



In [10]:

    
import random
%precision 3









    Out[10]:





u'%.3f'



In [13]:

    
n = 1000000
x = [random.random() for _ in range(n)]
y = [random.random() for _ in range(n)]
x[:3], y[:3]









    Out[13]:





([0.304, 0.291, 0.593], [0.174, 0.704, 0.607])



In [15]:

    
%%timeit
z = [x[i] + y[i] for i in range(n)]









    



1 loops, best of 3: 111 ms per loop



In [16]:

    
xa = np.array(x)
ya = np.array(y)
xa[:3]









    Out[16]:





array([ 0.304,  0.291,  0.593])



In [19]:

    
%timeit za = xa + ya









    



100 loops, best of 3: 3.06 ms per loop



In [20]:

    
da = np.abs(xa[:1000,None] - ya[:1000])
da









    Out[20]:





array([[ 0.13 ,  0.4  ,  0.303, ...,  0.193,  0.522,  0.333],
       [ 0.117,  0.413,  0.316, ...,  0.18 ,  0.535,  0.345],
       [ 0.419,  0.111,  0.014, ...,  0.482,  0.233,  0.044],
       ..., 
       [ 0.078,  0.608,  0.512, ...,  0.015,  0.73 ,  0.541],
       [ 0.738,  0.208,  0.304, ...,  0.801,  0.086,  0.275],
       [ 0.552,  0.021,  0.118, ...,  0.614,  0.1  ,  0.089]])



In [22]:

    
%timeit [abs(x[i] - y[j]) for i in range(1000) for j in range(1000)]
%timeit np.abs(xa[:1000,None] - ya[:1000])









    



1 loops, best of 3: 130 ms per loop
100 loops, best of 3: 4.39 ms per loop



In [30]:

    
test1 = [1,2,3,4]
atest1 = np.array(test1)
test1
atest1[:,None]









    Out[30]:





array([[1],
       [2],
       [3],
       [4]])



In [31]:

    
xa[:13,None] - ya[:3]









    Out[31]:





array([[ 0.13 , -0.4  , -0.303],
       [ 0.117, -0.413, -0.316],
       [ 0.419, -0.111, -0.014],
       [-0.024, -0.555, -0.458],
       [ 0.69 ,  0.16 ,  0.257],
       [-0.038, -0.568, -0.471],
       [ 0.617,  0.086,  0.183],
       [-0.127, -0.658, -0.561],
       [ 0.026, -0.504, -0.407],
       [-0.115, -0.646, -0.549],
       [ 0.083, -0.447, -0.351],
       [-0.166, -0.696, -0.599],
       [-0.031, -0.561, -0.465]])



In [33]:

    
from IPython.core.magic import (register_line_magic, register_cell_magic)
import pandas as pd
from StringIO import StringIO  # Python 2
#from io import StringIO  # Python 3

@register_cell_magic
def csv(line, cell):
    # We create a string buffer containing the
    # contents of the cell.
    sio = StringIO(cell)
    # We use pandas' read_csv function to parse
    # the CSV string.
    return pd.read_csv(sio)



In [34]:

    
%%csv
col1,col2,col3
0,1,2
3,4,5
6,7,8



In [36]:

    
mydef = _
mydef.describe()



In [ ]:

	Berri1	CSC	Mais1	Mais2	Parc	PierDup	Rachel1	Totem_Laurier
count	261.000000	261.000000	261.000000	261.000000	261.000000	261.000000	261.000000	261.000000
mean	2743.390805	1221.858238	1757.590038	3224.130268	1669.425287	1152.885057	3084.425287	1858.793103
std	2247.957848	1070.037364	1458.793882	2589.514354	1363.738862	1208.848429	2380.255540	1434.899574
min	0.000000	0.000000	1.000000	0.000000	6.000000	0.000000	0.000000	0.000000
25%	392.000000	12.000000	236.000000	516.000000	222.000000	12.000000	451.000000	340.000000
50%	2771.000000	1184.000000	1706.000000	3178.000000	1584.000000	818.000000	3111.000000	2087.000000
75%	4767.000000	2168.000000	3158.000000	5812.000000	3068.000000	2104.000000	5338.000000	3168.000000
max	6803.000000	3330.000000	4716.000000	7684.000000	4103.000000	4841.000000	8555.000000	4293.000000