numpy testing

experimenting numpy in udemy course data-analysis-in-python-with-pandas Original source code in https://github.com/anabranch/data_analysis_with_python_and_pandas numpy doc is at http://www.numpy.org/


In [14]:
from __future__ import print_function
import sys
print("Python version is {pv}".format(pv=sys.version))
import numpy as np
print("numpy version is {npv}".format(npv=np.__version__))


Python version is 2.7.11 |Anaconda 2.3.0 (64-bit)| (default, Dec  6 2015, 18:08:32) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
numpy version is 1.10.4

In [35]:
npa = np.arange(20)
print("Our numpy array content is : {my_arr}".format(my_arr=npa))
print("And the mean of this array  is {mean}".format(mean=npa.mean()))
print("the minimum value of this array  is {min} at index {min_pos}]".format(min=npa.min(),min_pos=npa.argmin()))
print("the maximum value of this array  is {max} at index {max_pos}]".format(max=npa.max(),max_pos=npa.argmax()))


Our numpy array content is : [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
And the mean of this array  is 9.5
the minimum value of this array  is 0 at index 0]
the maximum value of this array  is 19 at index 19]

In [17]:
np2 = np.arange(20000)

Now let's see why numpy is great comparing filter performance


In [20]:
%timeit [x for x in np2 if x % 2 == 0] # list comprehension


100 loops, best of 3: 6.65 ms per loop

In [19]:
%timeit np2[np2 % 2 == 0]  # numpy boolean selection is really MUCH FASTER


1000 loops, best of 3: 317 µs per loop

In [23]:
npa[npa > 10]


Out[23]:
array([11, 12, 13, 14, 15, 16, 17, 18, 19])

In [25]:
npa[(npa > 10) & (npa < 15)] # numpy booleans can be combined with and & or |


Out[25]:
array([11, 12, 13, 14])

In [26]:
type(npa)


Out[26]:
numpy.ndarray

In [27]:
type(npa[0])


Out[27]:
numpy.int64

In [31]:
np2 = np.array([1.0,2.0])
type(np2[0])


Out[31]:
numpy.float64

In [32]:
np2.dtype


Out[32]:
dtype('float64')

In [55]:
np.random.seed(10)
npr = np.random.random_integers(0,100,2*3)

In [56]:
npr.shape


Out[56]:
(6,)

In [57]:
npr.reshape(2,3).shape  # let's do it a 2 rows by 3 columns


Out[57]:
(2, 3)

In [63]:
npr.reshape(2,3)


Out[63]:
array([[  9, 100,  15],
       [ 64,  28,  89]])

In [58]:
np3x2 = np.random.random_integers(0,10,(3,2))

In [59]:
np3x2.shape


Out[59]:
(3, 2)

In [60]:
np3x2


Out[60]:
array([[ 0,  1],
       [10,  8],
       [ 9,  0]])

In [70]:
print("second row is : {arr}".format(arr=np3x2[1,]))
print("Sum of values of second row is : {arr_sum}".format(arr_sum=np3x2[1,].sum()))


second row is : [10  8]
Sum of values of second row is : 18

In [62]:
np3x2[:,1]


Out[62]:
array([1, 8, 0])

In [72]:
np3x2[:,1]


Out[72]:
array([1, 8, 0])

In [73]:
np3x2


Out[73]:
array([[ 0,  1],
       [10,  8],
       [ 9,  0]])

In [74]:
ar


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-74-33cf8b7b9d40> in <module>()
----> 1 ar

NameError: name 'ar' is not defined

In [75]:
ar = np.arange(12)
ar


Out[75]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [77]:
ar2 = np.random.random_integers(12, size=12)
ar2


Out[77]:
array([ 2,  9,  5,  2,  4,  7,  6,  4, 10,  7, 10,  2])

In [78]:
np.concatenate((ar,ar2))


Out[78]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,  2,  9,  5,  2,  4,
        7,  6,  4, 10,  7, 10,  2])

In [80]:
np.vstack((ar,ar2))


Out[80]:
array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11],
       [ 2,  9,  5,  2,  4,  7,  6,  4, 10,  7, 10,  2]])

In [ ]: