In [1]:
import numpy as np
From lists
In [2]:
ary = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
ary
Out[2]:
With linspace, always useful for plotting data
In [3]:
np.linspace(0, 1, 101)
Out[3]:
If you prefer the stepsize
In [4]:
np.arange(0, 1, 0.01)
Out[4]:
Many more ways:
In [ ]:
np.ones(10)
np.zeros(10)
np.loadtxt('filename.txt', ...)
np.genfromtxt('filename.txt', ...)
...
In [6]:
ary.shape
Out[6]:
In [7]:
ary = np.array([[0, 1., 2., 3., 4., 5], [5, 6, 7, 8, 9, 10], [11, 12, 13, 14, 15, 16]], dtype='float')
ary
Out[7]:
In [8]:
ary.shape
Out[8]:
No problem to change the shape
In [9]:
ary.shape = (6, 3)
ary
Out[9]:
Number fo all elements in array
In [10]:
ary.size
Out[10]:
You can do all the nice indexing and slicing.
In [11]:
ary = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [12]:
ary[2]
Out[12]:
In [13]:
ary[3:-3]
Out[13]:
In [14]:
ary[:-1:2]
Out[14]:
More fun on higher dimension
In [15]:
ary = np.array([[0, 1., 2., 3., 4.], [5, 6, 7, 8, 9], [10, 11, 12, 13, 14], [15, 16, 17, 18, 19], [20, 21, 22, 23, 24]])
ary
Out[15]:
In [16]:
ary[2, 2]
Out[16]:
In [17]:
ary[-1, [0, 2, 4]]
Out[17]:
Slicing the center
In [18]:
ary[1:-1, 1:-1]
Out[18]:
Slicing more advanced
In [19]:
ary[::2, ::2]
Out[19]:
In [20]:
ary[2]
Out[20]:
In [21]:
ary = np.array([1., 2., 3., 4., 5, 6, 7, 8, 9])
ary.dtype
Out[21]:
In [22]:
ary = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
ary.dtype
Out[22]:
In [23]:
ary = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='float128')
ary.dtype
Out[23]:
Custom dtype, more on that later
In [24]:
dt = ([('b-field', 'int'), ('temp', 'float'), ('spectrum', 'object')])
ary = np.array([(1, 4.2, (333.,400.,600, 234. )), (2, 1.5, (12, 22., 23., 221)), (4, 0.3, (212., 21., 21., 21.))], dtype=dt)
In [25]:
ary
Out[25]:
In [26]:
ary[0][1]
Out[26]:
In [27]:
ary[2][1]
Out[27]:
Number of bytes of one element
In [28]:
ary = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
ary.itemsize
Out[28]:
In [29]:
ary = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='float128')
ary.itemsize
Out[29]:
Bytes of all elements
In [30]:
ary.nbytes
Out[30]:
is equal to
In [31]:
ary.itemsize * ary.size
Out[31]:
Keep in mind that the total array object is ofcourse a little bit bigger than that
In [32]:
from sys import getsizeof
getsizeof(ary)
Out[32]:
Operatiors act elementwise on numpy arrays
In [33]:
ary = np.array([0, 1, 2, 3, 4, 5])
In [34]:
ary + 10
Out[34]:
In [35]:
ary - 33
Out[35]:
In [36]:
ary * 3
Out[36]:
In [37]:
ary / 2
Out[37]:
In [38]:
ary ** 2
Out[38]:
Numpy includes a couple of mathematical functions, so called universal functions.
Ufunc act elementwise on arrays
In [39]:
x = np.linspace(0, 2 * np.pi, 10)
x
Out[39]:
In [40]:
np.sin(x)
Out[40]:
In [41]:
import matplotlib.pyplot as plt
%matplotlib inline
In [42]:
x = np.linspace(0, 2 * np.pi, 100)
plt.plot(x, np.sin(x))
Out[42]:
In [43]:
x = np.linspace(0, 2 * np.pi, 100)
y = np.sin(x)
dx = (x[:-1] - x[1:])
dy = (y[:-1] - y[1:])
In [44]:
plt.plot(x, y, x[1:], dy / dx)
plt.plot(x, np.cos(x), ls='', marker='o', markevery=3)
Out[44]:
To hard to remember: Use the np.diff() insted, into also works for higher
In [45]:
x = np.linspace(0, 2 * np.pi, 100)
y = np.sin(x)
plt.plot(x, y)
plt.plot(x[1:], np.diff(y) / np.diff(x))
Out[45]:
This does not only work for the numpy universal functions.
In [2]:
def parabola(x, a = 1, b=0, c=0):
return a * (x - b)**2 - c
In [47]:
x = np.linspace(-6, 6, 1001)
plt.plot(x, parabola(x, a=2, b=2, c=3))
plt.plot(x, parabola(x, a=2, b=-2, c=3))
plt.grid()
plt.ylim(-3, 25)
Out[47]:
It still handles normal python types
In [48]:
parabola(4)
Out[48]:
In [49]:
parabola(3.14)
Out[49]:
But you can not put a list, tuple, ... directly into it
In [50]:
parabola([1,2,3,4])
But you can always transform it into a numpy array
In [51]:
np.exp([1,2,34])
Out[51]:
You can extent your function behaviour by adding a simpel line
In [7]:
def parabola(x, a = 1, b=0, c=0):
x = np.array(x, copy=False)
return a * (x - b)**2 - c
In [26]:
data = np.array([1, 2, 3, 4, 5])
Define a boolean mask
In [27]:
mask = np.array([True, True, False, True, False])
Return True items of the mask
In [29]:
data[mask]
Out[29]:
Why do i want to do that?
In [44]:
import matplotlib.pyplot as plt
%matplotlib inline
In [165]:
data = np.random.random(100) - 0.5
x = np.linspace(-2 * np.pi, 2 * np.pi)
y = np.sin(x)
plt.figure(figsize=(12, 3))
plt.plot(x, y, lw=2)
mask = y > 0
plt.plot(x[mask], y[mask], ls='', marker='o', color='red', ms=8)
plt.xlim(-2 * np.pi, 2 * np.pi)
plt.ylim(-1.1, 1.1)
mask = (y < 0) & (x > 0)
plt.plot(x[mask], y[mask], ls='', marker='D', color='green', ms=8)
plt.xlim(-2 * np.pi, 2 * np.pi)
plt.ylim(-1.1, 1.1)
Out[165]:
In [164]:
# Create some random spikes
y = []
for i in range(1000):
r = np.random.random()
if r > 0.95:
y.append(np.random.randint(-10, 10))
else:
y.append(r - 0.5)
y = np.array(y)
x = np.arange(y.size)
# Plot the raw data
plt.figure(figsize=(12,3))
plt.plot(x, y)
# Find all positiv spikes
m_pos = y > np.var(y)
plt.plot(x[m_pos], y[m_pos], ls='', marker='o')
# Find all negatives spikes
m_neg = y < -np.var(y)
plt.plot(x[m_neg], y[m_neg], ls='', marker='o')
plt.ylim(-12, 12)
Out[164]:
A few words about Speed.
You don't have to understand the following timing in detail. The message is:
In [53]:
def py_pow(data):
for point in data:
point ** 2
def np_pow(data):
data ** 2
Time the functions using IPython %timeit magic
In [54]:
data = np.random.random(100000)
%timeit py_pow(data)
%timeit np_pow(data)
Lets have a more detailed view with the line_profiler. You have to install it!
In [55]:
%load_ext line_profiler
In [56]:
%lprun -f py_pow py_pow(data)
In [57]:
%load_ext Cython
In [58]:
%%cython
def cy_pow(double[:] data):
cdef long i
for i in range(len(data)):
data[i] ** 2
In [59]:
data = np.random.random(100000)
%timeit cy_pow(data)