Numpy Primer


In [ ]:
import numpy as np

1-D

(Generate) Some Data


In [ ]:
countries = np.array([
    'Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina',
    'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas',
    'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
    'Belize', 'Benin', 'Bhutan', 'Bolivia',
    'Bosnia and Herzegovina'
])

countries.size

In [ ]:
?np.arange

In [ ]:
np.arange?

In [ ]:
values = np.arange(1,countries.size,2)
values

In [ ]:
values = np.arange(countries.size)
values

In [ ]:
np.linspace?

In [ ]:
values = np.linspace(0,95,countries.size)
values

Basic Operations


In [ ]:
# Accessing elements
print(countries[0])
print(countries[3])

In [ ]:
# Slicing
print(countries[0:3])
print(countries[:3])
print(countries[17:])
print(countries[:])

In [ ]:
# Element types
print(countries.dtype)
print(values.dtype)
print(np.array([0, 1, 2, 3]).dtype)
print(np.array([1.0, 1.5, 2.0, 2.5]).dtype)
print(np.array([True, False, True]).dtype)
print(np.array(['AL', 'AK', 'AZ', 'AR', 'CA']).dtype)

In [ ]:
# Looping 1
for country in countries:
    print('Country {}'.format(country))

In [ ]:
# Looping 2
for i in range(len(countries)):
    country = countries[i]
    print('Country {} -> {}'.format(country,
            values[i]))

In [ ]:
# Looping 3
for i, country in enumerate(countries):
    print('Country {} -> {}'.format(country,
            values[i]))

Statistic Functions


In [ ]:
print(values.mean())
print(values.std())
print(values.max())
print(values.sum())

In [ ]:
i=values.argmax()
(countries[i],values[i])

In [ ]:
i=values.argmin()
(countries[i],values[i])

Index Arrays


In [ ]:
a = np.array([1, 2, 3, 4])
b = np.array([True, True, False, False])

print(a[b])

In [ ]:
print(a[np.array([True, False, True, False])])

In [ ]:
a = np.array([1, 2, 3, 4, 5])
b = np.array([1, 2, 3, 2, 1])

print(b == 2)
print(a[b == 2])

In [ ]:
# values/countries, where value>20
print(values[values>20])
print(countries[values>20])

In [ ]:
# values/countries, where value>20 and value<80
print(values[(values>20) & (values<80)])
print(countries[(values>20) & (values<80)])

In [ ]:
# sum of all values, where value>20 and value<80
print(values[(values>20) & (values<80)].sum())

In [ ]:
# also sum of all values, where value>20 and value<80
# (see bool algebra: A & B == not(notA | notB))
print(values[~((values<=20) | (values>=80))].sum())

2-D


In [ ]:
values_1d=np.arange(0,50)
print("1-D: {}".format(values_1d))
values_2d = values_1d.reshape(10,5)
values_2d

In [ ]:
# Accessing elements
print (values_2d[1, 3])
print (values_2d[1:3, 3:5])
print (values_2d[1, :])

In [ ]:
# Vectorized operations on rows or columns
print (values_2d[0, :] + values_2d[1, :])
print (values_2d[:, 0] + values_2d[:, 1])

In [ ]:
# Vectorized operations on entire arrays
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
b = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
a + b

In [ ]:
np.sum?

In [ ]:
# NumPy axis argument
a = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])

print (a.sum())
print (a.sum(axis=0)) # along 'row'
print (a.sum(axis=1)) # along 'column'