In [391]:
import numpy as np

In [392]:
### Data Processing Using Arrays

In [393]:
points = np.arange(-5, 5 ,0.01)

In [394]:
xs, ys = np.meshgrid(points, points)

In [395]:
xs, ys, xs.shape, ys.shape


Out[395]:
(array([[-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
        [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
        [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
        ..., 
        [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
        [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
        [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99]]),
 array([[-5.  , -5.  , -5.  , ..., -5.  , -5.  , -5.  ],
        [-4.99, -4.99, -4.99, ..., -4.99, -4.99, -4.99],
        [-4.98, -4.98, -4.98, ..., -4.98, -4.98, -4.98],
        ..., 
        [ 4.97,  4.97,  4.97, ...,  4.97,  4.97,  4.97],
        [ 4.98,  4.98,  4.98, ...,  4.98,  4.98,  4.98],
        [ 4.99,  4.99,  4.99, ...,  4.99,  4.99,  4.99]]),
 (1000, 1000),
 (1000, 1000))

In [396]:
nx, ny = (3, 2)

In [397]:
x = np.linspace(0, 1, nx)
y = np.linspace(0, 1, ny)
x, y


Out[397]:
(array([ 0. ,  0.5,  1. ]), array([ 0.,  1.]))

In [398]:
xv, yv = np.meshgrid(x ,y)
xv, yv


Out[398]:
(array([[ 0. ,  0.5,  1. ],
        [ 0. ,  0.5,  1. ]]), array([[ 0.,  0.,  0.],
        [ 1.,  1.,  1.]]))

In [399]:
import matplotlib.pyplot as plt

In [400]:
z = np.sqrt(xs ** 2 + ys ** 2)
z.shape


Out[400]:
(1000, 1000)

In [401]:
%matplotlib inline

In [402]:
plt.imshow(z, cmap=plt.cm.gray); plt.colorbar()
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")


Out[402]:
<matplotlib.text.Text at 0x10923668>

In [403]:
### Expressing Conditional Logic as Array Operations

In [404]:
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])

In [405]:
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])

In [406]:
cond = np.array([True, False, True, True, False])

In [407]:
result = [(x if c else y) for x,y,c in zip(xarr, yarr, cond)]
result


Out[407]:
[1.1000000000000001, 2.2000000000000002, 1.3, 1.3999999999999999, 2.5]

In [408]:
result = np.where(cond, xarr, yarr)

In [409]:
result


Out[409]:
array([ 1.1,  2.2,  1.3,  1.4,  2.5])

In [410]:
arr = np.random.randn(4,4)
arr


Out[410]:
array([[ 0.89719068, -0.59082067,  0.08638771, -0.40744453],
       [-1.43860789,  0.64118068, -0.1174959 ,  0.29982887],
       [-0.91917278, -1.16851849, -2.2336861 ,  0.59057909],
       [-0.05189616,  0.78495099,  2.31168333,  0.05792402]])

In [411]:
np.where(arr > 0, 2, -2)


Out[411]:
array([[ 2, -2,  2, -2],
       [-2,  2, -2,  2],
       [-2, -2, -2,  2],
       [-2,  2,  2,  2]])

In [412]:
np.where(arr > 0, 2, arr)


Out[412]:
array([[ 2.        , -0.59082067,  2.        , -0.40744453],
       [-1.43860789,  2.        , -0.1174959 ,  2.        ],
       [-0.91917278, -1.16851849, -2.2336861 ,  2.        ],
       [-0.05189616,  2.        ,  2.        ,  2.        ]])

In [413]:
cond1 = np.array([True, False, True, True, False])
cond2 = np.array([True, True, False, True, True])

In [414]:
np.where(cond1&cond2, 0, np.where(cond1, 1, np.where(cond2, 2, 3)))


Out[414]:
array([0, 2, 1, 0, 2])

In [415]:
1 * (cond1&~cond2) + 2 * (cond2&~cond1) + 3 * ~(cond1|cond2)


Out[415]:
array([0, 2, 1, 0, 2])

In [416]:
cond.any(),cond.all()


Out[416]:
(True, False)

In [417]:
### Mathematical and Statistical Methods

In [418]:
arr = np.random.randn(5, 4)

In [419]:
arr.mean(), np.mean(arr)


Out[419]:
(0.078735775455800766, 0.078735775455800766)

In [420]:
arr.sum(), np.sum(arr)


Out[420]:
(1.5747155091160154, 1.5747155091160154)

In [421]:
arr.mean(1), arr.mean(axis = 1)


Out[421]:
(array([-0.0590002 , -0.11319965, -0.01009554,  0.58240973, -0.00643546]),
 array([-0.0590002 , -0.11319965, -0.01009554,  0.58240973, -0.00643546]))

In [422]:
arr = np.array([[0 ,1, 2], [3, 4, 5], [6, 7, 8]])

In [423]:
arr.cumsum(0), arr.cumsum(1)


Out[423]:
(array([[ 0,  1,  2],
        [ 3,  5,  7],
        [ 9, 12, 15]], dtype=int32), array([[ 0,  1,  3],
        [ 3,  7, 12],
        [ 6, 13, 21]], dtype=int32))

In [424]:
arr.cumprod(0), arr.cumprod(1)


Out[424]:
(array([[ 0,  1,  2],
        [ 0,  4, 10],
        [ 0, 28, 80]], dtype=int32), array([[  0,   0,   0],
        [  3,  12,  60],
        [  6,  42, 336]], dtype=int32))

In [425]:
np.argmax(arr), np.argmin(arr)


Out[425]:
(8, 0)

In [426]:
np.std(arr), np.var(arr)


Out[426]:
(2.5819888974716112, 6.666666666666667)

In [427]:
### Sorting

In [428]:
arr = np.random.randn(8)
arr


Out[428]:
array([-0.77807854, -0.16904977,  1.51955449,  0.52692534, -0.03035838,
        2.91852312,  0.72604482,  1.8828415 ])

In [429]:
arr.sort()
arr


Out[429]:
array([-0.77807854, -0.16904977, -0.03035838,  0.52692534,  0.72604482,
        1.51955449,  1.8828415 ,  2.91852312])

In [430]:
arr = np.random.randn(8, 3)
sort_arr = np.sort(np.sort(arr, 0), 1)
sort_arr


Out[430]:
array([[-2.52373145, -1.03623257, -0.66977315],
       [-0.87224341, -0.62378542, -0.56482426],
       [-0.54323473, -0.46871958, -0.26920322],
       [-0.387282  , -0.02238543,  0.37969673],
       [ 0.20922251,  0.43673629,  0.61680093],
       [ 0.63805486,  0.69349725,  0.69511149],
       [ 0.7048379 ,  0.7496964 ,  0.92688127],
       [ 1.76120777,  1.91741846,  1.95971845]])

In [431]:
np.sort(np.sort(arr, 0), 1) == np.sort(np.sort(arr, 1), 0)


Out[431]:
array([[ True,  True, False],
       [ True, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False,  True]], dtype=bool)

In [432]:
arr1 = np.array([[3,1],[2,4]])
arr2 = np.array([[3,1],[2,4]])

In [433]:
arr1.sort(0);arr1.sort(1)
arr2.sort(1);arr2.sort(0)
arr1, arr2


Out[433]:
(array([[1, 2],
        [3, 4]]), array([[1, 3],
        [2, 4]]))

In [434]:
### Unique and other set logic

In [435]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
np.unique(names)


Out[435]:
array(['Bob', 'Joe', 'Will'], 
      dtype='<U4')

In [436]:
sorted(set(names))


Out[436]:
['Bob', 'Joe', 'Will']

In [437]:
values = np.array([6, 0, 0, 3, 2, 5, 6])

np.in1d(values, [2, 3, 6])


Out[437]:
array([ True, False, False,  True,  True, False,  True], dtype=bool)

In [438]:
'''
unique(x) Compute the sorted, unique elements in x
intersect1d(x, y) Compute the sorted, common elements in x and y
union1d(x, y) Compute the sorted union of elements
in1d(x, y) Compute a boolean array indicating whether each element of x is contained in y
setdiff1d(x, y) Set difference, elements in x that are not in y
setxor1d(x, y) Set symmetric differences; elements that are in either of the arrays, but not both
'''


Out[438]:
'\nunique(x) Compute the sorted, unique elements in x\nintersect1d(x, y) Compute the sorted, common elements in x and y\nunion1d(x, y) Compute the sorted union of elements\nin1d(x, y) Compute a boolean array indicating whether each element of x is contained in y\nsetdiff1d(x, y) Set difference, elements in x that are not in y\nsetxor1d(x, y) Set symmetric differences; elements that are in either of the arrays, but not both\n'

In [439]:
### Linear Algebra

In [440]:
x = np.array([[1., 2., 3.], [4., 5., 6.]])
y = np.array([[6., 23.], [-1, 7], [8, 9]])

In [441]:
x.dot(y)


Out[441]:
array([[  28.,   64.],
       [  67.,  181.]])

In [442]:
np.dot(x,y)


Out[442]:
array([[  28.,   64.],
       [  67.,  181.]])

In [443]:
np.dot(x, np.ones(3))


Out[443]:
array([  6.,  15.])

In [444]:
from numpy.linalg import inv, qr

In [445]:
X = np.random.randn(5, 5)
X


Out[445]:
array([[ 0.83446316,  0.60659635,  1.92971243,  1.16415872,  1.01773482],
       [-0.09882387, -0.91500098,  0.6454825 , -0.95251268,  0.53225447],
       [-0.58681906,  0.30716669,  1.00049562,  1.6247212 ,  0.89281885],
       [-0.23930454, -0.58032391, -0.38393602, -0.65411855,  0.70856148],
       [ 0.6126028 ,  0.53907545, -0.21219374, -0.82998365,  0.09879829]])

In [446]:
mat = X.T.dot(X)
mat


Out[446]:
array([[ 1.48300039,  0.88546825,  0.92126211, -0.23975559,  0.16370178],
       [ 0.88546825,  1.92691548,  0.99567715,  2.00896153,  0.04664964],
       [ 0.92126211,  0.99567715,  5.33386227,  3.68444472,  2.90775118],
       [-0.23975559,  2.00896153,  3.68444472,  6.01900882,  1.58292326],
       [ 0.16370178,  0.04664964,  2.90775118,  1.58292326,  2.62802497]])

In [447]:
inv(mat)


Out[447]:
array([[ 5.29862098, -4.21827419, -2.7645008 ,  3.05839458,  0.96143069],
       [-4.21827419,  4.24645247,  2.07963857, -2.7359063 , -0.46571029],
       [-2.7645008 ,  2.07963857,  2.18407211, -1.8313333 , -1.17819995],
       [ 3.05839458, -2.7359063 , -1.8313333 ,  2.17039862,  0.57703086],
       [ 0.96143069, -0.46571029, -1.17819995,  0.57703086,  1.28493986]])

In [448]:
mat.dot(inv(mat))


Out[448]:
array([[  1.00000000e+00,  -9.02056208e-16,   3.88578059e-16,
         -1.52655666e-16,   2.22044605e-16],
       [ -7.35522754e-16,   1.00000000e+00,   1.56819002e-15,
         -1.80411242e-15,   2.08166817e-16],
       [  8.88178420e-16,  -1.55431223e-15,   1.00000000e+00,
          1.99840144e-15,   0.00000000e+00],
       [  5.10702591e-15,   4.21884749e-15,   1.99840144e-15,
          1.00000000e+00,  -4.44089210e-16],
       [  4.44089210e-16,  -8.88178420e-16,   0.00000000e+00,
          2.22044605e-16,   1.00000000e+00]])

In [449]:
q, r = qr(mat)
q, r


Out[449]:
(array([[-0.74937736,  0.16743263,  0.19537466, -0.40918057,  0.45254821],
        [-0.44743741, -0.44993692,  0.43456329,  0.60038132, -0.21921118],
        [-0.46552447, -0.07124301, -0.6766628 , -0.11303769, -0.55458213],
        [ 0.12115129, -0.87347545, -0.15503482, -0.35292374,  0.2716101 ],
        [-0.08272041,  0.03857664, -0.53951381,  0.57860249,  0.60482492]]),
 array([[-1.97897678, -1.74970661, -3.41307473, -1.83614571, -1.52279464],
        [ 0.        , -2.54264816, -3.77984371, -6.40293238, -1.48200157],
        [ 0.        ,  0.        , -5.13653928, -3.4541128 , -3.57857562],
        [ 0.        ,  0.        ,  0.        , -0.32060256,  0.59426911],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.4707029 ]]))

In [450]:
q.T, inv(q)


Out[450]:
(array([[-0.74937736, -0.44743741, -0.46552447,  0.12115129, -0.08272041],
        [ 0.16743263, -0.44993692, -0.07124301, -0.87347545,  0.03857664],
        [ 0.19537466,  0.43456329, -0.6766628 , -0.15503482, -0.53951381],
        [-0.40918057,  0.60038132, -0.11303769, -0.35292374,  0.57860249],
        [ 0.45254821, -0.21921118, -0.55458213,  0.2716101 ,  0.60482492]]),
 array([[-0.74937736, -0.44743741, -0.46552447,  0.12115129, -0.08272041],
        [ 0.16743263, -0.44993692, -0.07124301, -0.87347545,  0.03857664],
        [ 0.19537466,  0.43456329, -0.6766628 , -0.15503482, -0.53951381],
        [-0.40918057,  0.60038132, -0.11303769, -0.35292374,  0.57860249],
        [ 0.45254821, -0.21921118, -0.55458213,  0.2716101 ,  0.60482492]]))

In [451]:
A = np.array([[1, 0.5], [0.5, 1]])

In [452]:
# eigenvalue 1.5, 0.5
for i in range(10):
    q, r = qr(A)
    A = r.dot(q)
q, r = qr(A)
A, q ,r


Out[452]:
(array([[  1.50000000e+00,   1.69350878e-05],
        [  1.69350878e-05,   5.00000000e-01]]),
 array([[ -1.00000000e+00,  -1.12900585e-05],
        [ -1.12900585e-05,   1.00000000e+00]]),
 array([[ -1.50000000e+00,  -2.25801171e-05],
        [  0.00000000e+00,   5.00000000e-01]]))

In [453]:
value, vector = np.linalg.eig(np.array([[1, 0.5], [0.5, 1]]))
value, vector


Out[453]:
(array([ 1.5,  0.5]), array([[ 0.70710678, -0.70710678],
        [ 0.70710678,  0.70710678]]))

In [454]:
np.linalg.det(vector)


Out[454]:
0.99999999999999978

In [455]:
np.array([[1, 0.5], [0.5, 1]]).dot(vector[:,1]), 0.5*(vector[:,1])


Out[455]:
(array([-0.35355339,  0.35355339]), array([-0.35355339,  0.35355339]))

In [456]:
np.array([[1, 0.5], [0.5, 1]]).dot(vector[:,0]), 1.5*(vector[:,0])


Out[456]:
(array([ 1.06066017,  1.06066017]), array([ 1.06066017,  1.06066017]))

In [457]:
value, vector = np.linalg.eig(np.array([[1.5, 0], [0, 0.5]]))
value, vector


Out[457]:
(array([ 1.5,  0.5]), array([[ 1.,  0.],
        [ 0.,  1.]]))

In [458]:
np.linalg.det(vector)


Out[458]:
1.0

In [459]:
np.diag(np.array([[1, 0.5], [0.5, 1]]))


Out[459]:
array([ 1.,  1.])

In [460]:
np.diag(np.array([1, 1]))


Out[460]:
array([[1, 0],
       [0, 1]])

In [461]:
# Compute the sum of the diagonal elements
np.trace(np.array([[1, 0.5], [0.5, 1]]))


Out[461]:
2.0

In [462]:
# Solve the system of equations ``3 * x0 + x1 = 9`` and ``x0 + 2 * x1 = 8``:
a = np.array([[3,1], [1,2]])
b = np.array([9,8])
x = np.linalg.solve(a, b)
x


Out[462]:
array([ 2.,  3.])

In [463]:
x = np.array([0, 1, 2, 3])
y = np.array([-1, 0.2, 0.9, 2.1])
A = np.vstack([x, np.ones(len(x))]).T
A


Out[463]:
array([[ 0.,  1.],
       [ 1.,  1.],
       [ 2.,  1.],
       [ 3.,  1.]])

In [464]:
p = np.linalg.lstsq(A, y)[0]
m, c


Out[464]:
(0.99999999999999989, -0.94999999999999962)

In [465]:
import matplotlib.pyplot as plt
plt.plot(x, y, 'o', label='Original data', markersize=10)
plt.plot(x, m*x+c, 'r', label='Fitted line')
plt.legend()
plt.show()



In [466]:
### Random Number Generation

In [467]:
samples = np.random.normal(size = (4, 4))
samples


Out[467]:
array([[ 0.20311695,  0.10223108, -0.42848758, -0.23839667],
       [-0.29014384,  1.01447127,  0.92825456, -0.39243235],
       [-0.15414798, -0.07406847,  0.16185351, -0.09082215],
       [ 0.30937909, -0.70471676, -0.87019931,  0.25925802]])

In [468]:
from random import normalvariate

In [469]:
N = 1000000

In [470]:
%timeit samples = [normalvariate(0, 1) for _ in range(0, N)]


1 loop, best of 3: 2.44 s per loop

In [471]:
%timeit np.random.normal(size=N)


10 loops, best of 3: 70 ms per loop

In [472]:
'''
seed Seed the random number generator
permutation Return a random permutation of a sequence, or return a permuted range
shuffle Randomly permute a sequence in place
rand Draw samples from a uniform distribution
randint Draw random integers from a given low-to-high range
randn Draw samples from a normal distribution with mean 0 and standard deviation 1 (MATLAB-like interface)
binomial Draw samples a binomial distribution
normal Draw samples from a normal (Gaussian) distribution
beta Draw samples from a beta distribution
chisquare Draw samples from a chi-square distribution
gamma Draw samples from a gamma distribution
uniform Draw samples from a uniform [0, 1) distribution
'''


Out[472]:
'\nseed Seed the random number generator\npermutation Return a random permutation of a sequence, or return a permuted range\nshuffle Randomly permute a sequence in place\nrand Draw samples from a uniform distribution\nrandint Draw random integers from a given low-to-high range\nrandn Draw samples from a normal distribution with mean 0 and standard deviation 1 (MATLAB-like interface)\nbinomial Draw samples a binomial distribution\nnormal Draw samples from a normal (Gaussian) distribution\nbeta Draw samples from a beta distribution\nchisquare Draw samples from a chi-square distribution\ngamma Draw samples from a gamma distribution\nuniform Draw samples from a uniform [0, 1) distribution\n'

In [473]:
# Built-in method 
import random
position = 0
walk = [position]
steps = 1000
for i in range(0, steps):
    step = 1 if random.randint(0, 1) else -1
    position += step
    walk.append(position)

In [474]:
# numpy method
nsteps = 1000
draws = np.random.randint(0, 2, size=nsteps)
steps = np.where(draws > 0, 1, -1)
walk = steps.cumsum()

In [475]:
walk, walk.min(), walk.max()


Out[475]:
(array([ 1,  0,  1,  0, -1, -2, -1, -2, -3, -4, -5, -6, -7, -8, -7, -8, -7,
        -6, -5, -4, -3, -4, -5, -4, -3, -2, -1, -2, -3, -2, -1,  0, -1,  0,
         1,  2,  3,  4,  5,  6,  7,  8,  7,  6,  5,  6,  7,  8,  9, 10,  9,
        10, 11, 12, 11, 12, 11, 12, 13, 14, 13, 12, 11, 12, 13, 14, 13, 14,
        15, 14, 15, 14, 15, 16, 15, 16, 15, 14, 15, 16, 15, 16, 17, 18, 19,
        18, 17, 18, 19, 18, 19, 20, 19, 18, 19, 18, 17, 18, 19, 18, 17, 18,
        17, 16, 17, 18, 19, 18, 17, 18, 17, 16, 17, 18, 17, 16, 15, 16, 15,
        16, 17, 16, 15, 14, 13, 14, 15, 16, 17, 16, 17, 18, 19, 20, 19, 20,
        21, 22, 23, 22, 21, 22, 21, 22, 21, 22, 23, 22, 21, 22, 23, 22, 21,
        22, 23, 24, 23, 24, 23, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
        33, 34, 35, 34, 33, 32, 33, 34, 33, 32, 31, 30, 31, 32, 31, 30, 31,
        30, 29, 28, 27, 28, 29, 30, 31, 30, 31, 30, 29, 28, 29, 30, 29, 28,
        27, 26, 27, 28, 29, 28, 29, 28, 29, 30, 29, 30, 31, 32, 31, 32, 31,
        32, 33, 34, 35, 34, 33, 34, 35, 34, 35, 34, 33, 32, 33, 32, 31, 32,
        33, 34, 35, 34, 33, 34, 35, 36, 37, 36, 37, 38, 37, 38, 37, 36, 35,
        34, 33, 34, 35, 36, 35, 34, 35, 34, 35, 36, 35, 36, 37, 38, 39, 38,
        39, 40, 39, 40, 39, 40, 41, 42, 43, 42, 41, 40, 41, 42, 41, 40, 41,
        40, 39, 40, 39, 40, 39, 40, 39, 40, 41, 42, 41, 42, 43, 44, 43, 42,
        43, 44, 43, 42, 43, 44, 45, 46, 45, 44, 43, 42, 43, 42, 41, 42, 43,
        42, 41, 40, 41, 40, 39, 38, 37, 36, 35, 36, 37, 36, 35, 34, 35, 36,
        35, 36, 35, 34, 33, 32, 33, 34, 35, 34, 33, 32, 33, 32, 33, 34, 33,
        32, 31, 30, 31, 30, 31, 30, 31, 32, 33, 34, 33, 32, 31, 30, 31, 30,
        29, 28, 27, 28, 27, 26, 27, 28, 29, 28, 29, 28, 27, 26, 25, 26, 25,
        24, 23, 24, 25, 26, 27, 26, 25, 26, 27, 26, 25, 26, 27, 26, 27, 26,
        27, 28, 27, 26, 27, 26, 25, 26, 25, 24, 25, 26, 25, 24, 25, 26, 27,
        26, 25, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 33, 34, 35, 34,
        33, 34, 33, 34, 33, 32, 33, 34, 33, 32, 31, 32, 33, 34, 35, 36, 35,
        36, 37, 38, 39, 40, 39, 38, 37, 38, 39, 38, 37, 36, 37, 38, 39, 40,
        39, 38, 37, 38, 37, 36, 35, 36, 37, 38, 37, 36, 35, 36, 35, 34, 33,
        34, 35, 36, 37, 36, 35, 36, 37, 38, 39, 40, 39, 38, 39, 40, 41, 42,
        41, 42, 41, 42, 41, 42, 43, 42, 43, 44, 43, 44, 45, 44, 43, 42, 41,
        40, 39, 38, 37, 36, 37, 38, 37, 38, 37, 36, 37, 36, 35, 36, 35, 34,
        35, 34, 33, 34, 33, 34, 35, 34, 33, 34, 33, 32, 31, 32, 31, 32, 33,
        32, 31, 30, 29, 30, 29, 28, 27, 28, 29, 28, 29, 28, 29, 30, 31, 32,
        31, 30, 31, 30, 31, 30, 29, 30, 29, 30, 29, 30, 29, 30, 31, 32, 31,
        32, 33, 32, 33, 34, 35, 36, 37, 36, 37, 38, 37, 38, 37, 36, 35, 36,
        37, 36, 35, 34, 35, 36, 35, 34, 33, 34, 33, 32, 33, 34, 35, 34, 35,
        36, 37, 38, 37, 38, 39, 38, 37, 38, 39, 40, 39, 38, 37, 36, 35, 34,
        33, 34, 35, 36, 37, 38, 37, 38, 39, 40, 41, 42, 41, 42, 41, 40, 39,
        38, 39, 38, 37, 36, 37, 38, 37, 36, 35, 34, 33, 32, 31, 30, 31, 32,
        33, 34, 35, 36, 37, 38, 37, 38, 39, 38, 37, 36, 37, 38, 37, 36, 37,
        36, 35, 36, 35, 36, 35, 34, 35, 34, 33, 34, 35, 34, 35, 36, 35, 34,
        35, 36, 37, 36, 35, 34, 33, 32, 33, 32, 33, 34, 33, 32, 33, 32, 31,
        30, 31, 32, 31, 32, 31, 30, 31, 30, 31, 32, 33, 32, 33, 32, 33, 34,
        33, 34, 35, 34, 35, 36, 37, 36, 35, 34, 35, 36, 37, 36, 35, 34, 33,
        32, 31, 32, 33, 34, 33, 34, 35, 34, 33, 34, 33, 34, 35, 34, 33, 34,
        35, 34, 33, 32, 33, 34, 33, 34, 33, 34, 35, 36, 35, 34, 33, 32, 33,
        34, 33, 32, 31, 32, 33, 34, 35, 34, 35, 34, 35, 36, 35, 34, 33, 34,
        33, 34, 35, 36, 37, 38, 37, 38, 39, 38, 37, 36, 35, 34, 35, 34, 35,
        36, 37, 36, 37, 38, 37, 36, 35, 36, 37, 38, 37, 38, 37, 36, 37, 38,
        39, 38, 37, 38, 39, 38, 39, 38, 37, 38, 37, 36, 37, 36, 35, 36, 35,
        36, 35, 36, 35, 36, 35, 34, 33, 32, 31, 30, 29, 30, 31, 32, 33, 34,
        33, 32, 33, 32, 31, 32, 31, 32, 31, 32, 33, 34, 33, 34, 33, 34, 35,
        36, 35, 36, 37, 36, 35, 36, 37, 38, 39, 40, 39, 38, 39, 40, 39, 40,
        39, 38, 39, 40, 41, 40, 41, 42, 41, 42, 41, 40, 41, 42, 41, 40, 41,
        42, 43, 44, 45, 46, 47, 46, 45, 46, 47, 48, 47, 46, 45, 44, 43, 42,
        41, 42, 43, 44, 45, 46, 45, 46, 47, 48, 47, 48, 49, 50, 51, 52, 51,
        50, 51, 52, 53, 54, 55, 54, 53, 52, 51, 50, 49, 50, 51, 52, 53, 52,
        53, 54, 55, 56, 57, 56, 57, 56, 55, 54, 53, 54, 53, 54], dtype=int32),
 -8,
 57)

In [478]:
(np.abs(walk) >= 10).argmax()


Out[478]:
49

In [480]:
nwalks = 5000; nsteps = 1000

In [481]:
draws = np.random.randint(0, 2, size=(nwalks, nsteps))

In [482]:
steps = np.where(draws > 0, 1, -1)

In [483]:
walks = steps.cumsum(1)

In [484]:
walks


Out[484]:
array([[ -1,   0,   1, ...,  -2,  -3,  -2],
       [  1,   0,  -1, ...,   8,   9,   8],
       [ -1,  -2,  -3, ...,   4,   3,   4],
       ..., 
       [  1,   2,   3, ..., -46, -47, -48],
       [  1,   0,   1, ...,  44,  43,  42],
       [ -1,   0,   1, ..., -14, -13, -14]], dtype=int32)

In [485]:
walks.max(), walks.min()


Out[485]:
(119, -125)

In [486]:
hits30 = (np.abs(walks) >= 30).any(1)

In [487]:
hits30


Out[487]:
array([ True,  True, False, ...,  True,  True,  True], dtype=bool)

In [488]:
hits30.sum()


Out[488]:
3406

In [491]:
walks[hits30].shape


Out[491]:
(3406, 1000)

In [494]:
crossing_times = np.abs(walks[hits30] >= 30).argmax(1)

In [495]:
crossing_times


Out[495]:
array([  0, 183,   0, ...,   0, 453,   0], dtype=int64)

In [498]:
crossing_times.shape


Out[498]:
(3406,)

In [499]:
crossing_times.mean()


Out[499]:
258.87228420434525

In [ ]: