In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas import DataFrame, Series
%matplotlib inline

In [2]:
from numpy.random import randn

In [3]:
arr = randn(6)
arr.sort()
arr


Out[3]:
array([-0.66252951, -0.0956943 , -0.01814607,  0.27970656,  0.29199334,
        0.57142159])

In [4]:
arr = randn(3, 5)
arr


Out[4]:
array([[  1.06456385e-01,  -1.45108574e-01,   2.23752910e-03,
         -7.93935233e-01,  -1.25261759e+00],
       [ -3.86344202e-02,  -8.90350295e-01,  -1.28592908e-01,
          4.70966622e-01,   2.60679866e+00],
       [ -1.63683063e+00,  -1.52783083e+00,   2.17446836e+00,
          5.90761795e-01,   1.23646359e+00]])

In [5]:
arr[:, 0].sort() # sort first column values in-place
arr


Out[5]:
array([[ -1.63683063e+00,  -1.45108574e-01,   2.23752910e-03,
         -7.93935233e-01,  -1.25261759e+00],
       [ -3.86344202e-02,  -8.90350295e-01,  -1.28592908e-01,
          4.70966622e-01,   2.60679866e+00],
       [  1.06456385e-01,  -1.52783083e+00,   2.17446836e+00,
          5.90761795e-01,   1.23646359e+00]])

In [6]:
arr = randn(5)
arr


Out[6]:
array([ 0.52812602, -0.47370947, -0.71279049, -0.96333218, -0.01685095])

In [7]:
np.sort(arr)


Out[7]:
array([-0.96333218, -0.71279049, -0.47370947, -0.01685095,  0.52812602])

In [8]:
arr


Out[8]:
array([ 0.52812602, -0.47370947, -0.71279049, -0.96333218, -0.01685095])

In [9]:
arr = randn(3, 5)
arr


Out[9]:
array([[ 0.07547164,  1.40813197, -0.41124863,  0.97566277, -0.84580314],
       [-1.80792429,  0.24792603,  0.55171647,  0.3270743 , -0.76419124],
       [-2.119253  , -0.36267734,  0.14620263, -0.33330987,  0.13864297]])

In [10]:
arr.sort(axis=1)
arr


Out[10]:
array([[-0.84580314, -0.41124863,  0.07547164,  0.97566277,  1.40813197],
       [-1.80792429, -0.76419124,  0.24792603,  0.3270743 ,  0.55171647],
       [-2.119253  , -0.36267734, -0.33330987,  0.13864297,  0.14620263]])

In [11]:
arr[:, ::-1]


Out[11]:
array([[ 1.40813197,  0.97566277,  0.07547164, -0.41124863, -0.84580314],
       [ 0.55171647,  0.3270743 ,  0.24792603, -0.76419124, -1.80792429],
       [ 0.14620263,  0.13864297, -0.33330987, -0.36267734, -2.119253  ]])

In [12]:
values = np.array([5, 0, 1, 3, 2])
indexer = values.argsort()
indexer


Out[12]:
array([1, 2, 4, 3, 0], dtype=int64)

In [14]:
values[indexer]


Out[14]:
array([0, 1, 2, 3, 5])

In [16]:
arr = randn(3, 5)
arr[0] = values
arr


Out[16]:
array([[ 5.        ,  0.        ,  1.        ,  3.        ,  2.        ],
       [ 1.99951353, -0.53906493,  0.69579683,  0.03258102,  0.11506788],
       [-0.7260661 ,  0.2464319 , -1.63473778, -0.73729395, -0.51551055]])

In [17]:
arr[:, arr[0].argsort()]


Out[17]:
array([[ 0.        ,  1.        ,  2.        ,  3.        ,  5.        ],
       [-0.53906493,  0.69579683,  0.11506788,  0.03258102,  1.99951353],
       [ 0.2464319 , -1.63473778, -0.51551055, -0.73729395, -0.7260661 ]])

In [18]:
first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara'])
last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jones', 'Walters'])
sorter = np.lexsort((first_name, last_name))
zip(last_name[sorter], first_name[sorter])


Out[18]:
[('Arnold', 'Jane'),
 ('Arnold', 'Steve'),
 ('Jones', 'Bill'),
 ('Jones', 'Bob'),
 ('Walters', 'Barbara')]

In [19]:
values = np.array(['2:first', '2:second', '1:first', '1:second', '1:third'])
key = np.array([2, 2, 1, 1, 1])
indexer = key.argsort(kind='mergesort')
indexer


Out[19]:
array([2, 3, 4, 0, 1], dtype=int64)

In [20]:
values.take(indexer)


Out[20]:
array(['1:first', '1:second', '1:third', '2:first', '2:second'], 
      dtype='|S8')

In [21]:
arr = np.array([0 ,1,7, 12, 15])
arr.searchsorted(9)


Out[21]:
3

In [22]:
arr.searchsorted([0, 8, 11, 16])


Out[22]:
array([0, 3, 3, 5], dtype=int64)

In [23]:
arr = np.array([0, 0, 0, 1, 1, 1, 1])
arr.searchsorted([0, 1])


Out[23]:
array([0, 3], dtype=int64)

In [24]:
arr.searchsorted([0, 1], side='right')


Out[24]:
array([3, 7], dtype=int64)

In [25]:
data = np.floor(np.random.uniform(0, 10000, size=50))
data


Out[25]:
array([ 9986.,  3677.,  9889.,  3336.,  9837.,  7351.,   137.,   585.,
        7422.,  1782.,  2792.,  8997.,  1590.,   964.,  9309.,  7001.,
        4902.,  3578.,  2276.,  9510.,  1883.,  2554.,   103.,  2502.,
        8483.,  2172.,  6500.,   479.,  1694.,  9716.,  2895.,  3034.,
        4348.,  5333.,  6565.,  1070.,  8722.,  2138.,  6676.,  2649.,
         972.,  9377.,  3012.,  8127.,  9651.,  9553.,  7732.,  4135.,
        2381.,  2343.])

In [27]:
bins = np.array([0, 100, 1000, 5000, 10000])
data


Out[27]:
array([ 9986.,  3677.,  9889.,  3336.,  9837.,  7351.,   137.,   585.,
        7422.,  1782.,  2792.,  8997.,  1590.,   964.,  9309.,  7001.,
        4902.,  3578.,  2276.,  9510.,  1883.,  2554.,   103.,  2502.,
        8483.,  2172.,  6500.,   479.,  1694.,  9716.,  2895.,  3034.,
        4348.,  5333.,  6565.,  1070.,  8722.,  2138.,  6676.,  2649.,
         972.,  9377.,  3012.,  8127.,  9651.,  9553.,  7732.,  4135.,
        2381.,  2343.])

In [29]:
labels = bins.searchsorted(data)
labels


Out[29]:
array([4, 3, 4, 3, 4, 4, 2, 2, 4, 3, 3, 4, 3, 2, 4, 4, 3, 3, 3, 4, 3, 3, 2,
       3, 4, 3, 4, 2, 3, 4, 3, 3, 3, 4, 4, 3, 4, 3, 4, 3, 2, 4, 3, 4, 4, 4,
       4, 3, 3, 3], dtype=int64)

In [30]:
Series(data).groupby(labels).mean()


Out[30]:
2     540.000000
3    2727.956522
4    8368.428571
dtype: float64

In [31]:
np.digitize(data, bins)


Out[31]:
array([4, 3, 4, 3, 4, 4, 2, 2, 4, 3, 3, 4, 3, 2, 4, 4, 3, 3, 3, 4, 3, 3, 2,
       3, 4, 3, 4, 2, 3, 4, 3, 3, 3, 4, 4, 3, 4, 3, 4, 3, 2, 4, 3, 4, 4, 4,
       4, 3, 3, 3], dtype=int64)

In [33]:
X = randn(4, 4)* 10.0
X


Out[33]:
array([[  4.45557471,  -2.3522518 ,   3.29241388,  -3.63930622],
       [  3.01906633,  17.58707481,   9.83978684, -12.16680305],
       [ -7.41403321,   3.5149984 ,   2.58014277,   0.43401077],
       [ -6.92100309,  -5.45322914,  15.57843171,   4.24318544]])

In [34]:
X[:, 0]


Out[34]:
array([ 4.45557471,  3.01906633, -7.41403321, -6.92100309])

In [36]:
y = X[:, :1] # 切片操作可产生二维结果
X


Out[36]:
array([[  4.45557471,  -2.3522518 ,   3.29241388,  -3.63930622],
       [  3.01906633,  17.58707481,   9.83978684, -12.16680305],
       [ -7.41403321,   3.5149984 ,   2.58014277,   0.43401077],
       [ -6.92100309,  -5.45322914,  15.57843171,   4.24318544]])

In [37]:
y


Out[37]:
array([[ 4.45557471],
       [ 3.01906633],
       [-7.41403321],
       [-6.92100309]])

In [38]:
np.dot(y.T, np.dot(X, y))


Out[38]:
array([[ 1955.48360634]])

In [39]:
Xm = np.matrix(X)
ym = Xm[:, 0]
Xm


Out[39]:
matrix([[  4.45557471,  -2.3522518 ,   3.29241388,  -3.63930622],
        [  3.01906633,  17.58707481,   9.83978684, -12.16680305],
        [ -7.41403321,   3.5149984 ,   2.58014277,   0.43401077],
        [ -6.92100309,  -5.45322914,  15.57843171,   4.24318544]])

In [40]:
ym


Out[40]:
matrix([[ 4.45557471],
        [ 3.01906633],
        [-7.41403321],
        [-6.92100309]])

In [41]:
ym.T * Xm * ym


Out[41]:
matrix([[ 1955.48360634]])

In [42]:
Xm.I * X


Out[42]:
matrix([[  1.00000000e+00,   1.94289029e-16,   1.11022302e-16,
          -1.11022302e-16],
        [  2.77555756e-17,   1.00000000e+00,   5.55111512e-17,
          -2.63677968e-16],
        [  0.00000000e+00,  -5.55111512e-17,   1.00000000e+00,
           0.00000000e+00],
        [  1.11022302e-16,   1.11022302e-16,   4.44089210e-16,
           1.00000000e+00]])

In [43]:
mmap = np.memmap('mymmap', dtype='float64', mode='w+', shape=(10000, 10000))
mmap


Out[43]:
memmap([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [44]:
section = mmap[:5]
section[:] = np.random.randn(5, 10000)
mmap.flush()
mmap


Out[44]:
memmap([[-1.41147797,  1.23117177, -1.86859027, ..., -0.79980231,
        -2.24514314,  1.23074082],
       [ 0.44013742, -0.30032517, -1.25904478, ...,  0.0916274 ,
         0.4481306 , -1.47859898],
       [ 0.08378055,  0.00412298,  0.29392474, ...,  0.03898427,
        -0.41463042,  0.90307623],
       ..., 
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]])

In [45]:
del mmap

In [46]:
mmap = np.memmap('mymmap', dtype='float64', shape=(10000, 10000))
mmap


Out[46]:
memmap([[-1.41147797,  1.23117177, -1.86859027, ..., -0.79980231,
        -2.24514314,  1.23074082],
       [ 0.44013742, -0.30032517, -1.25904478, ...,  0.0916274 ,
         0.4481306 , -1.47859898],
       [ 0.08378055,  0.00412298,  0.29392474, ...,  0.03898427,
        -0.41463042,  0.90307623],
       ..., 
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]])

In [48]:
arr_c = np.ones((1000, 1000), order='C')
arr_f = np.ones((1000, 1000), order='F')
arr_c.flags


Out[48]:
  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

In [49]:
arr_f.flags


Out[49]:
  C_CONTIGUOUS : False
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

In [50]:
arr_f.flags.f_contiguous


Out[50]:
True

In [51]:
%timeit arr_c.sum(1)


1000 loops, best of 3: 1.28 ms per loop

In [52]:
%timeit arr_f.sum(1)


1000 loops, best of 3: 828 µs per loop

In [53]:
arr_f.copy('C').flags


Out[53]:
  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

In [54]:
arr_c[:50].flags.contiguous


Out[54]:
True

In [55]:
arr_c[:, :50].flags


Out[55]:
  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

In [ ]: