In [2]:
# 假设产生一个行为序列
import numpy as np
x = np.random.choice([0,1],size=20)

In [10]:
# 如果我们想根据序列中前一个值来预测后一个值, 当然在更复杂的版本里可以用前2个值来预测后一个值
x_t = x[:-1]
x_t1 = x[1:]
print x_t
print x_t1
print x, type(x)


[1 1 0 1 0 1 0 0 1 1 1 1 1 1 1 0 0 0 0]
[1 0 1 0 1 0 0 1 1 1 1 1 1 1 0 0 0 0 0]
[1 1 0 1 0 1 0 0 1 1 1 1 1 1 1 0 0 0 0 0] <type 'numpy.ndarray'>

In [4]:
# 写成一个预测函数,预测当前一个取xt时,后一个取1的概率
from __future__ import division
def predict_func(x, xt):
    x_t = x[:-1]
    x_t1 = x[1:]
    # p_11:表示前一个取1时,后一个取1的概率
    # p_01:表示前一个取0时,后一个取1的概率
    p_11 = np.sum((x_t==1) & (x_t1==1))/np.sum(x_t==1) 
    p_01 = np.sum((x_t==0) & (x_t1==1))/np.sum(x_t==0) 
    res = [p_01,p_11]
    return res[xt]

In [46]:
x= np.array([1,0,0,0,0,0,0,0,0,0,0,1,0,0])
predict_func(x,0)


Out[46]:
0.090909090909090912

In [6]:
predict_func(x,0)  # 前一个值为0的条件下,后一个值为1的概率


Out[6]:
0.375

In [73]:
# 有时候单个样本的数量比较少,此时可以加上全局的概率做加权修正
def predict_func2(x, xt, prior):
    x_t = x[:-1]
    x_t1 = x[1:]
    n = len(x)
    # p_11:表示前一个取1时,后一个取1的概率
    # p_01:表示前一个取0时,后一个取1的概率
    p_11 = np.sum((x_t==1) & (x_t1==1))/np.sum(x_t==1) 
    p_01 = np.sum((x_t==0) & (x_t1==1))/np.sum(x_t==0) 
    p_11 = n/(n+1) *p_11 + 1/(n+1) * prior[1]
    p_01 = n/(n+1) *p_01 + 1/(n+1) * prior[0]
    res = [p_01,p_11]
    return res[xt]

In [74]:
# 如果根据所有人的数据,算出p11=0.6, p01= 0.3
prior = [0.3, 0.6]
predict_func2(x,0,prior)


Out[74]:
0.88333333333333341

In [79]:
x = np.random.choice([0,1],size=5)
print x
predict_func2(x,0,prior)


[1 0 0 1 1]
Out[79]:
0.46666666666666667

In [66]:
# 更复杂的版本,根据前两个值来预测后一个值
x = np.random.choice([0,1],size=30)
print x


[1 0 1 1 0 0 0 1 0 0 0 0 1 0 1 1 0 0 1 1 0 1 0 1 0 0 0 1 0 0]

In [67]:
x_t = [tuple(x[i:i+2]) for i in range(len(x)-2)]
x_t1 = x[2:]
print x_t
print x_t1


[(1, 0), (0, 1), (1, 1), (1, 0), (0, 0), (0, 0), (0, 1), (1, 0), (0, 0), (0, 0), (0, 0), (0, 1), (1, 0), (0, 1), (1, 1), (1, 0), (0, 0), (0, 1), (1, 1), (1, 0), (0, 1), (1, 0), (0, 1), (1, 0), (0, 0), (0, 0), (0, 1), (1, 0)]
[1 1 0 0 0 1 0 0 0 0 1 0 1 1 0 0 1 1 0 1 0 1 0 0 0 1 0 0]

In [71]:
def predict_func_dpre(x, xt):
    x_t = [tuple(x[i:i+2]) for i in range(len(x)-2)]
    x_t1 = x[2:]
    xt_11 = np.sum(np.array(x_t) == (1,1),axis=1) == 2
    p_111 = np.sum(xt_11 & (x_t1==1))/np.sum(xt_11) 
    xt_10 = np.sum(np.array(x_t) == (1,0),axis=1) == 2
    p_101 = np.sum(xt_10 & (x_t1==1))/np.sum(xt_10) 
    xt_01 = np.sum(np.array(x_t) == (0,1),axis=1) == 2
    p_011 = np.sum(xt_01 & (x_t1==1))/np.sum(xt_01) 
    xt_00 = np.sum(np.array(x_t) == (0,0),axis=1) == 2
    print xt_00
    p_001 = np.sum(xt_00 & (x_t1==1))/np.sum(xt_00) 
    res = np.array([p_001,p_011,p_101, p_111]).reshape(2,2)
    return res[xt]

In [72]:
predict_func_dpre(x,(0,1)) # 当前两个值是(0,1)条件时,后面取值为1的概率


[False  True  True  True  True  True  True  True  True  True  True  True
  True  True  True]
/opt/data/.pyenv/versions/279iPy/lib/python2.7/site-packages/IPython/kernel/__main__.py:5: RuntimeWarning: invalid value encountered in long_scalars
/opt/data/.pyenv/versions/279iPy/lib/python2.7/site-packages/IPython/kernel/__main__.py:9: RuntimeWarning: invalid value encountered in long_scalars
Out[72]:
nan

In [70]:
x= np.array([1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0])
predict_func_dpre(x,(0,1))


/opt/data/.pyenv/versions/279iPy/lib/python2.7/site-packages/IPython/kernel/__main__.py:5: RuntimeWarning: invalid value encountered in long_scalars
/opt/data/.pyenv/versions/279iPy/lib/python2.7/site-packages/IPython/kernel/__main__.py:9: RuntimeWarning: invalid value encountered in long_scalars
Out[70]:
nan

In [ ]:
# 上述方法是讲顺序的,也可以不讲顺序,即认为 (0,1) = (1,0)