In [2]:
# 假设产生一个行为序列
import numpy as np
x = np.random.choice([0,1],size=20)
In [10]:
# 如果我们想根据序列中前一个值来预测后一个值, 当然在更复杂的版本里可以用前2个值来预测后一个值
x_t = x[:-1]
x_t1 = x[1:]
print x_t
print x_t1
print x, type(x)
In [4]:
# 写成一个预测函数,预测当前一个取xt时,后一个取1的概率
from __future__ import division
def predict_func(x, xt):
x_t = x[:-1]
x_t1 = x[1:]
# p_11:表示前一个取1时,后一个取1的概率
# p_01:表示前一个取0时,后一个取1的概率
p_11 = np.sum((x_t==1) & (x_t1==1))/np.sum(x_t==1)
p_01 = np.sum((x_t==0) & (x_t1==1))/np.sum(x_t==0)
res = [p_01,p_11]
return res[xt]
In [46]:
x= np.array([1,0,0,0,0,0,0,0,0,0,0,1,0,0])
predict_func(x,0)
Out[46]:
In [6]:
predict_func(x,0) # 前一个值为0的条件下,后一个值为1的概率
Out[6]:
In [73]:
# 有时候单个样本的数量比较少,此时可以加上全局的概率做加权修正
def predict_func2(x, xt, prior):
x_t = x[:-1]
x_t1 = x[1:]
n = len(x)
# p_11:表示前一个取1时,后一个取1的概率
# p_01:表示前一个取0时,后一个取1的概率
p_11 = np.sum((x_t==1) & (x_t1==1))/np.sum(x_t==1)
p_01 = np.sum((x_t==0) & (x_t1==1))/np.sum(x_t==0)
p_11 = n/(n+1) *p_11 + 1/(n+1) * prior[1]
p_01 = n/(n+1) *p_01 + 1/(n+1) * prior[0]
res = [p_01,p_11]
return res[xt]
In [74]:
# 如果根据所有人的数据,算出p11=0.6, p01= 0.3
prior = [0.3, 0.6]
predict_func2(x,0,prior)
Out[74]:
In [79]:
x = np.random.choice([0,1],size=5)
print x
predict_func2(x,0,prior)
Out[79]:
In [66]:
# 更复杂的版本,根据前两个值来预测后一个值
x = np.random.choice([0,1],size=30)
print x
In [67]:
x_t = [tuple(x[i:i+2]) for i in range(len(x)-2)]
x_t1 = x[2:]
print x_t
print x_t1
In [71]:
def predict_func_dpre(x, xt):
x_t = [tuple(x[i:i+2]) for i in range(len(x)-2)]
x_t1 = x[2:]
xt_11 = np.sum(np.array(x_t) == (1,1),axis=1) == 2
p_111 = np.sum(xt_11 & (x_t1==1))/np.sum(xt_11)
xt_10 = np.sum(np.array(x_t) == (1,0),axis=1) == 2
p_101 = np.sum(xt_10 & (x_t1==1))/np.sum(xt_10)
xt_01 = np.sum(np.array(x_t) == (0,1),axis=1) == 2
p_011 = np.sum(xt_01 & (x_t1==1))/np.sum(xt_01)
xt_00 = np.sum(np.array(x_t) == (0,0),axis=1) == 2
print xt_00
p_001 = np.sum(xt_00 & (x_t1==1))/np.sum(xt_00)
res = np.array([p_001,p_011,p_101, p_111]).reshape(2,2)
return res[xt]
In [72]:
predict_func_dpre(x,(0,1)) # 当前两个值是(0,1)条件时,后面取值为1的概率
Out[72]:
In [70]:
x= np.array([1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0])
predict_func_dpre(x,(0,1))
Out[70]:
In [ ]:
# 上述方法是讲顺序的,也可以不讲顺序,即认为 (0,1) = (1,0)