notebook.community

Edit and run



In [1]:

    
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
from numpy.linalg import inv, qr, svd, solve, lstsq
import seaborn as sns
%matplotlib inline









    



/Users/xpgeng/anaconda/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')



In [2]:

    
df = pd.read_csv('data.csv')



In [3]:

    
df.head()



In [4]:

    
df.describe()









    Out[4]:






  
    
      
      x1
      x2
      x3
      y
    
  
  
    
      count
      1000.000000
      1000.000000
      1000.000000
      1000.000000
    
    
      mean
      0.030942
      0.196471
      0.059228
      2.073943
    
    
      std
      1.024607
      0.656093
      0.722516
      3.700031
    
    
      min
      -3.026895
      -0.958924
      -0.989992
      -8.108243
    
    
      25%
      -0.646609
      -0.279415
      -0.653644
      -0.418985
    
    
      50%
      0.018040
      0.412118
      0.283662
      2.094814
    
    
      75%
      0.678942
      0.841471
      0.753902
      4.745579
    
    
      max
      3.862828
      0.989358
      1.000000
      12.421685

linear regression

解析式直接求解



In [5]:

    
df['x4'] = 1



In [6]:

    
X = df.iloc[:,(0,1,2,4)].values



In [7]:

    
y = df.y.values

$y = Xw$
$ w = (X^T*X)^[-1]*X^T*y$



In [8]:

    
inv_XX_T = inv(X.T.dot(X))



In [9]:

    
w = inv_XX_T.dot(X.T).dot(df.y.values)



In [10]:

    
w









    Out[10]:





array([ 2.97396653, -0.54139002,  0.97132913,  2.03076198])

Results

w1 = 2.97396653
w2 = -0.54139002
w3 = 0.97132913
b = 2.03076198



In [11]:

    
qr(inv_XX_T)









    Out[11]:





(array([[-0.99932291, -0.02064871, -0.0030824 ,  0.03029612],
        [ 0.02599347, -0.98046617,  0.03167325,  0.19237264],
        [ 0.00526495, -0.02072455, -0.99808801,  0.05799222],
        [ 0.02550186,  0.1944999 ,  0.05298706,  0.9791383 ]]),
 array([[ -9.54420192e-04,   7.38324873e-05,   1.32685044e-05,
           3.97413182e-05],
        [  0.00000000e+00,  -2.37176114e-03,  -1.12197714e-04,
           6.67128272e-04],
        [  0.00000000e+00,   0.00000000e+00,  -1.91987064e-03,
           1.66696794e-04],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           9.79138303e-04]]))



In [12]:

    
X.shape









    Out[12]:





(1000, 4)



In [312]:

    
#solve(X,y)##只能解方阵

梯度下降法求解

目标函数选取要合适一些, 前边乘以适当的系数.
注意检验梯度的计算是否正确...



In [118]:

    
def f(w,X,y):    
    return ((X.dot(w)-y)**2/(2*1000)).sum()



In [119]:

    
def grad_f(w,X,y):
    return  (X.dot(w) - y).dot(X)/1000



In [120]:

    
w0 = np.array([100.0,100.0,100.0,100.0])



In [121]:

    
epsilon = 1e-10



In [122]:

    
alpha = 0.1



In [123]:

    
check_condition = 1



In [243]:

    
while  check_condition > epsilon:
    w0 += -alpha*grad_f(w0,X,y)
    check_condition = abs(grad_f(w0,X,y)).sum()
print w0









    



[ 2.97396671 -0.5414066   0.97132728  2.03076759]

随机梯度下降法求解

Stochastic gradient descent
使用了固定步长
- 一开始用的0.1, 始终达不到给定的精度
于是添加了判定条件用来更新步长.



In [411]:

    
def cost_function(w,X,y):    
    return (X.dot(w)-y)**2/2



In [412]:

    
def grad_cost_f(w,X,y):
    return  (np.dot(X, w) - y)*X



In [462]:

    
w0 = np.array([1.0, 1.0, 1.0, 1.0])



In [477]:

    
epsilon = 1e-3



In [478]:

    
alpha = 0.01



In [479]:

    
# 生成随机index,用来随机索引数据.
random_index = np.arange(1000)
np.random.shuffle(random_index)



In [480]:

    
cost_value = np.inf #初始化目标函数值



In [481]:

    
while abs(grad_f(w0,X,y)).sum() > epsilon:
    for i in range(1000):
        w0 += -alpha*grad_cost_f(w0,X[random_index[i]],y[random_index[i]])
    
    #检查目标函数变化趋势, 如果趋势变化达到临界值, 更新更小的步长继续计算
    difference = cost_value - f(w0, X, y) 
    if difference < 1e-10:
        alpha *= 0.9
    cost_value = f(w0, X, y)
print w0









    



[ 2.97376767 -0.54075842  0.97217986  2.03067711]



In [ ]:

	x1	x2	x3	y
0	0.511942	0.412118	0.540302	3.135134
1	-0.385495	-0.756802	0.753902	1.189656
2	-0.016638	0.989358	0.540302	2.404029
3	0.786973	0.141120	1.000000	6.359970
4	0.395976	0.656987	0.540302	2.882558

	x1	x2	x3	y
count	1000.000000	1000.000000	1000.000000	1000.000000
mean	0.030942	0.196471	0.059228	2.073943
std	1.024607	0.656093	0.722516	3.700031
min	-3.026895	-0.958924	-0.989992	-8.108243
25%	-0.646609	-0.279415	-0.653644	-0.418985
50%	0.018040	0.412118	0.283662	2.094814
75%	0.678942	0.841471	0.753902	4.745579
max	3.862828	0.989358	1.000000	12.421685