In [1]:
import scipy as sp
data = sp.genfromtxt("web_traffic.tsv", delimiter="\t")
print(data.shape)
x = data[:,0]
y = data[:,1]


(743, 2)

In [2]:
# избавляемся от Nan значений
x = x[~sp.isnan(y)]
y = y[~sp.isnan(y)]

In [6]:
%matplotlib inline

import matplotlib.pyplot as plt
plt.scatter(x,y)
plt.title("Web traffic over the last month")
plt.xlabel("Time")
plt.ylabel("Hits/hour")
plt.xticks([w*7*24 for w in range(10)],
['week %i'%w for w in range(10)])
plt.autoscale(tight=True)
plt.grid()
plt.show()



In [8]:
def error(f, x, y): #f - function
   return sp.sum((f(x)-y)**2)

In [29]:
# Linear aproximation
fp1, residuals, rank, sv, rcond = sp.polyfit(x, y, 100, full=True)
print("Model parameters: %s" % fp1)
f1 = sp.poly1d(fp1)
print("error", error(f1, x, y))


Model parameters: [  0.00000000e+000  -0.00000000e+000  -0.00000000e+000   0.00000000e+000
  -0.00000000e+000  -0.00000000e+000  -0.00000000e+000  -0.00000000e+000
   0.00000000e+000  -0.00000000e+000   0.00000000e+000  -0.00000000e+000
   0.00000000e+000  -0.00000000e+000  -0.00000000e+000   0.00000000e+000
  -0.00000000e+000  -0.00000000e+000  -0.00000000e+000   0.00000000e+000
  -0.00000000e+000  -0.00000000e+000  -0.00000000e+000  -0.00000000e+000
  -0.00000000e+000  -0.00000000e+000  -0.00000000e+000   0.00000000e+000
  -0.00000000e+000   0.00000000e+000   0.00000000e+000  -0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000  -6.72781136e-140
   1.19710322e-136  -4.65667888e-135  -4.66523655e-131  -2.54268646e-128
   2.25987084e-126   1.39021354e-122   1.16121276e-119   4.32307708e-117
  -1.50810185e-114  -3.76728274e-111  -3.20575066e-108  -1.48597979e-105
   6.29996490e-104   8.53771081e-100   9.11105056e-097   5.56899187e-094
   1.31784528e-091  -1.52916593e-088  -2.41434736e-085  -1.84996561e-082
  -7.30101590e-080   2.08161061e-077   6.26367457e-074   5.61015925e-071
   2.52805581e-068  -4.42355581e-066  -1.85493908e-062  -1.62953030e-059
  -5.76681477e-057   3.54996575e-054   6.50767788e-051   3.79797724e-048
  -4.36953149e-046  -2.48075863e-042  -1.57887934e-039   3.46014155e-037
   1.09222607e-033   3.34480061e-031  -4.93811084e-028  -2.97883150e-025
   2.64881881e-022   1.18825931e-019  -2.02262247e-016   1.03634111e-013
  -2.90580233e-011   4.79824883e-009  -4.21170030e-007   5.99313171e-006
   2.59140843e-003  -2.60163354e-001   1.03501534e+001  -1.60099233e+002
   2.14971948e+003]
('error', 109452410.24731606)

In [23]:
%matplotlib inline

import matplotlib.pyplot as plt
plt.scatter(x,y)
plt.title("Web traffic over the last month")
plt.xlabel("Time")
plt.ylabel("Hits/hour")
plt.xticks([w*7*24 for w in range(10)],
['week %i'%w for w in range(10)])

print (x[-1])
fx = sp.linspace(0,x[-1], 1000) # generate X-values for plotting
plt.plot(fx, f1(fx), linewidth=4)
plt.legend(["d=%i" % f1.order], loc="upper left")

plt.autoscale(tight=True)
plt.grid()
plt.show()


743.0

In [ ]: