In [1]:
import numpy as np
In [2]:
x = np.array([1,2,3])
In [3]:
x
Out[3]:
In [6]:
y = np.array([2, 3.9, 6.1])
In [7]:
y
Out[7]:
In [9]:
x.mean()
Out[9]:
In [10]:
y.mean()
Out[10]:
In [11]:
xc = x - x.mean()
In [12]:
xc
Out[12]:
In [13]:
yc = y - y.mean()
In [14]:
yc
Out[14]:
In [31]:
xx = xc * xc
In [34]:
xx
Out[34]:
In [20]:
xy = xc * yc
In [21]:
xy
Out[21]:
In [24]:
xx.sum()
Out[24]:
In [26]:
xy.sum()
Out[26]:
In [32]:
a = xy.sum() / xx.sum()
In [35]:
a # 傾き
Out[35]:
In [37]:
import pandas as pd
In [39]:
df = pd.read_csv('sample.csv')
In [40]:
print(df)
In [42]:
df.head(5)
Out[42]:
In [46]:
# データの抽出
x = df['x']
y = df['y']
In [47]:
import matplotlib.pyplot as plt
In [50]:
# 横軸をx, 縦軸をy, 散布図(scatter)をプロット
plt.scatter(x, y)
plt.show()
In [51]:
df.describe()
Out[51]:
In [52]:
x.mean()
Out[52]:
In [53]:
df.mean()
Out[53]:
In [55]:
df_c = df - df.mean()
In [56]:
df_c.head(5)
Out[56]:
In [58]:
df_c.describe()
Out[58]:
In [59]:
x = df_c['x']
y = df_c['y']
In [61]:
plt.scatter(x, y)
plt.show()
In [63]:
xx = x * x # * は要素積
In [65]:
xy = x * y
In [68]:
a = xy.sum() / xx.sum()
In [69]:
a
Out[69]:
In [73]:
plt.scatter(x, y, label='y') # 実測値
plt.plot(x, a*x, label='y_hat', color='red') # 予測値
plt.legend()
plt.show()
In [74]:
x_new = 40 # 40平米の部屋
In [75]:
mean = df.mean()
In [76]:
mean['x']
Out[76]:
In [77]:
# 中心化
xc = x_new - mean['x']
In [78]:
xc
Out[78]:
In [83]:
# 単回帰分析
yc = a * xc
In [84]:
yc
Out[84]:
In [89]:
y_hat = a * xc + mean['y']
In [90]:
y_hat
Out[90]:
In [94]:
def predict(x):
# 定数項
a = 10069.022519284063
xm = 37.62222
ym = 121065.0
# 中心化
xc = x - xm
# 予測値
y_hat = a * xc + ym
return y_hat
In [93]:
print(a)
print(mean['x'])
print(mean['y'])
In [96]:
# 予測値
predict(40)
Out[96]:
In [98]:
# 外挿の範囲
print(predict(25))
print(predict(10))
In [99]:
predict(50)
Out[99]:
In [ ]: