In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import pyper
import matplotlib.pyplot as plt
In [2]:
# 関数定義
# 因子分析
def factanal(data, factors, rotation='none'):
r = pyper.R(use_pandas='True')
r.assign("data", data)
r("result <- factanal(data, factors=%s, rotation='%s', scores='regression')" % (factors, rotation))
result_var = pd.DataFrame(index=["固有値", "寄与率", "累積寄与率"])
uniquenesses = pd.Series(r.get("result"))["uniquenesses"]
Rstar = np.trace(data.corr() - np.diag(uniquenesses))
result = pd.DataFrame(index=data.columns)
Cumulative_Var = 0
for i in range(factors):
result["factor%s" % str(i+1)] = np.round(pd.Series(r.get("result"))["loadings"], 3).T[i]
SS_loadings = np.dot(pd.Series(r.get("result"))["loadings"].T[i], pd.Series(r.get("result"))["loadings"].T[i].T)
Proportion_Var = SS_loadings/Rstar
Cumulative_Var += Proportion_Var
result_var["factor%s" % str(i+1)] = [round(SS_loadings, 3),
round(Proportion_Var, 3),
round(Cumulative_Var, 3)]
return result_var, result
# 棒グラフのプロット
def factplot(data, factors, result):
for i in range(factors):
plt.subplot(factors, 1, i+1)
plt.bar(range(9), result["factor%s" % str(i+1)])
plt.title('Factor%s' % str(i+1))
plt.tight_layout()
print(data.columns)
In [3]:
# データの読み込み
data = pd.read_csv("data/tab47.csv")
data.head()
Out[3]:
In [4]:
# 回転なし
result_nvar, result_n = factanal(data, 3)
In [5]:
result_nvar
Out[5]:
In [6]:
result_n
Out[6]:
In [7]:
factplot(data, 3, result_n)
In [8]:
# varimax回転
result_vvar, result_v = factanal(data, 3, 'varimax')
In [9]:
result_vvar
Out[9]:
In [10]:
result_v
Out[10]:
In [11]:
factplot(data, 3, result_v)
In [12]:
# promax回転
result_pvar, result_p = factanal(data, 3, 'promax')
In [13]:
result_pvar
Out[13]:
In [14]:
result_p
Out[14]:
In [15]:
factplot(data, 3, result_p)