Dataset Package


In [ ]:
# statsmodels provides datasets using in example, models test, tutorial
import statsmodels.api as sm

# 所有的dataname(item)和package
# http://vincentarelbundock.github.com/Rdatasets/datasets.csv

In [ ]:
# 存储数据库的目录 ~/statsmodels_data
sm.datasets.get_data_home()

# cache = True 把数据保存到data_home中, 缓存之后不会重复下载
duncan_prestige = sm.datasets.get_rdataset("Duncan", "carData", cache=True)

In [ ]:
print(type(duncan_prestige), type(duncan_prestige.data)) # statsmodels.datasets.utils.Dataset
print(duncan_prestige.__doc__) # 描述这个数据包含的内容

In [ ]:
# statsmodels模块内置的sample数据statsmodels/datasets
data_1 = sm.datasets.longley.load()
data_2 = sm.datasets.longley.load_pandas()

# ?
print(type(data_1), "\n", type(data_2))

# data_1.data: numpy.recarray 记录数组, 可以使用"."的方式访问属性
print(type(data_1.data), "\n", type(data_2.data))

In [ ]:
data_1.data

In [ ]:
# exog(exogen): 外因, endog(endogen): 内因
print(data_1.endog_name, "\n", data_1.exog_name, "\n", data_1.names) # 输出列的名
print(data_1.endog)

In [ ]:
# this is ok
print(type(data_1.exog), type(data_2.exog))
data_1.exog[:5, :]
# this is error
# data_2.exog[:5, :]
# data_2.exog

In [ ]:
# ok
print(type(data_1.raw_data))
# error ?
# print(type(data_2.raw_data))