Samples of the available datasets


In [1]:
from pydataset import data

In [2]:
boston = data('Boston')
boston.head()


Out[2]:
crim zn indus chas nox rm age dis rad tax ptratio black lstat medv
1 0.00632 18 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 396.90 4.98 24.0
2 0.02731 0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 396.90 9.14 21.6
3 0.02729 0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 392.83 4.03 34.7
4 0.03237 0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 394.63 2.94 33.4
5 0.06905 0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 396.90 5.33 36.2

In [3]:
oil = data('Oil')
oil.head()


Out[3]:
dur size waterd gasres operator p vardp p97 varp97 p98 varp98
1 86 235 126 1140 2576 2.1834 1.8700 2.0480 3.298 2.2091 3.905
2 227 105 91 0 16000 1.3894 2.4000 2.0047 4.622 2.0542 4.818
3 17 70 76 0 584 0.9321 0.0070 0.9076 0.178 0.9056 0.179
4 12 96 85 0 16175 0.9893 0.0070 0.8993 0.150 0.8939 0.155
5 99 70 140 0 2445 2.2432 1.9576 2.0662 3.258 2.2089 3.833

In [4]:
air = data('Airline')
air.head()


Out[4]:
airline year cost output pf lf
1 1 1 1140640 0.952757 106650 0.534487
2 1 2 1215690 0.986757 110307 0.532328
3 1 3 1309570 1.091980 110574 0.547736
4 1 4 1511530 1.175780 121974 0.540846
5 1 5 1676730 1.160170 196606 0.591167

In [6]:
housing = data('Housing')
housing.head()


Out[6]:
price lotsize bedrooms bathrms stories driveway recroom fullbase gashw airco garagepl prefarea
1 42000 5850 3 1 2 yes no yes no no 1 no
2 38500 4000 2 1 1 yes no no no no 0 no
3 49500 3060 3 1 1 yes no no no no 0 no
4 60500 6650 3 1 2 yes yes no no no 0 no
5 61000 6360 2 1 1 yes no no no no 0 no

In [9]:
housing = data('Journals')
housing.head()


Out[9]:
title pub society libprice pages charpp citestot date1 oclc field
1 Asian-Pacific Economic Literature Blackwell no 123 440 3822 21 1986 14 General
2 South African Journal of Economic History So Afr ec history assn no 20 309 1782 22 1986 59 Ec History
3 Computational Economics Kluwer no 443 567 2924 22 1987 17 Specialized
4 MOCT-MOST Economic Policy in Transitional Economics Kluwer no 276 520 3234 22 1991 2 Area Studies
5 Journal of Socio-Economics Elsevier no 295 791 3024 24 1972 96 Interdisciplinary

In [10]:
housing = data('Tobacco')
housing.head()


Out[10]:
occupation region nkids nkids2 nadults lnx stobacco salcohol age
1 bluecol flanders 1 0 2 14.19054 0 0.000000 2
2 inactself flanders 0 0 3 13.90857 0 0.002285 3
3 whitecol flanders 0 0 1 13.97461 0 0.012875 2
4 bluecol flanders 1 0 2 13.76281 0 0.005907 2
5 inactself flanders 2 0 1 13.80800 0 0.021981 2

If you are not sure what's the dataset name or whether it exists or not, you can try something close:


In [11]:
data('ancombe')


Did you mean:
anscombe, Anscombe, income, acme, newcomb, cancer, OME, voteincome, cane, sanction, brambles