Datasets: Proceedings of Machine Learning Research Volumes

5th March 2017

Neil Lawrence

This data set collection comes from the rebranding of JMLR W&CP as PMLR. As part of the move of the web we've set it up to be convenient to download the current set of proceedings.


In [1]:
import pods
import matplotlib.pyplot as plt
%matplotlib inline

In [8]:
# calling without arguments downloads all volumes
data = pods.datasets.pmlr?

In [5]:
data = pods.datasets.pmlr()#(refresh_data=False)

In [7]:
[Dan Cornford].isidata['Y'].author


Out[7]:
0       [Cedric Archambeau, Dan Cornford, Manfred Oppe...
1                           [Simon Rogers, Mark Girolami]
2                            [Tobias Jung, Daniel Polani]
3             [Kim S. Pedersen, Marco Loog, Pieter Dorst]
4                          [Jarno Vanhatalo, Aki Vehtari]
5                           [Marina Meila, Xiaotong Shen]
6         [Douglas Aberdeen, Olivier Buffet, Owen Thomas]
7       [Sameer Agarwal, Josh Wills, Lawrence Cayton, ...
8                               [Amr Ahmed, Eric P. Xing]
9                [Yonatan Amit, Ofer Dekel, Yoram Singer]
10      [Avleen S. Bijral, Markus Breitenbach, Greg Gr...
11      [Edwin V. Bonilla, Felix V. Agakov, Christophe...
12                         [Julie Carreau, Yoshua Bengio]
13           [Miguel A. Carreira-Perpiñan, Zhengdong Lu]
14      [James Cook, Ilya Sutskever, Andriy Mnih, Geof...
15                            [Timothee Cour, Jianbo Shi]
16                                        [Hal Daume III]
17                                [Ricky Der, Daniel Lee]
18         [Gregory Druck, Mukund Narasimhan, Paul Viola]
19                           [Daniel Eaton, Kevin Murphy]
20                                         [Michael Fink]
21                    [Peter V. Gehler, Olivier Chapelle]
22                       [Amir Globerson, Tommi Jaakkola]
23                           [Amir Globerson, Sam Roweis]
24                          [Vibhav Gogate, Rina Dechter]
25      [Andrew B. Goldberg, Xiaojin Zhu, Stephen Wright]
26            [Amit Gruber, Yair Weiss, Michal Rosen-Zvi]
27                        [Sudipto Guha, Andrew McGregor]
28      [David R. Hardoon, John Shawe-Taylor, Antti Aj...
29               [Katherine A. Heller, Zoubin Ghahramani]
                              ...                        
3899          [Hongyu Yang, Cynthia Rudin, Margo Seltzer]
3900                  [Haishan Ye, Luo Luo, Zhihua Zhang]
3901                   [Jianbo Ye, James Z. Wang, Jia Li]
3902    [Ian En-Hsu Yen, Wei-Cheng Lee, Sung-En Chang,...
3903                        [Jaehong Yoon, Sung Ju Hwang]
3904       [Manzil Zaheer, Amr Ahmed, Alexander J. Smola]
3905    [Manzil Zaheer, Satwik Kottur, Amr Ahmed, José...
3906         [Friedemann Zenke, Ben Poole, Surya Ganguli]
3907    [Yizhe Zhang, Changyou Chen, Zhe Gan, Ricardo ...
3908    [Yizhe Zhang, Zhe Gan, Kai Fan, Zhi Chen, Rica...
3909    [Weizhong Zhang, Bin Hong, Wei Liu, Jieping Ye...
3910    [Chenzi Zhang, Shuguang Hu, Zhihao Gavin Tang,...
3911    [Hantian Zhang, Jerry Li, Kaan Kara, Dan Alist...
3912    [Yuchen Zhang, Percy Liang, Martin J. Wainwright]
3913    [Wenpeng Zhang, Peilin Zhao, Wenwu Zhu, Steven...
3914                           [Teng Zhang, Zhi-Hua Zhou]
3915                      [He Zhao, Lan Du, Wray Buntine]
3916    [Liang Zhao, Siyu Liao, Yanzhi Wang, Zhe Li, J...
3917         [Shengjia Zhao, Jiaming Song, Stefano Ermon]
3918    [Mingmin Zhao, Shichao Yue, Dina Katabi, Tommi...
3919                         [Shuai Zheng, James T. Kwok]
3920    [Shuxin Zheng, Qi Meng, Taifeng Wang, Wei Chen...
3921                 [Kai Zheng, Wenlong Mou, Liwei Wang]
3922    [Kai Zhong, Zhao Song, Prateek Jain, Peter L. ...
3923            [Chaoxu Zhou, Wenbo Gao, Donald Goldfarb]
3924                    [Yichi Zhou, Jialian Li, Jun Zhu]
3925    [Hao Henry Zhou, Yilin Zhang, Vamsi K. Ithapu,...
3926    [Rongda Zhu, Lingxiao Wang, Chengxiang Zhai, Q...
3927    [Julian Georg Zilly, Rupesh Kumar Srivastava, ...
3928    [Masrour Zoghi, Tomas Tunys, Mohammad Ghavamza...
Name: author, Length: 3929, dtype: object

In [15]:
df = data['Y']

In [16]:
df.isin({'author': ['Dan Cornford']})


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
TypeError: unhashable type: 'list'

The above exception was the direct cause of the following exception:

SystemError                               Traceback (most recent call last)
<ipython-input-16-dd156f7d07b3> in <module>()
----> 1 df.isin({'author': ['Dan Cornford']})

~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in isin(self, values)
   5456             values = defaultdict(list, values)
   5457             return concat((self.iloc[:, [i]].isin(values[col])
-> 5458                            for i, col in enumerate(self.columns)), axis=1)
   5459         elif isinstance(values, Series):
   5460             if not values.index.is_unique:

~/anaconda3/lib/python3.6/site-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, copy)
    204                        keys=keys, levels=levels, names=names,
    205                        verify_integrity=verify_integrity,
--> 206                        copy=copy)
    207     return op.get_result()
    208 

~/anaconda3/lib/python3.6/site-packages/pandas/core/reshape/concat.py in __init__(self, objs, axis, join, join_axes, keys, levels, names, ignore_index, verify_integrity, copy)
    234             objs = [objs[k] for k in keys]
    235         else:
--> 236             objs = list(objs)
    237 
    238         if len(objs) == 0:

~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in <genexpr>(.0)
   5456             values = defaultdict(list, values)
   5457             return concat((self.iloc[:, [i]].isin(values[col])
-> 5458                            for i, col in enumerate(self.columns)), axis=1)
   5459         elif isinstance(values, Series):
   5460             if not values.index.is_unique:

~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in isin(self, values)
   5475             return DataFrame(
   5476                 algorithms.isin(self.values.ravel(),
-> 5477                                 values).reshape(self.shape), self.index,
   5478                 self.columns)
   5479 

~/anaconda3/lib/python3.6/site-packages/pandas/core/algorithms.py in isin(comps, values)
    424             comps = comps.astype(object)
    425 
--> 426     return f(comps, values)
    427 
    428 

~/anaconda3/lib/python3.6/site-packages/pandas/core/algorithms.py in <lambda>(x, y)
    402     # work-around for numpy < 1.8 and comparisions on py3
    403     # faster for larger cases to use np.in1d
--> 404     f = lambda x, y: htable.ismember_object(x, values)
    405     if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000:
    406         f = lambda x, y: np.in1d(x, y)

pandas/_libs/hashtable_func_helper.pxi in pandas._libs.hashtable.ismember_object()

SystemError: <built-in method view of numpy.ndarray object at 0x1090e1da0> returned a result with an error set

In [13]:
import pandas as pd

In [14]:
df2 = pd.DataFrame({'A': [1, 2, 3], 'B': [1, 4, 7]})
df2.isin({'A': [1, 3], 'B': [4, 7, 12]})


Out[14]:
A B
0 True False
1 False True
2 True True

In [19]:
pd.Series(df.author[10])


Out[19]:
0      Avleen S. Bijral
1    Markus Breitenbach
2           Greg Grudic
dtype: object

In [ ]: