Piazza question:

Can column and row have the same index? Will it impact retrieval of data?

Something on the lines-

df = DataFrame([1,2,3,4],index=['a','b','c','d'],columns=['a'])

In this case if we use df['a'], what value would be retrieved? I believe we don't specify while selection whether index is for a row or a column in the syntax.

Let's try it....


In [1]:
from pandas import DataFrame

df = DataFrame([1,2,3,4],index=['a','b','c','d'],columns=['a'])

In [2]:
# column a
df['a']


Out[2]:
a    1
b    2
c    3
d    4
Name: a, dtype: int64

In [3]:
# index a
# http://pandas.pydata.org/pandas-docs/stable/indexing.html#different-choices-for-indexing-loc-iloc-and-ix

df.ix['a']


Out[3]:
a    1
Name: a, dtype: int64

In [4]:
# index b

df.ix['b']


Out[4]:
a    2
Name: b, dtype: int64

In [5]:
# column b -- doesn't exist

df['b']


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-5-f0cb03be78bd> in <module>()
      1 # column b -- doesn't exist
      2 
----> 3 df['b']

/Users/raymondyee/anaconda/envs/ipython-dev/lib/python2.7/site-packages/pandas/core/frame.pyc in __getitem__(self, key)
   1656             return self._getitem_multilevel(key)
   1657         else:
-> 1658             return self._getitem_column(key)
   1659 
   1660     def _getitem_column(self, key):

/Users/raymondyee/anaconda/envs/ipython-dev/lib/python2.7/site-packages/pandas/core/frame.pyc in _getitem_column(self, key)
   1663         # get column
   1664         if self.columns.is_unique:
-> 1665             return self._get_item_cache(key)
   1666 
   1667         # duplicate columns & possible reduce dimensionaility

/Users/raymondyee/anaconda/envs/ipython-dev/lib/python2.7/site-packages/pandas/core/generic.pyc in _get_item_cache(self, item)
   1003         res = cache.get(item)
   1004         if res is None:
-> 1005             values = self._data.get(item)
   1006             res = self._box_item_values(item, values)
   1007             cache[item] = res

/Users/raymondyee/anaconda/envs/ipython-dev/lib/python2.7/site-packages/pandas/core/internals.pyc in get(self, item)
   2871                 return self.get_for_nan_indexer(indexer)
   2872 
-> 2873             _, block = self._find_block(item)
   2874             return block.get(item)
   2875         else:

/Users/raymondyee/anaconda/envs/ipython-dev/lib/python2.7/site-packages/pandas/core/internals.pyc in _find_block(self, item)
   3183 
   3184     def _find_block(self, item):
-> 3185         self._check_have(item)
   3186         for i, block in enumerate(self.blocks):
   3187             if item in block:

/Users/raymondyee/anaconda/envs/ipython-dev/lib/python2.7/site-packages/pandas/core/internals.pyc in _check_have(self, item)
   3190     def _check_have(self, item):
   3191         if item not in self.items:
-> 3192             raise KeyError('no item named %s' % com.pprint_thing(item))
   3193 
   3194     def reindex_axis(self, new_axis, indexer=None, method=None, axis=0,

KeyError: u'no item named b'