Piazza question:

Can column and row have the same index? Will it impact retrieval of data?

Something on the lines-

df = DataFrame([1,2,3,4],index=['a','b','c','d'],columns=['a'])

In this case if we use df['a'], what value would be retrieved? I believe we don't specify while selection whether index is for a row or a column in the syntax.

Let's try it....


In [2]:
from pandas import DataFrame

df = DataFrame([1,2,3,4],index=['a','b','c','d'],columns=['a'])

In [3]:
df


Out[3]:
a
a 1
b 2
c 3
d 4

4 rows × 1 columns


In [4]:
# column a
df['a']


Out[4]:
a    1
b    2
c    3
d    4
Name: a, dtype: int64

In [5]:
# index a
# http://pandas.pydata.org/pandas-docs/stable/indexing.html#different-choices-for-indexing-loc-iloc-and-ix

df.ix['a']


Out[5]:
a    1
Name: a, dtype: int64

In [6]:
# index b

df.ix['b']


Out[6]:
a    2
Name: b, dtype: int64

In [7]:
# column b -- doesn't exist

df['b']


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-7-f0cb03be78bd> in <module>()
      1 # column b -- doesn't exist
      2 
----> 3 df['b']

/Library/Frameworks/Python.framework/Versions/7.3/lib/python2.7/site-packages/pandas/core/frame.pyc in __getitem__(self, key)
   1633             return self._getitem_multilevel(key)
   1634         else:
-> 1635             return self._getitem_column(key)
   1636 
   1637     def _getitem_column(self, key):

/Library/Frameworks/Python.framework/Versions/7.3/lib/python2.7/site-packages/pandas/core/frame.pyc in _getitem_column(self, key)
   1640         # get column
   1641         if self.columns.is_unique:
-> 1642             return self._get_item_cache(key)
   1643 
   1644         # duplicate columns & possible reduce dimensionaility

/Library/Frameworks/Python.framework/Versions/7.3/lib/python2.7/site-packages/pandas/core/generic.pyc in _get_item_cache(self, item)
    981         res = cache.get(item)
    982         if res is None:
--> 983             values = self._data.get(item)
    984             res = self._box_item_values(item, values)
    985             cache[item] = res

/Library/Frameworks/Python.framework/Versions/7.3/lib/python2.7/site-packages/pandas/core/internals.pyc in get(self, item)
   2752                 return self.get_for_nan_indexer(indexer)
   2753 
-> 2754             _, block = self._find_block(item)
   2755             return block.get(item)
   2756         else:

/Library/Frameworks/Python.framework/Versions/7.3/lib/python2.7/site-packages/pandas/core/internals.pyc in _find_block(self, item)
   3063 
   3064     def _find_block(self, item):
-> 3065         self._check_have(item)
   3066         for i, block in enumerate(self.blocks):
   3067             if item in block:

/Library/Frameworks/Python.framework/Versions/7.3/lib/python2.7/site-packages/pandas/core/internals.pyc in _check_have(self, item)
   3070     def _check_have(self, item):
   3071         if item not in self.items:
-> 3072             raise KeyError('no item named %s' % com.pprint_thing(item))
   3073 
   3074     def reindex_axis(self, new_axis, indexer=None, method=None, axis=0,

KeyError: u'no item named b'