In [1]:
from IPython.display import display
from copy import copy

import pandas as pd

In [2]:
METHODS = [
    'add', 'sub', 'mul', 'floordiv', 'div', 'truediv', 'mod', 
    'divmod', 'pow', 'lshift', 'rshift', 'and', 'or', 'xor'
]


def overload_public_attribute(instance, name):
    """
    
    """
    @property
    def __target__(self, *args, **kwargs):
        """
        
        """
        return getattr(self.data, name)
    
    setattr(instance, name, __target__)

def overload_public_method(instance, name):
    """
    
    """
    def __target__(self, *args, **kwargs):
        """
        
        """
        if not 'inplace' in kwargs or not kwargs['inplace']:
            _data = getattr(self.data, name)(*args, **kwargs)
            
            if isinstance(_data, pd.DataFrame):
                _SkDataObject = SkDataFrame
            elif isinstance(_data, pd.Series):
                _SkDataObject = SkDataSerie
            else:
                raise Exception('Data Type not supported yet.')
            
            return _SkDataObject(
                getattr(self.data, name)(*args, **kwargs), 
                list(self.steps) + ['%s(*%s)' % (name, args)]
            )
        else:
            getattr(self.data, name)(*args, **kwargs), 
            self.steps.append('%s(*%s)' % (name, args))
            return None
            
    setattr(instance, name, __target__)
    
    
def overload_private_method(instance, name, register_step=True):
    """
    
    """
    def __target__(self, *args, **kwargs):
        """
        
        """
        if not register_step:
            return getattr(self.data, name)(*args, **kwargs)
        
        if args and isinstance(args[0], SkDataObject):
            args = list(args)
            args[0] = args[0].data
            args = tuple(args)
            
        return SkDataFrame(
            getattr(self.data, name)(*args, **kwargs), 
            list(self.steps) + ['%s(*%s)' % (name, args)]
        )
    setattr(instance, name, __target__)

    
def overload_private_imethod(instance, name):
    """
    
    """
    def __target__(self, *args, **kwargs):
        """
        
        """
        if args and isinstance(args[0], SkDataObject):
            args = list(args)
            args[0] = args[0].data
            args = tuple(args)
            
        getattr(self.data, name)(*args, **kwargs), 
        self.steps.append('%s(*%s)' % (name, args))
        return self
    setattr(instance, name, __target__)

In [3]:
_df = pd.DataFrame()
_se = pd.Series()

PANDAS_DATAFRAME_OBJECTS = [
    attr for attr in dir(_df) 
    if not attr.startswith('_')
]

PANDAS_DATAFRAME_ATTRIBUTES = [
    attr for attr in PANDAS_DATAFRAME_OBJECTS
    if not callable(getattr(_df, attr))
]

PANDAS_DATAFRAME_METHODS = [
    attr for attr in PANDAS_DATAFRAME_OBJECTS 
    if callable(getattr(_df, attr))
]

PANDAS_SERIES_OBJECTS = [
    attr for attr in dir(_se) 
    if not attr.startswith('_')
]

PANDAS_SERIES_ATTRIBUTES = [
    attr for attr in PANDAS_SERIES_OBJECTS
    if not callable(getattr(_se, attr))
]

PANDAS_SERIES_METHODS = [
    attr for attr in PANDAS_SERIES_OBJECTS 
    if callable(getattr(_se, attr))
]


/home/xmn/miniconda3/envs/skdata/lib/python3.6/site-packages/ipykernel_launcher.py:11: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  # This is added back by InteractiveShellApp.init_path()
/home/xmn/miniconda3/envs/skdata/lib/python3.6/site-packages/ipykernel_launcher.py:26: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated

In [4]:
class SkDataObject:
    pass

class SkDataFrame(SkDataObject):
    data = None
    steps = []
    
    def __new__(cls, *args, **kwds):
        """
        
        """
        overload_public_attribute(cls, 'values')
        overload_private_method(cls, '__repr__', register_step=False)
        overload_private_method(cls, '_repr_html_', register_step=False)
        
        for method in METHODS:
            overload_private_method(cls, '__%s__' % method)
            overload_private_method(cls, '__r%s__' % method)
            overload_private_imethod(cls, '__i%s__' % method)
            
        for method in PANDAS_DATAFRAME_METHODS:
            overload_public_method(cls, method)
            
        return super(SkDataFrame, cls).__new__(cls)
    
    def __init__(self, *args, **kwargs):
        if args and isinstance(args[0], pd.DataFrame):
            self.data = args[0]
            if len(args) > 1:
                self.steps = list(args[1])
        else:
            self.data = pd.DataFrame(*args, **kwargs)

In [5]:
class SkDataSerie(SkDataObject):
    data = None
    steps = []
    
    def __new__(cls, *args, **kwds):
        """
        
        """
        overload_public_attribute(cls, 'values')
        overload_private_method(cls, '__repr__', register_step=False)
        # overload_private_method(cls, '_repr_html_', register_step=False)
        
        for method in METHODS:
            overload_private_method(cls, '__%s__' % method)
            overload_private_method(cls, '__r%s__' % method)
            overload_private_imethod(cls, '__i%s__' % method)
            
        for method in PANDAS_DATAFRAME_METHODS:
            overload_public_method(cls, method)
            
        return super(SkDataSerie, cls).__new__(cls)
    
    def __init__(self, *args, **kwargs):
        if args and isinstance(args[0], pd.Series):
            self.data = args[0]
            if len(args) > 1:
                self.steps = list(args[1])
        else:
            self.data = pd.DataFrame(*args, **kwargs)

In [6]:
skdf = SkDataFrame({'a': [1,5]})
skdf, id(skdf)


Out[6]:
(   a
 0  1
 1  5, 140155953694480)

In [7]:
skdf.sum()


Out[7]:
a    6
dtype: int64

In [8]:
skdf.steps


Out[8]:
[]

In [9]:
(skdf / 2)


Out[9]:
a
0 0.5
1 2.5

In [10]:
skdf += 1
print(skdf.steps)
print(skdf)


['__iadd__(*(1,))']
   a
0  2
1  6

In [11]:
print(skdf + 1)
print((skdf + 1).steps)
skdf.steps


   a
0  3
1  7
['__iadd__(*(1,))', '__add__(*(1,))']
Out[11]:
['__iadd__(*(1,))']

In [12]:
skdf_test = 1 + skdf + 1
display(skdf_test)
skdf_test.steps


a
0 4
1 8
Out[12]:
['__iadd__(*(1,))', '__radd__(*(1,))', '__add__(*(1,))']

In [13]:
skdf.steps


Out[13]:
['__iadd__(*(1,))']

In [14]:
skdf = 1 + skdf + 1
print(skdf.steps)
print(skdf)


['__iadd__(*(1,))', '__radd__(*(1,))', '__add__(*(1,))']
   a
0  4
1  8

In [15]:
skdf - 1


Out[15]:
a
0 3
1 7

In [16]:
skdf += skdf

In [17]:
print(skdf.steps)
skdf


['__iadd__(*(1,))', '__radd__(*(1,))', '__add__(*(1,))', '__iadd__(*(    a\n0   8\n1  16,))']
Out[17]:
a
0 8
1 16