In [1]:
from IPython.display import display
from copy import copy
import pandas as pd
In [2]:
METHODS = [
'add', 'sub', 'mul', 'floordiv', 'div', 'truediv', 'mod',
'divmod', 'pow', 'lshift', 'rshift', 'and', 'or', 'xor'
]
def overload_public_attribute(instance, name):
"""
"""
@property
def __target__(self, *args, **kwargs):
"""
"""
return getattr(self.data, name)
setattr(instance, name, __target__)
def overload_public_method(instance, name):
"""
"""
def __target__(self, *args, **kwargs):
"""
"""
if not 'inplace' in kwargs or not kwargs['inplace']:
_data = getattr(self.data, name)(*args, **kwargs)
if isinstance(_data, pd.DataFrame):
_SkDataObject = SkDataFrame
elif isinstance(_data, pd.Series):
_SkDataObject = SkDataSerie
else:
raise Exception('Data Type not supported yet.')
return _SkDataObject(
getattr(self.data, name)(*args, **kwargs),
list(self.steps) + ['%s(*%s)' % (name, args)]
)
else:
getattr(self.data, name)(*args, **kwargs),
self.steps.append('%s(*%s)' % (name, args))
return None
setattr(instance, name, __target__)
def overload_private_method(instance, name, register_step=True):
"""
"""
def __target__(self, *args, **kwargs):
"""
"""
if not register_step:
return getattr(self.data, name)(*args, **kwargs)
if args and isinstance(args[0], SkDataObject):
args = list(args)
args[0] = args[0].data
args = tuple(args)
return SkDataFrame(
getattr(self.data, name)(*args, **kwargs),
list(self.steps) + ['%s(*%s)' % (name, args)]
)
setattr(instance, name, __target__)
def overload_private_imethod(instance, name):
"""
"""
def __target__(self, *args, **kwargs):
"""
"""
if args and isinstance(args[0], SkDataObject):
args = list(args)
args[0] = args[0].data
args = tuple(args)
getattr(self.data, name)(*args, **kwargs),
self.steps.append('%s(*%s)' % (name, args))
return self
setattr(instance, name, __target__)
In [3]:
_df = pd.DataFrame()
_se = pd.Series()
PANDAS_DATAFRAME_OBJECTS = [
attr for attr in dir(_df)
if not attr.startswith('_')
]
PANDAS_DATAFRAME_ATTRIBUTES = [
attr for attr in PANDAS_DATAFRAME_OBJECTS
if not callable(getattr(_df, attr))
]
PANDAS_DATAFRAME_METHODS = [
attr for attr in PANDAS_DATAFRAME_OBJECTS
if callable(getattr(_df, attr))
]
PANDAS_SERIES_OBJECTS = [
attr for attr in dir(_se)
if not attr.startswith('_')
]
PANDAS_SERIES_ATTRIBUTES = [
attr for attr in PANDAS_SERIES_OBJECTS
if not callable(getattr(_se, attr))
]
PANDAS_SERIES_METHODS = [
attr for attr in PANDAS_SERIES_OBJECTS
if callable(getattr(_se, attr))
]
In [4]:
class SkDataObject:
pass
class SkDataFrame(SkDataObject):
data = None
steps = []
def __new__(cls, *args, **kwds):
"""
"""
overload_public_attribute(cls, 'values')
overload_private_method(cls, '__repr__', register_step=False)
overload_private_method(cls, '_repr_html_', register_step=False)
for method in METHODS:
overload_private_method(cls, '__%s__' % method)
overload_private_method(cls, '__r%s__' % method)
overload_private_imethod(cls, '__i%s__' % method)
for method in PANDAS_DATAFRAME_METHODS:
overload_public_method(cls, method)
return super(SkDataFrame, cls).__new__(cls)
def __init__(self, *args, **kwargs):
if args and isinstance(args[0], pd.DataFrame):
self.data = args[0]
if len(args) > 1:
self.steps = list(args[1])
else:
self.data = pd.DataFrame(*args, **kwargs)
In [5]:
class SkDataSerie(SkDataObject):
data = None
steps = []
def __new__(cls, *args, **kwds):
"""
"""
overload_public_attribute(cls, 'values')
overload_private_method(cls, '__repr__', register_step=False)
# overload_private_method(cls, '_repr_html_', register_step=False)
for method in METHODS:
overload_private_method(cls, '__%s__' % method)
overload_private_method(cls, '__r%s__' % method)
overload_private_imethod(cls, '__i%s__' % method)
for method in PANDAS_DATAFRAME_METHODS:
overload_public_method(cls, method)
return super(SkDataSerie, cls).__new__(cls)
def __init__(self, *args, **kwargs):
if args and isinstance(args[0], pd.Series):
self.data = args[0]
if len(args) > 1:
self.steps = list(args[1])
else:
self.data = pd.DataFrame(*args, **kwargs)
In [6]:
skdf = SkDataFrame({'a': [1,5]})
skdf, id(skdf)
Out[6]:
In [7]:
skdf.sum()
Out[7]:
In [8]:
skdf.steps
Out[8]:
In [9]:
(skdf / 2)
Out[9]:
In [10]:
skdf += 1
print(skdf.steps)
print(skdf)
In [11]:
print(skdf + 1)
print((skdf + 1).steps)
skdf.steps
Out[11]:
In [12]:
skdf_test = 1 + skdf + 1
display(skdf_test)
skdf_test.steps
Out[12]:
In [13]:
skdf.steps
Out[13]:
In [14]:
skdf = 1 + skdf + 1
print(skdf.steps)
print(skdf)
In [15]:
skdf - 1
Out[15]:
In [16]:
skdf += skdf
In [17]:
print(skdf.steps)
skdf
Out[17]: