In [2]:
%%bash
git add --all :/
git commit -a -m 'fixed bug that prevented overwriting old records'
In [3]:
!date #last update
In [4]:
# Change work directory to project root
import gitpath #pip install git+https://github.com/ruxi/python-gitpath.git
import os.path
rootpath = gitpath.root()
os.chdir(rootpath)
os.getcwd()
Out[4]:
#update pypi
rm -r dist # remove old source files
python setup.py sdist # make source distribution
python setup.py bdist_wheel # make build distribution with *.whl file
twine upload dist/* # pip install twine
In [10]:
%ls dist
XyDB is a database-like containers for derivative data
The intended usecase of XyDB is to store dervative data in a database-like
container and bind it as an attribute to the source data. It solves the
problem of namespace pollution by confining intermediate data forms to
the original dataset in a logical and structured manner. The limitation
of this object is that it exists in memory only. For more persistent storage
solutions, its recommended to use an actual database library such as
blaze, mongoDB, or SQLite. Conversely, the advantage is residual information
is not left over after a session.
keys (list): list keywords for all records (names for intermediate data configurations)
push (func): Adds record to database
pull (func): Pulls record from database (ducktyped)
Records are accessible via attributes by keyname
Returns dictionary records
pull.<config keyword>
show (func): Show record from database. (ducktyped)
Records are accessible via attributes by keyname
Returns namedtuple objects based on db records.
show.<config keyword>.<attribute name>
Structure of the repository according to The Hitchhiker's Guide to Python
|-- LISCENSE
|-- README.md
|-- setup.py
|-- requirements.txt
|-- Makefile
|-- .gitignore
|-- docs/
|-- notebooks/
|-- ruxitools/
|-- __init__.py
|-- xydb/
|-- __init__.py
|-- XyDB.py
|-- test/
|-- __init__.py
|-- test_XyDB.py
Some resources on documentation conventions
Programs:
pip install sphinxcontrib-napoleon
Guides:
Guides on how to write unit tests:
In [13]:
import os.path
# create folder if doesn't exist
folders = ['ruxitools', 'tests']
for x in folders:
os.makedirs(x, exist_ok=True)
In [14]:
!tree | grep -v __pycache__ | grep -v .cpython #hides grep'd keywords
In [ ]:
# %load ruxitools/__init__.py
In [ ]:
# %load ruxitools/xydb.py
#!/usr/bin/env python
__author__ = "github.com/ruxi"
__copyright__ = "Copyright 2016, ruxitools"
__email__ = "ruxi.github@gmail.com"
__license__ = "MIT"
__status__ = "Development"
__version__ = "0.1"
from collections import namedtuple
class XyDB(object):
"""XyDB is a database-like containers for intermediate data
The intended usecase of XyDB is to store intermediate data in a database-like
container and bind it as an attribute to the source data. It solves the
problem of namespace pollution by confining intermediate data forms to
the original dataset in a logical and structured manner. The limitation
of this object is that it exists in memory only. For more persistent storage
solutions, its recommended to use an actual database library such as
blaze, mongoDB, or SQLite. Conversely, the advantage is residual information
is not left over after a session.
Example:
Defined a namedtuple for input validation, then assign this function
as an attribute of your source data object, usually a pandas dataframe.
import XyDB
from collections import namedtuple
# define input validation schema
input_val = namedtuple("data", ['key','desc', 'X', 'y'])
# define data
myData = pd.DataFrame()
# assign class function
myData.Xy = XyDB(input_val, verbose = True)
# add data to DB
myRecord = dict(key='config1'
, desc='dummydata'
, X=[0,1,0]
, y=['a','b','a])
myData.Xy.push(**myRecord)
# show data
myData.Xy.config1.desc
"""
def __init__(self, schema = None, verbose=True, welcome=True):
"""
Arguments:
schema (default: None | NamedTuple):
Accepts a NamedTuple subclass with a "key" field
which is used for input validation when records
are "push"ed
verbose (default: True | boolean)
If false, suppresses print commands. Including this message
welcome (default: True | boolean)
Suppresses printing of the docstring upon initialization
"""
self._db = {}
self._show = lambda: None
self._pull = lambda: None
self._verbose = verbose
# print docstring
if welcome:
print (self.__doc__)
# Input Validation (optional) can be spec'd out by NameTuple.
# Input NamedTuple requires 'key' field
self._schema = False if schema is None else schema
if self._schema:
if "key" not in dir(self._schema):
raise Exception("namedtuple must have 'key' as a field")
#@db.setter
def push(self, key, *args, **kwargs):
"""Adds records (dict) to database"""
if not(type(key)==str):
raise Exception('key must be string')
# Create database record entry (a dict)
if self._schema: # is user-defined
self._input_validator = self._schema
record = self._input_validator(key, *args,**kwargs)
else: # the schema is inferred from every push
entry_dict = dict(key=key, *args,**kwargs)
self._input_validator = namedtuple('Data', list(entry_dict.keys()))
record = self._input_validator(**entry_dict)
# The record is added to the database.
self._db[record.key] = record
if self._verbose:
print('Record added {}'.format(record.key))
self._update()
def _update(self):
"""updates dyanamic attribute access for self.show & self.pull"""
for key in self.keys:
# self.show.<key> = namedtuple
setattr(self._show
, key
, self._db[key]
)
# self.pull.<key> = dict
setattr(self._pull,
key,
self.db[key]._asdict()
)
@property
def db(self):
"""Intermediate data accessible by keyword. Returns a dict"""
return self._db
@property
def keys(self):
"""
list configuration keywords
Returns:
list
"""
return self.db.keys()
@property
def show(self):
"""
Show record from database. Accessible by attribute via keyname
Returns:
namedtuple objects
Usage:
show.<config keyword>.<attribute name>
"""
return self._show
@property
def pull(self):
"""
Pull record from database. Accessible by attribute via keyname
Returns:
dictionary
Usage:
pull.<config keyword>
"""
return self._pull
In [ ]:
# %load tests/test_xydb.py
__author__ = "github.com/ruxi"
__copyright__ = "Copyright 2016, ruxitools"
__email__ = "ruxi.github@gmail.com"
__license__ = "MIT"
__status__ = "Development"
__version__ = "0.1"
import unittest
import collections
from ruxitools.xydb import XyDB
class TestXydb(unittest.TestCase):
"""test if unittest works"""
############
# set-up #
############
def dummycase(self):
# dummy record
key = 'dummy0'
desc = 'test case'
X = [1,2,3,4]
y = ['a','b','c','d']
return dict(key=key, desc=desc, X=X, y=y)
def badcase_nokey(self):
desc = 'test case'
X = [1,2,3,4]
return dict(desc=desc, X=X)
def badcase_KeyNotStr(self):
key = [1,2,3,4]
X = "x is a str"
return dict(jey=key, X=X)
def mockschema(self):
input_validation = collections.namedtuple("Xy", ['key','desc', 'X', 'y'])
return input_validation
def push_record_noschema(self, record):
xy = XyDB(verbose=False)
xy.push(**record)
return xy
def push_record_w_schema(self, record, schema):
xy = XyDB(schema=schema, verbose=False)
xy.push(**record)
return xy
###########
# TESTS #
###########
def test_positive_control(self):
self.assertTrue(True)
def test_init_args(self):
xy = XyDB()
xy = XyDB(verbose=False)
xy = XyDB(verbose=True)
def test_PushRecord_NoSchema(self):
record = self.dummycase()
self.push_record_noschema(record)
def test_PushRecord_WithSchema(self):
record = self.dummycase()
schema = self.mockschema()
self.push_record_w_schema(record=record, schema=schema)
def test_PushRecord_NoKey(self):
"""negative test"""
record = self.badcase_nokey()
with self.assertRaises(TypeError):
self.push_record_noschema(record)
def test_PushRecord_KeyNotStr(self):
"""negative test"""
record = self.badcase_KeyNotStr()
with self.assertRaises(TypeError):
self.push_record_noschema(record)
def test_ShowRecord(self):
record = self.dummycase()
xy = self.push_record_noschema(record)
getattr(xy.show, record['key'])
def test_ShowRecord_NonExistKey(self):
"""negative test"""
record = self.dummycase()
key = record['key'] + "spike"
xy = self.push_record_noschema(record)
with self.assertRaises(KeyError):
getattr(xy.show, record[key])
def test_PullRecord(self):
record = self.dummycase()
xy = self.push_record_noschema(record)
getattr(xy.pull, record['key'])
def test_PullRecord_NonExistKey(self):
"""negative test"""
record = self.dummycase()
key = record['key'] + "spike"
xy = self.push_record_noschema(record)
with self.assertRaises(KeyError):
getattr(xy.pull, record[key])
def test_keys_NoRecords(self):
"""is dict_keys returned"""
xy = XyDB()
xy.keys
self.assertTrue(type(xy.keys)==type({}.keys())
, "Expecting dict_keys, instead got {}".format(type(xy.keys))
)
def test_keys_WithRecords(self):
record = self.dummycase()
xy = XyDB()
xy.push(**record)
xy.keys
def test_db_IsDict(self):
record = self.dummycase()
xy = self.push_record_noschema(record)
self.assertTrue(type(xy.db)==dict)
def test_otherattributes(self):
record = self.dummycase()
schema = self.mockschema()
xy = self.push_record_w_schema(record, schema)
xy._update
if __name__ == '__main__':
unittest.main()
In [18]:
!nosetests --tests=tests --with-coverage #conda install nose, coverage
In [20]:
!coverage report -mi #conda install nose, coverage
Format based on minimal example
In [ ]:
# %load setup.py
from setuptools import setup, find_packages
import sys
if sys.version_info[:2]<(3,5):
sys.exit("ruxitools requires python 3.5 or higher")
# defining variables
install_requires = []
tests_require = [
'mock'
, 'nose'
]
# How mature is this project? Common values are
# 3 - Alpha
# 4 - Beta
# 5 - Production/Stable
classifier = [
"Programming Language :: Python",
'Development Status :: 3 - Alpha',
'Intended Audience :: Developers',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: MIT License',
'Natural Language :: English',
'Operating System :: Unix',
'Programming Language :: Python :: 3 :: Only'
]
keywords='ruxi tools ruxitools xydb intermediate data containers',
# setup
setup(
name='ruxitools'
, version="0.2.6"
, description="Misc general use functions. XyDB: container fo intermediate data. "
, url="http://github.com/ruxi/tools"
, author="ruxi"
, author_email="ruxi.github@gmail.com"
, license="MIT"
, packages=find_packages()#['ruxitools']
, tests_require=tests_require
, test_suite= 'nose.collector'
, classifiers = classifier
, keywords=keywords
)
Docs on python wheels (needed for pip)
recommended way reigster and upload
python setup.py register # Not recommended, but did it this way. See guide
Create source distribution
python setup.py sdist
Create build distribution (python wheels for pip)
python setup.py bdist_wheel
Upload distribution
twine upload dist/* # pip install twine
All together
python setup.py sdist
python setup.py bdist_wheel
twine upload dist/*
In [ ]:
# %load README.md
# ruxitools
Miscellaneous tools.
# Installation
method1:
pip install -e git+https://github.com/ruxi/tools.git
method2:
git clone https://github.com/ruxi/tools.git
cd tools
python setup.py install
python setup.py tests
# Modules
## XyDB: a container for intermediate data
XyDB is used to organize intermediate data by attaching it to the source dataset.
It solves the problem of namespace pollution, especially if many intermediate
datasets are derived from the source.
Usage:
```python
from ruxitools.xydb import XyDB
# attach container to source data
mydata.Xy = XyDB()
# store intermediate info & documentation into the containers
mydata.Xy.push(dict(
key="config1" # keyword
, X=[mydata*2] # intermediate data
, desc = "multiply by 2" # description of operation
))
# To retrieve intermediate data as a dict:
mydata.Xy.pull.config1
# To retrieve intermediate data as attributes:
mydata.Xy.show.config1.desc
# To show keys
mydata.Xy.keys
```
# TODO:
requirements.txt - not sure if it works
In [ ]:
# %load MANIFEST.in
include README.md
include LICENSE
In [ ]:
# %load LICENSE
MIT License
Copyright (c) 2016 github.com/ruxi
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
python setup.py test
In [32]:
#!python setup.py test
In [ ]:
# %load .travis.yml
os: linux
language: python
python:
- 3.5
# command to install dependencies
install:
- "pip install -r requirements.txt"
- "pip install ."
# command to run tests
script: nosetests
In [ ]: