Goulib.table

"mini pandas.DataFrame" Table class with Excel + CSV I/O, easy access to columns, HTML output, and much more.


In [1]:
from Goulib.notebook import *
from Goulib.table import *

In [2]:
small=Table(Table([[1,2,3],(4,5)])) #tables can be constructed from any tabular data
small


Out[2]:
123
45

In [3]:
#Table cells can contain other tables, LaTeX expressions, Images... and more (soon...)
from Goulib.image import Image
lena=Image('../tests/data/lena.png').resize((128,128))
Table([[small,r'$\\LaTeX : \sqrt{\left(a+b\right)\left(a-b\right)}$'],[lena,lena]],titles=['complex','content'])


Out[3]:
complexcontent
123
45
$\\LaTeX : \sqrt{\left(a+b\right)\left(a-b\right)}$

In [4]:
# Tables can be read from .csv, html, JSON and Excel files (requires xlrd http://www.python-excel.org/)
t=Table('../tests/data/test.xls')

In [5]:
print(t.titles) #Tables have optional column headers


['OrderDate', 'Région', 'Rep', 'Item', 'Unités', 'Cost', 'Total']

In [6]:
t.setcol('Total',None)
Table(t[:5],titles=t.titles) #indexing lines, construction and default HTML representation
#notice the Order Dates are messy because of Excel representaion of dates


Out[6]:
OrderDateRégionRepItemUnitésCostTotal
41061EastJonesPencil951.99 
1/23/2012CentralKivellBinder5019.99 
41154CentralJardinePencil364.99 
2/26/2012CentralGillPen2719.99 
3/15/2012WestSorvinoPencil562.99 

In [7]:
t[2,5],t[2,'Cost'] # cells can be accessed by row,col index or title


Out[7]:
(4.99, 4.99)

In [8]:
print(t[:5,'Cost']) # indexing supports slices too


[1.99, 19.99, 4.99, 19.99, 2.99]

In [9]:
# handle the mess in Excel Cell types ...
t.to_date('OrderDate',fmt=['%m/%d/%Y','Excel']) #converts column to date using several possible formats...
t.applyf('Cost',float) # apply a function to a column. Here force the column to contain floats


Out[9]:
True

In [10]:
# math between columns is still a bit tedious...
from Goulib.math2 import vecmul
t.setcol('Total',vecmul(t.col('Cost'),t.col('Unités')))

In [11]:
#it's easy to make a "total" line from columns
#with a list of reduce-like functions applied to each column
from Goulib.stats import avg
from Goulib.itertools2 import count_unique
t.total([max,count_unique,count_unique,count_unique,sum,avg,sum])
t.footer #result is stored in a separated footer field


Out[11]:
[datetime.date(2013, 12, 21),
 3,
 11,
 5,
 2121,
 20.308604651162796,
 19627.88000000001]

In [12]:
h(t.html(start=5,stop=10)) # a way to shorten long tables


OrderDateRégionRepItemUnitésCostTotal
.....................
2012-01-04EastJonesBinder604.99299.40
2012-04-18CentralAndrewsPencil751.99149.25
2012-05-05CentralJardinePencil904.99449.10
2012-05-22WestThompsonPencil321.9963.68
2012-08-06EastJonesBinder608.99539.40
.....................
2013-12-213115212120.3119627.88

In [13]:
t.sort('Total',reverse=True) # Tables can be sorted by column easily
h(t.html(stop=5)) # show only the 5 lines with highest total


OrderDateRégionRepItemUnitésCostTotal
2013-04-12CentralJardineBinder9419.991879.06
2012-07-29EastParentBinder8119.991619.19
2013-01-02CentralSmithBinder8715.001305.00
2012-12-29EastParentPen Set7415.991183.26
2013-10-14WestThompsonBinder5719.991139.43
.....................
2013-12-213115212120.3119627.88

In [14]:
region=t.groupby(u'Région') # dictionary of subtables grouped by a column. notice Unicode support
region['East'] # isn't it nice ?


Out[14]:
OrderDateRepItemUnitésCostTotal
2012-07-29ParentBinder8119.991619.19
2012-12-29ParentPen Set7415.991183.26
2012-10-22JonesPen648.99575.36
2012-08-06JonesBinder608.99539.40
2013-04-27HowardPen964.99479.04
2013-04-07JonesPen Set624.99309.38
2012-08-11ParentPen1519.99299.85
2012-01-04JonesBinder604.99299.40
2012-09-18JonesPen Set1615.99255.84
2012-06-01JonesPencil951.99189.05
2012-08-15JonesPencil354.99174.65
2012-12-07HowardBinder291.9957.71
2013-02-18JonesBinder44.9919.96

In [15]:
#a row can be extracted as a dict where column titles are keys
t.rowasdict(1)


Out[15]:
OrderedDict([('OrderDate', datetime.date(2013, 1, 2)),
             ('Région', 'Central'),
             ('Rep', 'Smith'),
             ('Item', 'Binder'),
             ('Unités', 87),
             ('Cost', 15.0),
             ('Total', 1305.0)])

In [16]:
t.json()[:250]+'...' #rowasdict is handy to build json representation


Out[16]:
'[{"OrderDate": "2013-04-12", "R\\u00e9gion": "Central", "Rep": "Jardine", "Item": "Binder", "Unit\\u00e9s": 94, "Cost": 19.99, "Total": 1879.06}, {"OrderDate": "2013-01-02", "R\\u00e9gion": "Central", "Rep": "Smith", "Item": "Binder", "Unit\\u00e9s": 87,...'

In [17]:
from Goulib.math2 import *
from Goulib.itertools2 import *
res=Table(t) #copy
s=len(res)
for i in range(s-1):
    line=res[i]
    d=[hamming(line,res[j]) for j in range(i+1,s)]
    j=index_min(d)[0]+i
    res[i+1],res[j]=res[j],res[i+1] #swap
res


Out[17]:
OrderDateRégionRepItemUnitésCostTotal
2013-04-12CentralJardineBinder9419.991879.06
2013-07-08CentralKivellPen Set4223.951005.90
2012-01-23CentralKivellBinder5019.99999.50
2013-05-31CentralGillBinder808.99719.20
2012-02-26CentralGillPen2719.99539.73
2012-06-25CentralMorganPencil904.99449.10
2012-11-25CentralKivellPen Set964.99479.04
2013-06-17CentralKivellDesk5125.00625.00
2012-01-09CentralSmithDesk2125.00250.00
2012-05-10CentralMorganBinder288.99251.72
2013-07-21CentralMorganPen Set5512.49686.95
2013-01-02CentralSmithBinder8715.001305.00
2012-05-05CentralJardinePencil904.99449.10
2013-03-24CentralJardinePen Set504.99249.50
2013-01-15CentralGillBinder468.99413.54
2012-04-18CentralAndrewsPencil751.99149.25
2013-12-21CentralAndrewsBinder284.99139.72
2013-05-14CentralGillPencil531.2968.37
2012-12-12CentralSmithPencil671.2986.43
2013-10-31CentralAndrewsPencil141.2918.06
2012-09-02CentralJardinePencil364.99179.64
2013-11-17CentralJardineBinder114.9954.89
2012-08-11EastParentPen1519.99299.85
2013-10-09CentralGillPencil71.299.03
2013-04-07EastJonesPen Set624.99309.38
2013-10-04CentralAndrewsPencil661.99131.34
2012-09-18EastJonesPen Set1615.99255.84
2012-07-29EastParentBinder8119.991619.19
2013-04-27EastHowardPen964.99479.04
2012-08-06EastJonesBinder608.99539.40
2012-01-04EastJonesBinder604.99299.40
2012-10-22EastJonesPen648.99575.36
2012-06-01EastJonesPencil951.99189.05
2012-08-15EastJonesPencil354.99174.65
2012-12-07EastHowardBinder291.9957.71
2013-02-18EastJonesBinder44.9919.96
2013-10-14WestThompsonBinder5719.991139.43
2013-08-24WestSorvinoDesk3275.00825.00
2012-03-15WestSorvinoPencil562.99167.44
2013-09-27WestSorvinoPen761.99151.24
2013-07-03WestSorvinoBinder719.99139.93
2012-05-22WestThompsonPencil321.9963.68
2012-12-29EastParentPen Set7415.991183.26
2013-12-213115212120.3119627.88

In [18]:
hamming(t[1],t[2])


Out[18]:
6

In [19]:
Table([[small,r'$\\LaTeX : \sqrt{\left(a+b\right)\left(a-b\right)}$'],[lena,lena]],titles=['complex','content'])


Out[19]:
complexcontent
123
45
$\\LaTeX : \sqrt{\left(a+b\right)\left(a-b\right)}$

In [ ]: