Goulib.table

"mini pandas.DataFrame" Table class with Excel + CSV I/O, easy access to columns, HTML output, and much more.



In [1]:

    
from Goulib.notebook import *
from Goulib.table import *



In [2]:

    
small=Table(Table([[1,2,3],(4,5)])) #tables can be constructed from any tabular data
small









    Out[2]:




1 2 3
4 5



In [3]:

    
#Table cells can contain other tables, LaTeX expressions, Images... and more (soon...)
from Goulib.image import Image
lena=Image('../tests/data/lena.png').resize((128,128))
Table([[small,r'$\\LaTeX : \sqrt{\left(a+b\right)\left(a-b\right)}$'],[lena,lena]],titles=['complex','content'])









    Out[3]:





complex content

1 2 3
4 5

$\\LaTeX : \sqrt{\left(a+b\right)\left(a-b\right)}$



In [4]:

    
# Tables can be read from .csv, html, JSON and Excel files (requires xlrd http://www.python-excel.org/)
t=Table('../tests/data/test.xls')



In [5]:

    
print(t.titles) #Tables have optional column headers









    



['OrderDate', 'Région', 'Rep', 'Item', 'Unités', 'Cost', 'Total']



In [6]:

    
t.setcol('Total',None)
Table(t[:5],titles=t.titles) #indexing lines, construction and default HTML representation
#notice the Order Dates are messy because of Excel representaion of dates









    Out[6]:





OrderDate Région Rep Item Unités Cost Total

41061 East Jones Pencil 95 1.99  
1/23/2012 Central Kivell Binder 50 19.99  
41154 Central Jardine Pencil 36 4.99  
2/26/2012 Central Gill Pen 27 19.99  
3/15/2012 West Sorvino Pencil 56 2.99



In [7]:

    
t[2,5],t[2,'Cost'] # cells can be accessed by row,col index or title









    Out[7]:





(4.99, 4.99)



In [8]:

    
print(t[:5,'Cost']) # indexing supports slices too









    



[1.99, 19.99, 4.99, 19.99, 2.99]



In [9]:

    
# handle the mess in Excel Cell types ...
t.to_date('OrderDate',fmt=['%m/%d/%Y','Excel']) #converts column to date using several possible formats...
t.applyf('Cost',float) # apply a function to a column. Here force the column to contain floats









    Out[9]:





True



In [10]:

    
# math between columns is still a bit tedious...
from Goulib.math2 import vecmul
t.setcol('Total',vecmul(t.col('Cost'),t.col('Unités')))



In [11]:

    
#it's easy to make a "total" line from columns
#with a list of reduce-like functions applied to each column
from Goulib.stats import avg
from Goulib.itertools2 import count_unique
t.total([max,count_unique,count_unique,count_unique,sum,avg,sum])
t.footer #result is stored in a separated footer field









    Out[11]:





[datetime.date(2013, 12, 21),
 3,
 11,
 5,
 2121,
 20.308604651162796,
 19627.88000000001]



In [12]:

    
h(t.html(start=5,stop=10)) # a way to shorten long tables









    





OrderDate Région Rep Item Unités Cost Total

... ... ... ... ... ... ...
2012-01-04 East Jones Binder 60 4.99 299.40
2012-04-18 Central Andrews Pencil 75 1.99 149.25
2012-05-05 Central Jardine Pencil 90 4.99 449.10
2012-05-22 West Thompson Pencil 32 1.99 63.68
2012-08-06 East Jones Binder 60 8.99 539.40
... ... ... ... ... ... ...

2013-12-21 3 11 5 2121 20.31 19627.88



In [13]:

    
t.sort('Total',reverse=True) # Tables can be sorted by column easily
h(t.html(stop=5)) # show only the 5 lines with highest total









    





OrderDate Région Rep Item Unités Cost Total

2013-04-12 Central Jardine Binder 94 19.99 1879.06
2012-07-29 East Parent Binder 81 19.99 1619.19
2013-01-02 Central Smith Binder 87 15.00 1305.00
2012-12-29 East Parent Pen Set 74 15.99 1183.26
2013-10-14 West Thompson Binder 57 19.99 1139.43
... ... ... ... ... ... ...

2013-12-21 3 11 5 2121 20.31 19627.88



In [14]:

    
region=t.groupby(u'Région') # dictionary of subtables grouped by a column. notice Unicode support
region['East'] # isn't it nice ?









    Out[14]:





OrderDate Rep Item Unités Cost Total

2012-07-29 Parent Binder 81 19.99 1619.19
2012-12-29 Parent Pen Set 74 15.99 1183.26
2012-10-22 Jones Pen 64 8.99 575.36
2012-08-06 Jones Binder 60 8.99 539.40
2013-04-27 Howard Pen 96 4.99 479.04
2013-04-07 Jones Pen Set 62 4.99 309.38
2012-08-11 Parent Pen 15 19.99 299.85
2012-01-04 Jones Binder 60 4.99 299.40
2012-09-18 Jones Pen Set 16 15.99 255.84
2012-06-01 Jones Pencil 95 1.99 189.05
2012-08-15 Jones Pencil 35 4.99 174.65
2012-12-07 Howard Binder 29 1.99 57.71
2013-02-18 Jones Binder 4 4.99 19.96



In [15]:

    
#a row can be extracted as a dict where column titles are keys
t.rowasdict(1)









    Out[15]:





OrderedDict([('OrderDate', datetime.date(2013, 1, 2)),
             ('Région', 'Central'),
             ('Rep', 'Smith'),
             ('Item', 'Binder'),
             ('Unités', 87),
             ('Cost', 15.0),
             ('Total', 1305.0)])



In [16]:

    
t.json()[:250]+'...' #rowasdict is handy to build json representation









    Out[16]:





'[{"OrderDate": "2013-04-12", "R\\u00e9gion": "Central", "Rep": "Jardine", "Item": "Binder", "Unit\\u00e9s": 94, "Cost": 19.99, "Total": 1879.06}, {"OrderDate": "2013-01-02", "R\\u00e9gion": "Central", "Rep": "Smith", "Item": "Binder", "Unit\\u00e9s": 87,...'



In [17]:

    
from Goulib.math2 import *
from Goulib.itertools2 import *
res=Table(t) #copy
s=len(res)
for i in range(s-1):
    line=res[i]
    d=[hamming(line,res[j]) for j in range(i+1,s)]
    j=index_min(d)[0]+i
    res[i+1],res[j]=res[j],res[i+1] #swap
res









    Out[17]:





OrderDate Région Rep Item Unités Cost Total

2013-04-12 Central Jardine Binder 94 19.99 1879.06
2013-07-08 Central Kivell Pen Set 42 23.95 1005.90
2012-01-23 Central Kivell Binder 50 19.99 999.50
2013-05-31 Central Gill Binder 80 8.99 719.20
2012-02-26 Central Gill Pen 27 19.99 539.73
2012-06-25 Central Morgan Pencil 90 4.99 449.10
2012-11-25 Central Kivell Pen Set 96 4.99 479.04
2013-06-17 Central Kivell Desk 5 125.00 625.00
2012-01-09 Central Smith Desk 2 125.00 250.00
2012-05-10 Central Morgan Binder 28 8.99 251.72
2013-07-21 Central Morgan Pen Set 55 12.49 686.95
2013-01-02 Central Smith Binder 87 15.00 1305.00
2012-05-05 Central Jardine Pencil 90 4.99 449.10
2013-03-24 Central Jardine Pen Set 50 4.99 249.50
2013-01-15 Central Gill Binder 46 8.99 413.54
2012-04-18 Central Andrews Pencil 75 1.99 149.25
2013-12-21 Central Andrews Binder 28 4.99 139.72
2013-05-14 Central Gill Pencil 53 1.29 68.37
2012-12-12 Central Smith Pencil 67 1.29 86.43
2013-10-31 Central Andrews Pencil 14 1.29 18.06
2012-09-02 Central Jardine Pencil 36 4.99 179.64
2013-11-17 Central Jardine Binder 11 4.99 54.89
2012-08-11 East Parent Pen 15 19.99 299.85
2013-10-09 Central Gill Pencil 7 1.29 9.03
2013-04-07 East Jones Pen Set 62 4.99 309.38
2013-10-04 Central Andrews Pencil 66 1.99 131.34
2012-09-18 East Jones Pen Set 16 15.99 255.84
2012-07-29 East Parent Binder 81 19.99 1619.19
2013-04-27 East Howard Pen 96 4.99 479.04
2012-08-06 East Jones Binder 60 8.99 539.40
2012-01-04 East Jones Binder 60 4.99 299.40
2012-10-22 East Jones Pen 64 8.99 575.36
2012-06-01 East Jones Pencil 95 1.99 189.05
2012-08-15 East Jones Pencil 35 4.99 174.65
2012-12-07 East Howard Binder 29 1.99 57.71
2013-02-18 East Jones Binder 4 4.99 19.96
2013-10-14 West Thompson Binder 57 19.99 1139.43
2013-08-24 West Sorvino Desk 3 275.00 825.00
2012-03-15 West Sorvino Pencil 56 2.99 167.44
2013-09-27 West Sorvino Pen 76 1.99 151.24
2013-07-03 West Sorvino Binder 7 19.99 139.93
2012-05-22 West Thompson Pencil 32 1.99 63.68
2012-12-29 East Parent Pen Set 74 15.99 1183.26

2013-12-21 3 11 5 2121 20.31 19627.88



In [18]:

    
hamming(t[1],t[2])









    Out[18]:





6



In [19]:

    
Table([[small,r'$\\LaTeX : \sqrt{\left(a+b\right)\left(a-b\right)}$'],[lena,lena]],titles=['complex','content'])









    Out[19]:





complex content

1 2 3
4 5

$\\LaTeX : \sqrt{\left(a+b\right)\left(a-b\right)}$



In [ ]:

OrderDate	Région	Rep	Item	Unités	Cost
41061	East	Jones	Pencil	95	1.99
1/23/2012	Central	Kivell	Binder	50	19.99
41154	Central	Jardine	Pencil	36	4.99
2/26/2012	Central	Gill	Pen	27	19.99
3/15/2012	West	Sorvino	Pencil	56	2.99

OrderDate	Région	Rep	Item	Unités	Cost	Total
...	...	...	...	...	...	...
2012-01-04	East	Jones	Binder	60	4.99	299.40
2012-04-18	Central	Andrews	Pencil	75	1.99	149.25
2012-05-05	Central	Jardine	Pencil	90	4.99	449.10
2012-05-22	West	Thompson	Pencil	32	1.99	63.68
2012-08-06	East	Jones	Binder	60	8.99	539.40
...	...	...	...	...	...	...
2013-12-21	3	11	5	2121	20.31	19627.88

OrderDate	Région	Rep	Item	Unités	Cost	Total
2013-04-12	Central	Jardine	Binder	94	19.99	1879.06
2012-07-29	East	Parent	Binder	81	19.99	1619.19
2013-01-02	Central	Smith	Binder	87	15.00	1305.00
2012-12-29	East	Parent	Pen Set	74	15.99	1183.26
2013-10-14	West	Thompson	Binder	57	19.99	1139.43
...	...	...	...	...	...	...
2013-12-21	3	11	5	2121	20.31	19627.88