"mini pandas.DataFrame" Table class with Excel + CSV I/O, easy access to columns, HTML output, and much more.
In [1]:
from Goulib.notebook import *
from Goulib.table import *
In [2]:
small=Table(Table([[1,2,3],(4,5)])) #tables can be constructed from any tabular data
small
Out[2]:
In [3]:
#Table cells can contain other tables, LaTeX expressions, Images... and more (soon...)
from Goulib.image import Image
lena=Image('../tests/data/lena.png').resize((128,128))
Table([[small,r'$\\LaTeX : \sqrt{\left(a+b\right)\left(a-b\right)}$'],[lena,lena]],titles=['complex','content'])
Out[3]:
In [4]:
# Tables can be read from .csv, html, JSON and Excel files (requires xlrd http://www.python-excel.org/)
t=Table('../tests/data/test.xls')
In [5]:
print(t.titles) #Tables have optional column headers
In [6]:
t.setcol('Total',None)
Table(t[:5],titles=t.titles) #indexing lines, construction and default HTML representation
#notice the Order Dates are messy because of Excel representaion of dates
Out[6]:
In [7]:
t[2,5],t[2,'Cost'] # cells can be accessed by row,col index or title
Out[7]:
In [8]:
print(t[:5,'Cost']) # indexing supports slices too
In [9]:
# handle the mess in Excel Cell types ...
t.to_date('OrderDate',fmt=['%m/%d/%Y','Excel']) #converts column to date using several possible formats...
t.applyf('Cost',float) # apply a function to a column. Here force the column to contain floats
Out[9]:
In [10]:
# math between columns is still a bit tedious...
from Goulib.math2 import vecmul
t.setcol('Total',vecmul(t.col('Cost'),t.col('Unités')))
In [11]:
#it's easy to make a "total" line from columns
#with a list of reduce-like functions applied to each column
from Goulib.stats import avg
from Goulib.itertools2 import count_unique
t.total([max,count_unique,count_unique,count_unique,sum,avg,sum])
t.footer #result is stored in a separated footer field
Out[11]:
In [12]:
h(t.html(start=5,stop=10)) # a way to shorten long tables
In [13]:
t.sort('Total',reverse=True) # Tables can be sorted by column easily
h(t.html(stop=5)) # show only the 5 lines with highest total
In [14]:
region=t.groupby(u'Région') # dictionary of subtables grouped by a column. notice Unicode support
region['East'] # isn't it nice ?
Out[14]:
In [15]:
#a row can be extracted as a dict where column titles are keys
t.rowasdict(1)
Out[15]:
In [16]:
t.json()[:250]+'...' #rowasdict is handy to build json representation
Out[16]:
In [17]:
from Goulib.math2 import *
from Goulib.itertools2 import *
res=Table(t) #copy
s=len(res)
for i in range(s-1):
line=res[i]
d=[hamming(line,res[j]) for j in range(i+1,s)]
j=index_min(d)[0]+i
res[i+1],res[j]=res[j],res[i+1] #swap
res
Out[17]:
In [18]:
hamming(t[1],t[2])
Out[18]:
In [19]:
Table([[small,r'$\\LaTeX : \sqrt{\left(a+b\right)\left(a-b\right)}$'],[lena,lena]],titles=['complex','content'])
Out[19]:
In [ ]: