In [1]:
# remove comment to use latest development version
import sys; sys.path.insert(0, '../')
In [2]:
# import libraries
import raccoon as rc
In [3]:
# empty DataFrame
df = rc.DataFrame()
df
Out[3]:
In [4]:
# with columns and indexes but no data
df = rc.DataFrame(columns=['a', 'b', 'c'], index=[1, 2, 3])
df
Out[4]:
In [5]:
# with data
df = rc.DataFrame(data={'a': [1, 2, 3], 'b': [4, 5, 6]}, index=[10, 11, 12], columns=['a', 'b'])
df
Out[5]:
In [6]:
df.print()
In [7]:
print(df)
In [8]:
# columns
df.columns
Out[8]:
In [9]:
df.columns = ['first', 'second']
print(df)
In [10]:
# columns can be renamed with a dict()
df.rename_columns({'second': 'b', 'first': 'a'})
df.columns
Out[10]:
In [11]:
# index
df.index
Out[11]:
In [12]:
#indexes can be any non-repeating unique values
df.index = ['apple', 'pear', 7.7]
df.print()
In [13]:
df.index = [10, 11, 12]
print(df)
In [14]:
# the index can also have a name, befault it is "index"
df.index_name
Out[14]:
In [15]:
df.index_name = 'units'
df.index_name
Out[15]:
In [16]:
# data is a shallow copy, be careful on how this is used
df.index_name = 'index'
df.data
Out[16]:
In [17]:
df.select_index(11)
Out[17]:
In [18]:
# set a single cell
df.set(10, 'a', 100)
print(df)
In [19]:
# set a value outside current range creates a new row and/or column. Can also use [] for setting
df[13, 'c'] = 9
df.print()
In [20]:
# set column
df['b'] = 55
print(df)
In [21]:
# set a subset of column
df[[10, 12], 'b'] = 66
print(df)
In [22]:
# using boolean list
df.set([True, False, True, False], 'b', [88, 99])
print(df)
In [23]:
# setting with slices
df[12:13, 'a'] = 33
print(df)
In [24]:
df[10:12, 'c'] = [1, 2, 3]
print(df)
In [25]:
# append a row, DANGEROUS as there is not validation checking, but can be used for speed
df.append_row(14, {'a': 44, 'c': 100, 'd': 99})
print(df)
In [26]:
# append rows, again use caution
df.append_rows([15, 16], {'a': [55, 56], 'd': [100,101]})
print(df)
In [27]:
# get a single cell
df[10, 'a']
Out[27]:
In [28]:
# get an entire column
df['c'].print()
In [29]:
# get list of columns
df[['a', 'c']].print()
In [30]:
# get subset of the index
df[[11, 12, 13], 'b'].print()
In [31]:
# get using slices
df[11:13, 'b'].print()
In [32]:
# get a matrix
df[10:11, ['a', 'c']].print()
In [33]:
# get a column, return as a list
df.get(columns='a', as_list=True)
Out[33]:
In [34]:
# get a row and return as a dictionary
df.get_columns(index=13, columns=['a', 'b'], as_dict=True)
Out[34]:
In [35]:
# get a single cell
df.get_location(2, 'a')
Out[35]:
In [36]:
# get an entire row when the columns is None
print(df.get_location(2))
In [37]:
print(df.get_location(0, ['b', 'c'], as_dict=True))
In [38]:
df.get_location(-1).print()
In [39]:
df.get_locations(locations=[0, 2]).print()
In [40]:
df.set_locations(locations=[0, 2], column='a', values=-9)
df.print()
In [41]:
df.head(2).print()
In [42]:
df.tail(2).print()
In [43]:
df.delete_rows([10, 13])
print(df)
In [44]:
df.delete_columns('b')
print(df)
In [45]:
# return a dict
df.to_dict()
Out[45]:
In [46]:
# exclude the index
df.to_dict(index=False)
Out[46]:
In [47]:
# return an OrderedDict()
df.to_dict(ordered=True)
Out[47]:
In [48]:
# return a list of just one column
df['c'].to_list()
Out[48]:
In [49]:
# convert to JSON
string = df.to_json()
print(string)
In [50]:
# construct DataFrame from JSON
df_from_json = rc.DataFrame.from_json(string)
print(df_from_json)
In [51]:
df = rc.DataFrame({'a': [4, 3, 2, 1], 'b': [6, 7, 8, 9]}, index=[25, 24, 23, 22])
print(df)
In [52]:
# sort by index. Sorts are inplace
df.sort_index()
print(df)
In [53]:
# sort by column
df.sort_columns('b')
print(df)
In [54]:
# sort by column in reverse order
df.sort_columns('b', reverse=True)
print(df)
In [55]:
# sorting with a key function is avaialble, see tests for examples
In [56]:
df1 = rc.DataFrame({'a': [1, 2], 'b': [5, 6]}, index=[1, 2])
df1.print()
In [57]:
df2 = rc.DataFrame({'b': [7, 8], 'c': [11, 12]}, index=[3, 4])
print(df2)
In [58]:
df1.append(df2)
print(df1)
In [59]:
df = rc.DataFrame({'a': [1, 2, 3], 'b': [2, 8, 9]})
In [60]:
# test for equality
df.equality('a', value=3)
Out[60]:
In [61]:
# all math methods can operate on a subset of the index
df.equality('b', indexes=[1, 2], value=2)
Out[61]:
In [62]:
# add two columns
df.add('a', 'b')
Out[62]:
In [63]:
# subtract
df.subtract('b', 'a')
Out[63]:
In [64]:
# multiply
df.multiply('a', 'b', [0, 2])
Out[64]:
In [65]:
# divide
df.divide('b', 'a')
Out[65]:
In [66]:
tuples = [('a', 1, 3), ('a', 1, 4), ('a', 2, 3), ('b', 1, 4), ('b', 2, 1), ('b', 3, 3)]
df = rc.DataFrame({'a': [1, 2, 3, 4, 5, 6]}, index=tuples)
print(df)
The select_index method works with tuples by allowing the * to act as a wild card for matching.
In [67]:
compare = ('a', None, None)
df.select_index(compare)
Out[67]:
In [68]:
compare = ('a', None, 3)
df.select_index(compare, 'boolean')
Out[68]:
In [69]:
compare = (None, 2, None)
df.select_index(compare, 'value')
Out[69]:
In [70]:
compare = (None, None, 3)
df.select_index(compare, 'value')
Out[70]:
In [71]:
compare = (None, None, None)
df.select_index(compare)
Out[71]:
In [72]:
df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'])
print(df)
In [73]:
df.reset_index()
df
Out[73]:
In [74]:
df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=['x', 'y', 'z'], index_name='jelo')
print(df)
In [75]:
df.reset_index()
print(df)
In [76]:
df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'],
index=[('a', 10, 'x'), ('b', 11, 'y'), ('c', 12, 'z')], index_name=('melo', 'helo', 'gelo'))
print(df)
In [77]:
df.reset_index()
print(df)
In [78]:
df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=['x', 'y', 'z'], index_name='jelo')
print(df)
In [79]:
df.reset_index(drop=True)
print(df)
In [80]:
df = rc.DataFrame({'a': [1, 2, 'c'], 'b': [5, 6, 'd']}, index=[1, 2, 3])
In [81]:
for row in df.iterrows():
print(row)
In [82]:
for row in df.itertuples():
print(row)
In [83]:
df = rc.DataFrame({'a': [3, 5, 4], 'b': [6, 8, 7]}, index=[12, 15, 14], sort=True)
When sorted=True on initialization the data will be sorted by index to start
In [84]:
df.print()
In [85]:
df[16, 'b'] = 9
print(df)
In [86]:
df.set(indexes=13, values={'a': 3.5, 'b': 6.5})
print(df)