Example Usage for Series


In [1]:
# remove comment to use latest development version
import sys; sys.path.insert(0, '../')

In [2]:
# import libraries
import raccoon as rc

Initialize


In [3]:
# empty DataFrame
srs = rc.Series()
srs


Out[3]:
object id: 1924106416536
data:
[]
index:
[]

In [4]:
# with indexes but no data
srs = rc.Series(index=[1, 2, 3])
srs


Out[4]:
object id: 1924104590056
data:
[None, None, None]
index:
[1, 2, 3]

In [5]:
# with data
srs = rc.Series(data=[4, 5, 6], index=[10, 11, 12])
srs


Out[5]:
object id: 1924106418216
data:
[4, 5, 6]
index:
[10, 11, 12]

Print


In [6]:
srs.print()


  index    value
-------  -------
     10        4
     11        5
     12        6

In [7]:
print(srs)


  index    value
-------  -------
     10        4
     11        5
     12        6

Setters and Getters


In [8]:
# data_name
srs.data_name


Out[8]:
'value'

In [9]:
srs.data_name = 'new_data'
print(srs)


  index    new_data
-------  ----------
     10           4
     11           5
     12           6

In [10]:
# index
srs.index


Out[10]:
[10, 11, 12]

In [11]:
#indexes can be any non-repeating unique values
srs.index = ['apple', 'pear', 7.7]
srs.print()


index      new_data
-------  ----------
apple             4
pear              5
7.7               6

In [12]:
srs.index = [10, 11, 12]
print(srs)


  index    new_data
-------  ----------
     10           4
     11           5
     12           6

In [13]:
# the index can also have a name, befault it is "index"
srs.index_name


Out[13]:
'index'

In [14]:
srs.index_name = 'units'
srs.index_name


Out[14]:
'units'

In [15]:
# data is a shallow copy, be careful on how this is used
srs.index_name = 'index'
srs.data


Out[15]:
[4, 5, 6]

Select Index


In [16]:
srs.select_index(11)


Out[16]:
[False, True, False]

Set Values


In [17]:
# set a single cell
srs.set(10, 100)
print(srs)


  index    new_data
-------  ----------
     10         100
     11           5
     12           6

In [18]:
# set a value outside current range creates a new row. Can also use [] for setting
srs[13] = 9
srs.print()


  index    new_data
-------  ----------
     10         100
     11           5
     12           6
     13           9

In [19]:
# set a subset of rows
srs[[10, 12]] = 66
print(srs)


  index    new_data
-------  ----------
     10          66
     11           5
     12          66
     13           9

In [20]:
# using boolean list
srs.set([True, False, True, False], [88, 99])
print(srs)


  index    new_data
-------  ----------
     10          88
     11           5
     12          99
     13           9

In [21]:
# setting with slices
srs[12:13] = 33
print(srs)


  index    new_data
-------  ----------
     10          88
     11           5
     12          33
     13          33

In [22]:
srs[10:12] = [1, 2, 3]
print(srs)


  index    new_data
-------  ----------
     10           1
     11           2
     12           3
     13          33

In [23]:
# set a location
srs.set_location(1, 22)
print(srs)


  index    new_data
-------  ----------
     10           1
     11          22
     12           3
     13          33

In [24]:
# set multiple locations
srs.set_locations([0, 2], [11, 27])
print(srs)


  index    new_data
-------  ----------
     10          11
     11          22
     12          27
     13          33

In [25]:
# append a row, DANGEROUS as there is not validation checking, but can be used for speed
srs.append_row(14, 99)
print(srs)


  index    new_data
-------  ----------
     10          11
     11          22
     12          27
     13          33
     14          99

In [26]:
# append multiple rows, again no sort check
srs.append_rows([15, 16], [100, 110])
print(srs)


  index    new_data
-------  ----------
     10          11
     11          22
     12          27
     13          33
     14          99
     15         100
     16         110

Get Values


In [27]:
# get a single cell
srs[10]


Out[27]:
11

In [28]:
# get subset of the index
srs[[11, 12, 13]].print()


  index    new_data
-------  ----------
     11          22
     12          27
     13          33

In [29]:
# get using slices
srs[11:13].print()


  index    new_data
-------  ----------
     11          22
     12          27
     13          33

In [30]:
# return as a list
srs.get([11, 12, 13], as_list=True)


Out[30]:
[22, 27, 33]

Set and Get by Location

Locations are the index of the index, in other words the index locations from 0...len(index)


In [31]:
print(srs.get_location(2))


{'index': 12, 'new_data': 27}

In [32]:
srs.get_location(-1)


Out[32]:
{'index': 16, 'new_data': 110}

In [33]:
srs.get_locations(locations=[0, 2]).print()


  index    new_data
-------  ----------
     10          11
     12          27

In [34]:
srs.get_locations(locations=[0, 2], as_list=True)


Out[34]:
[11, 27]

In [35]:
srs.set_locations([-1, -2], values=[10, 9])
print(srs)


  index    new_data
-------  ----------
     10          11
     11          22
     12          27
     13          33
     14          99
     15           9
     16          10

Head and Tail


In [36]:
srs.head(2).print()


  index    new_data
-------  ----------
     10          11
     11          22

In [37]:
srs.tail(2).print()


  index    new_data
-------  ----------
     15           9
     16          10

Delete rows


In [38]:
srs.delete([10, 13])
print(srs)


  index    new_data
-------  ----------
     11          22
     12          27
     14          99
     15           9
     16          10

Convert


In [39]:
# return a dict
srs.to_dict()


Out[39]:
{'index': [11, 12, 14, 15, 16], 'new_data': [22, 27, 99, 9, 10]}

In [40]:
# exclude the index
srs.to_dict(index=False)


Out[40]:
{'new_data': [22, 27, 99, 9, 10]}

In [41]:
# return an OrderedDict()
srs.to_dict(ordered=True)


Out[41]:
OrderedDict([('index', [11, 12, 14, 15, 16]),
             ('new_data', [22, 27, 99, 9, 10])])

Sort by Index


In [42]:
srs = rc.Series([6, 7, 8, 9], index=[25, 24, 23, 22])
print(srs)


  index    value
-------  -------
     25        6
     24        7
     23        8
     22        9

In [43]:
# sort by index. Sorts are inplace
srs.sort_index()
print(srs)


  index    value
-------  -------
     22        9
     23        8
     24        7
     25        6

Math Methods


In [44]:
srs = rc.Series([1, 2, 3])

In [45]:
# test for equality
srs.equality(value=3)


Out[45]:
[False, False, True]

In [46]:
# all math methods can operate on a subset of the index
srs.equality(indexes=[1, 2], value=2)


Out[46]:
[True, False]

Multi-Index

Raccoon does not have true hierarchical mulit-index capabilities like Pandas, but attempts to mimic some of the capabilities with the use of tuples as the index. Raccoon does not provide any checking to make sure the indexes are all the same length or any other integrity checking.


In [47]:
tuples = [('a', 1, 3), ('a', 1, 4), ('a', 2, 3), ('b', 1, 4), ('b', 2, 1), ('b', 3, 3)]
srs = rc.Series([1, 2, 3, 4, 5, 6], index=tuples)
print(srs)


index          value
-----------  -------
('a', 1, 3)        1
('a', 1, 4)        2
('a', 2, 3)        3
('b', 1, 4)        4
('b', 2, 1)        5
('b', 3, 3)        6

The select_index method works with tuples by allowing the * to act as a wild card for matching.


In [48]:
compare = ('a', None, None)
srs.select_index(compare)


Out[48]:
[True, True, True, False, False, False]

In [49]:
compare = ('a', None, 3)
srs.select_index(compare, 'boolean')


Out[49]:
[True, False, True, False, False, False]

In [50]:
compare = (None, 2, None)
srs.select_index(compare, 'value')


Out[50]:
[('a', 2, 3), ('b', 2, 1)]

In [51]:
compare = (None, None, 3)
srs.select_index(compare, 'value')


Out[51]:
[('a', 1, 3), ('a', 2, 3), ('b', 3, 3)]

In [52]:
compare = (None, None, None)
srs.select_index(compare)


Out[52]:
[True, True, True, True, True, True]

Reset Index


In [53]:
srs = rc.Series([1, 2, 3], index=[9, 10, 11])
print(srs)


  index    value
-------  -------
      9        1
     10        2
     11        3

In [54]:
srs.reset_index()
srs


Out[54]:
object id: 1924106640744
data:
[1, 2, 3]
index:
[0, 1, 2]

In [55]:
srs = rc.Series([1, 2, 3], index=[9, 10, 11], index_name='new name')
print(srs)


  new name    value
----------  -------
         9        1
        10        2
        11        3

In [56]:
srs.reset_index()
print(srs)


  index    value
-------  -------
      0        1
      1        2
      2        3

Sorted Series

Series will be set to sorted by default if no index is given at initialization. If an index is given at initialization then the parameter sorted must be set to True


In [57]:
srs = rc.Series([3, 5, 4], index=[12, 15, 14], sort=True)

When sorted=True on initialization the data will be sorted by index to start


In [58]:
srs.print()


  index    value
-------  -------
     12        3
     14        4
     15        5

In [59]:
srs[16] = 9
print(srs)


  index    value
-------  -------
     12        3
     14        4
     15        5
     16        9

In [60]:
srs.set(indexes=13, values=3.5)
print(srs)


  index    value
-------  -------
     12      3
     13      3.5
     14      4
     15      5
     16      9