notebook.community

Edit and run



In [16]:

    
from collections import defaultdict


users = [[0, "Hero", 0],
         [1, "Dunn", 2],
         [2, "Sue", 3],
         [3, "Chi", 3]]


class Table(object):
    """ This is a toy table class, meant to mimick a table
        in a relational database. The same sorts of logic
        are implemented, but in memory.
        No mind was paid to performance or utility, this is
        a toy model meant to demonstrate database features.
    """
    
    def __init__(self, columns):
        self.columns = columns
        self.rows = []
    
    def __repr__(self):
        """ pretty representation of the table: columns then rows """
        return str(self.columns) + "\n" + "\n".join(map(str, self.rows))
    
    def insert(self, row_values):
        if len(row_values) != len(self.columns):
            raise TypeError("wrong number of elements")
        row_dict = dict(zip(self.columns, row_values))
        self.rows.append(row_dict)
        
    def update(self, updates, predicate):
        for row in self.rows:
            if predicate(row):
                for column, new_value in updates.items():
                    row[column] = new_value
    
    def delete(self, predicate=lambda row: True):
        """ delete all rows matching predicate
            or all rows if no predicate supplied
        """
        self.rows = [row for row in self.rows if not(predicate(row))]

    def select(self, keep_columns=None, additional_columns=None):
        if keep_columns is None:
            keep_columns = self.columns
        
        if additional_columns is None:
            additional_columns = {}
        
        result_table = Table(keep_columns + list(additional_columns.keys()))
        
        for row in self.rows:
            new_row = [row[column] for column in keep_columns]
            for column_name, calc in additional_columns.iterms():
                new_row.append(calc(row))
            result_table.insert(new_row)
        
        return result_table

    def where(self, predicate=lambda row: True):
        """ return only the rows that satisfy the supplied predicate """
        where_table = Table(self.columns)
        where_table.rows = filter(predicate, self.rows)
        return where_table
    
    def limit(self, num_rows):
        """ return only the first num_rows rows """
        limit_table = Table(self.columns)
        limit_table.rows = self.rows[:num_rows]
        return limit_table
    
    def group_by(self, group_by_columns, aggregates, having=None):
        grouped_rows = defaultdict(list)
        
        # populate groups
        for row in self.rows:
            key = tuple(row[column] for column in group_by_columns)
            grouped_rows[key].append(row)
        
        # resultant table consists of group_by and aggregates
        result_table = Table(group_by_columns + list(aggregates.keys()))
        
        for key, rows in grouped_rows.items():
            if having is None or having(rows):
                new_row = list(key)
                for aggregate_name, aggregate_fn in aggregates.items():
                    new_row.append(aggregate_fn(rows))
                result_table.insert(new_row)
        
        return result_table
    
    def join(self, other_table, left_join=False):
        """ join two tables based on whatever columns they have in common """
        join_on_columns = [c for c in self.columns if c in other_table.columns]
        additional_columns = [c for c in other_table.columns if c not in join_on_columns]
        
        # all columns from left table and additional columns from right table
        join_table = Table(self.columns + additional_columns)
        
        for row in self.rows:
            def is_join(other_row):
                return all(other_row[c] == row[c] for c in join_on_columns)
            
            othter_rows = other_table.where(is_join).rows
            
            # for each row that machines, this one produces a resultant row
            for other_row in other_rows:
                join_table.insert([row[c] for c in self.columns] +
                                  [other_row[c] for c in additional_columns])
                
            # if no rows match and it's a left join, output with None
            if left_join and not other_rows:
                join_table.insert([row[c] for c in self.columns] +
                                  [None for c in additional_columns])
        
        return join_table



In [11]:

    
users = Table(["user_id", "name", "num_friends"])
users.insert([0, "Hero", 0])
users.insert([1, "Dunn", 2])
users.insert([2, "Sue", 3])
users.insert([3, "Chi", 3])
users.insert([4, "Thor", 3])
users.insert([5, "Clive", 2])
users.insert([6, "Hicks", 3])
users.insert([7, "Devin", 2])
users.insert([8, "Kate", 2])
users.insert([9, "Klein", 3])
users.insert([10, "Jen", 1])


print(users)









    



['user_id', 'name', 'num_friends']
{'num_friends': 0, 'name': 'Hero', 'user_id': 0}
{'num_friends': 2, 'name': 'Dunn', 'user_id': 1}
{'num_friends': 3, 'name': 'Sue', 'user_id': 2}
{'num_friends': 3, 'name': 'Chi', 'user_id': 3}
{'num_friends': 3, 'name': 'Thor', 'user_id': 4}
{'num_friends': 2, 'name': 'Clive', 'user_id': 5}
{'num_friends': 3, 'name': 'Hicks', 'user_id': 6}
{'num_friends': 2, 'name': 'Devin', 'user_id': 7}
{'num_friends': 2, 'name': 'Kate', 'user_id': 8}
{'num_friends': 3, 'name': 'Klein', 'user_id': 9}
{'num_friends': 1, 'name': 'Jen', 'user_id': 10}



In [12]:

    
users.update({'num_friends': 3}, lambda row: row['user_id'] == 1)



In [13]:

    
users.delete(lambda row: row["user_id"] == 1)
users.delete()



In [14]:

    
def name_length(row): return len(row["name"])


users.select(keep_columns=[],
             additional_columns = {"name_length": name_length})









    Out[14]:





['name_length']



In [15]:

    
def min_user_id(rows): return min(row['user_id'] for row in rows)

stats_by_length = users.select(additional_columns={'name_length': name_length}) \
                       .group_by(group_by_columns=['name_length'],
                                 aggregates={'min_user_id': min_user_id, 'num_users': len})



In [ ]: