Exercise 1: Populations of humans


In [20]:
class Human():
    def __init__(self, name, height, weight, age, sex):
        self.name = name
        self.age = age
        self.sex = sex
        self.height = height 
        self.weight = weight
        
    def __repr__(self):
        ske = 'Sex: {}, Age: {}, Weight: {}, Height: {}'
        return ske.format(self.sex, self.age, self.weight, self.height)
        
    
class Population():
    def __init__(self, humans, sex=None, description=None, age_range=None):
        self.humans = humans
        self.sex = sex
        self.description = description
        self.age_range = age_range
        
    @classmethod
    def from_csv(cls, file, height_col, weight_col, age_col, sex_col,
                 delimiter=None, skip_lines=None,  name_col=None, 
                 age_range=None, description=None, sex=None):
        
        with open(file) as myfile:
            lines = myfile.readlines()
            
        if skip_lines:
            for l in sorted(set(skip_lines), reverse=True):
                lines.pop(l)
        
        humans = []
        count = 0
        for line in lines:
            
            splitted = line.split(delimiter)
            
            if name_col:
                name = splitted[name_col]
            else:
                name = str(count)
                
            height = float(splitted[height_col])
            weight = float(splitted[weight_col])
            age = float(splitted[age_col])
            gender = bool(int(splitted[sex_col]))
            
            if sex != gender:
                continue
                
            if not age_range[0] <= age < age_range[1]:
                continue
            humans.append(Human(name, height, weight, age, gender))
            
        
        return cls(humans, sex, description, age_range)      
        
        
    def __len__(self):
        return len(self.humans)
    
    def __str__(self):        
        ske = ('Gender: {} \n' +
               'Min Age: {:5.2f}; Max Age: {:5.2f}\nSize: {:3d}\t'+
               'Height: {:6.2f}\tWeight: {:6.2f}\tAge: {:5.2f}')
        if self.sex:
            gen = 'male'
        else: 
            gen = 'female'
            
        return ske.format(gen, self.min_age(), self.max_age(),
                          self.__len__(), self.height(),
                          self.weight(), self.age())
    
    def add(self, human):
        self.humans.append(human)
    
    def max_age(self):
        if len(self.humans) == 0:
            return 0
        return max([h.age for h in self.humans])
    
    def min_age(self):
        if len(self.humans) == 0:
            return 0
        return min([h.age for h in self.humans])
    
    def max_weigt(self):
        if len(self.humans) == 0:
            return 0
        return max([h.weight for h in self.humans])
    
    def min_weight(self):
        if len(self.humans) == 0:
            return 0
        return min([h.weight for h in self.humans])
    
    def max_heigt(self):
        if len(self.humans) == 0:
            return 0
        return max([h.height for h in self.humans])
    
    def min_height(self):
        if len(self.humans) == 0:
            return 0
        return min([h.height for h in self.humans])
    
    def age(self):
        '''Return the average age in the population'''
        if len(self.humans) == 0:
            return 0
        return sum([h.age for h in self.humans]) / \
               len(self.humans)
    
    def weight(self):
        '''Return the average weight in the population'''
        if len(self.humans) == 0:
            return 0
        return sum([h.weight for h in self.humans]) / \
               len(self.humans)
        
    def height(self):
        '''Return the average height in the population'''
        if len(self.humans) == 0:
            return 0
        return sum([h.height for h in self.humans]) / \
               len(self.humans)

The used data is from the github repository for the book McElreath 2016. Statistical Rethinking. CRC Press.


In [23]:
populations = []
for sex in [False, True]:
    for age_range in [(x, x+10) for x in range(0,91,10)]:
        description = 'This population contains humans with age \
                       between {} and {}'.format(*age_range)
        
        p = Population.from_csv(
            'Howell1.csv', height_col=0, weight_col=1, 
            age_col=2, sex_col=3, delimiter=';', skip_lines=[0],
            sex=sex, age_range=age_range, description=description)

        
        populations.append(p)

#for popu in populations:
#    print(popu)
#    print()


Gender: female 
Min Age:  0.00; Max Age:  9.00
Size:  62	Height:  90.46	Weight:  12.22	Age:  4.01

Gender: female 
Min Age: 10.00; Max Age: 19.00
Size:  48	Height: 136.86	Weight:  30.03	Age: 14.83

Gender: female 
Min Age: 20.00; Max Age: 29.00
Size:  49	Height: 151.03	Weight:  43.01	Age: 24.54

Gender: female 
Min Age: 30.00; Max Age: 39.00
Size:  39	Height: 149.11	Weight:  42.41	Age: 34.47

Gender: female 
Min Age: 40.00; Max Age: 49.50
Size:  37	Height: 149.50	Weight:  41.43	Age: 44.58

Gender: female 
Min Age: 50.00; Max Age: 58.00
Size:  22	Height: 150.06	Weight:  43.28	Age: 53.55

Gender: female 
Min Age: 60.00; Max Age: 69.00
Size:  20	Height: 146.49	Weight:  39.14	Age: 64.64

Gender: female 
Min Age: 70.00; Max Age: 79.00
Size:   8	Height: 147.80	Weight:  35.71	Age: 74.03

Gender: female 
Min Age: 83.00; Max Age: 85.60
Size:   2	Height: 150.18	Weight:  42.68	Age: 84.30

Gender: female 
Min Age:  0.00; Max Age:  0.00
Size:   0	Height:   0.00	Weight:   0.00	Age:  0.00

Gender: male 
Min Age:  0.00; Max Age:  9.00
Size:  58	Height:  95.16	Weight:  13.73	Age:  4.17

Gender: male 
Min Age: 10.00; Max Age: 19.00
Size:  39	Height: 137.91	Weight:  30.09	Age: 14.33

Gender: male 
Min Age: 20.00; Max Age: 29.00
Size:  37	Height: 160.55	Weight:  48.85	Age: 25.00

Gender: male 
Min Age: 30.00; Max Age: 39.00
Size:  38	Height: 161.35	Weight:  50.39	Age: 34.20

Gender: male 
Min Age: 41.00; Max Age: 49.00
Size:  40	Height: 161.27	Weight:  50.16	Age: 43.42

Gender: male 
Min Age: 50.00; Max Age: 59.00
Size:  22	Height: 161.55	Weight:  47.92	Age: 53.77

Gender: male 
Min Age: 60.00; Max Age: 68.00
Size:  12	Height: 157.80	Weight:  45.45	Age: 64.08

Gender: male 
Min Age: 71.00; Max Age: 79.30
Size:   7	Height: 156.57	Weight:  42.01	Age: 74.96

Gender: male 
Min Age: 81.75; Max Age: 88.00
Size:   4	Height: 153.19	Weight:  45.10	Age: 83.79

Gender: male 
Min Age:  0.00; Max Age:  0.00
Size:   0	Height:   0.00	Weight:   0.00	Age:  0.00

Exercise 2: Graph data structure


In [6]:
import numpy


class NodeExistsError(Exception):
    pass

class NodeDoesNotExistsError(Exception):
    pass

class EdgeExistsError(Exception):
    pass

class EdgeDoesNotExistsError(Exception):
    pass
    
class Graph():
    
    def __init__(self, weighted_edges=None):
        
        self.edges = [(set([str(x[0]), str(x[1])]), x[2]) 
                          for x in weighted_edges]
        self.nodes = set([str(x[0]) for x in weighted_edges] + 
                         [str(x[1]) for x in weighted_edges])
            
    
    def add_node(self, name):
        name = str(name)
        if not name in self.nodes:
            self.nodes.add(name)
        else:
            raise NodeExistsError
    
    def del_node(self, name):
        name = str(name)
        if name in self.nodes:
            self.nodes.remove(name)
        else: 
            raise NodeDoesNotExistsError        
    
    def add_edge(self, n1, n2, weight):
        if not set(n1, n2) in [x[0] for x in self.edges]:
            self.edges.add((set(n1,n2), weight))
        else:
            raise EdgeExistsError
    
    def del_edge(self, n1, n2, weight):
        if set(n1, n2) in [x[0] for x in self.edges]:
            self.edges.remove((set(n1,n2), weight))
        else:
            raise EdgeExistsError
        
    def change_edge(self, w_edge1, w_edge2):
        self.del_edge(*w_edge1)
        self.add_edge(*w_edge2)      
    
    def is_connected(self, start):
        lifo = [start]
        visited = set()
        while lifo:
            node = lifo.pop()
            if visited == self.nodes:
                return True
            if not node in visited:
                visited.add(node)
                for edge in self.edges:
                    if node in edge[0]:
                        lifo += edge[0].difference(set([node]))
                
        
        return False
    
    def find_mst(self):
        pass

In [7]:
weighted_edges = [(1,2,2), (1,4,1), (1,5,3), (2,3,4),
                  (2,4,2), (2,5,3), (2,6,1), (3,5,5),
                  (3,6,3), (4,5,4), (4,7,2), (5,6,1),
                  (5,7,3), (5,8,7), (6,8,3)] 

g = Graph(weighted_edges)

g.is_connected('1')


Out[7]:
True

Exercise 3: The problem of the n queens