In [41]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns

Simple

simple 1-1 correspondance


In [42]:
keyEN = ['red', 'yellow', 'green', 'blue', 'black']
keyFR1 = ['rouge', 'jaune', 'vert', 'bleu', 'noir']
keyFR2 = ['jaune', 'vert', 'bleu', 'noir', 'rouge']
keyDE = ['gelb', 'gruen', 'blau', 'schwartz', 'rot']

In [43]:
dataENFR = pd.DataFrame({'keyEN' : keyEN, 'keyFR' : keyFR1})
dataENFR


Out[43]:
keyEN keyFR
0 red rouge
1 yellow jaune
2 green vert
3 blue bleu
4 black noir

In [44]:
dataFRDE = pd.DataFrame({'keyFR' : keyFR2, 'keyDE' : keyDE})
dataFRDE


Out[44]:
keyDE keyFR
0 gelb jaune
1 gruen vert
2 blau bleu
3 schwartz noir
4 rot rouge

In [45]:
simpleMerge = pd.merge(dataENFR, dataFRDE, on='keyFR', how='outer')
simpleMerge


Out[45]:
keyEN keyFR keyDE
0 red rouge rot
1 yellow jaune gelb
2 green vert gruen
3 blue bleu blau
4 black noir schwartz

Complex

1-n correspondance


In [69]:
users = ['Tom', 'Tom', 'Tom',
         'Bill', 'Bill', 'Bill', 'Bill',
         'Jack',
         'Bob',
         'Jim']
sessionsUsers = ['sessionTom1', 'sessionTom2', 'sessionTom3',
                 'sessionBill1', 'sessionBill2', 'sessionBill3', 'sessionBill4', 
                 'sessionJack',
                 'sessionBob',
                 'sessionJim']
sessionsChapters = [
                 'sessionTom1', 'sessionTom1', 'sessionTom1',
                 'sessionTom2', 'sessionTom2',
                 'sessionTom3',
    
                 'sessionBill1',
                 'sessionBill2', 'sessionBill2',
                 'sessionBill3', 'sessionBill3', 'sessionBill3',
                 'sessionBill4', 'sessionBill4', 'sessionBill4', 'sessionBill4',
                 
                 'sessionJack', 'sessionJack', 'sessionJack',
                 
                 'sessionBob',
                 
                 'sessionJim']

chaptersSessions = ['1', '2', '3',
                    '1', '2',
                    '1',
                    
                    '1',
                    '2', '3',
                    '4', '5', '6',
                    '5', '6', '5', '6',
                    
                    '9', '10', '11',
                    
                    '10',
                    '1']

times = 100 * np.random.rand(len(chaptersSessions))
times.sort()
times


Out[69]:
array([  1.920556  ,   2.05149291,   4.01622859,   6.01695117,
         9.94325926,  33.46910107,  35.03250999,  39.35289105,
        45.2546713 ,  60.67957719,  62.2058968 ,  63.21452902,
        65.01748642,  73.47538142,  79.52493287,  82.95773425,
        84.57350829,  87.3415436 ,  91.10738857,  98.42042455,  99.00165259])

In [47]:
dataUsers = pd.DataFrame({'users' : users, 'sessions' : sessionsUsers})
#dataUsers

In [70]:
dataChapters = pd.DataFrame({'sessions' : sessionsChapters, 'chapters' : chaptersSessions, 'times' : times})
#dataChapters

In [71]:
complexMerge = pd.merge(dataUsers, dataChapters, on='sessions', how='outer')
complexMerge


Out[71]:
sessions users chapters times
0 sessionTom1 Tom 1 1.920556
1 sessionTom1 Tom 2 2.051493
2 sessionTom1 Tom 3 4.016229
3 sessionTom2 Tom 1 6.016951
4 sessionTom2 Tom 2 9.943259
5 sessionTom3 Tom 1 33.469101
6 sessionBill1 Bill 1 35.032510
7 sessionBill2 Bill 2 39.352891
8 sessionBill2 Bill 3 45.254671
9 sessionBill3 Bill 4 60.679577
10 sessionBill3 Bill 5 62.205897
11 sessionBill3 Bill 6 63.214529
12 sessionBill4 Bill 5 65.017486
13 sessionBill4 Bill 6 73.475381
14 sessionBill4 Bill 5 79.524933
15 sessionBill4 Bill 6 82.957734
16 sessionJack Jack 9 84.573508
17 sessionJack Jack 10 87.341544
18 sessionJack Jack 11 91.107389
19 sessionBob Bob 10 98.420425
20 sessionJim Jim 1 99.001653

In [74]:
usersChapters = complexMerge.drop('sessions', 1)

In [82]:
usersChapters.groupby('users').max()


Out[82]:
chapters times
users
Bill 6 82.957734
Bob 10 98.420425
Jack 9 91.107389
Jim 1 99.001653
Tom 3 33.469101