In [1]:
import pandas as pd
import numpy as np
import glob # to find all files in folder
from datetime import datetime
from datetime import date, time
from dateutil.parser import parse
import seaborn as sns
import matplotlib.pyplot as plt
import itertools
%matplotlib inline
sns.set_context('notebook')
pd.options.mode.chained_assignment = None  # default='warn'

import requests
from bs4 import BeautifulSoup

Collect the data

Get the select options


In [2]:
reportModel = 133685247
full_form_url = 'http://isa.epfl.ch/imoniteur_ISAP/!GEDPUBLICREPORTS.filter?ww_i_reportModel=133685247'
r = requests.get(full_form_url)
soup = BeautifulSoup(r.text, 'html.parser')

In [3]:
# get the name of the dropdown menus
select = soup.find_all('select')
select_name = [s.attrs['name'] for s in select]
select_name


Out[3]:
['ww_x_UNITE_ACAD',
 'ww_x_PERIODE_ACAD',
 'ww_x_PERIODE_PEDAGO',
 'ww_x_HIVERETE']

In [4]:
select_fields = [soup.find('select',{'name': name}) for name in select_name]
# the html for each <select> field

In [5]:
# find the value for the informatique section
unite_acad_options = select_fields[0].find_all('option')
#unite_acad_options

In [6]:
unite_acad_informatique ={opt['value']: opt.text for opt in unite_acad_options if opt.text == 'Informatique'}
unite_acad_informatique


Out[6]:
{'249847': 'Informatique'}

In [7]:
# periode academic
#select_fields[1].find_all('option')
period_acad = {opt['value']: opt.text for opt in select_fields[1].find_all('option') if opt['value'] != 'null' and int(opt.text.split('-')[0]) >= 2007}
period_acad


Out[7]:
{'123455150': '2011-2012',
 '123456101': '2012-2013',
 '213637754': '2013-2014',
 '213637922': '2014-2015',
 '213638028': '2015-2016',
 '355925344': '2016-2017',
 '39486325': '2010-2011',
 '978181': '2007-2008',
 '978187': '2008-2009',
 '978195': '2009-2010'}

In [8]:
# get all the pedagogic periods
option = select_fields[2].find_all('option')
period_pedago = {opt['value']: opt.text for opt in option if opt.text != '' }
period_pedago


Out[8]:
{'2063602308': 'Mise à niveau',
 '2226616': 'Stage automne 4ème année',
 '2226626': 'Stage printemps 4ème année',
 '2226768': 'Bachelor semestre 5b',
 '2226785': 'Bachelor semestre 6b',
 '2227132': 'Stage printemps master',
 '2230106': 'Master semestre 1',
 '2230128': 'Master semestre 3',
 '2230140': 'Master semestre 4',
 '2335667': 'Mineur semestre 1',
 '2335676': 'Mineur semestre 2',
 '249108': 'Bachelor semestre 1',
 '249114': 'Bachelor semestre 2',
 '249127': 'Projet Master automne',
 '2754553': 'Semestre printemps',
 '3781783': 'Projet Master printemps',
 '942120': 'Bachelor semestre 5',
 '942155': 'Bachelor semestre 3',
 '942163': 'Bachelor semestre 4',
 '942175': 'Bachelor semestre 6',
 '942192': 'Master semestre 2',
 '953137': 'Stage automne 3ème année',
 '953159': 'Semestre automne',
 '983606': 'Stage printemps 3ème année'}

In [9]:
option = select_fields[3].find_all('option')
hiverEte = {opt['value']: opt.text for opt in option if opt['value'] != 'null'}
hiverEte


Out[9]:
{'2936286': "Semestre d'automne", '2936295': 'Semestre de printemps'}

Collect


In [10]:
# arguments are tuples (key, 'description') eg: ('2936286': "Semestre d'automne")
def collect_dataframe(t_unite_acad, t_periode_acad, t_periode_pedago, t_hiver_ete, final_headers):
    print("collect_dataframe: input: "+str(t_unite_acad)+" & "+str(t_periode_acad)+" &"+str(t_periode_pedago)+" & "+str(t_hiver_ete))

    #Send request
    params = {
                'ww_x_GPS': -1, 
                'ww_i_reportModel': reportModel, 
                'ww_i_reportModelXsl': 133685270, 
                'ww_x_UNITE_ACAD': t_unite_acad[0], 
                'ww_x_PERIODE_ACAD': t_periode_acad[0], 
                'ww_x_PERIODE_PEDAGO': t_periode_pedago[0], 
                'ww_x_HIVERETE': t_hiver_ete[0]
             }
    
    url = 'http://isa.epfl.ch/imoniteur_ISAP/!GEDPUBLICREPORTS.html'
    
    r = requests.get(url, params=params)
    soupe = BeautifulSoup(r.text, 'html.parser')
    
    # get all the tr tags
    tr_tags = soupe.find_all('tr')
    
    #Temporary dictionary that will collect all the entry of the dataframe
    data = {}
    
    # there may be several tables.
    current_table = 't1'
    
    # for each tr tag, determine if it is a table title, a header (ignore those) or a student row
    for tr in tr_tags:
        th = tr.find_all('th')
        if(len(th) == 1): #this is a table title
            current_table = th[0].text.split('\n')[0]
            data[current_table] = []
            #print('current table: '+str(current_table))
        
        elif(len(th) > 1): #this is the header row (ignore because is always the same)
            #print('headers: '+str([t.text for t in th]))
            pass
            
        else:
            # this is a student
            td_tags = tr.find_all('td')
            student = [td.text.replace('\xa0', ' ') for td in td_tags[:-1]] #drop last td because it is always empty
            # add the desired columns
            student.append(current_table)
            student.append(t_periode_acad[1])
            student.append(t_periode_pedago[1])
            #print('student: '+str(student))
            data[current_table].append(student)
    
    # return all different dataframes
    dframes = [pd.DataFrame(data[k], columns=final_headers) for k in list(data.keys())]

    return dframes

make one request for all permutations of (unite_acad, periode_acad, periode_pedago). We ignore hiverete because it is redundant.


In [11]:
perm_list = list(itertools.product(list(unite_acad_informatique.items()), list(period_acad.items()), list(period_pedago.items())))

In [12]:
# get the data
header = ['Civilité', 'Nom_Prénom', 'Orientation_Bachelor', 'Orientation_Master', 'Spécialisation', 'Filière_opt.', 'Mineur', 'Statut', 'Type_Echange', 'Ecole_Echange', 'No_Sciper', 'title', 'periode_acad', 'periode_pedago']
dframes = []
for (ua, pa, pp) in perm_list:
    res = collect_dataframe(ua, pa, pp,('null', 'null'), header)
    [dframes.append(df) for df in res]


collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('2230128', 'Master semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('2063602308', 'Mise à niveau') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('2230140', 'Master semestre 4') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('2335676', 'Mineur semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('2226616', 'Stage automne 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('249114', 'Bachelor semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('942120', 'Bachelor semestre 5') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('2227132', 'Stage printemps master') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('2754553', 'Semestre printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('942192', 'Master semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('2226626', 'Stage printemps 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('2226785', 'Bachelor semestre 6b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('942155', 'Bachelor semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('249108', 'Bachelor semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('942175', 'Bachelor semestre 6') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('2226768', 'Bachelor semestre 5b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('953159', 'Semestre automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('2230106', 'Master semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('3781783', 'Projet Master printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('983606', 'Stage printemps 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('953137', 'Stage automne 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('249127', 'Projet Master automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('2335667', 'Mineur semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637922', '2014-2015') &('942163', 'Bachelor semestre 4') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('2230128', 'Master semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('2063602308', 'Mise à niveau') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('2230140', 'Master semestre 4') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('2335676', 'Mineur semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('2226616', 'Stage automne 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('249114', 'Bachelor semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('942120', 'Bachelor semestre 5') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('2227132', 'Stage printemps master') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('2754553', 'Semestre printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('942192', 'Master semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('2226626', 'Stage printemps 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('2226785', 'Bachelor semestre 6b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('942155', 'Bachelor semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('249108', 'Bachelor semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('942175', 'Bachelor semestre 6') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('2226768', 'Bachelor semestre 5b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('953159', 'Semestre automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('2230106', 'Master semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('3781783', 'Projet Master printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('983606', 'Stage printemps 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('953137', 'Stage automne 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('249127', 'Projet Master automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('2335667', 'Mineur semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123455150', '2011-2012') &('942163', 'Bachelor semestre 4') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('2230128', 'Master semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('2063602308', 'Mise à niveau') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('2230140', 'Master semestre 4') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('2335676', 'Mineur semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('2226616', 'Stage automne 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('249114', 'Bachelor semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('942120', 'Bachelor semestre 5') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('2227132', 'Stage printemps master') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('2754553', 'Semestre printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('942192', 'Master semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('2226626', 'Stage printemps 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('2226785', 'Bachelor semestre 6b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('942155', 'Bachelor semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('249108', 'Bachelor semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('942175', 'Bachelor semestre 6') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('2226768', 'Bachelor semestre 5b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('953159', 'Semestre automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('2230106', 'Master semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('3781783', 'Projet Master printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('983606', 'Stage printemps 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('953137', 'Stage automne 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('249127', 'Projet Master automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('2335667', 'Mineur semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978181', '2007-2008') &('942163', 'Bachelor semestre 4') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('2230128', 'Master semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('2063602308', 'Mise à niveau') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('2230140', 'Master semestre 4') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('2335676', 'Mineur semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('2226616', 'Stage automne 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('249114', 'Bachelor semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('942120', 'Bachelor semestre 5') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('2227132', 'Stage printemps master') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('2754553', 'Semestre printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('942192', 'Master semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('2226626', 'Stage printemps 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('2226785', 'Bachelor semestre 6b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('942155', 'Bachelor semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('249108', 'Bachelor semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('942175', 'Bachelor semestre 6') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('2226768', 'Bachelor semestre 5b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('953159', 'Semestre automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('2230106', 'Master semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('3781783', 'Projet Master printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('983606', 'Stage printemps 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('953137', 'Stage automne 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('249127', 'Projet Master automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('2335667', 'Mineur semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('355925344', '2016-2017') &('942163', 'Bachelor semestre 4') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('2230128', 'Master semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('2063602308', 'Mise à niveau') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('2230140', 'Master semestre 4') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('2335676', 'Mineur semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('2226616', 'Stage automne 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('249114', 'Bachelor semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('942120', 'Bachelor semestre 5') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('2227132', 'Stage printemps master') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('2754553', 'Semestre printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('942192', 'Master semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('2226626', 'Stage printemps 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('2226785', 'Bachelor semestre 6b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('942155', 'Bachelor semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('249108', 'Bachelor semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('942175', 'Bachelor semestre 6') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('2226768', 'Bachelor semestre 5b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('953159', 'Semestre automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('2230106', 'Master semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('3781783', 'Projet Master printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('983606', 'Stage printemps 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('953137', 'Stage automne 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('249127', 'Projet Master automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('2335667', 'Mineur semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213638028', '2015-2016') &('942163', 'Bachelor semestre 4') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('2230128', 'Master semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('2063602308', 'Mise à niveau') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('2230140', 'Master semestre 4') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('2335676', 'Mineur semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('2226616', 'Stage automne 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('249114', 'Bachelor semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('942120', 'Bachelor semestre 5') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('2227132', 'Stage printemps master') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('2754553', 'Semestre printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('942192', 'Master semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('2226626', 'Stage printemps 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('2226785', 'Bachelor semestre 6b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('942155', 'Bachelor semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('249108', 'Bachelor semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('942175', 'Bachelor semestre 6') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('2226768', 'Bachelor semestre 5b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('953159', 'Semestre automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('2230106', 'Master semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('3781783', 'Projet Master printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('983606', 'Stage printemps 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('953137', 'Stage automne 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('249127', 'Projet Master automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('2335667', 'Mineur semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('39486325', '2010-2011') &('942163', 'Bachelor semestre 4') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('2230128', 'Master semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('2063602308', 'Mise à niveau') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('2230140', 'Master semestre 4') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('2335676', 'Mineur semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('2226616', 'Stage automne 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('249114', 'Bachelor semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('942120', 'Bachelor semestre 5') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('2227132', 'Stage printemps master') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('2754553', 'Semestre printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('942192', 'Master semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('2226626', 'Stage printemps 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('2226785', 'Bachelor semestre 6b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('942155', 'Bachelor semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('249108', 'Bachelor semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('942175', 'Bachelor semestre 6') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('2226768', 'Bachelor semestre 5b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('953159', 'Semestre automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('2230106', 'Master semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('3781783', 'Projet Master printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('983606', 'Stage printemps 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('953137', 'Stage automne 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('249127', 'Projet Master automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('2335667', 'Mineur semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978187', '2008-2009') &('942163', 'Bachelor semestre 4') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('2230128', 'Master semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('2063602308', 'Mise à niveau') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('2230140', 'Master semestre 4') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('2335676', 'Mineur semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('2226616', 'Stage automne 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('249114', 'Bachelor semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('942120', 'Bachelor semestre 5') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('2227132', 'Stage printemps master') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('2754553', 'Semestre printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('942192', 'Master semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('2226626', 'Stage printemps 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('2226785', 'Bachelor semestre 6b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('942155', 'Bachelor semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('249108', 'Bachelor semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('942175', 'Bachelor semestre 6') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('2226768', 'Bachelor semestre 5b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('953159', 'Semestre automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('2230106', 'Master semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('3781783', 'Projet Master printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('983606', 'Stage printemps 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('953137', 'Stage automne 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('249127', 'Projet Master automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('2335667', 'Mineur semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('213637754', '2013-2014') &('942163', 'Bachelor semestre 4') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('2230128', 'Master semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('2063602308', 'Mise à niveau') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('2230140', 'Master semestre 4') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('2335676', 'Mineur semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('2226616', 'Stage automne 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('249114', 'Bachelor semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('942120', 'Bachelor semestre 5') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('2227132', 'Stage printemps master') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('2754553', 'Semestre printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('942192', 'Master semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('2226626', 'Stage printemps 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('2226785', 'Bachelor semestre 6b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('942155', 'Bachelor semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('249108', 'Bachelor semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('942175', 'Bachelor semestre 6') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('2226768', 'Bachelor semestre 5b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('953159', 'Semestre automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('2230106', 'Master semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('3781783', 'Projet Master printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('983606', 'Stage printemps 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('953137', 'Stage automne 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('249127', 'Projet Master automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('2335667', 'Mineur semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('123456101', '2012-2013') &('942163', 'Bachelor semestre 4') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('2230128', 'Master semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('2063602308', 'Mise à niveau') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('2230140', 'Master semestre 4') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('2335676', 'Mineur semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('2226616', 'Stage automne 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('249114', 'Bachelor semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('942120', 'Bachelor semestre 5') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('2227132', 'Stage printemps master') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('2754553', 'Semestre printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('942192', 'Master semestre 2') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('2226626', 'Stage printemps 4ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('2226785', 'Bachelor semestre 6b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('942155', 'Bachelor semestre 3') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('249108', 'Bachelor semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('942175', 'Bachelor semestre 6') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('2226768', 'Bachelor semestre 5b') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('953159', 'Semestre automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('2230106', 'Master semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('3781783', 'Projet Master printemps') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('983606', 'Stage printemps 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('953137', 'Stage automne 3ème année') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('249127', 'Projet Master automne') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('2335667', 'Mineur semestre 1') & ('null', 'null')
collect_dataframe: input: ('249847', 'Informatique') & ('978195', '2009-2010') &('942163', 'Bachelor semestre 4') & ('null', 'null')

In [13]:
#concatenate
all_data = pd.concat([df for df in dframes])

In [14]:
# write to file
#all_data.to_csv('all_data.csv')

In [15]:
len(all_data)


Out[15]:
8942

Note that the Mineur semestre X are always empty. That is why they dont appear here.


In [16]:
all_data['periode_pedago'].unique()


Out[16]:
array(['Master semestre 3', 'Bachelor semestre 2', 'Bachelor semestre 5',
       'Semestre printemps', 'Master semestre 2', 'Bachelor semestre 3',
       'Bachelor semestre 1', 'Bachelor semestre 6', 'Semestre automne',
       'Master semestre 1', 'Projet Master automne', 'Bachelor semestre 4',
       'Projet Master printemps'], dtype=object)

In [ ]: