This notebook was used to conduct the various analysis on the different graphs created.


In [1]:
# Useful starting lines
%matplotlib inline

import numpy as np
import scipy
import scipy.sparse as sp
import matplotlib.pyplot as plt
import pandas as pd
import re
import networkx as nx
import itertools
import pygsp
import pickle
import os
from tqdm import tqdm
from pygsp import graphs, filters, plotting

plt.rcParams['figure.figsize'] = (10, 5)
plotting.BACKEND = 'matplotlib'

%load_ext autoreload
%autoreload 2


2018-01-21 19:55:08,285:[WARNING](pygsp.graphs.nngraphs.nngraph.<module>): Cannot import pyflann (used for faster kNN computations): Traceback (most recent call last):
  File "C:\Users\Thomas\Anaconda3\lib\site-packages\pygsp\graphs\nngraphs\nngraph.py", line 14, in <module>
    import pyflann as pfl
  File "C:\Users\Thomas\Anaconda3\lib\site-packages\pyflann\__init__.py", line 27, in <module>
    from pyflann.index import *
  File "C:\Users\Thomas\Anaconda3\lib\site-packages\pyflann\index.py", line 27, in <module>
    from pyflann.bindings.flann_ctypes import *
  File "C:\Users\Thomas\Anaconda3\lib\site-packages\pyflann\bindings\__init__.py", line 30, in <module>
    from pyflann.bindings.flann_ctypes import *
  File "C:\Users\Thomas\Anaconda3\lib\site-packages\pyflann\bindings\flann_ctypes.py", line 171, in <module>
    raise ImportError('Cannot load dynamic library. Did you compile FLANN?')
ImportError: Cannot load dynamic library. Did you compile FLANN?

1. Data retrieval

1.1 Load necessary dataframes

there are different steps:

  • With AR section;
  • Without AR section;
  • With only STI faculty

In [3]:
enrolAR=pd.read_pickle("../data/cleanedAR_enrol_initial.pickle")
coursesAR=pd.read_pickle("../data/cleanedAR_courses_initial.pickle")
enrol=pd.read_pickle("../data/cleaned_enrol.pickle")
courses=pd.read_pickle("../data/cleaned_courses.pickle")
enrolSTI=pd.read_pickle("../data/cleaned_enrol_STI.pickle")
coursesSTI=pd.read_pickle("../data/cleaned_courses_STI.pickle")

1.2 Load all graphs pickle


In [4]:
pkl_file = open(os.path.join(os.getcwd(), 'Graphs','students_graph_with_AR.pkl'), 'rb')
weight_student_AR = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open(os.path.join(os.getcwd(), 'Graphs','students_graph_without_AR.pkl'), 'rb')
weight_student = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open(os.path.join(os.getcwd(), 'Graphs','students_graph_STI.pkl'), 'rb')
weight_student_STI = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open(os.path.join(os.getcwd(), 'Graphs','section_graph_with_AR.pkl'), 'rb')
weight_section_AR = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open(os.path.join(os.getcwd(), 'Graphs','prof_graph_STI.pkl'), 'rb')
weight_prof_STI = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open(os.path.join(os.getcwd(), 'Graphs','assistants_graph_STI.pkl'), 'rb')
weight_assistants_STI = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open(os.path.join(os.getcwd(), 'Graphs','topics_graph.pkl'), 'rb')
weight_topics_STI = pickle.load(pkl_file)
pkl_file.close()

## courses are linked if one is a requirement of the other
pkl_file = open(os.path.join(os.getcwd(), 'Graphs','req_course_to_req_graph_STI.pkl'), 'rb')
weight_req_diff_level_STI = pickle.load(pkl_file)
pkl_file.close()

## courses are linked if they share the same requirements
pkl_file = open(os.path.join(os.getcwd(), 'Graphs','req_same_course_graph_STI.pkl'), 'rb')
weight_req_similar_STI = pickle.load(pkl_file)
pkl_file.close()

## courses are linked if they are the requirements of the same course
pkl_file = open(os.path.join(os.getcwd(), 'Graphs','req_course_same_req_graph_STI.pkl'), 'rb')
weight_req_same_level_STI = pickle.load(pkl_file)
pkl_file.close()


---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-4-29cf224abfc2> in <module>()
----> 1 pkl_file = open(os.path.join(os.getcwd(), 'Graphs','students_graph_with_AR.pkl'), 'rb')
      2 weight_student_AR = pickle.load(pkl_file)
      3 pkl_file.close()
      4 
      5 pkl_file = open(os.path.join(os.getcwd(), 'Graphs','students_graph_without_AR.pkl'), 'rb')

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\Thomas\\Documents\\EPFL\\Semestre_9\\NTDS\\NTDS_Project\\Graphs\\students_graph_with_AR.pkl'

important lists


In [5]:
StudentsAR=enrolAR['PersonID'].unique()
CoursesAR=enrolAR['CourseCodes'].unique()
Students=enrol['PersonID'].unique() # The different students
Courses=enrol['CourseCodes'].unique() # The different courses : USED in the the next part !!!
StudentsSTI=enrolSTI['PersonID'].unique() # The different students
CoursesSTI=enrolSTI['CourseCodes'].unique()

In [6]:
courses_index_dicoAR=dict(zip(CoursesAR, np.arange(len(CoursesAR))))
courses_index_dico=dict(zip(Courses, np.arange(len(Courses)))) # dictionnary to link a course code to a number, USED in the following part !!!
courses_index_dicoSTI=dict(zip(CoursesSTI, np.arange(len(CoursesSTI))))

1.3 Study Plans recuparation


In [7]:
dump=coursesAR['StudyPlans'].str.replace(' -','').str.replace(r'^ ','').str.replace(r' $','').str.replace(' ;',';').str.replace('; ',';')
dump=dump.str.split(';',expand=True)
StudyPlans=[]
for i in range(10):
    StudyPlans+=dump[i].unique().tolist()
StudyPlans = sorted(list(set(list(filter(None,StudyPlans)))))
# remove elements beggining with ED, hors plan, autre and digital humanities (SHS)
StudyPlansDisplay=[elem for elem in StudyPlans if elem[:2] !='ED' and elem[:2]!='Ho' and elem[:2]!='Au'] 
StudyPlansDisplay=[elem for elem in StudyPlansDisplay if 'ineur' not in elem and 'inor' not in elem]+["minor"]

1.4 Section name per course


In [8]:
courseSection=enrolAR.CourseSection.unique().tolist()
courseSection.remove('ETH')
courseSection.remove('HEP')

In [9]:
courses_section_dico={}
for course in CoursesAR:
    indiv_studyplan=coursesAR.loc[course,'StudyPlans']
    indiv_studyplan=re.sub(' ?; ?',';',indiv_studyplan.replace(' -',''))
    indiv_studyplan=re.sub('^ ','',indiv_studyplan)
    indiv_studyplan=re.sub(' $','',indiv_studyplan)
    courses_section_dico[course]=[elem for elem in indiv_studyplan.split(';') if elem[:2] !='ED' and elem[:2]!='Ho' and elem[:2]!='Au']

In [10]:
courses_sectionName_dico={}
grouped_df=enrolAR.groupby("CourseCodes")
for key, values in grouped_df.groups.items():
    code=enrolAR.ix[values[0]].CourseSection
    if(code!='ETH' and code!='HEP'):
        courses_sectionName_dico[key]=enrolAR.ix[values[0]].CourseSection


C:\Users\Thomas\Anaconda3\lib\site-packages\ipykernel_launcher.py:4: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  after removing the cwd from sys.path.

2. Graph analysis

2.1 Course labels

retrieve StudyPlan label for each course


In [11]:
# With AR
id_courses_sections_labelAR=np.zeros((len(courses_index_dicoAR),9))
for course in CoursesAR:
    for i in range(9):
        try:
            id_courses_sections_labelAR[courses_index_dicoAR[course],i]=StudyPlansDisplay.index(courses_section_dico[course][i])
        except IndexError:
            id_courses_sections_labelAR[courses_index_dicoAR[course],i]=-1
        except ValueError:
            if('ineur' in courses_section_dico[course][i] or 'inor' in courses_section_dico[course][i]):
                id_courses_sections_labelAR[courses_index_dicoAR[course],i]=StudyPlansDisplay.index('minor')

In [12]:
# Without AR
id_courses_sections_label=np.zeros((len(courses_index_dico),9))
for course in Courses:
    for i in range(9):
        try:
            id_courses_sections_label[courses_index_dico[course],i]=StudyPlansDisplay.index(courses_section_dico[course][i])
        except IndexError:
            id_courses_sections_label[courses_index_dico[course],i]=-1
        except ValueError:
            if('ineur' in courses_section_dico[course][i] or 'inor' in courses_section_dico[course][i]):
                id_courses_sections_label[courses_index_dico[course],i]=StudyPlansDisplay.index('minor')

In [13]:
# STI
id_courses_sections_labelSTI=np.zeros((len(courses_index_dicoSTI),9))
for course in CoursesSTI:
    for i in range(9):
        try:
            id_courses_sections_labelSTI[courses_index_dicoSTI[course],i]=StudyPlansDisplay.index(courses_section_dico[course][i])
        except IndexError:
            id_courses_sections_labelSTI[courses_index_dicoSTI[course],i]=-1
        except ValueError:
            if('ineur' in courses_section_dico[course][i] or 'inor' in courses_section_dico[course][i]):
                id_courses_sections_labelSTI[courses_index_dicoSTI[course],i]=StudyPlansDisplay.index('minor')

dict of faculties codes


In [14]:
faculties={
    'CDM': [StudyPlansDisplay.index('IF'), StudyPlansDisplay.index('MTEE')], 
    'ENAC': [StudyPlansDisplay.index('AR'), StudyPlansDisplay.index('GC'), StudyPlansDisplay.index('MES'), StudyPlansDisplay.index('SIE')], 
    'SB': [StudyPlansDisplay.index('CGC'), StudyPlansDisplay.index('MA'), StudyPlansDisplay.index('PH')], 
    'STI': [StudyPlansDisplay.index('EL'), StudyPlansDisplay.index('EL MNIS'), StudyPlansDisplay.index('GM'), StudyPlansDisplay.index('MT'), StudyPlansDisplay.index('MX')], 
    'IC': [StudyPlansDisplay.index('IN'), StudyPlansDisplay.index('SC')], 
    'SV':[StudyPlansDisplay.index('SV')],
    #'Minors': [2, 10, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
    #'True engineers': [2, 3, 4, 6, 9, 10, 13, 15, 31]
}

retrieve SectionCodes label for each course


In [15]:
id_courses_code_labelAR=np.zeros((len(courses_index_dicoAR),))
for course in CoursesAR:
    try:
        id_courses_code_labelAR[courses_index_dicoAR[course]]=courseSection.index(courses_sectionName_dico[course])
    except:
        id_courses_code_labelAR[courses_index_dicoAR[course]]=-1

In [16]:
id_courses_code_label=np.zeros((len(courses_index_dico),))
for course in Courses:
    try:
        id_courses_code_label[courses_index_dico[course]]=courseSection.index(courses_sectionName_dico[course])
    except:
        id_courses_code_label[courses_index_dico[course]]=-1

In [17]:
id_courses_code_labelSTI=np.zeros((len(courses_index_dicoSTI),))
for course in CoursesSTI:
    try:
        id_courses_code_labelSTI[courses_index_dicoSTI[course]]=courseSection.index(courses_sectionName_dico[course])
    except:
        id_courses_code_labelSTI[courses_index_dicoSTI[course]]=-1

2.2 Laplacian eigenmaps

2.2.1 Graph Student - all sections


In [18]:
G=graphs.Graph(weight_student_AR)
laplacian=G.compute_laplacian("normalized")
G.compute_fourier_basis(recompute=True)
G.set_coordinates(G.U[:,1:3])
G.plot(vertex_size=10)#show_edges=True, vertex_size=10)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-18-ceaff64c58ab> in <module>()
----> 1 G=graphs.Graph(weight_student_AR)
      2 laplacian=G.compute_laplacian("normalized")
      3 G.compute_fourier_basis(recompute=True)
      4 G.set_coordinates(G.U[:,1:3])
      5 G.plot(vertex_size=10)#show_edges=True, vertex_size=10)

NameError: name 'weight_student_AR' is not defined

In [19]:
## Study plan
for i in range(len(StudyPlansDisplay)):
    plt.figure()
    G.plot_signal(id_courses_sections_labelAR[:,0], vertex_size=10,plot_name=StudyPlansDisplay[i] , save_as="Graphs/image/student_"+StudyPlansDisplay[i]+"_studyPlan_wAR", highlight=np.where(np.isin(id_courses_sections_labelAR,[i]).any(axis=1)))


c:\python35\lib\site-packages\matplotlib\pyplot.py:524: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)
<matplotlib.figure.Figure at 0x1c226af2c88>
<matplotlib.figure.Figure at 0x1c226b4c6a0>
<matplotlib.figure.Figure at 0x1c226bc9320>
<matplotlib.figure.Figure at 0x1c226c76b38>
<matplotlib.figure.Figure at 0x1c226dfac18>
<matplotlib.figure.Figure at 0x1c226e8ec18>
<matplotlib.figure.Figure at 0x1c226d41f60>
<matplotlib.figure.Figure at 0x1c22adf8b70>
<matplotlib.figure.Figure at 0x1c223c4a8d0>
<matplotlib.figure.Figure at 0x1c22aff74a8>
<matplotlib.figure.Figure at 0x1c22c0150f0>
<matplotlib.figure.Figure at 0x1c22afb8ac8>
<matplotlib.figure.Figure at 0x1c22c0079b0>
<matplotlib.figure.Figure at 0x1c22c178748>
<matplotlib.figure.Figure at 0x1c22c39af28>
<matplotlib.figure.Figure at 0x1c22c46e828>
<matplotlib.figure.Figure at 0x1c22c538898>
<matplotlib.figure.Figure at 0x1c22c584be0>
<matplotlib.figure.Figure at 0x1c22d6b15f8>
<matplotlib.figure.Figure at 0x1c22d671278>

In [17]:
# faculty
for fac,ids in faculties.items():
    plt.figure()
    G.plot_signal(id_courses_sections_labelAR[:,0], vertex_size=10,plot_name=fac, save_as="Graphs/image/student_"+fac+"_faculty_wAR" , highlight=np.where(np.isin(id_courses_sections_labelAR,ids).any(axis=1)))


<matplotlib.figure.Figure at 0x1c21e05df60>
<matplotlib.figure.Figure at 0x1c223a12d30>
<matplotlib.figure.Figure at 0x1c223a7a4a8>
<matplotlib.figure.Figure at 0x1c223b46d30>
<matplotlib.figure.Figure at 0x1c223c15978>
<matplotlib.figure.Figure at 0x1c2233cab38>

In [31]:
# sections
for i in range(len(courseSection)):
    plt.figure()
    G.plot_signal(id_courses_code_labelAR, vertex_size=10,plot_name=courseSection[i], 
                  save_as="Graphs/image/student_"+courseSection[i]+"_section_wAR", 
                  highlight=np.where(id_courses_code_labelAR==i))


c:\python35\lib\site-packages\matplotlib\pyplot.py:524: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)
<matplotlib.figure.Figure at 0x1c22d8dcf28>
<matplotlib.figure.Figure at 0x1c22d8fea20>
<matplotlib.figure.Figure at 0x1c22d8fb9b0>
<matplotlib.figure.Figure at 0x1c22db39668>
<matplotlib.figure.Figure at 0x1c22dc18550>
<matplotlib.figure.Figure at 0x1c22dd704e0>
<matplotlib.figure.Figure at 0x1c22de40be0>
<matplotlib.figure.Figure at 0x1c22ded3a20>
<matplotlib.figure.Figure at 0x1c22df2e940>
<matplotlib.figure.Figure at 0x1c22df09940>
<matplotlib.figure.Figure at 0x1c22f07f358>
<matplotlib.figure.Figure at 0x1c22f216828>
<matplotlib.figure.Figure at 0x1c22f2aa3c8>
<matplotlib.figure.Figure at 0x1c22c42bf60>
<matplotlib.figure.Figure at 0x1c22d76ebe0>
<matplotlib.figure.Figure at 0x1c22f41fa20>
<matplotlib.figure.Figure at 0x1c22f559d30>
<matplotlib.figure.Figure at 0x1c22f56ac18>

In [23]:
def showAndSaveData(weightMatrix,graphName):
    G1=nx.from_numpy_matrix(weightMatrix)
    # Giant component
    Gcc=sorted(nx.connected_component_subgraphs(G1), key = len, reverse=True)[0]
    print("size of giant component: "+str(len(Gcc)))
    # weight distribution
    plt.hist(list(nx.degree(G1).values()))
    plt.title("weight distribution")
    plt.savefig("Graphs/image/"+graphName+"_weightDistrib")
    plt.figure()
    # weight matrix
    plt.title("weight matrix")
    plt.spy(weightMatrix)
    plt.savefig("Graphs/image/"+graphName+"_weightMatrix")

In [39]:
showAndSaveData(weight_student_AR,"student_AR")


size of giant component: 798

2.2.2 Graph Student - without AR


In [49]:
G=graphs.Graph(weight_student)
laplacian=G.compute_laplacian("normalized")
G.compute_fourier_basis(recompute=True)
G.set_coordinates(G.U[:,1:3])
G.plot(vertex_size=10)#show_edges=True, vertex_size=10)



In [50]:
## Study plan
for i in range(len(StudyPlansDisplay)):
    plt.figure()
    G.plot_signal(id_courses_sections_label[:,0], vertex_size=10,plot_name=StudyPlansDisplay[i] , save_as="Graphs/image/student_"+StudyPlansDisplay[i]+"_studyPlan", highlight=np.where(np.isin(id_courses_sections_label,[i]).any(axis=1)))


c:\python35\lib\site-packages\matplotlib\pyplot.py:524: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)
<matplotlib.figure.Figure at 0x1c31858ceb8>
<matplotlib.figure.Figure at 0x1c316fe5a58>
<matplotlib.figure.Figure at 0x1c21f38cef0>
<matplotlib.figure.Figure at 0x1c21ffa2e80>
<matplotlib.figure.Figure at 0x1c22006ea90>
<matplotlib.figure.Figure at 0x1c220130d30>
<matplotlib.figure.Figure at 0x1c220404ef0>
<matplotlib.figure.Figure at 0x1c2204d09b0>
<matplotlib.figure.Figure at 0x1c220563f28>
<matplotlib.figure.Figure at 0x1c3189876d8>
<matplotlib.figure.Figure at 0x1c3194b3588>
<matplotlib.figure.Figure at 0x1c31897d400>
<matplotlib.figure.Figure at 0x1c319697588>
<matplotlib.figure.Figure at 0x1c319712c50>
<matplotlib.figure.Figure at 0x1c319823860>
<matplotlib.figure.Figure at 0x1c319907748>
<matplotlib.figure.Figure at 0x1c319950470>
<matplotlib.figure.Figure at 0x1c31bdab4e0>
<matplotlib.figure.Figure at 0x1c31bd29198>
<matplotlib.figure.Figure at 0x1c31bf03e48>

In [51]:
# faculty
for fac,ids in faculties.items():
    plt.figure()
    G.plot_signal(id_courses_sections_label[:,0], vertex_size=10,plot_name=fac, save_as="Graphs/image/student_"+fac+"_faculty" , highlight=np.where(np.isin(id_courses_sections_label,ids).any(axis=1)))


<matplotlib.figure.Figure at 0x1c2325aa7b8>
<matplotlib.figure.Figure at 0x1c31c14ecf8>
<matplotlib.figure.Figure at 0x1c31d209f60>
<matplotlib.figure.Figure at 0x1c31d2f70f0>
<matplotlib.figure.Figure at 0x1c31d37b7f0>
<matplotlib.figure.Figure at 0x1c31d448e10>

In [52]:
# sections
for i in range(len(courseSection)):
    plt.figure()
    G.plot_signal(id_courses_code_label, vertex_size=10,plot_name=courseSection[i], 
                  save_as="Graphs/image/student_"+courseSection[i]+"_section", 
                  highlight=np.where(id_courses_code_label==i))


c:\python35\lib\site-packages\matplotlib\pyplot.py:524: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)
<matplotlib.figure.Figure at 0x1c31d2bb978>
<matplotlib.figure.Figure at 0x1c31d5fd908>
<matplotlib.figure.Figure at 0x1c31d6e86a0>
<matplotlib.figure.Figure at 0x1c31d707898>
<matplotlib.figure.Figure at 0x1c31d872518>
<matplotlib.figure.Figure at 0x1c31d9394a8>
<matplotlib.figure.Figure at 0x1c31da2f278>
<matplotlib.figure.Figure at 0x1c31da55588>
<matplotlib.figure.Figure at 0x1c31eb6b278>
<matplotlib.figure.Figure at 0x1c31ec35cc0>
<matplotlib.figure.Figure at 0x1c31ebb6f60>
<matplotlib.figure.Figure at 0x1c31ed64c18>
<matplotlib.figure.Figure at 0x1c31eeb43c8>
<matplotlib.figure.Figure at 0x1c31ef15668>
<matplotlib.figure.Figure at 0x1c31f07eac8>
<matplotlib.figure.Figure at 0x1c31f12a278>
<matplotlib.figure.Figure at 0x1c3201b4390>
<matplotlib.figure.Figure at 0x1c320294668>

In [53]:
showAndSaveData(weight_student,"student")


size of giant component: 682

2.2.3 Graph Student - master STI


In [21]:
G=graphs.Graph(weight_student_STI)
laplacian=G.compute_laplacian("normalized")
G.compute_fourier_basis(recompute=True)
G.set_coordinates(G.U[:,1:3])
G.plot(vertex_size=10,show_edges=False)



In [25]:
showAndSaveData(weight_student_STI,"student_STI_weight")


size of giant component: 196

In [47]:
## Study plan
for i in tqdm(range(len(StudyPlansDisplay))):
    plt.figure()
    G.plot_signal(id_courses_sections_labelSTI[:,0],show_edges=False, vertex_size=10,plot_name=StudyPlansDisplay[i] , save_as="Graphs/image/student_"+StudyPlansDisplay[i]+"_studyPlan_wSTI", highlight=np.where(np.isin(id_courses_sections_labelSTI,[i]).any(axis=1)))


 50%|█████████████████████████████████████████                                         | 10/20 [00:02<00:02,  4.22it/s]c:\python35\lib\site-packages\matplotlib\pyplot.py:524: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)
100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:04<00:00,  4.31it/s]
<matplotlib.figure.Figure at 0x1c215580908>
<matplotlib.figure.Figure at 0x1c271298128>
<matplotlib.figure.Figure at 0x1c2ccc191d0>
<matplotlib.figure.Figure at 0x1c31579af98>
<matplotlib.figure.Figure at 0x1c315973e80>
<matplotlib.figure.Figure at 0x1c31597a9e8>
<matplotlib.figure.Figure at 0x1c315686b00>
<matplotlib.figure.Figure at 0x1c315c61ac8>
<matplotlib.figure.Figure at 0x1c31580a2e8>
<matplotlib.figure.Figure at 0x1c31574a668>
<matplotlib.figure.Figure at 0x1c315ebf128>
<matplotlib.figure.Figure at 0x1c315a438d0>
<matplotlib.figure.Figure at 0x1c315655fd0>
<matplotlib.figure.Figure at 0x1c315853438>
<matplotlib.figure.Figure at 0x1c3157ddb38>
<matplotlib.figure.Figure at 0x1c3157ddc50>
<matplotlib.figure.Figure at 0x1c317182160>
<matplotlib.figure.Figure at 0x1c3182c2390>
<matplotlib.figure.Figure at 0x1c31718a9b0>
<matplotlib.figure.Figure at 0x1c3184662e8>

In [58]:
# faculty
for fac,ids in faculties.items():
    plt.figure()
    G.plot_signal(id_courses_sections_labelSTI[:,0],show_edges=False, vertex_size=10,plot_name=fac, save_as="Graphs/image/student_"+fac+"_faculty_STI" , highlight=np.where(np.isin(id_courses_sections_labelSTI,ids).any(axis=1)))


<matplotlib.figure.Figure at 0x1c3695a5320>
<matplotlib.figure.Figure at 0x1c3695cb358>
<matplotlib.figure.Figure at 0x1c36937a9e8>
<matplotlib.figure.Figure at 0x1c369662898>
<matplotlib.figure.Figure at 0x1c3696f18d0>
<matplotlib.figure.Figure at 0x1c369680668>

In [56]:
# sections
for i in range(len(courseSection)):
    plt.figure()
    G.plot_signal(id_courses_code_labelSTI,show_edges=False, vertex_size=10,plot_name=courseSection[i], 
                  save_as="Graphs/image/student_"+courseSection[i]+"_section_STI", 
                  highlight=np.where(id_courses_code_labelSTI==i))


c:\python35\lib\site-packages\matplotlib\pyplot.py:524: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)
<matplotlib.figure.Figure at 0x1c337a21fd0>
<matplotlib.figure.Figure at 0x1c358939b00>
<matplotlib.figure.Figure at 0x1c3665899e8>
<matplotlib.figure.Figure at 0x1c3667ea1d0>
<matplotlib.figure.Figure at 0x1c366597668>
<matplotlib.figure.Figure at 0x1c3667c7940>
<matplotlib.figure.Figure at 0x1c36673e860>
<matplotlib.figure.Figure at 0x1c366bd24a8>
<matplotlib.figure.Figure at 0x1c3665feb00>
<matplotlib.figure.Figure at 0x1c3665199e8>
<matplotlib.figure.Figure at 0x1c366c200f0>
<matplotlib.figure.Figure at 0x1c366650ac8>
<matplotlib.figure.Figure at 0x1c366e12cf8>
<matplotlib.figure.Figure at 0x1c366c776d8>
<matplotlib.figure.Figure at 0x1c368f5e2b0>
<matplotlib.figure.Figure at 0x1c368fa5710>
<matplotlib.figure.Figure at 0x1c3690dc1d0>
<matplotlib.figure.Figure at 0x1c3691bd470>

In [57]:
showAndSaveData(weight_student_STI,"student_STI")


size of giant component: 196

2.2.4 Graph sections - all sections


In [59]:
H=graphs.Graph(weight_section_AR)
H.compute_laplacian("normalized")
H.compute_fourier_basis(recompute=True)
H.set_coordinates(H.U[:,1:3])
H.plot(vertex_size=10)



In [60]:
## Study plan
for i in tqdm(range(len(StudyPlansDisplay))):
    plt.figure()
    H.plot_signal(id_courses_sections_labelAR[:,0],show_edges=False, vertex_size=10,plot_name=StudyPlansDisplay[i] , save_as="Graphs/image/section_"+StudyPlansDisplay[i]+"_studyPlan_wAR", highlight=np.where(np.isin(id_courses_sections_labelAR,[i]).any(axis=1)))


 50%|█████████████████████████████████████████                                         | 10/20 [00:03<00:03,  2.84it/s]c:\python35\lib\site-packages\matplotlib\pyplot.py:524: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)
100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:06<00:00,  2.97it/s]
<matplotlib.figure.Figure at 0x1c36a911390>
<matplotlib.figure.Figure at 0x1c36adc07b8>
<matplotlib.figure.Figure at 0x1c36adc0d30>
<matplotlib.figure.Figure at 0x1c36affc048>
<matplotlib.figure.Figure at 0x1c36b100dd8>
<matplotlib.figure.Figure at 0x1c36b194160>
<matplotlib.figure.Figure at 0x1c36b207da0>
<matplotlib.figure.Figure at 0x1c36c7d2a90>
<matplotlib.figure.Figure at 0x1c36b2071d0>
<matplotlib.figure.Figure at 0x1c36b204e80>
<matplotlib.figure.Figure at 0x1c36c98f400>
<matplotlib.figure.Figure at 0x1c36cbe38d0>
<matplotlib.figure.Figure at 0x1c36cc7ab00>
<matplotlib.figure.Figure at 0x1c36dd634a8>
<matplotlib.figure.Figure at 0x1c36de085f8>
<matplotlib.figure.Figure at 0x1c36ccdf320>
<matplotlib.figure.Figure at 0x1c36dd89320>
<matplotlib.figure.Figure at 0x1c36e0ce780>
<matplotlib.figure.Figure at 0x1c36df6ed30>
<matplotlib.figure.Figure at 0x1c36e159208>

In [61]:
H.set_coordinates('spring')
H.plot(vertex_size=10)



In [62]:
## Study plan
for i in tqdm(range(len(StudyPlansDisplay))):
    plt.figure()
    H.plot_signal(id_courses_sections_labelAR[:,0],show_edges=False, vertex_size=10,plot_name=StudyPlansDisplay[i] , save_as="Graphs/image/section_"+StudyPlansDisplay[i]+"_studyPlan_wAR_spring", highlight=np.where(np.isin(id_courses_sections_labelAR,[i]).any(axis=1)))


 50%|█████████████████████████████████████████                                         | 10/20 [00:03<00:03,  3.08it/s]c:\python35\lib\site-packages\matplotlib\pyplot.py:524: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)
100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:06<00:00,  2.96it/s]
<matplotlib.figure.Figure at 0x1c36af95be0>
<matplotlib.figure.Figure at 0x1c36f49f9b0>
<matplotlib.figure.Figure at 0x1c36f486898>
<matplotlib.figure.Figure at 0x1c36f5ab978>
<matplotlib.figure.Figure at 0x1c36f49fb70>
<matplotlib.figure.Figure at 0x1c36f7d2080>
<matplotlib.figure.Figure at 0x1c36f8a1c88>
<matplotlib.figure.Figure at 0x1c36f8177f0>
<matplotlib.figure.Figure at 0x1c36fa2ea58>
<matplotlib.figure.Figure at 0x1c36f8e5d68>
<matplotlib.figure.Figure at 0x1c36fc1ed68>
<matplotlib.figure.Figure at 0x1c36fb983c8>
<matplotlib.figure.Figure at 0x1c370d27390>
<matplotlib.figure.Figure at 0x1c370e75358>
<matplotlib.figure.Figure at 0x1c370eacd30>
<matplotlib.figure.Figure at 0x1c370ea6978>
<matplotlib.figure.Figure at 0x1c3710f0d68>
<matplotlib.figure.Figure at 0x1c371129ac8>
<matplotlib.figure.Figure at 0x1c3710686d8>
<matplotlib.figure.Figure at 0x1c37103be48>

In [63]:
showAndSaveData(weight_section_AR,"section_AR")


size of giant component: 798

2.2.5 Graphs profs - Master STI


In [64]:
I=graphs.Graph(weight_prof_STI)
I.compute_laplacian("normalized")
#I.compute_fourier_basis(recompute=True)
I.set_coordinates()#H.U[:,1:3])
I.plot(vertex_size=10)#show_edges=True, vertex_size=10)


c:\python35\lib\site-packages\pygsp\graphs\graph.py:606: RuntimeWarning: divide by zero encountered in power
  d = np.power(self.W.sum(1), -0.5)

In [65]:
## Study plan
for i in tqdm(range(len(StudyPlansDisplay))):
    plt.figure()
    I.plot_signal(id_courses_sections_labelSTI[:,0],show_edges=False, vertex_size=10,plot_name=StudyPlansDisplay[i] , save_as="Graphs/image/profs_"+StudyPlansDisplay[i]+"_studyPlan_wSTI", highlight=np.where(np.isin(id_courses_sections_labelSTI,[i]).any(axis=1)))


 50%|█████████████████████████████████████████                                         | 10/20 [00:02<00:02,  3.61it/s]c:\python35\lib\site-packages\matplotlib\pyplot.py:524: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)
100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.70it/s]
<matplotlib.figure.Figure at 0x1c237a13860>
<matplotlib.figure.Figure at 0x1c3740f6f98>
<matplotlib.figure.Figure at 0x1c373ce43c8>
<matplotlib.figure.Figure at 0x1c372c7b860>
<matplotlib.figure.Figure at 0x1c372c866d8>
<matplotlib.figure.Figure at 0x1c3728336a0>
<matplotlib.figure.Figure at 0x1c3729dc9e8>
<matplotlib.figure.Figure at 0x1c372b58208>
<matplotlib.figure.Figure at 0x1c372b361d0>
<matplotlib.figure.Figure at 0x1c372b31748>
<matplotlib.figure.Figure at 0x1c373d34f60>
<matplotlib.figure.Figure at 0x1c37400a4e0>
<matplotlib.figure.Figure at 0x1c3740b8358>
<matplotlib.figure.Figure at 0x1c374157908>
<matplotlib.figure.Figure at 0x1c374231b70>
<matplotlib.figure.Figure at 0x1c3763693c8>
<matplotlib.figure.Figure at 0x1c37642ed68>
<matplotlib.figure.Figure at 0x1c3764d7860>
<matplotlib.figure.Figure at 0x1c37640cef0>
<matplotlib.figure.Figure at 0x1c37656f630>

In [67]:
# faculty
for fac,ids in faculties.items():
    plt.figure()
    I.plot_signal(id_courses_sections_labelSTI[:,0],show_edges=False, vertex_size=10,plot_name=fac, save_as="Graphs/image/profs_"+fac+"_faculty_STI" , highlight=np.where(np.isin(id_courses_sections_labelSTI,ids).any(axis=1)))


<matplotlib.figure.Figure at 0x1c376928b00>
<matplotlib.figure.Figure at 0x1c376bfae80>
<matplotlib.figure.Figure at 0x1c376e8e048>
<matplotlib.figure.Figure at 0x1c377ed3e10>
<matplotlib.figure.Figure at 0x1c377f99780>
<matplotlib.figure.Figure at 0x1c378081fd0>

In [68]:
# sections
for i in range(len(courseSection)):
    plt.figure()
    I.plot_signal(id_courses_code_labelSTI,show_edges=False, vertex_size=10,plot_name=courseSection[i], 
                  save_as="Graphs/image/profs_"+courseSection[i]+"_section_STI", 
                  highlight=np.where(id_courses_code_labelSTI==i))


c:\python35\lib\site-packages\matplotlib\pyplot.py:524: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)
<matplotlib.figure.Figure at 0x1c372536160>
<matplotlib.figure.Figure at 0x1c3780d2be0>
<matplotlib.figure.Figure at 0x1c3780543c8>
<matplotlib.figure.Figure at 0x1c3783d45c0>
<matplotlib.figure.Figure at 0x1c3784fbf60>
<matplotlib.figure.Figure at 0x1c3785975c0>
<matplotlib.figure.Figure at 0x1c37867c6a0>
<matplotlib.figure.Figure at 0x1c3785ee7f0>
<matplotlib.figure.Figure at 0x1c378834a90>
<matplotlib.figure.Figure at 0x1c3786d3ef0>
<matplotlib.figure.Figure at 0x1c379a164a8>
<matplotlib.figure.Figure at 0x1c379a91e80>
<matplotlib.figure.Figure at 0x1c379a16dd8>
<matplotlib.figure.Figure at 0x1c379bca0b8>
<matplotlib.figure.Figure at 0x1c37896e518>
<matplotlib.figure.Figure at 0x1c379df4978>
<matplotlib.figure.Figure at 0x1c379c99d30>
<matplotlib.figure.Figure at 0x1c37afe15c0>

In [69]:
showAndSaveData(weight_prof_STI,"section_AR")


size of giant component: 16

In [ ]:
# show list of courses in main component

2.2.6 Graph Assistants - Mater STI


In [71]:
J=graphs.Graph(weight_assistants_STI)
J.compute_laplacian("normalized")
#I.compute_fourier_basis(recompute=True)
J.set_coordinates()#H.U[:,1:3])
J.plot(vertex_size=10)#show_edges=True, vertex_size=10)


c:\python35\lib\site-packages\pygsp\graphs\graph.py:606: RuntimeWarning: divide by zero encountered in power
  d = np.power(self.W.sum(1), -0.5)

In [72]:
## Study plan
for i in tqdm(range(len(StudyPlansDisplay))):
    plt.figure()
    J.plot_signal(id_courses_sections_labelSTI[:,0],show_edges=False, vertex_size=10,plot_name=StudyPlansDisplay[i] , save_as="Graphs/image/assistants_"+StudyPlansDisplay[i]+"_studyPlan_wSTI", highlight=np.where(np.isin(id_courses_sections_labelSTI,[i]).any(axis=1)))


 50%|█████████████████████████████████████████                                         | 10/20 [00:02<00:02,  3.68it/s]c:\python35\lib\site-packages\matplotlib\pyplot.py:524: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)
100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.84it/s]
<matplotlib.figure.Figure at 0x1c37b267668>
<matplotlib.figure.Figure at 0x1c37b2fce80>
<matplotlib.figure.Figure at 0x1c37b3fc2b0>
<matplotlib.figure.Figure at 0x1c37b4ef9e8>
<matplotlib.figure.Figure at 0x1c37b350b38>
<matplotlib.figure.Figure at 0x1c37b652710>
<matplotlib.figure.Figure at 0x1c37b65b7b8>
<matplotlib.figure.Figure at 0x1c37b82b588>
<matplotlib.figure.Figure at 0x1c37c89c1d0>
<matplotlib.figure.Figure at 0x1c37c978da0>
<matplotlib.figure.Figure at 0x1c37c851400>
<matplotlib.figure.Figure at 0x1c37c904c18>
<matplotlib.figure.Figure at 0x1c37cc07588>
<matplotlib.figure.Figure at 0x1c37cc8be80>
<matplotlib.figure.Figure at 0x1c37cdab400>
<matplotlib.figure.Figure at 0x1c37cc07710>
<matplotlib.figure.Figure at 0x1c37df65908>
<matplotlib.figure.Figure at 0x1c37e045f28>
<matplotlib.figure.Figure at 0x1c37df99f60>
<matplotlib.figure.Figure at 0x1c37e222278>

In [73]:
showAndSaveData(weight_assistants_STI,"ASSISTANT_AR")


size of giant component: 4

2.2.7 Requirements Graphs


In [48]:
## courses are linked if one is a requirement of the other
pkl_file = open(os.path.join(os.getcwd(), 'Graphs','req_course_to_req_graph_STI.pkl'), 'rb')
weight_req_diff_level_STI = pickle.load(pkl_file)
pkl_file.close()

## courses are linked if they share the same requirements
pkl_file = open(os.path.join(os.getcwd(), 'Graphs','req_same_course_graph_STI.pkl'), 'rb')
weight_req_similar_STI = pickle.load(pkl_file)
pkl_file.close()

## courses are linked if they are the requirements of the same course
pkl_file = open(os.path.join(os.getcwd(), 'Graphs','req_course_same_req_graph_STI.pkl'), 'rb')
weight_req_same_level_STI = pickle.load(pkl_file)
pkl_file.close()

In [49]:
#weight_req_diff_level_STI = np.fill_diagonal(weight_req_diff_level_STI,0)
weight_req_diff_level_STI = weight_req_diff_level_STI
L=graphs.Graph(weight_req_diff_level_STI)
#L.compute_laplacian("normalized")
#L.compute_fourier_basis(recompute=True)
L.set_coordinates()#L.U[:,1:3])
L.plot(vertex_size=10)#show_edges=True, vertex_size=10)


2018-01-21 20:11:28,667:[WARNING](pygsp.graphs.graph.check_weights): The main diagonal of the weight matrix is not 0!

In [52]:
showAndSaveData(weight_req_diff_level_STI,"requirement_linked_weight")


size of giant component: 8

2.2.7.2 same requirement graph


In [50]:
M=graphs.Graph(weight_req_similar_STI)
M.compute_laplacian("normalized")
#M.compute_fourier_basis(recompute=True)
M.set_coordinates()#M.U[:,1:3])
M.plot(vertex_size=10)#show_edges=True, vertex_size=10)


C:\Users\Thomas\Anaconda3\lib\site-packages\pygsp\graphs\graph.py:606: RuntimeWarning: divide by zero encountered in power
  d = np.power(self.W.sum(1), -0.5)

In [53]:
showAndSaveData(weight_req_similar_STI,"req_same_course_graph_STI")


size of giant component: 73

2.2.8.3 is required graph


In [51]:
N=graphs.Graph(weight_req_same_level_STI)
N.compute_laplacian("normalized")
#N.compute_fourier_basis(recompute=True)
N.set_coordinates()#N.U[:,1:3])
N.plot(vertex_size=10)#show_edges=True, vertex_size=10)


C:\Users\Thomas\Anaconda3\lib\site-packages\pygsp\graphs\graph.py:606: RuntimeWarning: divide by zero encountered in power
  d = np.power(self.W.sum(1), -0.5)

In [54]:
showAndSaveData(weight_req_same_level_STI,"req_course_same_req_graph_STI")


size of giant component: 6

In [ ]: