In this notebook we attempted to do a linear combination of feature graphs in order to construct the graph. The idea was to use a grid search to find a combination which would optimize the weights. The output was given by heat diffusion from a starting node (course of interest) compared to the estimated probability of the other courses being taken if the course of interest was.
In [1]:
    
%matplotlib inline
import os
import pandas as pd
import numpy as np
import pickle
from pygsp import graphs, filters, plotting
from scipy.spatial import distance
import matplotlib.pyplot as plt
import itertools
from tqdm import tqdm
plt.rcParams['figure.figsize'] = (17, 5)
plotting.BACKEND = 'matplotlib'
do_prints = False
random = True
plt.rcParams['figure.figsize'] = (17, 5)
plotting.BACKEND = 'matplotlib'
%matplotlib inline
    
    
In [2]:
    
pkl_file = open(os.path.join(os.getcwd(), 'Graphs','students_graph_STI.pkl'), 'rb')
students_graph = pickle.load(pkl_file)
pkl_file.close()
pkl_file = open(os.path.join(os.getcwd(), 'Graphs','assistants_graph_STI.pkl'), 'rb')
assistants_graph = pickle.load(pkl_file)
pkl_file.close()
pkl_file = open(os.path.join(os.getcwd(), 'Graphs','prof_graph_STI.pkl'), 'rb')
prof_graph = pickle.load(pkl_file)
pkl_file.close()
pkl_file = open(os.path.join(os.getcwd(), 'Graphs','section_graph_STI.pkl'), 'rb')
sections_graph = pickle.load(pkl_file)
pkl_file.close()
#pkl_file = open(os.path.join(os.getcwd(), 'Graphs','topics_graph.pkl'), 'rb')
#topics_graph = pickle.load(pkl_file)
#pkl_file.close()
pkl_file = open(os.path.join(os.getcwd(), 'Graphs','req_course_same_req_graph_STI.pkl'), 'rb')
course_same_req_graph = pickle.load(pkl_file)
pkl_file.close()
pkl_file = open(os.path.join(os.getcwd(), 'Graphs','req_course_to_req_graph_STI.pkl'), 'rb')
course_to_req_graph = pickle.load(pkl_file)
pkl_file.close()
pkl_file = open(os.path.join(os.getcwd(), 'Graphs','req_same_course_graph_STI.pkl'), 'rb')
same_course_req_graph = pickle.load(pkl_file)
pkl_file.close()
if do_prints:
    print("students ", np.shape(students_graph))
    print("assistants ", np.shape(assistants_graph))
    print("prof ", np.shape(prof_graph))
    print("sections ", np.shape(sections_graph))
    #print("topics ", np.shape(topics_graph))
    print("course same req ", np.shape(course_same_req_graph))
    print("course to req ", np.shape(course_to_req_graph))
    print("same course req ", np.shape(same_course_req_graph))
assert np.shape(students_graph) == np.shape(assistants_graph)
assert np.shape(assistants_graph) == np.shape(prof_graph)
assert np.shape(prof_graph) == np.shape(sections_graph)
#assert np.shape(sections_graph) == np.shape(topics_graph)
#assert np.shape(topics_graph) == np.shape(course_same_req_graph)
assert np.shape(sections_graph) == np.shape(course_same_req_graph)
assert np.shape(course_same_req_graph) == np.shape(course_to_req_graph)
assert np.shape(course_to_req_graph) == np.shape(same_course_req_graph)
    
In [3]:
    
courses = pd.read_pickle("../data/cleaned_courses_STI.pickle")
full_courses_list = courses.index.tolist()
    
In [4]:
    
weight_matrices = [students_graph, assistants_graph, prof_graph, sections_graph, course_same_req_graph, course_to_req_graph, same_course_req_graph]
for i in range(len(weight_matrices)):
    # Set the diagonal of the matrix to 0
    np.fill_diagonal(weight_matrices[i], 0)
    max_val = np.max(np.reshape(weight_matrices[i], (-1,1)))
    weight_matrices[i] = weight_matrices[i]/np.max(np.reshape(weight_matrices[i], (-1,1)))
    
In [5]:
    
def create_graph(mat):
    
    # Create the graph
    G = graphs.Graph(mat)
    G.compute_laplacian("normalized")
    G.compute_fourier_basis()
    return G
    
In [6]:
    
def heat_diffusion(G, courses, tau):
    
    # Create the heat diffusion filter
    filt = filters.Heat(G, tau)
    
    # Plot the response of the filter
    #y = filt.evaluate(G.e)
    #plt.plot(G.e, y[0])
    
    # Create the signal for the given graph
    signal = np.zeros(G.N)
    
    for course in courses:
        NODE = np.where(np.asarray(full_courses_list) == course)[0]
        signal[NODE] = 1
    # Apply the filter to the signal
    filtered_s = filt.filter(signal)
    return filtered_s
    
In [7]:
    
def diffusion(weight_mat, list_loved_courses, n_result_courses, tau_filter):
    # Define the index of the loved courses to hgighlight them later.
    NODE = []
    for i in range(0,len(list_loved_courses)):
        if (len(np.where(np.asarray(full_courses_list) == list_loved_courses[i])[0])==0):
            print("ERROR! Course loved is not in the list of the courses.")
            return
        NODE.append(np.where(np.asarray(full_courses_list) == list_loved_courses[i])[0][0])
    
    # Create the graph and do the diffusion on it.
    G_diffusion = create_graph(weight_mat)
    filtered_signals = heat_diffusion(G_diffusion,list_loved_courses,tau_filter)
    # Plot the diffusion
    G_diffusion.set_coordinates("spring")#G_diffusion.U[:,1:3])
    G_diffusion.plot_signal(filtered_signals, vertex_size=50, highlight = NODE, )
    
    # Create the list of courses ordered with their values found by the diffusion. 
    filtered_signals_int = list(filtered_signals)
    courses_list = []
    if(n_result_courses > len(filtered_signals_int)):
        n_result_courses = len(filtered_signals_int)
    for i in range(0,n_result_courses):
        course_code = full_courses_list[filtered_signals_int.index(max(filtered_signals_int))]
        courses_list.append(courses[courses.index.str.endswith(course_code)].CourseTitleFR.tolist()[0])
        filtered_signals_int[filtered_signals_int.index(max(filtered_signals_int))] = -1
    return courses_list
    
In [20]:
    
weight_different_graph = [0.2,0,0,0,0,0,1] # [0.5?,0,0,0 -same students-,0,$,1]
diffusion_graph = weight_different_graph[0]*weight_matrices[0]
for i in range(1, len(weight_matrices)):
    diffusion_graph = diffusion_graph + weight_different_graph[i]*weight_matrices[i]
recommanded_courses = diffusion(diffusion_graph,["EE-535", "EE-420"],7,4)
    
    
In [21]:
    
recommanded_courses
    
    Out[21]:
In [ ]: