In [23]:
    
import job_rec_helpers
from time import time
from my_util import *
from job_rec_helpers import *
    
In [44]:
    
reload(job_rec_helpers)
from job_rec_helpers import *
    
In [2]:
    
# Global vars
DATA_DIR = 'D:/larc_projects/job_analytics/data/clean/'
RES_DIR = 'd:/larc_projects/job_analytics/results/'
AGG_DIR = RES_DIR + 'agg/'
FIG_DIR = RES_DIR + 'figs/'
    
In [ ]:
    
apps = pd.read_csv(DATA_DIR + 'full_apps.csv')
    
In [10]:
    
print apps.shape
apps.head()
    
    
    Out[10]:
In [11]:
    
apps = apps.query('job_title_is_number == False')
print apps.shape
    
    
Here we are using the total number of times instead of frequency.
In [32]:
    
index_of_users = mkUserIndex(df=apps, user_col='uid')
index_of_items = mkItemIndex(df=apps, item_col='job_title')
    
In [16]:
    
print('# users: %d' %len(user_ids))
print('# job titles: %d' %len(item_ids))
    
    
In [18]:
    
from scipy.io import *
user_apply_job = mmread(DATA_DIR + 'user_apply_job.mtx')
    
In [19]:
    
printInfo(user_apply_job)
    
    
In [20]:
    
apps_by_job_emp = pd.read_csv(AGG_DIR + 'apps_by_job_emp.csv')
apps_by_job_emp.shape
    
    Out[20]:
In [28]:
    
apps_by_job_emp['job_emp'] = apps_by_job_emp['job_title'] + ' at ' + apps_by_job_emp['organisation_name_ep']
apps_by_job_emp.head()
    
    Out[28]:
In [33]:
    
index_of_items = mkItemIndex(df=apps_by_job_emp, item_col='job_emp')
    
In [45]:
    
user_apply_job_emp = buildUserItemMat(df=apps_by_job_emp, 
                                      index_of_users=index_of_users, index_of_items=index_of_items, 
                                      user_col='uid', item_col='job_emp', rating_col='n_apply')
    
    
In [ ]: