In [23]:
import job_rec_helpers
from time import time
from my_util import *
from job_rec_helpers import *
In [44]:
reload(job_rec_helpers)
from job_rec_helpers import *
In [2]:
# Global vars
DATA_DIR = 'D:/larc_projects/job_analytics/data/clean/'
RES_DIR = 'd:/larc_projects/job_analytics/results/'
AGG_DIR = RES_DIR + 'agg/'
FIG_DIR = RES_DIR + 'figs/'
In [ ]:
apps = pd.read_csv(DATA_DIR + 'full_apps.csv')
In [10]:
print apps.shape
apps.head()
Out[10]:
In [11]:
apps = apps.query('job_title_is_number == False')
print apps.shape
Here we are using the total number of times instead of frequency.
In [32]:
index_of_users = mkUserIndex(df=apps, user_col='uid')
index_of_items = mkItemIndex(df=apps, item_col='job_title')
In [16]:
print('# users: %d' %len(user_ids))
print('# job titles: %d' %len(item_ids))
In [18]:
from scipy.io import *
user_apply_job = mmread(DATA_DIR + 'user_apply_job.mtx')
In [19]:
printInfo(user_apply_job)
In [20]:
apps_by_job_emp = pd.read_csv(AGG_DIR + 'apps_by_job_emp.csv')
apps_by_job_emp.shape
Out[20]:
In [28]:
apps_by_job_emp['job_emp'] = apps_by_job_emp['job_title'] + ' at ' + apps_by_job_emp['organisation_name_ep']
apps_by_job_emp.head()
Out[28]:
In [33]:
index_of_items = mkItemIndex(df=apps_by_job_emp, item_col='job_emp')
In [45]:
user_apply_job_emp = buildUserItemMat(df=apps_by_job_emp,
index_of_users=index_of_users, index_of_items=index_of_items,
user_col='uid', item_col='job_emp', rating_col='n_apply')
In [ ]: