notebook.community

Edit and run



In [23]:

    
import job_rec_helpers

from time import time
from my_util import *
from job_rec_helpers import *



In [44]:

    
reload(job_rec_helpers)
from job_rec_helpers import *



In [2]:

    
# Global vars
DATA_DIR = 'D:/larc_projects/job_analytics/data/clean/'
RES_DIR = 'd:/larc_projects/job_analytics/results/'
AGG_DIR = RES_DIR + 'agg/'
FIG_DIR = RES_DIR + 'figs/'



In [ ]:

    
apps = pd.read_csv(DATA_DIR + 'full_apps.csv')



In [10]:

    
print apps.shape
apps.head()









    



(1506897, 12)






    Out[10]:






  
    
      
      uid
      job_id
      job_title
      apply_date
      reg_no_uen_ep
      employer_creation_date
      organisation_name_ep
      ssic_code_ep
      ssic_description_ep
      ssic_group_ep
      third_party_entity_ep
      job_title_is_number
    
  
  
    
      0
      7
      JOB-2015-0223128
      housekeeping supervisor
      2015-07-01
      52865867X
      Jul 8, 2014
      THE FULLERTON HOTEL
      55101
      Hotels with restaurant
      Accommodation and Food Service Activities
      Y
      False
    
    
      1
      21073
      JOB-2015-0223128
      housekeeping supervisor
      2015-05-07
      52865867X
      Jul 8, 2014
      THE FULLERTON HOTEL
      55101
      Hotels with restaurant
      Accommodation and Food Service Activities
      Y
      False
    
    
      2
      46634
      JOB-2015-0223128
      housekeeping supervisor
      2015-05-01
      52865867X
      Jul 8, 2014
      THE FULLERTON HOTEL
      55101
      Hotels with restaurant
      Accommodation and Food Service Activities
      Y
      False
    
    
      3
      100427
      JOB-2015-0223128
      housekeeping supervisor
      2015-07-24
      52865867X
      Jul 8, 2014
      THE FULLERTON HOTEL
      55101
      Hotels with restaurant
      Accommodation and Food Service Activities
      Y
      False
    
    
      4
      39
      JOB-2014-0134411
      account assistant
      2015-06-16
      200203771R
      Jul 5, 2014
      THE SHICHIDA METHOD (S) PTE. LTD.
      82999
      Other business support services activities nec...
      Administrative and Support Service Activities
      N
      False



In [11]:

    
apps = apps.query('job_title_is_number == False')
print apps.shape









    



(1506897, 12)

Applicant-apply-Job matrix

Jobs are considered at job title level.
Each entry $ e_{u,j} $ of the matrix is either the number of times (frequency) applicant $u$ applies job title $j$.

Here we are using the total number of times instead of frequency.



In [32]:

    
index_of_users = mkUserIndex(df=apps, user_col='uid')
index_of_items = mkItemIndex(df=apps, item_col='job_title')



In [16]:

    
print('# users: %d' %len(user_ids))
print('# job titles: %d' %len(item_ids))









    



# users: 68144
# job titles: 5794



In [18]:

    
from scipy.io import *
user_apply_job = mmread(DATA_DIR + 'user_apply_job.mtx')



In [19]:

    
printInfo(user_apply_job)









    



Dims of user-apply-job matrix: (68144, 5829)
# non-zero entries: 775480
Max entry: 582

Applicant-apply-(Job, Employer) matrix



In [20]:

    
apps_by_job_emp = pd.read_csv(AGG_DIR + 'apps_by_job_emp.csv')
apps_by_job_emp.shape









    Out[20]:





(1352961, 5)



In [28]:

    
apps_by_job_emp['job_emp'] = apps_by_job_emp['job_title'] + ' at ' + apps_by_job_emp['organisation_name_ep']
apps_by_job_emp.head()









    Out[28]:






  
    
      
      uid
      job_title
      reg_no_uen_ep
      organisation_name_ep
      n_apply
      job_emp
    
  
  
    
      0
      103204
      Analyst
      196800306E
      DBS BANK LTD.
      132
      Analyst at DBS BANK LTD.
    
    
      1
      103204
      Information Technology Specialist
      196800306E
      DBS BANK LTD.
      90
      Information Technology Specialist at DBS BANK ...
    
    
      2
      112664
      Research Assistant
      200604346E
      NATIONAL UNIVERSITY OF SINGAPORE
      90
      Research Assistant at NATIONAL UNIVERSITY OF S...
    
    
      3
      108289
      Call Centre Agent
      199907051E
      CREDIT MANAGEMENT CONSULTANCY (ASIA) PTE LTD
      72
      Call Centre Agent at CREDIT MANAGEMENT CONSULT...
    
    
      4
      76182
      Information Technology Specialist
      196800306E
      DBS BANK LTD.
      64
      Information Technology Specialist at DBS BANK ...



In [33]:

    
index_of_items = mkItemIndex(df=apps_by_job_emp, item_col='job_emp')



In [45]:

    
user_apply_job_emp = buildUserItemMat(df=apps_by_job_emp, 
                                      index_of_users=index_of_users, index_of_items=index_of_items, 
                                      user_col='uid', item_col='job_emp', rating_col='n_apply')









    



# users in index: 68144
# items in index: 89071
Mapping user ids to internal user indices...
Mapping item ids to internal item indices...
User-Item matrix built



In [ ]:

	uid	job_id	job_title	apply_date	reg_no_uen_ep	employer_creation_date	organisation_name_ep	ssic_code_ep	ssic_description_ep	ssic_group_ep	third_party_entity_ep	job_title_is_number
0	7	JOB-2015-0223128	housekeeping supervisor	2015-07-01	52865867X	Jul 8, 2014	THE FULLERTON HOTEL	55101	Hotels with restaurant	Accommodation and Food Service Activities	Y	False
1	21073	JOB-2015-0223128	housekeeping supervisor	2015-05-07	52865867X	Jul 8, 2014	THE FULLERTON HOTEL	55101	Hotels with restaurant	Accommodation and Food Service Activities	Y	False
2	46634	JOB-2015-0223128	housekeeping supervisor	2015-05-01	52865867X	Jul 8, 2014	THE FULLERTON HOTEL	55101	Hotels with restaurant	Accommodation and Food Service Activities	Y	False
3	100427	JOB-2015-0223128	housekeeping supervisor	2015-07-24	52865867X	Jul 8, 2014	THE FULLERTON HOTEL	55101	Hotels with restaurant	Accommodation and Food Service Activities	Y	False
4	39	JOB-2014-0134411	account assistant	2015-06-16	200203771R	Jul 5, 2014	THE SHICHIDA METHOD (S) PTE. LTD.	82999	Other business support services activities nec...	Administrative and Support Service Activities	N	False

	uid	job_title	reg_no_uen_ep	organisation_name_ep	n_apply	job_emp
0	103204	Analyst	196800306E	DBS BANK LTD.	132	Analyst at DBS BANK LTD.
1	103204	Information Technology Specialist	196800306E	DBS BANK LTD.	90	Information Technology Specialist at DBS BANK ...
2	112664	Research Assistant	200604346E	NATIONAL UNIVERSITY OF SINGAPORE	90	Research Assistant at NATIONAL UNIVERSITY OF S...
3	108289	Call Centre Agent	199907051E	CREDIT MANAGEMENT CONSULTANCY (ASIA) PTE LTD	72	Call Centre Agent at CREDIT MANAGEMENT CONSULT...
4	76182	Information Technology Specialist	196800306E	DBS BANK LTD.	64	Information Technology Specialist at DBS BANK ...