In [1]:
import ja_helpers as ja_helpers
from ja_helpers import *
In [2]:
HOME_DIR = 'd:/larc_projects/job_analytics/'; DATA_DIR = HOME_DIR + 'data/clean/'
RES_DIR = HOME_DIR + 'results/'
In [5]:
skill_df = pd.read_csv(DATA_DIR + 'skill_index.csv')
In [ ]:
doc_skill = buildDocSkillMat(jd_docs, skill_df, folder=DATA_DIR)
with(open(DATA_DIR + 'doc_skill.mtx', 'w')) as f:
mmwrite(f, doc_skill)
In [ ]:
extracted_skill_df = getSkills4Docs(docs=doc_index['doc'], doc_term=doc_skill, skills=skills)
In [ ]:
df = pd.merge(doc_index, extracted_skill_df, left_index=True, right_index=True)
In [ ]:
print(df.shape)
df.head()
In [ ]:
df.to_csv(DATA_DIR + 'doc_index.csv') # later no need to extract skill again
In [9]:
reload(ja_helpers)
from ja_helpers import *
In [8]:
# load frameworks of SF as docs
pst_docs = pd.read_csv(DATA_DIR + 'SF/pst.csv')
pst_docs
Out[8]:
In [10]:
pst_skill = buildDocSkillMat(pst_docs, skill_df, folder=None)
In [11]:
with(open(DATA_DIR + 'pst_skill.mtx', 'w')) as f:
mmwrite(f, pst_skill)