Set up



In [1]:

    
# Load needed modules and functions
import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np

from pylab import figure, show

import pandas as pd
from pandas import DataFrame, Series
from sklearn.neighbors import NearestNeighbors



In [2]:

    
#set up path to the data files
import os
data_folder = os.path.join(os.pardir, "data")



In [3]:

    
import glob 
file_names = glob.glob(data_folder + "/*")
#file_names = glob.glob(data_folder + "\\\*")



In [4]:

    
import re
p = re.compile('data/(.*).txt')
#p = re.compile('data\\\(.*).txt')
name_list = []
for name in file_names:
    frame_name = p.findall(name)[0]
    frame_name = frame_name.lower().replace(" ","_")
    frame_name = frame_name.replace(",","")
    name_list.append(frame_name)
    frame = pd.read_table(name, sep= '\t')
    #reformat column names
    columns = frame.columns
    columns = [x.lower().replace("*","").replace("-","_").replace(" ","_") for x in columns]
    frame.columns = columns
    #create a variable named the frame_name
    vars()[frame_name] = frame









    



/Users/agswigart/anaconda/envs/myenv/lib/python2.7/site-packages/pandas/io/parsers.py:1070: DtypeWarning: Columns (6) have mixed types. Specify dtype option on import or set low_memory=False.
  data = self._reader.read(nrows)

Functions



In [5]:

    
#function that calculates the number of features available in a dataframe (the # rows divided by # of jobs)
def feature(dataframe):
    return len(dataframe)/len(dataframe.onet_soc_code.unique())

Data Filtering

Abilities



In [6]:

    
#In abilities, we only want to keep the rows where scale_id == 'IM'
abilities_final = abilities[abilities.scale_id == 'IM']



In [7]:

    
len(abilities_final)









    Out[7]:





47996



In [8]:

    
feature(abilities_final)









    Out[8]:





52

Knowledge



In [9]:

    
#In knowledge, we only want to keep the rows where scale_id == 'IM'
knowledge_final = knowledge[knowledge.scale_id == 'IM']



In [10]:

    
len(knowledge_final)









    Out[10]:





30459



In [11]:

    
feature(knowledge_final)









    Out[11]:





33

Interests



In [12]:

    
#In interests, we only want to keep rows where scale_id == 'OI'
interests_final = interests[interests.scale_id == 'OI']



In [13]:

    
len(interests_final)









    Out[13]:





5844



In [14]:

    
feature(interests_final)









    Out[14]:





6



In [15]:

    
interests_final['domain'] = 'Interests'
interests_final.head()









    Out[15]:






  
    
      
      onet_soc_code
      element_id
      element_name
      scale_id
      data_value
      date
      domain_source
      domain
    
  
  
    
      0
       11-1011.00
       1.B.1.a
           Realistic
       OI
       1.33
       06/2008
       Analyst
       Interests
    
    
      1
       11-1011.00
       1.B.1.b
       Investigative
       OI
       2.00
       06/2008
       Analyst
       Interests
    
    
      2
       11-1011.00
       1.B.1.c
            Artistic
       OI
       2.67
       06/2008
       Analyst
       Interests
    
    
      3
       11-1011.00
       1.B.1.d
              Social
       OI
       3.67
       06/2008
       Analyst
       Interests
    
    
      4
       11-1011.00
       1.B.1.e
        Enterprising
       OI
       7.00
       06/2008
       Analyst
       Interests
    
  

5 rows × 8 columns



In [16]:

    
interests_pt = interests_final.pivot_table('data_value',
                                           rows = 'onet_soc_code',
                                           cols = ['domain', 'element_name'],
                                           aggfunc = 'sum')
interests_pt.head()









    Out[16]:






  
    
      domain
      Interests
    
    
      element_name
      Artistic
      Conventional
      Enterprising
      Investigative
      Realistic
      Social
    
    
      onet_soc_code
      
      
      
      
      
      
    
  
  
    
      11-1011.00
       2.67
       5.33
       7
       2.00
       1.33
       3.67
    
    
      11-1011.03
       2.67
       4.33
       7
       4.33
       1.00
       2.33
    
    
      11-1021.00
       1.00
       3.67
       7
       1.33
       1.33
       3.33
    
    
      11-1031.00
       3.67
       3.00
       7
       3.67
       1.00
       4.67
    
    
      11-2011.00
       5.33
       4.67
       7
       2.00
       1.67
       2.33
    
  

5 rows × 6 columns

Job Zones



In [17]:

    
#we do not need to do anything to job_zones
job_zones_final = job_zones



In [18]:

    
len(job_zones_final)









    Out[18]:





924



In [19]:

    
feature(job_zones_final)









    Out[19]:





1



In [20]:

    
job_zones_final.head()









    Out[20]:






  
    
      
      onet_soc_code
      job_zone
      date
      domain_source
    
  
  
    
      0
       11-1011.00
       5
       06/2006
       Analyst
    
    
      1
       11-1011.03
       5
       07/2013
       Analyst
    
    
      2
       11-1021.00
       3
       06/2008
       Analyst
    
    
      3
       11-1031.00
       4
       06/2008
       Analyst
    
    
      4
       11-2011.00
       4
       06/2010
       Analyst
    
  

5 rows × 4 columns



In [21]:

    
job_zones_final['domain'] = 'Job_Zones'
job_zones_final['element_name'] = 'job_zone'
job_zones_pt = job_zones_final.pivot_table('job_zone',
                                           rows = 'onet_soc_code',
                                           cols = ['domain', 'element_name'],
                                           aggfunc = 'sum')
job_zones_pt.head()









    Out[21]:






  
    
      domain
      Job_Zones
    
    
      element_name
      job_zone
    
    
      onet_soc_code
      
    
  
  
    
      11-1011.00
       5
    
    
      11-1011.03
       5
    
    
      11-1021.00
       3
    
    
      11-1031.00
       4
    
    
      11-2011.00
       4
    
  

5 rows × 1 columns

Skills



In [22]:

    
#for skills, we only want to keep rows where scale_id == "IM"
skills_final = skills[skills.scale_id == 'IM']



In [23]:

    
len(skills_final)









    Out[23]:





32305



In [24]:

    
feature(skills_final)









    Out[24]:





35



In [25]:

    
skills_final.head()









    Out[25]:






  
    
      
      onet_soc_code
      element_id
      element_name
      scale_id
      data_value
      n
      standard_error
      lower_ci_bound
      upper_ci_bound
      recommend_suppress
      not_relevant
      date
      domain_source
    
  
  
    
      0
       11-1011.00
       2.A.1.a
       Reading Comprehension
       IM
       4.38
       8
       0.18
       4.02
       4.73
       N
       n/a
       06/2010
       Analyst
    
    
      2
       11-1011.00
       2.A.1.b
            Active Listening
       IM
       4.38
       8
       0.18
       4.02
       4.73
       N
       n/a
       06/2010
       Analyst
    
    
      4
       11-1011.00
       2.A.1.c
                     Writing
       IM
       4.12
       8
       0.23
       3.68
       4.57
       N
       n/a
       06/2010
       Analyst
    
    
      6
       11-1011.00
       2.A.1.d
                    Speaking
       IM
       4.38
       8
       0.18
       4.02
       4.73
       N
       n/a
       06/2010
       Analyst
    
    
      8
       11-1011.00
       2.A.1.e
                 Mathematics
       IM
       3.00
       8
       0.19
       2.63
       3.37
       N
       n/a
       06/2010
       Analyst
    
  

5 rows × 13 columns



In [26]:

    
skills_final['domain'] = 'Skills'
skills_pt = skills_final.pivot_table('data_value',
                                     rows = 'onet_soc_code',
                                     cols = ['domain', 'element_name'],
                                     aggfunc = 'sum')
skills_pt.head()









    Out[26]:






  
    
      domain
      Skills
      
    
    
      element_name
      Active Learning
      Active Listening
      Complex Problem Solving
      Coordination
      Critical Thinking
      Equipment Maintenance
      Equipment Selection
      Installation
      Instructing
      Judgment and Decision Making
      Learning Strategies
      Management of Financial Resources
      Management of Material Resources
      Management of Personnel Resources
      Mathematics
      Monitoring
      Negotiation
      Operation Monitoring
      Operation and Control
      Operations Analysis
      
    
    
      onet_soc_code
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      11-1011.00
       4.00
       4.38
       4.50
       4.25
       4.38
       1
       1.00
       1.00
       3.25
       4.50
       3.38
       4.12
       3.62
       4.25
       3.00
       4.12
       4.00
       2.25
       1.88
       3.50
      ...
    
    
      11-1011.03
       3.50
       3.88
       4.00
       3.62
       4.00
       1
       1.12
       1.00
       3.25
       3.75
       3.38
       2.62
       2.38
       3.38
       2.75
       3.62
       2.88
       2.25
       1.62
       2.62
      ...
    
    
      11-1021.00
       3.50
       4.00
       3.50
       3.62
       3.88
       1
       1.25
       1.12
       3.12
       3.50
       2.75
       2.88
       3.12
       3.38
       2.25
       3.75
       3.50
       2.88
       2.12
       3.25
      ...
    
    
      11-2011.00
       3.25
       4.00
       3.50
       3.50
       3.75
       1
       1.25
       1.00
       2.88
       3.75
       2.75
       2.88
       2.50
       3.12
       3.00
       3.25
       3.38
       1.62
       1.25
       2.88
      ...
    
    
      11-2021.00
       3.50
       3.88
       3.38
       3.50
       3.88
       1
       1.00
       1.00
       3.12
       3.62
       3.00
       2.88
       2.38
       3.38
       2.75
       3.62
       3.25
       2.00
       1.00
       3.50
      ...
    
  

5 rows × 35 columns

Work Activities



In [27]:

    
#for work activities, we only want to keep rows where scale_id == 'IM'
work_activities_final = work_activities[work_activities.scale_id == 'IM']



In [28]:

    
len(work_activities_final)









    Out[28]:





37843



In [29]:

    
feature(work_activities_final)









    Out[29]:





41



In [30]:

    
work_activities_final['domain'] = 'Work_Activities'
work_activities_pt = work_activities_final.pivot_table('data_value',
                                     rows = 'onet_soc_code',
                                     cols = ['domain', 'element_name'],
                                     aggfunc = 'sum')
work_activities_pt.head()









    Out[30]:






  
    
      domain
      Work_Activities
      
    
    
      element_name
      Analyzing Data or Information
      Assisting and Caring for Others
      Coaching and Developing Others
      Communicating with Persons Outside Organization
      Communicating with Supervisors, Peers, or Subordinates
      Controlling Machines and Processes
      Coordinating the Work and Activities of Others
      Developing Objectives and Strategies
      Developing and Building Teams
      Documenting/Recording Information
      Drafting, Laying Out, and Specifying Technical Devices, Parts, and Equipment
      Establishing and Maintaining Interpersonal Relationships
      Estimating the Quantifiable Characteristics of Products, Events, or Information
      Evaluating Information to Determine Compliance with Standards
      Getting Information
      Guiding, Directing, and Motivating Subordinates
      Handling and Moving Objects
      Identifying Objects, Actions, and Events
      Inspecting Equipment, Structures, or Material
      Interacting With Computers
      
    
    
      onet_soc_code
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      11-1011.00
       4.19
       2.22
       3.91
       4.62
       4.75
       1.32
       4.00
       4.63
       4.55
       2.19
       1.22
       4.64
       2.68
       3.50
       4.75
       4.02
       1.48
       3.64
       1.49
       3.28
      ...
    
    
      11-1011.03
       3.85
       2.23
       3.64
       4.46
       4.58
       1.36
       3.96
       4.31
       4.12
       3.44
       2.56
       4.28
       3.31
       4.12
       4.48
       3.88
       1.62
       3.62
       2.36
       4.32
      ...
    
    
      11-1021.00
       3.49
       3.08
       3.41
       3.83
       3.74
       1.99
       4.09
       3.22
       3.56
       3.29
       2.70
       3.82
       3.36
       3.54
       4.37
       3.48
       2.53
       3.65
       3.36
       3.68
      ...
    
    
      11-2011.00
       2.81
       2.10
       2.68
       4.56
       4.28
       2.22
       3.06
       3.68
       3.27
       3.30
       1.70
       4.04
       3.07
       2.50
       4.38
       2.72
       1.86
       3.36
       2.13
       4.08
      ...
    
    
      11-2021.00
       3.52
       2.40
       3.54
       4.60
       4.58
       1.32
       3.96
       4.04
       4.24
       2.84
       1.92
       4.40
       3.16
       2.84
       4.52
       3.60
       1.52
       3.84
       1.84
       4.08
      ...
    
  

5 rows × 41 columns

Work Context



In [31]:

    
#in work context, we only want to keep rows where scale_id == 'CX' or 'CT'
work_context_final = work_context[(work_context['scale_id'] == 'CX') | (work_context['scale_id'] == 'CT')]



In [32]:

    
len(work_context_final)









    Out[32]:





52592



In [33]:

    
feature(work_context_final)









    Out[33]:





56



In [34]:

    
work_context_final_CX = work_context_final[work_context_final['scale_id'] == 'CX']
work_context_final_CT = work_context_final[work_context_final['scale_id'] == 'CT']



In [35]:

    
work_context_final_CX['domain'] = 'Work_Context'
work_context_CX_pt = work_context_final_CX.pivot_table('data_value',
                                     rows = 'onet_soc_code',
                                     cols = ['domain', 'element_name'],
                                     aggfunc = 'sum')
work_context_CX_pt.head()









    Out[35]:






  
    
      domain
      Work_Context
      
    
    
      element_name
      Consequence of Error
      Contact With Others
      Coordinate or Lead Others
      Cramped Work Space, Awkward Positions
      Deal With External Customers
      Deal With Physically Aggressive People
      Deal With Unpleasant or Angry People
      Degree of Automation
      Electronic Mail
      Exposed to Contaminants
      Exposed to Disease or Infections
      Exposed to Hazardous Conditions
      Exposed to Hazardous Equipment
      Exposed to High Places
      Exposed to Minor Burns, Cuts, Bites, or Stings
      Exposed to Radiation
      Exposed to Whole Body Vibration
      Extremely Bright or Inadequate Lighting
      Face-to-Face Discussions
      Freedom to Make Decisions
      
    
    
      onet_soc_code
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      11-1011.00
       3.55
       4.84
       4.32
       1.47
       3.83
       2.07
       3.92
       1.80
       5.00
       1.49
       1.01
       1.47
       1.49
       1.32
       1.34
       1.00
       1.44
       1.35
       5.00
       4.92
      ...
    
    
      11-1011.03
       2.35
       4.38
       4.12
       1.38
       3.73
       1.04
       2.38
       1.72
       4.96
       1.65
       1.12
       1.23
       1.17
       1.50
       1.15
       1.08
       1.08
       1.42
       4.76
       4.42
      ...
    
    
      11-1021.00
       3.04
       4.76
       4.20
       1.32
       4.48
       1.60
       3.39
       2.32
       4.26
       2.11
       1.68
       1.40
       1.68
       1.76
       2.19
       1.01
       1.15
       1.79
       4.60
       4.80
      ...
    
    
      11-2011.00
       2.06
       4.65
       4.12
       1.53
       3.89
       1.29
       2.73
       2.56
       5.00
       1.12
       1.17
       1.03
       1.11
       1.12
       1.31
       1.00
       1.03
       1.39
       4.56
       4.15
      ...
    
    
      11-2021.00
       2.40
       4.64
       3.72
       1.21
       4.00
       1.12
       2.56
       2.08
       5.00
       1.16
       1.00
       1.04
       1.12
       1.17
       1.04
       1.00
       1.00
       1.20
       4.84
       4.20
      ...
    
  

5 rows × 55 columns



In [36]:

    
work_context_final_CT['domain'] = 'Work_Context_Time'
work_context_CT_pt = work_context_final_CT.pivot_table('data_value',
                                     rows = 'onet_soc_code',
                                     cols = ['domain', 'element_name'],
                                     aggfunc = 'sum')
work_context_CT_pt.head()









    Out[36]:






  
    
      domain
      Work_Context_Time
    
    
      element_name
      Duration of Typical Work Week
      Work Schedules
    
    
      onet_soc_code
      
      
    
  
  
    
      11-1011.00
       2.91
       1.00
    
    
      11-1011.03
       2.77
       1.35
    
    
      11-1021.00
       2.67
       1.37
    
    
      11-2011.00
       2.51
       1.04
    
    
      11-2021.00
       2.68
       1.28
    
  

5 rows × 2 columns

Work Styles



In [37]:

    
#in work styles, we can keep everythin
work_styles_final = work_styles



In [38]:

    
len(work_styles_final)









    Out[38]:





14752



In [39]:

    
feature(work_styles_final)









    Out[39]:





16



In [40]:

    
work_styles_final['domain'] = 'Work_Styles'
work_styles_pt = work_styles_final.pivot_table('data_value',
                                     rows = 'onet_soc_code',
                                     cols = ['domain', 'element_name'],
                                     aggfunc = 'sum')
work_styles_pt.head()









    Out[40]:






  
    
      domain
      Work_Styles
    
    
      element_name
      Achievement/Effort
      Adaptability/Flexibility
      Analytical Thinking
      Attention to Detail
      Concern for Others
      Cooperation
      Dependability
      Independence
      Initiative
      Innovation
      Integrity
      Leadership
      Persistence
      Self Control
      Social Orientation
      Stress Tolerance
    
    
      onet_soc_code
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      11-1011.00
       4.66
       4.48
       4.24
       4.26
       3.95
       4.42
       4.67
       4.63
       4.79
       4.22
       4.85
       4.84
       4.61
       4.28
       4.02
       4.75
    
    
      11-1011.03
       4.19
       4.23
       4.31
       4.12
       3.48
       4.32
       4.23
       4.27
       4.60
       4.38
       4.58
       4.64
       4.31
       4.00
       3.35
       4.08
    
    
      11-1021.00
       4.07
       4.21
       4.22
       4.52
       3.96
       4.26
       4.73
       3.96
       4.36
       3.88
       4.36
       4.50
       4.24
       4.38
       3.56
       4.35
    
    
      11-2011.00
       4.30
       4.54
       4.16
       4.70
       3.93
       4.40
       4.74
       4.08
       4.71
       4.51
       4.66
       4.23
       4.23
       4.42
       3.99
       4.39
    
    
      11-2021.00
       4.24
       4.24
       3.84
       4.48
       3.72
       4.44
       4.56
       4.20
       4.32
       4.08
       4.40
       4.36
       4.28
       4.04
       3.88
       4.20
    
  

5 rows × 16 columns

Work Values



In [41]:

    
#in work values, we want to only keep rows where scale_id == 'EX'
work_values_final = work_values[work_values.scale_id == 'EX']



In [42]:

    
len(work_values_final)









    Out[42]:





5844



In [43]:

    
feature(work_values_final)









    Out[43]:





6



In [44]:

    
work_values_final['domain'] = 'Work_Values'
work_values_pt = work_values_final.pivot_table('data_value',
                                     rows = 'onet_soc_code',
                                     cols = ['domain', 'element_name'],
                                     aggfunc = 'sum')
work_values_pt.head()









    Out[44]:






  
    
      domain
      Work_Values
    
    
      element_name
      Achievement
      Independence
      Recognition
      Relationships
      Support
      Working Conditions
    
    
      onet_soc_code
      
      
      
      
      
      
    
  
  
    
      11-1011.00
       6.33
       7.00
       7.00
       5.00
       5.33
       6.33
    
    
      11-1011.03
       6.67
       6.67
       6.00
       5.00
       3.33
       6.33
    
    
      11-1021.00
       5.33
       6.00
       5.67
       6.33
       4.67
       6.00
    
    
      11-1031.00
       5.33
       5.00
       5.00
       5.67
       4.00
       4.33
    
    
      11-2011.00
       5.33
       5.33
       5.33
       5.00
       4.00
       5.33
    
  

5 rows × 6 columns



In [45]:

    
occupation_data['element_name'] = "title"
occupation_data['domain'] = 'Occupation'
occ_data_pt = occupation_data.pivot_table('title',
                                     rows = 'onet_soc_code',
                                     cols = ['domain', 'element_name'],
                                     aggfunc = 'sum')


#combined_df = combined_df.rename(columns=lambda x: x.replace(' ', '_'))
occ_data_pt.Occupation.title = occ_data_pt.Occupation.title.apply(lambda x: x.replace(' ', '_'))
occ_data_pt.Occupation.title = occ_data_pt.Occupation.title.apply(lambda x: x.replace('/', '_'))
occ_data_pt.Occupation.title = occ_data_pt.Occupation.title.apply(lambda x: x.replace(',', '_'))


occ_data_pt.tail()
len(set(occ_data_pt.index))









    



/Users/agswigart/anaconda/envs/myenv/lib/python2.7/site-packages/pandas/core/generic.py:1830: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_index,col_indexer] = value instead
  self[name] = value






    Out[45]:





1110



In [46]:

    
domain_pt_list = [interests_pt, job_zones_pt, skills_pt, work_activities_pt, work_context_CX_pt, work_context_CT_pt, work_styles_pt, work_values_pt]

combined_df = pd.concat(domain_pt_list, axis=1)

combined_df = pd.merge(occ_data_pt, combined_df, left_index = True, right_index = True)

combined_df.head()









    Out[46]:






  
    
      domain
      Occupation
      Interests
      Job_Zones
      Skills
      
    
    
      element_name
      title
      Artistic
      Conventional
      Enterprising
      Investigative
      Realistic
      Social
      job_zone
      Active Learning
      Active Listening
      Complex Problem Solving
      Coordination
      Critical Thinking
      Equipment Maintenance
      Equipment Selection
      Installation
      Instructing
      Judgment and Decision Making
      Learning Strategies
      Management of Financial Resources
      
    
  
  
    
      11-1011.00
                          Chief_Executives
       2.67
       5.33
       7
       2.00
       1.33
       3.67
       5
       4.00
       4.38
       4.5
       4.25
       4.38
        1
       1.00
       1.00
       3.25
       4.50
       3.38
       4.12
      ...
    
    
      11-1011.03
             Chief_Sustainability_Officers
       2.67
       4.33
       7
       4.33
       1.00
       2.33
       5
       3.50
       3.88
       4.0
       3.62
       4.00
        1
       1.12
       1.00
       3.25
       3.75
       3.38
       2.62
      ...
    
    
      11-1021.00
           General_and_Operations_Managers
       1.00
       3.67
       7
       1.33
       1.33
       3.33
       3
       3.50
       4.00
       3.5
       3.62
       3.88
        1
       1.25
       1.12
       3.12
       3.50
       2.75
       2.88
      ...
    
    
      11-1031.00
                               Legislators
       3.67
       3.00
       7
       3.67
       1.00
       4.67
       4
        NaN
        NaN
       NaN
        NaN
        NaN
      NaN
        NaN
        NaN
        NaN
        NaN
        NaN
        NaN
      ...
    
    
      11-2011.00
       Advertising_and_Promotions_Managers
       5.33
       4.67
       7
       2.00
       1.67
       2.33
       4
       3.25
       4.00
       3.5
       3.50
       3.75
        1
       1.25
       1.00
       2.88
       3.75
       2.75
       2.88
      ...
    
  

5 rows × 163 columns



In [47]:

    
# Remove spaces in element names
combined_df = combined_df.rename(columns=lambda x: x.replace(' ', '_'))

# combined_df.fillna(0, inplace=True)



In [48]:

    
combined_df.head()









    Out[48]:






  
    
      domain
      Occupation
      Interests
      Job_Zones
      Skills
      
    
    
      element_name
      title
      Artistic
      Conventional
      Enterprising
      Investigative
      Realistic
      Social
      job_zone
      Active_Learning
      Active_Listening
      Complex_Problem_Solving
      Coordination
      Critical_Thinking
      Equipment_Maintenance
      Equipment_Selection
      Installation
      Instructing
      Judgment_and_Decision_Making
      Learning_Strategies
      Management_of_Financial_Resources
      
    
  
  
    
      11-1011.00
                          Chief_Executives
       2.67
       5.33
       7
       2.00
       1.33
       3.67
       5
       4.00
       4.38
       4.5
       4.25
       4.38
        1
       1.00
       1.00
       3.25
       4.50
       3.38
       4.12
      ...
    
    
      11-1011.03
             Chief_Sustainability_Officers
       2.67
       4.33
       7
       4.33
       1.00
       2.33
       5
       3.50
       3.88
       4.0
       3.62
       4.00
        1
       1.12
       1.00
       3.25
       3.75
       3.38
       2.62
      ...
    
    
      11-1021.00
           General_and_Operations_Managers
       1.00
       3.67
       7
       1.33
       1.33
       3.33
       3
       3.50
       4.00
       3.5
       3.62
       3.88
        1
       1.25
       1.12
       3.12
       3.50
       2.75
       2.88
      ...
    
    
      11-1031.00
                               Legislators
       3.67
       3.00
       7
       3.67
       1.00
       4.67
       4
        NaN
        NaN
       NaN
        NaN
        NaN
      NaN
        NaN
        NaN
        NaN
        NaN
        NaN
        NaN
      ...
    
    
      11-2011.00
       Advertising_and_Promotions_Managers
       5.33
       4.67
       7
       2.00
       1.67
       2.33
       4
       3.25
       4.00
       3.5
       3.50
       3.75
        1
       1.25
       1.00
       2.88
       3.75
       2.75
       2.88
      ...
    
  

5 rows × 163 columns



In [49]:

    
def normalize(series):
    maximum = series.max()
    minimum = series.min()
    return [(item - minimum) / (maximum - minimum) for item in series]



In [50]:

    
normed_df = combined_df.copy()
normed_df.iloc[:,1:] = normed_df.iloc[:,1:].apply(normalize)
normed_df.head()









    Out[50]:






  
    
      domain
      Occupation
      Interests
      Job_Zones
      Skills
      
    
    
      element_name
      title
      Artistic
      Conventional
      Enterprising
      Investigative
      Realistic
      Social
      job_zone
      Active_Learning
      Active_Listening
      Complex_Problem_Solving
      Coordination
      Critical_Thinking
      Equipment_Maintenance
      Equipment_Selection
      Installation
      Instructing
      Judgment_and_Decision_Making
      Learning_Strategies
      Management_of_Financial_Resources
      
    
  
  
    
      11-1011.00
                          Chief_Executives
       0.278333
       0.721667
       1
       0.166667
       0.055000
       0.445000
       1.00
       0.949367
       0.800
       1.0
       1.000000
       0.852
        0
       0.000000
       0.000000
       0.621547
       1.0
       0.693252
       1.000000
      ...
    
    
      11-1011.03
             Chief_Sustainability_Officers
       0.278333
       0.555000
       1
       0.555000
       0.000000
       0.221667
       1.00
       0.738397
       0.600
       0.8
       0.720000
       0.700
        0
       0.040000
       0.000000
       0.621547
       0.7
       0.693252
       0.519231
      ...
    
    
      11-1021.00
           General_and_Operations_Managers
       0.000000
       0.445000
       1
       0.055000
       0.055000
       0.388333
       0.50
       0.738397
       0.648
       0.6
       0.720000
       0.652
        0
       0.083333
       0.038462
       0.585635
       0.6
       0.500000
       0.602564
      ...
    
    
      11-1031.00
                               Legislators
       0.445000
       0.333333
       1
       0.445000
       0.000000
       0.611667
       0.75
            NaN
         NaN
       NaN
            NaN
         NaN
      NaN
            NaN
            NaN
            NaN
       NaN
            NaN
            NaN
      ...
    
    
      11-2011.00
       Advertising_and_Promotions_Managers
       0.721667
       0.611667
       1
       0.166667
       0.111667
       0.221667
       0.75
       0.632911
       0.648
       0.6
       0.666667
       0.600
        0
       0.083333
       0.000000
       0.519337
       0.7
       0.500000
       0.602564
      ...
    
  

5 rows × 163 columns

Visualizing the Features



In [51]:

    
from math import floor,ceil
def draw_histogram(domain_frame):
    fig, axes = plt.subplots(nrows=int((ceil(float(len(domain_frame.columns))/3.0))), ncols=3, figsize = (12,len(domain_frame.columns)))
    plt.subplots_adjust(hspace = 0.4)
    for i,column_name in enumerate(domain_frame.columns):
        row = int(floor(i/3))
        column = i % 3
        domain_frame[column_name].hist(bins=10, ax=axes[row,column]); axes[row,column].set_title(column_name); axes[row,column].set_ylim([0,500])



In [52]:

    
# draw_histogram(combined_df.Job_Zones)
# normed_df.Interests.Investigative.hist(bins=10)
draw_histogram(normed_df.Interests)



In [53]:

    
draw_histogram(normed_df.Skills)



In [54]:

    
draw_histogram(normed_df.Work_Activities)



In [55]:

    
draw_histogram(normed_df.Work_Context)



In [56]:

    
draw_histogram(normed_df.Work_Context_Time)









    



---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-56-ab67bc215f7f> in <module>()
----> 1 draw_histogram(normed_df.Work_Context_Time)

<ipython-input-51-e444ff717fe3> in draw_histogram(domain_frame)
      6         row = int(floor(i/3))
      7         column = i % 3
----> 8         domain_frame[column_name].hist(bins=10, ax=axes[row,column]); axes[row,column].set_title(column_name); axes[row,column].set_ylim([0,500])

IndexError: too many indices



In [57]:

    
draw_histogram(normed_df.Work_Styles)



In [58]:

    
draw_histogram(normed_df.Work_Values)

Correlation between Features



In [59]:

    
corr_df = normed_df.iloc[:,1:].corr()



In [60]:

    
corr_df.index = corr_df.index.droplevel(0)
corr_df.head()









    Out[60]:






  
    
      domain
      Interests
      Job_Zones
      Skills
      
    
    
      element_name
      Artistic
      Conventional
      Enterprising
      Investigative
      Realistic
      Social
      job_zone
      Active_Learning
      Active_Listening
      Complex_Problem_Solving
      Coordination
      Critical_Thinking
      Equipment_Maintenance
      Equipment_Selection
      Installation
      Instructing
      Judgment_and_Decision_Making
      Learning_Strategies
      Management_of_Financial_Resources
      Management_of_Material_Resources
      
    
    
      element_name
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Artistic
       1.000000
      -0.401212
       0.012268
       0.201901
      -0.388635
       0.311364
       0.427659
       0.360623
       0.292641
       0.245882
       0.188951
       0.240592
      -0.339151
      -0.250277
      -0.179429
       0.314698
       0.259137
       0.340943
       0.084552
       0.085508
      ...
    
    
      Conventional
      -0.401212
       1.000000
       0.267182
      -0.149493
      -0.137886
      -0.239040
      -0.177788
      -0.163082
      -0.000384
      -0.126095
      -0.092104
      -0.112272
      -0.098427
      -0.161965
      -0.064627
      -0.253891
      -0.144256
      -0.230510
       0.052377
      -0.044017
      ...
    
    
      Enterprising
       0.012268
       0.267182
       1.000000
      -0.311084
      -0.550015
       0.191897
       0.139388
       0.237040
       0.446623
       0.199139
       0.506449
       0.266822
      -0.420746
      -0.455691
      -0.310320
       0.184611
       0.304429
       0.169215
       0.412286
       0.261025
      ...
    
    
      Investigative
       0.201901
      -0.149493
      -0.311084
       1.000000
      -0.050547
       0.061720
       0.643559
       0.572187
       0.360374
       0.617767
       0.114631
       0.588151
      -0.064329
       0.040445
       0.045447
       0.382763
       0.495419
       0.439133
       0.151122
       0.167033
      ...
    
    
      Realistic
      -0.388635
      -0.137886
      -0.550015
      -0.050547
       1.000000
      -0.569939
      -0.536989
      -0.500476
      -0.664910
      -0.345526
      -0.425227
      -0.435737
       0.663616
       0.683996
       0.421077
      -0.419841
      -0.409970
      -0.453541
      -0.203513
      -0.025298
      ...
    
  

5 rows × 162 columns



In [61]:

    
corr_df.columns = corr_df.columns.droplevel(0)
corr_df.head()









    Out[61]:






  
    
      element_name
      Artistic
      Conventional
      Enterprising
      Investigative
      Realistic
      Social
      job_zone
      Active_Learning
      Active_Listening
      Complex_Problem_Solving
      Coordination
      Critical_Thinking
      Equipment_Maintenance
      Equipment_Selection
      Installation
      Instructing
      Judgment_and_Decision_Making
      Learning_Strategies
      Management_of_Financial_Resources
      Management_of_Material_Resources
      
    
    
      element_name
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Artistic
       1.000000
      -0.401212
       0.012268
       0.201901
      -0.388635
       0.311364
       0.427659
       0.360623
       0.292641
       0.245882
       0.188951
       0.240592
      -0.339151
      -0.250277
      -0.179429
       0.314698
       0.259137
       0.340943
       0.084552
       0.085508
      ...
    
    
      Conventional
      -0.401212
       1.000000
       0.267182
      -0.149493
      -0.137886
      -0.239040
      -0.177788
      -0.163082
      -0.000384
      -0.126095
      -0.092104
      -0.112272
      -0.098427
      -0.161965
      -0.064627
      -0.253891
      -0.144256
      -0.230510
       0.052377
      -0.044017
      ...
    
    
      Enterprising
       0.012268
       0.267182
       1.000000
      -0.311084
      -0.550015
       0.191897
       0.139388
       0.237040
       0.446623
       0.199139
       0.506449
       0.266822
      -0.420746
      -0.455691
      -0.310320
       0.184611
       0.304429
       0.169215
       0.412286
       0.261025
      ...
    
    
      Investigative
       0.201901
      -0.149493
      -0.311084
       1.000000
      -0.050547
       0.061720
       0.643559
       0.572187
       0.360374
       0.617767
       0.114631
       0.588151
      -0.064329
       0.040445
       0.045447
       0.382763
       0.495419
       0.439133
       0.151122
       0.167033
      ...
    
    
      Realistic
      -0.388635
      -0.137886
      -0.550015
      -0.050547
       1.000000
      -0.569939
      -0.536989
      -0.500476
      -0.664910
      -0.345526
      -0.425227
      -0.435737
       0.663616
       0.683996
       0.421077
      -0.419841
      -0.409970
      -0.453541
      -0.203513
      -0.025298
      ...
    
  

5 rows × 162 columns



In [62]:

    
corr_pairs_list = []
for i in range(len(corr_df.index)):
    row_name = corr_df.index[i]
    for j in range(i + 1, len(corr_df.columns)):
        column_name = corr_df.columns[j]
        corr_pairs_list.append([row_name,column_name, corr_df.ix[i,j]])



In [63]:

    
corr_pairs_df = DataFrame(corr_pairs_list)



In [64]:

    
corr_pairs_df.sort(2)









    Out[64]:






  
    
      
      0
      1
      2
    
  
  
    
      12446
                                      Spend_Time_Sitting
                                     Spend_Time_Standing
      -0.967267
    
    
      12448
                                      Spend_Time_Sitting
                          Spend_Time_Walking_and_Running
      -0.835168
    
    
      12265
                 Spend_Time_Bending_or_Twisting_the_Body
                                      Spend_Time_Sitting
      -0.795219
    
    
      8911 
                  Performing_General_Physical_Activities
                                      Spend_Time_Sitting
      -0.782129
    
    
      10586
                                         Electronic_Mail
                 Spend_Time_Bending_or_Twisting_the_Body
      -0.773786
    
    
      7753 
                             Handling_and_Moving_Objects
                                      Spend_Time_Sitting
      -0.751062
    
    
      5861 
                                                 Writing
                 Spend_Time_Bending_or_Twisting_the_Body
      -0.717948
    
    
      8051 
                              Interacting_With_Computers
                 Spend_Time_Bending_or_Twisting_the_Body
      -0.717089
    
    
      4486 
                                   Reading_Comprehension
                 Spend_Time_Bending_or_Twisting_the_Body
      -0.713159
    
    
      7717 
                             Handling_and_Moving_Objects
                                         Electronic_Mail
      -0.711717
    
    
      10990
          Exposed_to_Minor_Burns,_Cuts,_Bites,_or_Stings
                                      Spend_Time_Sitting
      -0.711399
    
    
      10593
                                         Electronic_Mail
       Spend_Time_Using_Your_Hands_to_Handle,_Control...
      -0.705413
    
    
      12376
       Spend_Time_Kneeling,_Crouching,_Stooping,_or_C...
                                      Spend_Time_Sitting
      -0.703470
    
    
      10592
                                         Electronic_Mail
                                     Spend_Time_Standing
      -0.699675
    
    
      668  
                                               Realistic
                                                Speaking
      -0.697539
    
    
      5868 
                                                 Writing
       Spend_Time_Using_Your_Hands_to_Handle,_Control...
      -0.695551
    
    
      11738
                     Indoors,_Environmentally_Controlled
                           Very_Hot_or_Cold_Temperatures
      -0.688135
    
    
      5133 
                                                Speaking
       Spend_Time_Using_Your_Hands_to_Handle,_Control...
      -0.687152
    
    
      5797 
                                                 Writing
                             Handling_and_Moving_Objects
      -0.683033
    
    
      11098
                         Exposed_to_Whole_Body_Vibration
                     Indoors,_Environmentally_Controlled
      -0.680449
    
    
      4422 
                                   Reading_Comprehension
                             Handling_and_Moving_Objects
      -0.670271
    
    
      10561
                                         Electronic_Mail
          Exposed_to_Minor_Burns,_Cuts,_Bites,_or_Stings
      -0.667508
    
    
      11663
                         In_an_Open_Vehicle_or_Equipment
                     Indoors,_Environmentally_Controlled
      -0.667292
    
    
      4493 
                                   Reading_Comprehension
       Spend_Time_Using_Your_Hands_to_Handle,_Control...
      -0.665916
    
    
      641  
                                               Realistic
                                        Active_Listening
      -0.664910
    
    
      8618 
       Operating_Vehicles,_Mechanized_Devices,_or_Equ...
                     Indoors,_Environmentally_Controlled
      -0.660575
    
    
      7687 
                             Handling_and_Moving_Objects
                              Interacting_With_Computers
      -0.660367
    
    
      8057 
                              Interacting_With_Computers
                                     Spend_Time_Standing
      -0.659657
    
    
      12340
                 Spend_Time_Keeping_or_Regaining_Balance
                                      Spend_Time_Sitting
      -0.655549
    
    
      1380 
                                        Active_Listening
       Spend_Time_Using_Your_Hands_to_Handle,_Control...
      -0.652968
    
    
      8875 
                  Performing_General_Physical_Activities
                                         Electronic_Mail
      -0.650777
    
    
      7999 
                              Interacting_With_Computers
                  Performing_General_Physical_Activities
      -0.650413
    
    
      6325 
         Communicating_with_Persons_Outside_Organization
                   Pace_Determined_by_Speed_of_Equipment
      -0.650114
    
    
      1066 
                                                job_zone
                 Spend_Time_Bending_or_Twisting_the_Body
      -0.644585
    
    
      10660
                                 Exposed_to_Contaminants
                                      Spend_Time_Sitting
      -0.644513
    
    
      11832
                                       Letters_and_Memos
       Spend_Time_Using_Your_Hands_to_Handle,_Control...
      -0.643299
    
    
      10844
                          Exposed_to_Hazardous_Equipment
                     Indoors,_Environmentally_Controlled
      -0.641733
    
    
      10594
                                         Electronic_Mail
                          Spend_Time_Walking_and_Running
      -0.639322
    
    
      674  
                                               Realistic
                                                 Writing
      -0.638714
    
    
      686  
                                               Realistic
       Establishing_and_Maintaining_Interpersonal_Rel...
      -0.634594
    
    
      8026 
                              Interacting_With_Computers
          Exposed_to_Minor_Burns,_Cuts,_Bites,_or_Stings
      -0.629980
    
    
      10556
                                         Electronic_Mail
                                 Exposed_to_Contaminants
      -0.629270
    
    
      5126 
                                                Speaking
                 Spend_Time_Bending_or_Twisting_the_Body
      -0.627783
    
    
      12296
                 Spend_Time_Bending_or_Twisting_the_Body
                                             Recognition
      -0.626270
    
    
      1373 
                                        Active_Listening
                 Spend_Time_Bending_or_Twisting_the_Body
      -0.623239
    
    
      5867 
                                                 Writing
                                     Spend_Time_Standing
      -0.621393
    
    
      663  
                                               Realistic
                                   Reading_Comprehension
      -0.620490
    
    
      5062 
                                                Speaking
                             Handling_and_Moving_Objects
      -0.619313
    
    
      12453
                                      Spend_Time_Sitting
       Wear_Common_Protective_or_Safety_Equipment_suc...
      -0.617690
    
    
      4434 
                                   Reading_Comprehension
                  Performing_General_Physical_Activities
      -0.617219
    
    
      8053 
                              Interacting_With_Computers
                 Spend_Time_Keeping_or_Regaining_Balance
      -0.616856
    
    
      10973
          Exposed_to_Minor_Burns,_Cuts,_Bites,_or_Stings
                     Indoors,_Environmentally_Controlled
      -0.616442
    
    
      1002 
                                                job_zone
                             Handling_and_Moving_Objects
      -0.616243
    
    
      4492 
                                   Reading_Comprehension
                                     Spend_Time_Standing
      -0.615121
    
    
      11825
                                       Letters_and_Memos
                 Spend_Time_Bending_or_Twisting_the_Body
      -0.615008
    
    
      5836 
                                                 Writing
          Exposed_to_Minor_Burns,_Cuts,_Bites,_or_Stings
      -0.614475
    
    
      12294
                 Spend_Time_Bending_or_Twisting_the_Body
                                             Achievement
      -0.614046
    
    
      5051 
                                                Speaking
                      Controlling_Machines_and_Processes
      -0.613475
    
    
      4097 
                                              Persuasion
       Spend_Time_Using_Your_Hands_to_Handle,_Control...
      -0.612960
    
    
      5809 
                                                 Writing
                  Performing_General_Physical_Activities
      -0.611421
    
    
      
      ...
      ...
      ...
    
  

13041 rows × 3 columns



In [65]:

    
corr_pairs_df.sort(2, ascending=False)









    Out[65]:






  
    
      
      0
      1
      2
    
  
  
    
      1884 
                                Equipment_Maintenance
                                               Repairing
       0.976774
    
    
      5166 
                                     Systems_Analysis
                                      Systems_Evaluation
       0.943016
    
    
      13027
                                          Achievement
                                             Recognition
       0.919637
    
    
      4405 
                                Reading_Comprehension
                                                 Writing
       0.916252
    
    
      2032 
                                  Equipment_Selection
                                               Repairing
       0.909113
    
    
      1866 
                                Equipment_Maintenance
                                     Equipment_Selection
       0.905916
    
    
      3588 
                                 Operation_Monitoring
                                   Operation_and_Control
       0.901321
    
    
      2311 
                                          Instructing
                                     Learning_Strategies
       0.896822
    
    
      10922
                               Exposed_to_High_Places
        Spend_Time_Climbing_Ladders,_Scaffolds,_or_Poles
       0.894912
    
    
      1928 
                                Equipment_Maintenance
          Repairing_and_Maintaining_Mechanical_Equipment
       0.892552
    
    
      13037
                                          Recognition
                                      Working_Conditions
       0.889523
    
    
      7696 
                          Handling_and_Moving_Objects
                  Performing_General_Physical_Activities
       0.888681
    
    
      4569 
                                            Repairing
          Repairing_and_Maintaining_Mechanical_Equipment
       0.887249
    
    
      1286 
                                     Active_Listening
                                                Speaking
       0.886562
    
    
      13030
                                          Achievement
                                      Working_Conditions
       0.885633
    
    
      3453 
                                          Negotiation
                                              Persuasion
       0.884341
    
    
      1414 
                              Complex_Problem_Solving
                                       Critical_Thinking
       0.876100
    
    
      1419 
                              Complex_Problem_Solving
                            Judgment_and_Decision_Making
       0.873171
    
    
      2745 
                    Management_of_Financial_Resources
                        Management_of_Material_Resources
       0.868843
    
    
      3603 
                                 Operation_Monitoring
                                         Troubleshooting
       0.865452
    
    
      1720 
                                    Critical_Thinking
                            Judgment_and_Decision_Making
       0.863481
    
    
      1893 
                                Equipment_Maintenance
                                         Troubleshooting
       0.863022
    
    
      4273 
                             Quality_Control_Analysis
                                         Troubleshooting
       0.861049
    
    
      4534 
                                            Repairing
                                         Troubleshooting
       0.860817
    
    
      1109 
                                      Active_Learning
                                       Critical_Thinking
       0.858779
    
    
      2041 
                                  Equipment_Selection
                                         Troubleshooting
       0.857719
    
    
      3746 
                                Operation_and_Control
                      Controlling_Machines_and_Processes
       0.857358
    
    
      1440 
                              Complex_Problem_Solving
                                      Systems_Evaluation
       0.853486
    
    
      6601 
       Coordinating_the_Work_and_Activities_of_Others
                           Developing_and_Building_Teams
       0.848110
    
    
      12821
                                   Achievement/Effort
                                             Persistence
       0.847364
    
    
      5930 
                        Analyzing_Data_or_Information
                                  Processing_Information
       0.844796
    
    
      2076 
                                  Equipment_Selection
          Repairing_and_Maintaining_Mechanical_Equipment
       0.844682
    
    
      12953
                                           Initiative
                                             Persistence
       0.841958
    
    
      12481
                                  Spend_Time_Standing
                          Spend_Time_Walking_and_Running
       0.841053
    
    
      1114 
                                      Active_Learning
                            Judgment_and_Decision_Making
       0.840695
    
    
      6513 
                   Controlling_Machines_and_Processes
          Repairing_and_Maintaining_Mechanical_Equipment
       0.839191
    
    
      5694 
                                      Troubleshooting
          Repairing_and_Maintaining_Mechanical_Equipment
       0.837864
    
    
      13026
                                          Achievement
                                            Independence
       0.837742
    
    
      1107 
                                      Active_Learning
                                 Complex_Problem_Solving
       0.837309
    
    
      13034
                                         Independence
                                      Working_Conditions
       0.835889
    
    
      1439 
                              Complex_Problem_Solving
                                        Systems_Analysis
       0.835821
    
    
      1100 
                                             job_zone
                                             Achievement
       0.835461
    
    
      8020 
                           Interacting_With_Computers
                                         Electronic_Mail
       0.835297
    
    
      1115 
                                      Active_Learning
                                     Learning_Strategies
       0.835023
    
    
      11913
                         Outdoors,_Exposed_to_Weather
                                   Outdoors,_Under_Cover
       0.834886
    
    
      1102 
                                             job_zone
                                             Recognition
       0.832792
    
    
      5830 
                                              Writing
                                         Electronic_Mail
       0.831127
    
    
      11388
                         Frequency_of_Decision_Making
       Impact_of_Decisions_on_Co-workers_or_Company_R...
       0.830485
    
    
      4455 
                                Reading_Comprehension
                                         Electronic_Mail
       0.829915
    
    
      1865 
                                    Critical_Thinking
                                      Working_Conditions
       0.829546
    
    
      12897
                                   Concern_for_Others
                                      Social_Orientation
       0.828631
    
    
      12263
              Spend_Time_Bending_or_Twisting_the_Body
       Spend_Time_Kneeling,_Crouching,_Stooping,_or_C...
       0.824567
    
    
      1862 
                                    Critical_Thinking
                                             Recognition
       0.824394
    
    
      1256 
                                      Active_Learning
                                             Recognition
       0.823616
    
    
      6174 
                       Coaching_and_Developing_Others
                            Training_and_Teaching_Others
       0.823240
    
    
      11615
                  In_an_Enclosed_Vehicle_or_Equipment
                            Outdoors,_Exposed_to_Weather
       0.822343
    
    
      13031
                                         Independence
                                             Recognition
       0.821812
    
    
      8906 
               Performing_General_Physical_Activities
                 Spend_Time_Bending_or_Twisting_the_Body
       0.821150
    
    
      1139 
                                      Active_Learning
                                                 Writing
       0.821125
    
    
      2476 
                         Judgment_and_Decision_Making
                                      Systems_Evaluation
       0.820151
    
    
      
      ...
      ...
      ...
    
  

13041 rows × 3 columns



In [ ]:

    
normed_df_subset = normed_df.domain[['Interests', 'Skills', 'Knowledge']]
#corr_df_2 = normed_df.iloc[:,1:].corr()

	onet_soc_code	element_id	element_name	scale_id	data_value	date	domain_source	domain
0	11-1011.00	1.B.1.a	Realistic	OI	1.33	06/2008	Analyst	Interests
1	11-1011.00	1.B.1.b	Investigative	OI	2.00	06/2008	Analyst	Interests
2	11-1011.00	1.B.1.c	Artistic	OI	2.67	06/2008	Analyst	Interests
3	11-1011.00	1.B.1.d	Social	OI	3.67	06/2008	Analyst	Interests
4	11-1011.00	1.B.1.e	Enterprising	OI	7.00	06/2008	Analyst	Interests

domain	Interests
element_name	Artistic	Conventional	Enterprising	Investigative	Realistic	Social
onet_soc_code
11-1011.00	2.67	5.33	7	2.00	1.33	3.67
11-1011.03	2.67	4.33	7	4.33	1.00	2.33
11-1021.00	1.00	3.67	7	1.33	1.33	3.33
11-1031.00	3.67	3.00	7	3.67	1.00	4.67
11-2011.00	5.33	4.67	7	2.00	1.67	2.33

	onet_soc_code	job_zone	date	domain_source
0	11-1011.00	5	06/2006	Analyst
1	11-1011.03	5	07/2013	Analyst
2	11-1021.00	3	06/2008	Analyst
3	11-1031.00	4	06/2008	Analyst
4	11-2011.00	4	06/2010	Analyst

domain	Job_Zones
element_name	job_zone
onet_soc_code
11-1011.00	5
11-1011.03	5
11-1021.00	3
11-1031.00	4
11-2011.00	4

	onet_soc_code	element_id	element_name	scale_id	data_value	n	standard_error	lower_ci_bound	upper_ci_bound	recommend_suppress	not_relevant	date	domain_source
0	11-1011.00	2.A.1.a	Reading Comprehension	IM	4.38	8	0.18	4.02	4.73	N	n/a	06/2010	Analyst
2	11-1011.00	2.A.1.b	Active Listening	IM	4.38	8	0.18	4.02	4.73	N	n/a	06/2010	Analyst
4	11-1011.00	2.A.1.c	Writing	IM	4.12	8	0.23	3.68	4.57	N	n/a	06/2010	Analyst
6	11-1011.00	2.A.1.d	Speaking	IM	4.38	8	0.18	4.02	4.73	N	n/a	06/2010	Analyst
8	11-1011.00	2.A.1.e	Mathematics	IM	3.00	8	0.19	2.63	3.37	N	n/a	06/2010	Analyst

domain	Skills
element_name	Active Learning	Active Listening	Complex Problem Solving	Coordination	Critical Thinking	Equipment Maintenance	Equipment Selection	Installation	Instructing	Judgment and Decision Making	Learning Strategies	Management of Financial Resources	Management of Material Resources	Management of Personnel Resources	Mathematics	Monitoring	Negotiation	Operation Monitoring	Operation and Control	Operations Analysis
onet_soc_code
11-1011.00	4.00	4.38	4.50	4.25	4.38	1	1.00	1.00	3.25	4.50	3.38	4.12	3.62	4.25	3.00	4.12	4.00	2.25	1.88	3.50	...
11-1011.03	3.50	3.88	4.00	3.62	4.00	1	1.12	1.00	3.25	3.75	3.38	2.62	2.38	3.38	2.75	3.62	2.88	2.25	1.62	2.62	...
11-1021.00	3.50	4.00	3.50	3.62	3.88	1	1.25	1.12	3.12	3.50	2.75	2.88	3.12	3.38	2.25	3.75	3.50	2.88	2.12	3.25	...
11-2011.00	3.25	4.00	3.50	3.50	3.75	1	1.25	1.00	2.88	3.75	2.75	2.88	2.50	3.12	3.00	3.25	3.38	1.62	1.25	2.88	...
11-2021.00	3.50	3.88	3.38	3.50	3.88	1	1.00	1.00	3.12	3.62	3.00	2.88	2.38	3.38	2.75	3.62	3.25	2.00	1.00	3.50	...

domain	Work_Activities
element_name	Analyzing Data or Information	Assisting and Caring for Others	Coaching and Developing Others	Communicating with Persons Outside Organization	Communicating with Supervisors, Peers, or Subordinates	Controlling Machines and Processes	Coordinating the Work and Activities of Others	Developing Objectives and Strategies	Developing and Building Teams	Documenting/Recording Information	Drafting, Laying Out, and Specifying Technical Devices, Parts, and Equipment	Establishing and Maintaining Interpersonal Relationships	Estimating the Quantifiable Characteristics of Products, Events, or Information	Evaluating Information to Determine Compliance with Standards	Getting Information	Guiding, Directing, and Motivating Subordinates	Handling and Moving Objects	Identifying Objects, Actions, and Events	Inspecting Equipment, Structures, or Material	Interacting With Computers
onet_soc_code
11-1011.00	4.19	2.22	3.91	4.62	4.75	1.32	4.00	4.63	4.55	2.19	1.22	4.64	2.68	3.50	4.75	4.02	1.48	3.64	1.49	3.28	...
11-1011.03	3.85	2.23	3.64	4.46	4.58	1.36	3.96	4.31	4.12	3.44	2.56	4.28	3.31	4.12	4.48	3.88	1.62	3.62	2.36	4.32	...
11-1021.00	3.49	3.08	3.41	3.83	3.74	1.99	4.09	3.22	3.56	3.29	2.70	3.82	3.36	3.54	4.37	3.48	2.53	3.65	3.36	3.68	...
11-2011.00	2.81	2.10	2.68	4.56	4.28	2.22	3.06	3.68	3.27	3.30	1.70	4.04	3.07	2.50	4.38	2.72	1.86	3.36	2.13	4.08	...
11-2021.00	3.52	2.40	3.54	4.60	4.58	1.32	3.96	4.04	4.24	2.84	1.92	4.40	3.16	2.84	4.52	3.60	1.52	3.84	1.84	4.08	...

domain	Work_Context
element_name	Consequence of Error	Contact With Others	Coordinate or Lead Others	Cramped Work Space, Awkward Positions	Deal With External Customers	Deal With Physically Aggressive People	Deal With Unpleasant or Angry People	Degree of Automation	Electronic Mail	Exposed to Contaminants	Exposed to Disease or Infections	Exposed to Hazardous Conditions	Exposed to Hazardous Equipment	Exposed to High Places	Exposed to Minor Burns, Cuts, Bites, or Stings	Exposed to Radiation	Exposed to Whole Body Vibration	Extremely Bright or Inadequate Lighting	Face-to-Face Discussions	Freedom to Make Decisions
onet_soc_code
11-1011.00	3.55	4.84	4.32	1.47	3.83	2.07	3.92	1.80	5.00	1.49	1.01	1.47	1.49	1.32	1.34	1.00	1.44	1.35	5.00	4.92	...
11-1011.03	2.35	4.38	4.12	1.38	3.73	1.04	2.38	1.72	4.96	1.65	1.12	1.23	1.17	1.50	1.15	1.08	1.08	1.42	4.76	4.42	...
11-1021.00	3.04	4.76	4.20	1.32	4.48	1.60	3.39	2.32	4.26	2.11	1.68	1.40	1.68	1.76	2.19	1.01	1.15	1.79	4.60	4.80	...
11-2011.00	2.06	4.65	4.12	1.53	3.89	1.29	2.73	2.56	5.00	1.12	1.17	1.03	1.11	1.12	1.31	1.00	1.03	1.39	4.56	4.15	...
11-2021.00	2.40	4.64	3.72	1.21	4.00	1.12	2.56	2.08	5.00	1.16	1.00	1.04	1.12	1.17	1.04	1.00	1.00	1.20	4.84	4.20	...

domain	Work_Context_Time
element_name	Duration of Typical Work Week	Work Schedules
onet_soc_code
11-1011.00	2.91	1.00
11-1011.03	2.77	1.35
11-1021.00	2.67	1.37
11-2011.00	2.51	1.04
11-2021.00	2.68	1.28

domain	Work_Styles
element_name	Achievement/Effort	Adaptability/Flexibility	Analytical Thinking	Attention to Detail	Concern for Others	Cooperation	Dependability	Independence	Initiative	Innovation	Integrity	Leadership	Persistence	Self Control	Social Orientation	Stress Tolerance
onet_soc_code
11-1011.00	4.66	4.48	4.24	4.26	3.95	4.42	4.67	4.63	4.79	4.22	4.85	4.84	4.61	4.28	4.02	4.75
11-1011.03	4.19	4.23	4.31	4.12	3.48	4.32	4.23	4.27	4.60	4.38	4.58	4.64	4.31	4.00	3.35	4.08
11-1021.00	4.07	4.21	4.22	4.52	3.96	4.26	4.73	3.96	4.36	3.88	4.36	4.50	4.24	4.38	3.56	4.35
11-2011.00	4.30	4.54	4.16	4.70	3.93	4.40	4.74	4.08	4.71	4.51	4.66	4.23	4.23	4.42	3.99	4.39
11-2021.00	4.24	4.24	3.84	4.48	3.72	4.44	4.56	4.20	4.32	4.08	4.40	4.36	4.28	4.04	3.88	4.20

domain	Work_Values
element_name	Achievement	Independence	Recognition	Relationships	Support	Working Conditions
onet_soc_code
11-1011.00	6.33	7.00	7.00	5.00	5.33	6.33
11-1011.03	6.67	6.67	6.00	5.00	3.33	6.33
11-1021.00	5.33	6.00	5.67	6.33	4.67	6.00
11-1031.00	5.33	5.00	5.00	5.67	4.00	4.33
11-2011.00	5.33	5.33	5.33	5.00	4.00	5.33

domain	Occupation	Interests						Job_Zones	Skills
element_name	title	Artistic	Conventional	Enterprising	Investigative	Realistic	Social	job_zone	Active Learning	Active Listening	Complex Problem Solving	Coordination	Critical Thinking	Equipment Maintenance	Equipment Selection	Installation	Instructing	Judgment and Decision Making	Learning Strategies	Management of Financial Resources
11-1011.00	Chief_Executives	2.67	5.33	7	2.00	1.33	3.67	5	4.00	4.38	4.5	4.25	4.38	1	1.00	1.00	3.25	4.50	3.38	4.12	...
11-1011.03	Chief_Sustainability_Officers	2.67	4.33	7	4.33	1.00	2.33	5	3.50	3.88	4.0	3.62	4.00	1	1.12	1.00	3.25	3.75	3.38	2.62	...
11-1021.00	General_and_Operations_Managers	1.00	3.67	7	1.33	1.33	3.33	3	3.50	4.00	3.5	3.62	3.88	1	1.25	1.12	3.12	3.50	2.75	2.88	...
11-1031.00	Legislators	3.67	3.00	7	3.67	1.00	4.67	4	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...
11-2011.00	Advertising_and_Promotions_Managers	5.33	4.67	7	2.00	1.67	2.33	4	3.25	4.00	3.5	3.50	3.75	1	1.25	1.00	2.88	3.75	2.75	2.88	...

element_name	Artistic	Conventional	Enterprising	Investigative	Realistic	Social	job_zone	Active_Learning	Active_Listening	Complex_Problem_Solving	Coordination	Critical_Thinking	Equipment_Maintenance	Equipment_Selection	Installation	Instructing	Judgment_and_Decision_Making	Learning_Strategies	Management_of_Financial_Resources	Management_of_Material_Resources
element_name
Artistic	1.000000	-0.401212	0.012268	0.201901	-0.388635	0.311364	0.427659	0.360623	0.292641	0.245882	0.188951	0.240592	-0.339151	-0.250277	-0.179429	0.314698	0.259137	0.340943	0.084552	0.085508	...
Conventional	-0.401212	1.000000	0.267182	-0.149493	-0.137886	-0.239040	-0.177788	-0.163082	-0.000384	-0.126095	-0.092104	-0.112272	-0.098427	-0.161965	-0.064627	-0.253891	-0.144256	-0.230510	0.052377	-0.044017	...
Enterprising	0.012268	0.267182	1.000000	-0.311084	-0.550015	0.191897	0.139388	0.237040	0.446623	0.199139	0.506449	0.266822	-0.420746	-0.455691	-0.310320	0.184611	0.304429	0.169215	0.412286	0.261025	...
Investigative	0.201901	-0.149493	-0.311084	1.000000	-0.050547	0.061720	0.643559	0.572187	0.360374	0.617767	0.114631	0.588151	-0.064329	0.040445	0.045447	0.382763	0.495419	0.439133	0.151122	0.167033	...
Realistic	-0.388635	-0.137886	-0.550015	-0.050547	1.000000	-0.569939	-0.536989	-0.500476	-0.664910	-0.345526	-0.425227	-0.435737	0.663616	0.683996	0.421077	-0.419841	-0.409970	-0.453541	-0.203513	-0.025298	...

	0	1	2
12446	Spend_Time_Sitting	Spend_Time_Standing	-0.967267
12448	Spend_Time_Sitting	Spend_Time_Walking_and_Running	-0.835168
12265	Spend_Time_Bending_or_Twisting_the_Body	Spend_Time_Sitting	-0.795219
8911	Performing_General_Physical_Activities	Spend_Time_Sitting	-0.782129
10586	Electronic_Mail	Spend_Time_Bending_or_Twisting_the_Body	-0.773786
7753	Handling_and_Moving_Objects	Spend_Time_Sitting	-0.751062
5861	Writing	Spend_Time_Bending_or_Twisting_the_Body	-0.717948
8051	Interacting_With_Computers	Spend_Time_Bending_or_Twisting_the_Body	-0.717089
4486	Reading_Comprehension	Spend_Time_Bending_or_Twisting_the_Body	-0.713159
7717	Handling_and_Moving_Objects	Electronic_Mail	-0.711717
10990	Exposed_to_Minor_Burns,_Cuts,_Bites,_or_Stings	Spend_Time_Sitting	-0.711399
10593	Electronic_Mail	Spend_Time_Using_Your_Hands_to_Handle,_Control...	-0.705413
12376	Spend_Time_Kneeling,_Crouching,_Stooping,_or_C...	Spend_Time_Sitting	-0.703470
10592	Electronic_Mail	Spend_Time_Standing	-0.699675
668	Realistic	Speaking	-0.697539
5868	Writing	Spend_Time_Using_Your_Hands_to_Handle,_Control...	-0.695551
11738	Indoors,_Environmentally_Controlled	Very_Hot_or_Cold_Temperatures	-0.688135
5133	Speaking	Spend_Time_Using_Your_Hands_to_Handle,_Control...	-0.687152
5797	Writing	Handling_and_Moving_Objects	-0.683033
11098	Exposed_to_Whole_Body_Vibration	Indoors,_Environmentally_Controlled	-0.680449
4422	Reading_Comprehension	Handling_and_Moving_Objects	-0.670271
10561	Electronic_Mail	Exposed_to_Minor_Burns,_Cuts,_Bites,_or_Stings	-0.667508
11663	In_an_Open_Vehicle_or_Equipment	Indoors,_Environmentally_Controlled	-0.667292
4493	Reading_Comprehension	Spend_Time_Using_Your_Hands_to_Handle,_Control...	-0.665916
641	Realistic	Active_Listening	-0.664910
8618	Operating_Vehicles,_Mechanized_Devices,_or_Equ...	Indoors,_Environmentally_Controlled	-0.660575
7687	Handling_and_Moving_Objects	Interacting_With_Computers	-0.660367
8057	Interacting_With_Computers	Spend_Time_Standing	-0.659657
12340	Spend_Time_Keeping_or_Regaining_Balance	Spend_Time_Sitting	-0.655549
1380	Active_Listening	Spend_Time_Using_Your_Hands_to_Handle,_Control...	-0.652968
8875	Performing_General_Physical_Activities	Electronic_Mail	-0.650777
7999	Interacting_With_Computers	Performing_General_Physical_Activities	-0.650413
6325	Communicating_with_Persons_Outside_Organization	Pace_Determined_by_Speed_of_Equipment	-0.650114
1066	job_zone	Spend_Time_Bending_or_Twisting_the_Body	-0.644585
10660	Exposed_to_Contaminants	Spend_Time_Sitting	-0.644513
11832	Letters_and_Memos	Spend_Time_Using_Your_Hands_to_Handle,_Control...	-0.643299
10844	Exposed_to_Hazardous_Equipment	Indoors,_Environmentally_Controlled	-0.641733
10594	Electronic_Mail	Spend_Time_Walking_and_Running	-0.639322
674	Realistic	Writing	-0.638714
686	Realistic	Establishing_and_Maintaining_Interpersonal_Rel...	-0.634594
8026	Interacting_With_Computers	Exposed_to_Minor_Burns,_Cuts,_Bites,_or_Stings	-0.629980
10556	Electronic_Mail	Exposed_to_Contaminants	-0.629270
5126	Speaking	Spend_Time_Bending_or_Twisting_the_Body	-0.627783
12296	Spend_Time_Bending_or_Twisting_the_Body	Recognition	-0.626270
1373	Active_Listening	Spend_Time_Bending_or_Twisting_the_Body	-0.623239
5867	Writing	Spend_Time_Standing	-0.621393
663	Realistic	Reading_Comprehension	-0.620490
5062	Speaking	Handling_and_Moving_Objects	-0.619313
12453	Spend_Time_Sitting	Wear_Common_Protective_or_Safety_Equipment_suc...	-0.617690
4434	Reading_Comprehension	Performing_General_Physical_Activities	-0.617219
8053	Interacting_With_Computers	Spend_Time_Keeping_or_Regaining_Balance	-0.616856
10973	Exposed_to_Minor_Burns,_Cuts,_Bites,_or_Stings	Indoors,_Environmentally_Controlled	-0.616442
1002	job_zone	Handling_and_Moving_Objects	-0.616243
4492	Reading_Comprehension	Spend_Time_Standing	-0.615121
11825	Letters_and_Memos	Spend_Time_Bending_or_Twisting_the_Body	-0.615008
5836	Writing	Exposed_to_Minor_Burns,_Cuts,_Bites,_or_Stings	-0.614475
12294	Spend_Time_Bending_or_Twisting_the_Body	Achievement	-0.614046
5051	Speaking	Controlling_Machines_and_Processes	-0.613475
4097	Persuasion	Spend_Time_Using_Your_Hands_to_Handle,_Control...	-0.612960
5809	Writing	Performing_General_Physical_Activities	-0.611421
	...	...	...

	0	1	2
1884	Equipment_Maintenance	Repairing	0.976774
5166	Systems_Analysis	Systems_Evaluation	0.943016
13027	Achievement	Recognition	0.919637
4405	Reading_Comprehension	Writing	0.916252
2032	Equipment_Selection	Repairing	0.909113
1866	Equipment_Maintenance	Equipment_Selection	0.905916
3588	Operation_Monitoring	Operation_and_Control	0.901321
2311	Instructing	Learning_Strategies	0.896822
10922	Exposed_to_High_Places	Spend_Time_Climbing_Ladders,_Scaffolds,_or_Poles	0.894912
1928	Equipment_Maintenance	Repairing_and_Maintaining_Mechanical_Equipment	0.892552
13037	Recognition	Working_Conditions	0.889523
7696	Handling_and_Moving_Objects	Performing_General_Physical_Activities	0.888681
4569	Repairing	Repairing_and_Maintaining_Mechanical_Equipment	0.887249
1286	Active_Listening	Speaking	0.886562
13030	Achievement	Working_Conditions	0.885633
3453	Negotiation	Persuasion	0.884341
1414	Complex_Problem_Solving	Critical_Thinking	0.876100
1419	Complex_Problem_Solving	Judgment_and_Decision_Making	0.873171
2745	Management_of_Financial_Resources	Management_of_Material_Resources	0.868843
3603	Operation_Monitoring	Troubleshooting	0.865452
1720	Critical_Thinking	Judgment_and_Decision_Making	0.863481
1893	Equipment_Maintenance	Troubleshooting	0.863022
4273	Quality_Control_Analysis	Troubleshooting	0.861049
4534	Repairing	Troubleshooting	0.860817
1109	Active_Learning	Critical_Thinking	0.858779
2041	Equipment_Selection	Troubleshooting	0.857719
3746	Operation_and_Control	Controlling_Machines_and_Processes	0.857358
1440	Complex_Problem_Solving	Systems_Evaluation	0.853486
6601	Coordinating_the_Work_and_Activities_of_Others	Developing_and_Building_Teams	0.848110
12821	Achievement/Effort	Persistence	0.847364
5930	Analyzing_Data_or_Information	Processing_Information	0.844796
2076	Equipment_Selection	Repairing_and_Maintaining_Mechanical_Equipment	0.844682
12953	Initiative	Persistence	0.841958
12481	Spend_Time_Standing	Spend_Time_Walking_and_Running	0.841053
1114	Active_Learning	Judgment_and_Decision_Making	0.840695
6513	Controlling_Machines_and_Processes	Repairing_and_Maintaining_Mechanical_Equipment	0.839191
5694	Troubleshooting	Repairing_and_Maintaining_Mechanical_Equipment	0.837864
13026	Achievement	Independence	0.837742
1107	Active_Learning	Complex_Problem_Solving	0.837309
13034	Independence	Working_Conditions	0.835889
1439	Complex_Problem_Solving	Systems_Analysis	0.835821
1100	job_zone	Achievement	0.835461
8020	Interacting_With_Computers	Electronic_Mail	0.835297
1115	Active_Learning	Learning_Strategies	0.835023
11913	Outdoors,_Exposed_to_Weather	Outdoors,_Under_Cover	0.834886
1102	job_zone	Recognition	0.832792
5830	Writing	Electronic_Mail	0.831127
11388	Frequency_of_Decision_Making	Impact_of_Decisions_on_Co-workers_or_Company_R...	0.830485
4455	Reading_Comprehension	Electronic_Mail	0.829915
1865	Critical_Thinking	Working_Conditions	0.829546
12897	Concern_for_Others	Social_Orientation	0.828631
12263	Spend_Time_Bending_or_Twisting_the_Body	Spend_Time_Kneeling,_Crouching,_Stooping,_or_C...	0.824567
1862	Critical_Thinking	Recognition	0.824394
1256	Active_Learning	Recognition	0.823616
6174	Coaching_and_Developing_Others	Training_and_Teaching_Others	0.823240
11615	In_an_Enclosed_Vehicle_or_Equipment	Outdoors,_Exposed_to_Weather	0.822343
13031	Independence	Recognition	0.821812
8906	Performing_General_Physical_Activities	Spend_Time_Bending_or_Twisting_the_Body	0.821150
1139	Active_Learning	Writing	0.821125
2476	Judgment_and_Decision_Making	Systems_Evaluation	0.820151
	...	...	...