Replication of AFQT scores

Construction of AFQT Raw Scores

Besides the ASVAB variables and AFQT_1, there are no missing values to be expected. Then we construct the AFQT raw scores as described online. Note that, we need to make an adjustment to numerical operations (see NLSY Attachment 106, Table A) and percentiles for the raw AFQT score are taken from Table B.

Exclusion of respondents

36 participants with possible altered testing conditions are excluded from the sample indicated by the variable R0614800. The reason to exclude these respondents is stated in the Profile of American Youth - 1980 Nationwide Administration of the ASVAB on page 18:

Thirty-six cases were dropped from this final sample because test procedures had been altered for these individuals due to language problems (e.g., non-English speaking respondents) or physical and mental handicaps (e.g., blindness, cerebral palsy, and mental retardation).

Besides that, variable R0614800 offers additional indicators for three other conditions for which the NLSY support had only limited explanations since an unpublished NORC field report is no longer available (original correspondence):

Altered testing condition Explanation
COMP-CONVERTED REFUSAL Participant was hesitant to take ASVAB but were later convinced by instructors.
COMP-PROBLEM REPORTED No explanation.
COMP-SPANISH INSTR. CARDS The test was taken with Spanish instructions.
COMP-PROCEDURES ALTERED Testing procedures were altered for the participants due to language problems, physical or mental handicaps

In [ ]:
import os

import numpy as np
import pandas as pd

In [1]:
afqt = pd.read_csv(os.path.join('..', 'data', 'external',
                                'afqt', 'afqt.csv'),
                   index_col=False, header=0)

column_labels = dict()
column_labels['R0000100'] = 'IDENTIFIER'

column_labels['R0614800'] = 'ASVAB_NORMAL_ALTERED_TESTING'
column_labels['R0615100'] = 'ASVAB_ARITHMETIC_REASONING'
column_labels['R0615200'] = 'ASVAB_WORD_KNOWLEDGE'
column_labels['R0615300'] = 'ASVAB_PARAGRAPH_COMPREHENSION'
column_labels['R0615400'] = 'ASVAB_NUMERICAL_OPERATIONS'
column_labels['R0618200'] = 'AFQT_1'

afqt.rename(columns=column_labels, inplace=True)

In [3]:
# Just making sure ...
assert afqt['IDENTIFIER'].isnull().any() == False

# Let me now deal with missing values ...
for label in [i for i in afqt if i.startswith('ASVAB')] + ['AFQT_1']:
    cond = afqt[label] < 0
    afqt.loc[cond, label] = np.nan

# Adjust the score of ``numerical operation`` according to the NLSY
# attachment 106
afqt.loc[afqt.ASVAB_NUMERICAL_OPERATIONS >=
         48, 'ASVAB_NUMERICAL_OPERATIONS'] = 50
adjust_no = {
    0: 0, 1: 0, 2: 1, 3: 2, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 14,
    13: 15, 14: 16, 15: 17, 16: 18, 17: 19, 18: 21, 19: 22, 20: 23, 21: 24,
    22: 25, 23: 26, 24: 27, 25: 28, 26: 29, 27: 30, 28: 31, 29: 33, 30: 34,
    31: 35, 32: 36, 33: 37, 34: 38, 35: 39, 36: 39, 37: 40, 38: 41, 39: 42,
    40: 43, 41: 44, 42: 45, 43: 46, 44: 47, 45: 48, 46: 49, 47: 49
}
afqt.ASVAB_NUMERICAL_OPERATIONS.replace(adjust_no, inplace=True)

# Now we construct the AFQT raw score based on the instructions from the
# NLSY website.
afqt['AFQT_RAW'] = 0.00
afqt['AFQT_RAW'] += afqt['ASVAB_ARITHMETIC_REASONING']
afqt['AFQT_RAW'] += afqt['ASVAB_WORD_KNOWLEDGE']
afqt['AFQT_RAW'] += afqt['ASVAB_PARAGRAPH_COMPREHENSION']
afqt['AFQT_RAW'] += 0.5 * afqt['ASVAB_NUMERICAL_OPERATIONS']

# Match ``AFQT_RAW`` to percentile of distribution
afqt.loc[afqt.AFQT_RAW <= 23.5, 'AFQT_PERCENTILES'] = 1
afqt.loc[(afqt.AFQT_RAW > 23.5) & (
    afqt.AFQT_RAW <= 27), 'AFQT_PERCENTILES'] = 2
afqt.loc[(afqt.AFQT_RAW > 27) & (
    afqt.AFQT_RAW <= 29.5), 'AFQT_PERCENTILES'] = 3
afqt.loc[(afqt.AFQT_RAW > 29.5) & (
    afqt.AFQT_RAW <= 32), 'AFQT_PERCENTILES'] = 4
afqt.loc[(afqt.AFQT_RAW > 32) & (
    afqt.AFQT_RAW <= 34), 'AFQT_PERCENTILES'] = 5
afqt.loc[(afqt.AFQT_RAW > 34) & (
    afqt.AFQT_RAW <= 36.5), 'AFQT_PERCENTILES'] = 6
afqt.loc[(afqt.AFQT_RAW > 36.5) & (
    afqt.AFQT_RAW <= 38), 'AFQT_PERCENTILES'] = 7
afqt.loc[(afqt.AFQT_RAW > 38) & (
    afqt.AFQT_RAW <= 40), 'AFQT_PERCENTILES'] = 8
afqt.loc[(afqt.AFQT_RAW > 40) & (
    afqt.AFQT_RAW <= 41), 'AFQT_PERCENTILES'] = 9
afqt.loc[(afqt.AFQT_RAW > 41) & (
    afqt.AFQT_RAW <= 42.5), 'AFQT_PERCENTILES'] = 10
afqt.loc[(afqt.AFQT_RAW > 42.5) & (
    afqt.AFQT_RAW <= 44), 'AFQT_PERCENTILES'] = 11
afqt.loc[(afqt.AFQT_RAW > 44) & (
    afqt.AFQT_RAW <= 45.5), 'AFQT_PERCENTILES'] = 12
afqt.loc[(afqt.AFQT_RAW > 45.5) & (
    afqt.AFQT_RAW <= 47), 'AFQT_PERCENTILES'] = 13
afqt.loc[(afqt.AFQT_RAW > 47) & (
    afqt.AFQT_RAW <= 48.5), 'AFQT_PERCENTILES'] = 14
afqt.loc[(afqt.AFQT_RAW > 48.5) & (
    afqt.AFQT_RAW <= 49.5), 'AFQT_PERCENTILES'] = 15
afqt.loc[(afqt.AFQT_RAW > 49.5) & (
    afqt.AFQT_RAW <= 51), 'AFQT_PERCENTILES'] = 16
afqt.loc[(afqt.AFQT_RAW > 51) & (
    afqt.AFQT_RAW <= 52.5), 'AFQT_PERCENTILES'] = 17
for i in range(18, 29):
    afqt.loc[(afqt.AFQT_RAW > i + 34.5) &
             (afqt.AFQT_RAW <= i + 35.5), 'AFQT_PERCENTILES'] = i
afqt.loc[(afqt.AFQT_RAW > 63.5) & (
    afqt.AFQT_RAW <= 64), 'AFQT_PERCENTILES'] = 29
afqt.loc[(afqt.AFQT_RAW > 64) & (
    afqt.AFQT_RAW <= 65), 'AFQT_PERCENTILES'] = 30
afqt.loc[(afqt.AFQT_RAW > 65) & (
    afqt.AFQT_RAW <= 65.5), 'AFQT_PERCENTILES'] = 31
afqt.loc[(afqt.AFQT_RAW > 65.5) & (
    afqt.AFQT_RAW <= 66.5), 'AFQT_PERCENTILES'] = 32
afqt.loc[(afqt.AFQT_RAW > 66.5) & (
    afqt.AFQT_RAW <= 67), 'AFQT_PERCENTILES'] = 33
afqt.loc[(afqt.AFQT_RAW > 67) & (
    afqt.AFQT_RAW <= 67.5), 'AFQT_PERCENTILES'] = 34
afqt.loc[(afqt.AFQT_RAW > 67.5) & (
    afqt.AFQT_RAW <= 68.5), 'AFQT_PERCENTILES'] = 35
afqt.loc[(afqt.AFQT_RAW > 68.5) & (
    afqt.AFQT_RAW <= 69), 'AFQT_PERCENTILES'] = 36
afqt.loc[(afqt.AFQT_RAW > 69) & (
    afqt.AFQT_RAW <= 69.5), 'AFQT_PERCENTILES'] = 37
afqt.loc[(afqt.AFQT_RAW > 69.5) & (
    afqt.AFQT_RAW <= 70.5), 'AFQT_PERCENTILES'] = 38
afqt.loc[(afqt.AFQT_RAW > 70.5) & (
    afqt.AFQT_RAW <= 71), 'AFQT_PERCENTILES'] = 39
afqt.loc[(afqt.AFQT_RAW > 71) & (
    afqt.AFQT_RAW <= 71.5), 'AFQT_PERCENTILES'] = 40
afqt.loc[(afqt.AFQT_RAW > 71.5) & (
    afqt.AFQT_RAW <= 72), 'AFQT_PERCENTILES'] = 41
afqt.loc[(afqt.AFQT_RAW > 72) & (
    afqt.AFQT_RAW <= 73), 'AFQT_PERCENTILES'] = 42
afqt.loc[(afqt.AFQT_RAW > 73) & (
    afqt.AFQT_RAW <= 73.5), 'AFQT_PERCENTILES'] = 43
afqt.loc[(afqt.AFQT_RAW > 73.5) & (
    afqt.AFQT_RAW <= 74), 'AFQT_PERCENTILES'] = 44
afqt.loc[(afqt.AFQT_RAW > 74) & (
    afqt.AFQT_RAW <= 74.5), 'AFQT_PERCENTILES'] = 45
afqt.loc[(afqt.AFQT_RAW > 74.5) & (
    afqt.AFQT_RAW <= 75.5), 'AFQT_PERCENTILES'] = 46
afqt.loc[(afqt.AFQT_RAW > 75.5) & (
    afqt.AFQT_RAW <= 76), 'AFQT_PERCENTILES'] = 47
afqt.loc[(afqt.AFQT_RAW > 76) & (
    afqt.AFQT_RAW <= 76.5), 'AFQT_PERCENTILES'] = 48
afqt.loc[(afqt.AFQT_RAW > 76.5) & (
    afqt.AFQT_RAW <= 77.5), 'AFQT_PERCENTILES'] = 49
for i, j in enumerate(range(50, 62), 1):
    afqt.loc[(afqt.AFQT_RAW > j + 28 - 0.5 * i) &
             (afqt.AFQT_RAW <= j + 28), 'AFQT_PERCENTILES'] = j
for i, j in enumerate(range(62, 94), 1):
    afqt.loc[(afqt.AFQT_RAW > j + 21.5 - 0.5 * i) &
             (afqt.AFQT_RAW <= j + 21.5), 'AFQT_PERCENTILES'] = j
afqt.loc[(afqt.AFQT_RAW > 99) & (
    afqt.AFQT_RAW <= 100), 'AFQT_PERCENTILES'] = 94
for i, j in enumerate(range(95, 98), 1):
    afqt.loc[(afqt.AFQT_RAW > j + 5.5 - 0.5 * i) &
             (afqt.AFQT_RAW <= j + 5.5), 'AFQT_PERCENTILES'] = j
afqt.loc[(afqt.AFQT_RAW > 101.5) & (
    afqt.AFQT_RAW <= 102.5), 'AFQT_PERCENTILES'] = 98
afqt.loc[(afqt.AFQT_RAW > 102.5) & (
    afqt.AFQT_RAW <= 105), 'AFQT_PERCENTILES'] = 99

# Set AFQT scores to NaN for respondents with altered testing conditions
afqt.loc[
    afqt.ASVAB_NORMAL_ALTERED_TESTING == 67, 'AFQT_PERCENTILES'] = np.nan

As a test, we compare the percentiles of our measure to AFQT_1. Both should be identical.


In [4]:
# NaNs in the same location are considered equal
try:
    assert afqt['AFQT_PERCENTILES'].equals(afqt['AFQT_1'])
except AssertionError:
    print('The two colums are not equal at this point.')