In [2]:
import os
import time
import multiprocessing

import gensim
from sklearn.manifold import TSNE
import seaborn as sns

import plotly.plotly as py
import plotly.graph_objs as go
import plotly.offline as offline

In [3]:
path = 'data'
comp_info = path + os.sep + 'comp_info.tsv'

# {compound_id: [compound_name, CAS_number]}
def load_compounds(path):
    compounds = {}
    compounds_list = []
    with open(path, 'r') as f:
        for line in f:
            if line[0] == '#':
                pass
            else:
                line_split = line.rstrip().split('\t')
                compounds_id = line_split[0]
                compounds_list = line_split[1:]
                compounds[compounds_id] = compounds_list
    return compounds

In [4]:
compounds = load_compounds(comp_info)
compounds


Out[4]:
{'344': ['s-methyl_3-methylbutanethioate', '23747-45-7'],
 '0': ['jasmone', '488-10-8'],
 '346': ['4-(2,6,6-trimethyl-cyclohexa-1,3-dienyl)but-2-en-4-one',
  '23696-85-7'],
 '347': ['cinnamic_acid', '621-82-9'],
 '340': ['1-methylnaphthalene', '90-12-0'],
 '341': ['5-ethyl-3-hydroxy-4-methyl-2(5h)-furanone', '698-10-2'],
 '342': ['p-menthane-3,8-diol', '42822-86-6'],
 '343': ['isopropyl_myristate', '110-27-0'],
 '810': ['tannic_acid', '1401-55-4'],
 '811': ['myristic_acid', '544-63-8'],
 '812': ['2,2,3-trimethylcyclopent-3-en-1-yl_acetaldehyde', '4501-58-0'],
 '813': ['d-octalactone', '698-76-0'],
 '348': ['limonene_(d-,l-,_and_dl-)',
  '5989-27-5,7705-14-8,5989-54-8,5989-27-5'],
 '349': ['guaiene', '88-84-6'],
 '816': ['5-_and_6-decenoic_acid', '85392-03-6,85392-04-7'],
 '817': ['2-octen-4-one', '4643-27-0'],
 '595': ['2-octanone', '111-13-7'],
 '719': ['propionic_acid', '79-09-4'],
 '718': ['caryophyllene_alcohol', '4586-22-5'],
 '717': ['phenethyl_formate', '104-62-1'],
 '716': ['methyl_o-methoxybenzoate', '606-45-1'],
 '715': ['d-fenchone', '4695-62-9'],
 '714': ['bis-(methylthio)methane', '1618-26-4'],
 '713': ['geranyl_isobutyrate', '2345-26-8'],
 '712': ['2-propionylpyrrole', '1073-26-3'],
 '711': ['g-ionone', '79-76-5'],
 '710': ['phenethyl_propionate', '122-70-3'],
 '915': ['d,l-methionine', '59-51-8'],
 '914': ['pyrrolidine', '123-75-1'],
 '606': ['phenethyl_acetate', '103-45-7'],
 '917': ['furfuryl_isovalerate', '13678-60-9'],
 '594': ['4-hydroxy-5-methyl-3(2h)-furanone', '19322-27-1'],
 '736': ['2-methylundecanal', '110-41-8'],
 '916': ['2-acetyl-3-methylpyrazine', '23787-80-6'],
 '1061': ['methyl-2-methylbutyrate', '868-57-5'],
 '911': ['2-acetylpyridine', '1122-62-9'],
 '1063': ['butyl_ethyl_disulfide', '63986-03-8'],
 '1062': ['phosphoric_acid', '7664-38-2'],
 '1065': ['benzaldehyde_propylene_glycol_acetal', '2568-25-4'],
 '1064': ['phenol', '108-95-2'],
 '619': ['lauryl_alcohol', '112-53-8'],
 '910': ['ethyl_oleate', '111-62-6'],
 '913': ['n-butyric_acid', '107-92-6'],
 '298': ['formic_acid', '64-18-6'],
 '299': ['nonanoic_acid', '112-05-0'],
 '296': ['methyl-3-methylthiopropionate', '13532-18-8'],
 '297': ['p-a-dimethylbenzyl_alcohol', '536-50-5'],
 '294': ['2-pentenal', '764-39-6'],
 '295': ['eugenyl_methyl_ether', '93-15-2'],
 '292': ['1-octanol', '111-87-5'],
 '293': ['mintlactone', '13341-72-5'],
 '290': ['ethyl_thioacetate', '625-60-5'],
 '291': ['p-isopropylacetophenone', '645-13-6'],
 '591': ['butter_acids', '85536-25-0'],
 '590': ['methyl_1-propenyl_disulfide', '5905-47-5'],
 '593': ['ethyl_3-methylpentanoate', '5870-68-8'],
 '592': ['10-hydroxymethylene-2-pinene', '128-50-7'],
 '199': ['2-undecenal', '2463-77-6'],
 '198': ['valeric_acid', '109-52-4'],
 '597': ['d-hexalactone', '823-22-3'],
 '596': ['linalyl_isovalerate', '1118-27-0'],
 '195': ['l-histidine', '71-00-1'],
 '194': ['2,3-diethylpyrazine', '15707-24-1'],
 '197': ['2,6-dimethylthiophenol', '118-72-9'],
 '196': ['undecanal', '112-44-7'],
 '191': ['butyl_hexanoate', '626-82-4'],
 '190': ['(e,e)-2,4-decadien-1-ol', '18409-21-7'],
 '193': ['4,5-dihydro-3-(2h)thiophenone', '1003-04-9'],
 '192': ['2-ethylbutyric_acid', '88-09-5'],
 '270': ['2-(methylthio)ethanol', '5271-38-5'],
 '271': ['4-acetoxy-2,5-dimethyl-3(2h)-furanone', '4166-20-5'],
 '272': ['2,4-nonadienal', '6750-03-4'],
 '273': ['terpinyl_acetate', '80-26-2'],
 '274': ['3-hexanone', '589-38-8'],
 '275': ['lauric_acid', '143-07-7'],
 '276': ['4-heptenal_(cis-_and_trans-)', '6728-31-0'],
 '277': ['2,5-xylenol', '95-87-4'],
 '278': ['isoborneol', '124-76-5'],
 '279': ['3-methylcyclohexanone', '591-24-2'],
 '738': ['vanillin_acetate', '881-68-5'],
 '1067': ['1,4-cineole', '470-67-7'],
 '524': ['octanoic_acid', '124-07-2'],
 '525': ['3-methylbutyl_2-methylbutanoate', '27625-35-0'],
 '526': ['dl-phenylalanine', '150-30-1'],
 '527': ['2-ethyl-4-methylthiazole', '15679-12-6'],
 '520': ['trithioacetone', '828-26-2'],
 '521': ['dihydro-b-ionone', '17283-81-7'],
 '522': ['linalool_oxide', '1365-19-1'],
 '523': ['3-mercapto-3-methylbutyl_formate', '50746-10-6'],
 '1014': ['2-pentyl_acetate', '626-38-0'],
 '1015': ['nerol', '106-25-2'],
 '599': ['methyl_sulfide', '75-18-3'],
 '1017': ['geranyl_hexanoate', '10032-02-7'],
 '528': ["2,2'-(dithiodimethylene)-difuran", '4437-20-1'],
 '529': ['diethyl_sulfide', '352-93-2'],
 '994': ['theaspirane', '36431-72-8'],
 '1013': ['n-valeraldehyde', '110-62-3'],
 '1025': ['l-phenylalanine', '63-91-2'],
 '449': ['(+/?)_heptan-3-yl_butyrate', '39026-94-3'],
 '448': ['ethyl_3-phenylpropionate', '2021-28-5'],
 '443': ['2-pentyl_butyrate', '60415-61-4'],
 '442': ['nerolidol', '7212-44-4'],
 '441': ['geranyl_propionate', '105-90-8'],
 '440': ['2-hydroxyacetophenone', '582-24-1'],
 '447': ['diisopropyl_disulfide', '4253-89-8'],
 '446': ['prenylthiol', '5287-45-6'],
 '445': ['2-methylpentanal', '123-15-9'],
 '444': ['isopropyl_propionate', '637-78-5'],
 '108': ['isobutyraldehyde', '78-84-2'],
 '109': ['5-methyl-2,3-hexanedione', '13706-86-0'],
 '102': ['furfuryl_methyl_ether', '13679-46-4'],
 '103': ['dl-(3-amino-3-carboxypropyl)dimethylsulfonium_chloride',
  '1115-84-0'],
 '100': ['(z)-8-tetradecenal', '169054-69-7'],
 '101': ['b-caryophyllene', '87-44-5'],
 '106': ['isobornyl_isovalerate', '7779-73-9'],
 '107': ['methyl-2-pyrrolyl_ketone', '1072-83-9'],
 '104': ['ethyl-3-methylthiopropionate', '13327-56-5'],
 '105': ['pyruvic_acid', '127-17-3'],
 '902': ['2-nonenal', '2463-53-8'],
 '903': ['4-acetyl-2-methylpyrimidine', '67860-38-2'],
 '39': ['hexyl_isovalerate', '10032-13-0'],
 '38': ['4-hydroxy-3-methoxybenzoic_acid', '121-34-6'],
 '906': ['bornyl_acetate', '76-49-3'],
 '907': ['2-phenylpropionaldehyde', '93-53-8'],
 '904': ['thiazole', '288-47-1'],
 '905': ['3-methylbutyl-2-methylpropanoate', '2050-01-3'],
 '33': ['2,6,6-trimethyl-1,2-cyclohexen-1-carboxaldehyde', '977045-71-8'],
 '32': ['(+/?)-(2,6,6-trimethyl-2-hydroxycyclohexylidene)_ace-tic_acid_g-lactone',
  '15356-74-8'],
 '31': ['(z)-3-hexenyl_valerate', '35852-46-1'],
 '30': ['terpinyl_propionate', '80-27-3'],
 '37': ["4'-methylacetophenone", '122-00-9'],
 '36': ['3-methyl-1,2,4-trithiane', '43040-01-3'],
 '35': ['nonyl_acetate', '143-13-5'],
 '34': ['2-acetylfuran', '1192-62-7'],
 '641': ['3-methyl-1-cyclopentadecanone', '541-91-3'],
 '640': ['g-heptalactone', '105-21-5'],
 '643': ['2,6-dimethoxyphenol', '91-10-1'],
 '642': ['carveol', '99-48-9'],
 '645': ['2-hexenal', '505-57-7'],
 '644': ['isopropyl_butyrate', '638-11-9'],
 '438': ['3,7,11-trimethyl-2,6,10-dodecatrienal', '19317-11-4'],
 '439': ['hexyl_hexanoate', '6378-65-0'],
 '436': ['n-octyl_formate', '112-32-3'],
 '437': ['hexyl_trans-2-hexenoate', '.'],
 '434': ['linalyl_acetate', '115-95-7'],
 '435': ['1-hexadecanol', '36653-82-4'],
 '432': ['dihydro-b-ionol', '3293-47-8'],
 '433': ['isopulegol', '89-79-2'],
 '430': ['2-isobutyl-3-methoxypyrazine', '24683-00-9'],
 '431': ['9,12-octadecadienoic_acid_(48%)_plus_9,12,15-octadeca-_trienoinc_acid_(52%)_(methyl_esters)',
  '99999-08-0'],
 '1002': ['4,5-dimethyl-3-hydroxy-2,5-dihydrofuran-2-one', '28664-35-9'],
 '339': ['propyl_heptanoate', '7778-87-2'],
 '338': ['furfuryl_alcohol', '98-00-0'],
 '335': ['p-menth-1-en-3-ol', '491-04-3'],
 '334': ['cinnamyl_benzoate', '5320-75-2'],
 '337': ['2-methylpropyl-3-methylbutyrate', '589-59-3'],
 '336': ['methyl_phenyl_sulfide', '100-68-5'],
 '331': ['p-menth-8-en-2-one', '3792-53-8'],
 '330': ['3-methylthiopropyl_isothiocyanate', '505-79-3'],
 '333': ['isobutyl_acetate', '110-19-0'],
 '332': ['isobutyl_benzoate', '120-50-3'],
 '744': ['ethyl_heptanoate', '106-30-9'],
 '1000': ['furfuryl_mercaptan', '98-02-2'],
 '745': ['cis-2-nonen-1-ol', '41453-56-9'],
 '854': ['diethyl_malonate', '105-53-3'],
 '818': ['2-methyltetrahydrofuran-3-one', '3188-00-9'],
 '856': ['heptyl_formate', '112-23-2'],
 '857': ['benzyl_butyrate', '103-37-7'],
 '850': ['pyrazine', '290-37-9'],
 '851': ['o-(methylthio)phenol', '1073-29-6'],
 '852': ['isoamyl_nonanoate', '7779-70-6'],
 '345': ['g-nonalactone', '104-61-0'],
 '858': ['5-methyl-2-phenyl-2-hexenal', '21834-92-4'],
 '859': ['2-methyl-1-butanethiol', '1878-18-8'],
 '1081': ['allyl_2-furoate', '4208-49-5'],
 '748': ['ethyl_octadecanoate', '111-61-5'],
 '6': ['ethyl-3-hydroxybutyrate', '5405-41-4'],
 '900': ['potassium_sorbate', '590-00-1'],
 '848': ['4-hydroxy-4-methyl-5-hexenoic_acid_gamma-lactone', '1073-11-6'],
 '99': ['(+/?)-ethyl_3-acetoxy-2-methylbutyrate', '139564-43-5'],
 '98': ['furfuryl_thioacetate', '13678-68-7'],
 '844': ['cinnamyl_butyrate', '103-61-7'],
 '1085': ['benzyl_mercaptan', '100-53-8'],
 '91': ['methyl_valerate', '624-24-8'],
 '90': ['menthol', '89-78-1'],
 '93': ['g-hexalactone', '695-06-7'],
 '92': ['diphenyl_ether', '101-84-8'],
 '95': ['tolualdehydes,_mixed_o-,_m-,_p-', '1334-78-7'],
 '94': ['p-methoxybenzaldehyde', '123-11-5'],
 '97': ['3-hexanol', '623-37-0'],
 '96': ['propenyl_propyl_disulfide', '5905-46-4'],
 '814': ['4-(2-furyl)-3-buten-2-one', '623-15-4'],
 '1030': ['methyl_acetate', '79-20-9'],
 '815': ['isoamyl_phenylacetate', '102-19-2'],
 '1098': ['2-ethyl-4-hydroxy-5-methyl-3(2h)-furanone', '27538-10-9'],
 '1066': ['butyl_heptanoate', '5454-28-4'],
 '740': ['n-octyl_isovalerate', '7786-58-5'],
 '741': ['2-heptylfuran', '3777-71-7'],
 '742': ['methyl_mercaptan', '74-93-1'],
 '743': ['10-undecenoic_acid', '112-38-9'],
 '559': ['4-methyl-2-pentanone', '108-10-1'],
 '558': ['dipropyl_trisulfide', '6028-61-1'],
 '746': ['hydroxycitronellol', '107-74-4'],
 '747': ['3-phenylpropionic_acid', '501-52-0'],
 '555': ['2-octenal', '2363-89-5'],
 '554': ['n-nonanal', '124-19-6'],
 '557': ['2-ethylbutyraldehyde', '97-96-1'],
 '556': ['2,5-dimethylpyrazine', '123-32-0'],
 '551': ['sodium_acetate', '127-09-3'],
 '550': ['2-ethylpyrazine', '13925-00-3'],
 '553': ['propylene_glycol', '57-55-6'],
 '552': ['trans-2-nonen-1-ol', '31502-14-4'],
 '238': ['a-pinene', '80-56-8'],
 '239': ['2-ethyl-4,5-dimethyloxazole', '53833-30-0'],
 '234': ['2-nonanol', '628-99-9'],
 '235': ['hexanal', '66-25-1'],
 '236': ['propyl_isovalerate', '557-00-6'],
 '237': ['methyl_cyclohexanecarboxylate', '4630-82-4'],
 '230': ['methyl_phenyl_disulfide', '14173-25-2'],
 '231': ['methoxypyrazine', '3149-28-8'],
 '232': ['(z)-3-hexenyl_propionate', '33467-74-2'],
 '233': ['(z)-3-hexenyl_isobutyrate', '41519-23-7'],
 '1050': ['2,3-heptanedione', '977043-66-5'],
 '1051': ['3-decen-2-one', '10519-33-2'],
 '1052': ['linalyl_isobutyrate', '78-35-3'],
 '1053': ['2-methyl-3-pentenoic_acid', '37674-63-8'],
 '1054': ['2-heptenal', '2463-63-0'],
 '1055': ['neryl_isobutyrate', '2345-24-6'],
 '1056': ['hydroxycitronellal_dimethyl_acetal', '107-75-5'],
 '1057': ['ethyl_2-methylbutyrate', '7452-79-1'],
 '1058': ['phenethyl_octanoate', '5457-70-5'],
 '1059': ['propyl_thioacetate', '2307-10-0'],
 '992': ['2-pentanethiol', '2084-19-7'],
 '1': ['5-methylhexanoic_acid', '628-46-6'],
 '614': ['o-methylanisole', '578-58-5'],
 '146': ['citronellyl_formate', '105-85-1'],
 '147': ['isopentylamine', '107-85-7'],
 '144': ['isoamyl_propionate', '105-68-0'],
 '145': ['1,3-propanedithiol', '109-80-8'],
 '142': ['5-methylquinoxaline', '13708-12-8'],
 '143': ['4-[(2,6,6)-trimethyl-cyclohex-1-enyl]-but-2-en-4-one', '35044-68-9'],
 '140': ['cyclohexyl_acetate', '622-45-7'],
 '141': ['neryl_formate', '2142-94-1'],
 '612': ['verbenol', '473-67-6'],
 '613': ['diallyl_trisulfide', '2050-87-5'],
 '610': ['hexyl_acetate', '142-92-7'],
 '611': ['benzyl_alcohol', '100-51-6'],
 '616': ['hexyl_formate', '629-33-4'],
 '617': ['4-methyl-5-vinylthiazole', '1759-28-0'],
 '148': ['benzyl_benzoate', '120-51-4'],
 '149': ['isobutyl_formate', '542-55-2'],
 '1007': ['2-decenal', '3913-71-1'],
 '912': ['indole', '120-72-9'],
 '1006': ['1-phenyl-1-propanol', '93-54-9'],
 '951': ['farnesol', '4602-84-0'],
 '1005': ['cis-3-hexenyl_formate', '33467-73-1'],
 '1082': ['nonyl_alcohol', '143-08-8'],
 '1004': ['propionaldehyde', '123-38-6'],
 '948': ['2-ethyl-6-methylpyrazine', '13925-03-6'],
 '949': ['benzyl_isovalerate', '103-38-8'],
 '946': ['1,3,5-undecatriene_(a_mixture_of_1,3(e),5(z)-_and_______________1,3(e),5(e)-isomers)',
  '16356-11-9'],
 '947': ['3-methyl-2-buten-1-ol', '556-82-1'],
 '944': ['acetaldehyde_diisoamyl_acetal', '13002-09-0'],
 '945': ['a-damascone', '43052-87-5'],
 '942': ['3,5-dimethyl-1,2-cyclopentadione', '13494-07-0'],
 '943': ['(e,e)-3,5-octadien-2-one', '30086-02-3'],
 '940': ['2-acetyl-3-ethylpyrazine', '32974-92-8'],
 '941': ['3-decanone', '928-80-3'],
 '768': ['butyl_acetate', '123-86-4'],
 '689': ['2,4,5-trimethyl_thiazole', '13623-11-5'],
 '688': ['acetophenone', '98-86-2'],
 '685': ['ethyl_propyl_disulfide', '30453-31-7'],
 '684': ['d,l-valine', '516-06-3'],
 '687': ['ethyl_trans-2-decenoate', '7367-88-6'],
 '686': ['trimethylamine', '75-50-3'],
 '681': ['cadinene', '29350-73-0'],
 '680': ['neryl_acetate', '141-12-8'],
 '683': ['trans-2-octen-1-yl_acetate', '3913-80-2'],
 '682': ['cis-3-hexenyl_butyrate', '16491-36-4'],
 '623': ['2-methoxy-4-propylphenol', '2785-87-7'],
 '819': ['1,2-ethanedithiol', '540-63-6'],
 '1103': ['1-octen-3-yl_butyrate', '16491-54-6'],
 '622': ['benzyl_butyl_ether', '588-67-0'],
 '133': ['cis-4-hexenal', '4634-89-3'],
 '132': ['citronellal', '106-23-0'],
 '131': ['carvyl_acetate', '97-42-7'],
 '130': ['zingerone', '122-48-5'],
 '137': ['methyl-3-hexenoate', '2396-78-3'],
 '136': ['isoamyl_alcohol', '123-51-3'],
 '135': ['2-methylbutyl-3-methylbutanoate', '2445-77-4'],
 '134': ['methyl_(methylthio)acetate', '16630-66-3'],
 '494': ['dihydroxyacetone', '96-26-4'],
 '495': ['6,7-dihydro-2,3-dimethyl-5h-cyclopentapyrazine', '38917-63-4'],
 '139': ['quinoline', '91-22-5'],
 '138': ['citronellyl_valerate', '7540-53-6'],
 '490': ['hydrogen_sulfide', '7783-06-4'],
 '491': ['anisyl_acetate', '104-21-2'],
 '492': ['4-mercapto-4-methyl-2-pentanone', '19872-52-7'],
 '493': ['2,5-diethyltetrahydrofuran', '41239-48-9'],
 '24': ['lauryl_acetate', '112-66-3'],
 '25': ['4-methoxy-2-methyl-2-butanethiol', '94087-83-9'],
 '26': ['4-heptanone', '123-19-3'],
 '27': ['benzothiazole', '95-16-9'],
 '20': ['lactic_acid', '598-82-3'],
 '21': ['linalyl_butyrate', '78-36-4'],
 '22': ['methylsulfinylmethane', '67-68-5'],
 '23': ['ethyl_2,4,7-decatrienoate', '78417-28-4'],
 '927': ['skatole', '83-34-1'],
 '28': ['p-menthan-2-ol', '499-69-4'],
 '29': ['w-6-hexadecenlactone', '7779-50-2'],
 '407': ['isobutyl_cinnamate', '122-67-8'],
 '406': ['2-tridecanone', '593-08-8'],
 '405': ['levulinic_acid', '123-76-2'],
 '404': ['nootkatone', '4674-50-4'],
 '403': ['sulfur_dioxide', '7446-09-5'],
 '402': ['methyl_nicotinate', '93-60-7'],
 '401': ['isoamyl_laurate', '6309-51-9'],
 '400': ['heptyl_alcohol', '111-70-6'],
 '933': ['methyl_2-methyl-3-furyl_disulfide', '65505-17-1'],
 '932': ['isopropyl_2-methylbutyrate', '66576-71-4'],
 '931': ['1-octen-3-yl_acetate', '2442-10-6'],
 '930': ['isobornyl_acetate', '125-12-2'],
 '937': ['furfuryl_acetate', '623-17-6'],
 '629': ['linalyl_octanoate', '10024-64-3'],
 '409': ['l-lysine', '56-87-1'],
 '408': ['4,5-dimethyl_thiazole', '3581-91-7'],
 '1069': ['myrtenol', '515-00-4'],
 '628': ['p-mentha-1,3-diene', '99-86-5'],
 '1028': ['taurine', '107-35-7'],
 '758': ['isoquinoline', '119-65-3'],
 '1018': ['undecanoic_acid', '112-37-8'],
 '379': ['3-butylidenephthalide', '551-08-6'],
 '378': ['thujan-4-ol', '546-79-2'],
 '829': ['isopropyl_isobutyrate', '617-50-5'],
 '828': ['glycine', '56-40-6'],
 '1060': ['ethyl_myristate', '124-06-1'],
 '371': ['2,8-dithianon-4-en-4-carboxaldehyde', '59902-01-1'],
 '370': ['p-isopropylbenzyl_alcohol', '536-60-7'],
 '373': ['pyruvaldehyde', '78-98-8'],
 '372': ['3-oxododecanoic_acid_glyceride', '91052-70-9'],
 '375': ['benzyl_acetate', '140-11-4'],
 '374': ['coumarin_(prohibited)', '91-64-5'],
 '377': ['octyl_2-furoate', '39251-88-2'],
 '376': ['cyclohexaneethyl_acetate', '21722-83-8'],
 '1019': ['cis-5-octen-1-ol', '64275-73-6'],
 '708': ['3-hydroxy-2-oxopropionic_acid', '1113-60-6'],
 '709': ['isoamyl_isovalerate', '659-70-1'],
 '704': ['alpha-terpineol', '10482-56-1'],
 '705': ['a-methylbenzyl_acetate', '93-92-5'],
 '706': ['4-methyloctanoic_acid', '54947-74-9'],
 '707': ['quinine', '.'],
 '700': ['estragole', '140-67-0'],
 '618': ['cis-_and_trans-menthone-8-thioacetate', '57129-12-1'],
 '702': ['2-ethyl_(3_or_5_or_6)-methoxypyrazine_(85%)_plus_2-methyl_(3_or_5_or_6)-methoxypyrazine_(13%)',
  '977044-47-5'],
 '703': ['furfural', '98-01-1'],
 '393': ['d-camphor', '464-49-3'],
 '392': ['diallyl_polysulfides', '72869-75-1'],
 '88': ['valencene', '4630-07-3'],
 '89': ['methyl_2-methylpentanoate', '2177-77-7'],
 '397': ['6-methyl-5-hepten-2-one', '110-93-0'],
 '396': ['2-acetyl-3,_(5_or_6)-dimethylpyrazine,_mixture_of_isomers',
  '977043-63-2'],
 '395': ['ethyl_laurate', '106-33-2'],
 '394': ['3-propylidenephthalide', '17369-59-4'],
 '82': ['4-(methylthio)-2-oxobutanoic_acid', '583-92-6'],
 '83': ['4-ethyloctanoic_acid', '16493-80-4'],
 '80': ['methyl_2-hydroxy-4-methylpentanoate', '40348-72-9'],
 '81': ['isoamyl_octanoate', '2035-99-6'],
 '86': ['ethyl_butyrate', '105-54-4'],
 '87': ['difurfuryl_ether', '4437-22-3'],
 '84': ['2,3-dimethylpyrazine', '5910-89-4'],
 '85': ['2-methyl-4-propyl-1,3-oxathiane', '67715-80-4'],
 '797': ['aconitic_acid', '499-12-7'],
 '796': ['2-acetoxy-3-butanone', '4906-24-5'],
 '795': ['2-methyl-3-(p-isopropylphenyl)-propionaldehyde', '103-95-7'],
 '794': ['6,10-dimethyl-5,9-undecadien-2-one', '689-67-8'],
 '793': ['a-phellandrene', '99-83-2'],
 '792': ['propyl_propionate', '106-36-5'],
 '791': ['menthone', '89-80-5'],
 '790': ['cinnamyl_cinnamate', '122-69-0'],
 '799': ['1-phenyl-1,2-propanedione', '579-07-7'],
 '798': ['hexyl_alcohol', '111-27-3'],
 '7': ['cyclohexyl_butyrate', '1551-44-6'],
 '601': ['1-(methylthio)-2-butanone', '13678-58-5'],
 '607': ['benzyl_salicylate', '118-58-1'],
 '586': ['linalool', '78-70-6'],
 '587': ['isoeugenol', '97-54-1'],
 '584': ['2,6-dimethylpyridine', '108-48-5'],
 '585': ['6-hydroxydihydrotheaspirane', '65620-50-0'],
 '582': ['terpinyl_butyrate', '80-26-6'],
 '583': ['undecyl_alcohol', '112-42-5'],
 '580': ['2,6,6-trimethyl-6-vinyltetrahydropyran', '7392-19-0'],
 '581': ['g-octalactone', '104-50-7'],
 '588': ['methyl_trans-2-octenoate', '2396-85-'],
 '589': ["disodium_5'-guanylate", '5550-12-9'],
 '245': ['(2-furyl)-2-propanone', '6975-60-6'],
 '244': ['ethyl_trans-2-octenoate', '7367-82-0'],
 '247': ['2-phenyl-2-butenal', '4411-89-6'],
 '246': ['1-hydroxy-2-butanone', '5077-67-8'],
 '241': ['g-decalactone', '706-14-9'],
 '240': ['anisyl_alcohol', '105-13-5'],
 '243': ['hexanoic_acid', '142-62-1'],
 '242': ["2,2'-(thiodimethylene)_difuran", '13678-67-6'],
 '615': ['a,a-dimethylphenethyl_alcohol', '100-86-7'],
 '249': ['pyridine', '110-86-1'],
 '248': ['tetrahydrofurfuryl_alcohol', '97-99-4'],
 '924': ['bornyl_formate', '7492-41-3'],
 '970': ['4-propenyl-2,6-dimethoxyphenol', '20675-95-0'],
 '925': ['3-hydroxy-2-pentanone', '3142-66-3'],
 '519': ['methyl_benzoate', '93-58-3'],
 '518': ['piperine', '94-62-2'],
 '926': ['2,6-dimethylpyrazine', '108-50-9'],
 '1009': ['3,7-dimethyl-1-octanol', '106-21-8'],
 '1008': ['sclareolide', '.'],
 '511': ['citronellyl_acetate', '150-84-5'],
 '510': ['2-propylpyridine', '622-39-9'],
 '513': ['1-hexanethiol', '111-31-9'],
 '512': ['ethyl_nonanoate', '123-29-5'],
 '515': ['4-hexen-1-ol', '6126-50-7'],
 '514': ['4,5,6,7-tetrahydro-3,6-dimethylbenzofuran', '494-90-6'],
 '517': ['4-ethylbenzaldehyde', '4748-78-1'],
 '516': ['2-methyl-2-octenal', '49576-57-0'],
 '458': ['eugenyl_acetate', '93-28-7'],
 '459': ['d-neomenthol', '2216-52-6'],
 '621': ['isobutyl_heptanoate', '7779-80-8'],
 '620': ['safrole', '94-59-7'],
 '627': ['(z)-4-hydroxy-6-dodecenoic_acid_lactone', '18679-18-0'],
 '626': ['2-hydroxy-4-methyl_benzaldehyde', '698-27-1'],
 '625': ['thaumatin_b-recombinant', '53859-34-3'],
 '624': ['octyl_2-methylbutyrate', '29811-50-5'],
 '450': ['decyl_butyrate', '5454-09-1'],
 '451': ['(+/?)2-mercapto-2-methylpentan-1-ol', '258823-39-1'],
 '452': ['beta-cyclodextrin', '7585-39-9'],
 '453': ['heptyl_acetate', '112-06-1'],
 '454': ['3-phenylpropyl_propionate', '122-74-7'],
 '455': ['n-(4-hydroxy-3-methoxybenzyl)-8-methyl-6-nonena-mide', '404-86-4'],
 '456': ['methyl_nonanoate', '1731-84-6'],
 '457': ['ethyl_cyclohexanecarboxylate', '3289-28-9'],
 '1084': ['a-amylcinnamaldehyde', '122-40-7'],
 '979': ['isopulegone', '29606-79-9'],
 '179': ['ethyl_isobutyrate', '97-62-1'],
 '178': ['2,4-dimethylanisole', '6738-23-4'],
 '177': ['b-pinene', '127-91-3'],
 '176': ['dimethyl_succinate', '106-65-0'],
 '175': ['allyl_sulfide', '592-88-1'],
 '174': ['methyl_butyrate', '623-42-7'],
 '173': ['acetylpyrazine', '22047-25-2'],
 '172': ['ethyl_methyl_disulfide', '20333-39-5'],
 '171': ['camphene', '79-92-5'],
 '170': ['2-methylheptanoic_acid', '1188-02-9'],
 '977': ['linalyl_benzoate', '126-64-7'],
 '656': ['3-heptanone', '106-35-4'],
 '975': ['amyl_octanoate', '638-25-5'],
 '974': ['acetaldehyde', '75-07-0'],
 '973': ['2-hexen-1-yl_acetate', '2497-18-9'],
 '972': ['3-octen-2-one', '1669-44-9'],
 '971': ['methyl_hexanoate', '106-70-7'],
 '657': ['n-octanal', '124-13-0'],
 '1080': ['4-hydroxy-2,3-dimethyl-2,4-nonadienoic_acid_gamma-lactone',
  '774-64-1'],
 '654': ['methyl_ethyl_trisulfide', '31499-71-5'],
 '253': ['ethyl_3-hexenoate', '2396-83-0'],
 '978': ['3-octanone', '106-68-3'],
 '182': ['p-methoxycinnamaldehyde', '1963-36-6'],
 '183': ['4-methyl-3-penten-2-one', '141-79-7'],
 '180': ['phenethyl-2-methylbutyrate', '24817-51-4'],
 '181': ['p-methyl_diphenyl', '644-08-6'],
 '186': ['terpinyl_formate', '2153-26-6'],
 '187': ['4-propyl-2,6-dimethoxyphenol', '6766-82-1'],
 '184': ['5-ethyl-2-methylpyridine', '104-90-5'],
 '185': ['isobutyl_isobutyrate', '97-85-8'],
 '886': ['p,a,a-trimethylbenzyl_alcohol', '1197-01-9'],
 '652': ['1,2,5,6-tetrahydrocuminic_acid', '56424-87-4'],
 '188': ['furfuryl_methyl_sulfide', '1438-91-1'],
 '189': ['2-ethyl-3,5(6)-dimethylpyrazine', '27043-05-6'],
 '658': ['2,4-dimethylbenzaldehyde', '15764-16-6'],
 '653': ['acetic_anhydride', '108-24-7'],
 '650': ['borneol', '507-70-0'],
 '651': ['6-methyl-3,5-heptadien-2-one', '1604-28-0'],
 '1089': ['3-phenyl-1-propanol', '122-97-4'],
 '764': ['2-methylbutyraldehyde', '96-17-3'],
 '1088': ['2-formyl-6,6-dimethyl_bicyclo_[3.1.1]_hept-2-ene', '564-94-3'],
 '11': ['octyl_propionate', '142-60-9'],
 '10': ['2-(1-methylpropyl)thiazole', '18277-27-5'],
 '13': ['l-_and_dl-alanine', '302-72-7'],
 '12': ['2_or_5_or_6-methoxy-3-methylpyrazine_(mixture_of_isomers)',
  '68378-13-2'],
 '15': ['2-trans-3,7-dimethylocta-2,6-dienyl-2-ethyl_butanoate', '73019-14-4'],
 '14': ['2-propionyl-2-thiazoline', '29926-42-9'],
 '17': ['(z)-3_&_(e)-2-hexenyl_propionate_(mixture)', '33467-74-2'],
 '16': ['10-undecenal', '112-45-8'],
 '19': ['3,5-diethyl-2-methylpyrazine', '18138-05-1'],
 '18': ['(+/?)-1-phenylethylmercaptan', '6263-65-6'],
 '863': ['p-mentha-8-thiol-3-one', '38462-22-5'],
 '862': ['heptyl_butyrate', '5870-93-9'],
 '865': ['propyl_butyrate', '105-66-8'],
 '864': ['2(4)-isobutyl-4(2),6-dimethyldihydro-4h-1,3,5-dithiazine',
  '977161-98-0'],
 '867': ['methyl_phenylacetate', '101-41-7'],
 '866': ['propyl_isobutyrate', '644-49-5'],
 '884': ['dihydrocarveol', '619-01-2'],
 '938': ['p-mentha-1,8-dien-7-yl-acetate', '15111-96-3'],
 '659': ['(+/?)-dihydrofarnesol', '1335-48-4'],
 '883': ['(z)-4-dodecenal', '21944-98-9'],
 '753': ['2,3,5,6-tetramethylpyrazine', '1124-11-4'],
 '881': ['geranyl_tiglate', '7785-33-7'],
 '880': ['g-undecalactone', '104-67-6'],
 '887': ['3-phenylpropionaldehyde', '104-53-0'],
 '831': ['methyl_benzyl_disulfide', '699-10-5'],
 '885': ['2-methoxy-4-methylphenol', '93-51-6'],
 '752': ['ethyl_cis-4-heptenoate', '39924-27-1'],
 '928': ['4-methyl-2-phenyl-2-pentenal', '26643-91-4'],
 '62': ['trans,_trans-2,4-hexadienal', '142-83-6'],
 '888': ['hexyl_propionate', '2445-76-3'],
 '1012': ['p-cymene', '99-87-6'],
 '1032': ['diethyl_tartrate', '87-91-2'],
 '950': ['w-pentadecalactone', '106-02-5'],
 '756': ['3-octanol', '589-98-0'],
 '929': ['isophorone', '78-59-1'],
 '809': ['isobutyl_hexanoate', '105-79-3'],
 '322': ['acetone', '67-64-1'],
 '323': ['2,6-dimethyl-4-heptanol', '108-82-7'],
 '320': ['methyl_2-hexenoate', '2396-77-2'],
 '321': ['methyl-4-methylvalerate', '2412-80-8'],
 '326': ['methyl_anisate', '121-98-2'],
 '327': ['fenchyl_acetate', '13851-11-1'],
 '324': ['anisole', '100-66-3'],
 '325': ['p-mentha-1,8-dien-7-ol', '536-59-4'],
 '328': ['butyl_formate', '592-84-7'],
 '329': ['nonyl_octanoate', '7786-48-3'],
 '759': ['cuminaldehyde', '122-03-2'],
 '201': ['amyl_butyrate', '540-18-1'],
 '200': ['butyl_sulfide', '544-40-1'],
 '203': ['1-octen-3-ol', '3391-86-4'],
 '202': ['resorcinol', '108-46-3'],
 '205': ['delta-tetradecalactone', '2721-22-4'],
 '204': ['p-cresol', '106-44-5'],
 '207': ['1-p-menthene-8-thiol', '71159-90-5'],
 '206': ['phenylacetaldehyde_dimethyl_acetal', '101-48-4'],
 '209': ['2-acetyl-5-methylfuran', '1193-79-9'],
 '208': ['ethylene_oxide', '75-21-8'],
 '779': ['2-trans,_6-trans-nonadienal', '17587-33-6'],
 '778': ['methyl_heptanoate', '106-73-0'],
 '889': ['rhodinol', '6812-78-8'],
 '77': ['isobutyl_2-butenoate', '589-66-2'],
 '76': ['2-trans-6-cis-dodecadienal', '21662-13-5'],
 '75': ['3,7-dimethyl-1,3,6-octatriene', '13877-91-3'],
 '74': ['(z)-3-hexenyl_(e)-2-butenoate', '65405-80-3'],
 '73': ['g-dodecalactone', '2305-05-7'],
 '72': ['l-tyrosine', '60-18-4'],
 '71': ['methyl_thiobutyrate', '2432-51-1'],
 '70': ['2-propionylpyrroline', '133447-37-7'],
 '655': ['3-(methylthio)propyl_acetate', '16630-55-0'],
 '79': ['methyl_citronellate', '2270-60-2'],
 '78': ['2,6-dimethyl-5-heptenal', '106-72-9'],
 '2': ['l-glutamine', '56-85-9'],
 '1042': ['l-arginine', '74-79-3'],
 '1041': ['ethyl_acetoacetate', '141-97-9'],
 '1040': ['phenethyl_alcohol', '60-12-8'],
 '1047': ['2-methylcyclohexanone', '583-60-8'],
 '1043': ['2-methoxy-3_(5_and_6)-isopropylpyrazine', '25773-40-4'],
 '1045': ['heptanal', '111-71-7'],
 '1044': ['5-isopropenyl-2-methyl-2-vinyltetrahydrofuran', '13679-86-2'],
 '1049': ['isopropyl_alcohol', '67-63-0'],
 '1048': ['p-a-dimethyl_styrene', '1195-32-0'],
 '805': ['isobutyl_phenylacetate', '102-13-6'],
 '804': ['methyl_laurate', '111-82-0'],
 '669': ['methyl_furoate', '611-13-2'],
 '668': ['cis-3-hexenyl_lactate', '61931-81-5'],
 '667': ['2-ethyl-3-methylpyrazine', '15707-23-0'],
 '666': ['2-methyl-5-thiomethylfuran', '13678-59-6'],
 '665': ['1-amino-2-propanol', '78-96-6'],
 '664': ['amyl_formate', '638-49-3'],
 '663': ['2-tridecenal', '7774-82-5'],
 '662': ['2,4,5-trimethyl-d-3-oxazoline', '22694-96-8'],
 '661': ['1-octen-3-one', '4312-99-6'],
 '660': ['butyl_isovalerate', '109-19-3'],
 '769': ['cis-6-nonenal', '2277-19-2'],
 '692': ['5-hydroxy-2,4-decadienoic_acid_d-lactone', '.'],
 '693': ['3-ethyl-2,6-dimethylpyrazine', '13925-07-0'],
 '690': ['4-hydroxy-2,5-dimethyl-3(2h)-furanone', '3658-77-3'],
 '691': ['3-methylcrotonic_acid', '541-47-9'],
 '696': ['biphenyl', '92-52-4'],
 '697': ['2-methyltetrahydrothiophen-3-one', '13679-85-1'],
 '694': ['disodium_succinate', '150-90-3'],
 '695': ['1-ethyl-2-acetylpyrrole', '39741-41-8'],
 '698': ['b-ionol', '22029-76-1'],
 '699': ['sodium_citrate', '68-04-2'],
 '542': ['benzyl_ethyl_ether', '539-30-0'],
 '543': ['cinnamyl_alcohol', '104-54-1'],
 '540': ['pulegone', '89-82-7'],
 '541': ['b-ionone', '14901-07-6'],
 '546': ['butyl_laurate', '106-18-3'],
 '547': ['hexyl_isobutyrate', '2349-07-7'],
 '544': ['5-methyl-3-hexen-2-one', '5166-53-0'],
 '545': ['theobromine', '83-67-0'],
 '8': ['methyl_dihydrojasmonate', '24851-98-7'],
 '548': ['1-methyl-2-acetylpyrrole', '932-16-1'],
 '549': ['isovaleric_acid', '503-74-2'],
 '68': ['4-methyl-2,3-pentanedione', '7493-58-5'],
 '598': ['benzaldehyde_dimethyl_acetal', '1125-88-8'],
 '995': ['diacetyl', '431-03-8'],
 '869': ['methyl_linoleate_(48%)_methyl_linolenate_(52%)_mix-ture',
  '977136-80-3'],
 '997': ['2-pentadecanone', '2345-28-0'],
 '996': ['citronellol', '26489-01-0'],
 '991': ['butyl_butyrate', '109-21-7'],
 '990': ['5-methylfurfural', '620-02-0'],
 '993': ['butyl_phenylacetate', '122-43-0'],
 '868': ['allyl_heptanoate', '142-19-8'],
 '999': ['allyl_hexanoate', '123-68-2'],
 '998': ['(e)-7-methyl-3-octen-2-one', '.'],
 '120': ['butyl_salicylate', '2052-14-4'],
 '121': ['a-methylbenzyl_alcohol', '98-85-1'],
 '122': ['1,6-hexanedithiol', '1191-43-1'],
 '123': ['dehydrodihydroionone', '20483-36-7'],
 '124': ['5-hydroxy-2-decenoic_acid_d-lactone', '51154-96-2'],
 '125': ['allyl_methyl_trisulfide', '34135-85-8'],
 '126': ['3-methyl-2-butenal', '107-86-8'],
 '127': ['p-ethoxybenzaldehyde', '10031-82-0'],
 '128': ['4-decenoic_acid', '26303-90-2'],
 '129': ['butter_starter_distillate', '977019-27-4'],
 '765': ['benzyl_isobutyrate', '103-28-6'],
 '1016': ['isopropyl_benzoate', '939-48-0'],
 '1010': ['a-campholenic_alcohol', '1901-38-8'],
 '1011': ['carvacrol', '499-75-2'],
 '414': ['p-propylphenol', '645-56-7'],
 '415': ['a-ionone', '127-41-3'],
 '416': ['2-propylpyrazine', '18138-03-9'],
 '417': ['1-ethoxy-3-methyl-2-butene', '22094-00-4'],
 '410': ['2-trans,_4-trans-decadienal', '25152-84-5'],
 '411': ['3-oxotetradecanoic_acid_glyceride', '91052-73-2'],
 '412': ['methyl_phenethyl_ether', '3558-60-9'],
 '413': ['nona-2-trans,-6-cis-dienal', '557-48-2'],
 '920': ['p-vinylphenol', '2628-17-3'],
 '498': ['ethyl_decanoate', '110-38-3'],
 '922': ['2-ethyl-1-hexanol', '104-76-7'],
 '923': ['isobornyl_propionate', '2756-56-1'],
 '418': ['4-hydroxy-3,5-dimethoxybenzaldehyde', '134-96-3'],
 '419': ['ethyl_alcohol', '64-17-5'],
 '776': ['methyl_ethyl_sulfide', '625-80-9'],
 '499': ['phenoxyacetic_acid', '122-59-8'],
 '319': ['3,4-dimethyl-1,2-cyclopentanedione', '13494-06-9'],
 '318': ['2,6-dimethyl-10-methylene-2,6,11-dodecatrienal', '60066-88-8'],
 '313': ['n-furfuryl_pyrrole', '1438-94-4'],
 '312': ['ethyl_trans-2-hexenoate', '27829-72-7'],
 '311': ['2-octanol', '123-96-6'],
 '310': ['p-tolyl_acetate', '140-39-6'],
 '317': ['ethyl_acetate', '141-78-6'],
 '316': ['methyl_propionate', '554-12-1'],
 '315': ['butyl_stearate', '123-95-5'],
 '314': ['4-(1,1-dimethyl)_ethyl_phenol', '98-54-4'],
 '861': ['maltol', '118-71-8'],
 '921': ['citral_(neral)', '5392-40-5'],
 '496': ['menthyl_isovalerate', '16409-46-4'],
 '832': ['fumaric_acid', '110-17-8'],
 '833': ['isobutyl_angelate', '7779-81-9'],
 '830': ['isopulegyl_acetate', '57576-09-7'],
 '497': ['3-(methylthio)_hexyl_acetate', '51755-85-2'],
 '836': ['2-methoxy-4-vinylphenol', '7786-61-0'],
 '837': ['adipic_acid', '124-04-9'],
 '834': ['2,2-dimethyl-5-(1-methylpropen-1-yl)-__________________________________tetrahydrofuran',
  '7416-35-5'],
 '835': ['ethyl_acrylate', '140-88-5'],
 '838': ['2-trans-4-cis-7-cis-tridecatrienal', '13552-96-0'],
 '839': ['ethyl_p-anisate', '94-30-4'],
 '808': ['2-acetyl-2-thiazoline', '29926-41-8'],
 '3': ['1-methyl-3-methoxy-4-isopropylbenzene', '1076-56-8'],
 '725': ['3-methyl-2-oxobutanoic_acid', '759-05-7'],
 '368': ['9-octadecenal', '5090-41-5'],
 '369': ['4-methylpentanoic_acid', '646-07-1'],
 '366': ['hexyl_octanoate', '1117-55-1'],
 '367': ['2-methylbutyl-2-methyl_butyrate', '2445-78-5'],
 '364': ['trans,_trans-2,4-dodecadienal', '21662-16-8'],
 '365': ['3-(2-methylpropyl)pyridine', '14159-61-6'],
 '362': ['glycerol_tributyrate', '60-01-5'],
 '363': ['2-methyl-5-vinylpyrazine', '13925-08-1'],
 '360': ['piperonyl_acetate', '326-61-4'],
 '361': ['terpinolene', '586-62-9'],
 '959': ['phenethyl_hexanoate', '6290-37-5'],
 '952': ['cis-3-octen-1-ol', '20125-84-2'],
 '882': ['2-propionylthiazole', '43039-98-1'],
 '380': ['2-oxobutyric_acid', '600-18-0'],
 '381': ['4-hexene-3-one', '2497-21-4'],
 '382': ['(e)-3-(z)-6-nonadien-1-ol', '56805-23-3'],
 '383': ['2,3-dimethylbenzofuran', '3782-00-1'],
 '384': ['2-methylbutyric_acid', '116-53-0'],
 '385': ['1,5,5,9-tetramethyl-13-oxatricyclo-(8.3.0.0(4,9))________tridecane',
  '6790-58-5'],
 '386': ['o-methoxybenzaldehyde', '135-02-4'],
 '387': ['(e)-2-hexenyl_hexanoate', '53398-86-0'],
 '388': ['allyl_mercaptan', '870-23-5'],
 '389': ['2-isopropyl-4-methylthiazole', '15679-13-7'],
 '784': ['geranyl_acetate', '105-87-3'],
 '785': ['phenethyl_benzoate', '94-47-3'],
 '786': ['1-penten-3-ol', '616-25-1'],
 '787': ['isobutyl_butyrate', '539-90-2'],
 '780': ['methyl_furfuryl_disulfide', '57500-00-2'],
 '781': ['4-ethyl-2,6-dimethoxyphenol', '14059-92-8'],
 '782': ['2-pentylpyridine', '2294-76-0'],
 '783': ['3-hydroxy-4-phenylbutan-2-one', '5355-63-5'],
 '788': ['diethyl_succinate', '123-25-1'],
 '789': ['nonyl_isovalerate', '7786-47-2'],
 '860': ['ethyl_3-hydroxyhexanoate', '2305-25-1'],
 '605': ['m-dimethoxybenzene', '151-10-0'],
 '579': ['l-aspartic_acid', '8021-39-4'],
 '578': ['2,2,6-trimethylcyclohexanone', '2408-37-9'],
 '604': ["2'-aminoacetophenone", '551-93-9'],
 '573': ['hexyl_butyrate', '2639-63-6'],
 '572': ['ascorbic_acid', '50-81-7'],
 '571': ['linalyl_formate', '115-99-1'],
 '570': ['triethyl_citrate', '77-93-0'],
 '577': ['butyl_anthranilate', '7756-96-9'],
 '576': ['trans-anethole', '4180-23-8'],
 '575': ['o-vinylanisole', '612-15-7'],
 '574': ['2-isobutyl_thiazole', '18640-74-9'],
 '60': ['2-ethylhexanethiol', '7341-17-5'],
 '61': ['geranyl_butyrate', '106-29-6'],
 '258': ['methyl_3-hydroxyhexanoate', '21188-58-9'],
 '259': ['monosodium_glutamate', '142-47-2'],
 '64': ['2-dodecenal', '4826-62-4'],
 '65': ['n-butyl_valerate', '591-68-4'],
 '66': ['rhodinyl_formate', '141-09-3'],
 '67': ['(e)-2-decenoic_acid', '334-49-6'],
 '252': ['propyl_formate', '110-74-7'],
 '69': ['hydroxynonanoic_acid,_d-lactone', '3301-94-8'],
 '250': ['hydroxycitronellal', '107-75-5'],
 '251': ['isoamyl_benzoate', '94-46-2'],
 '256': ['myristaldehyde', '124-25-4'],
 '257': ['2,4-dihydroxybenzoic_acid', '89-86-1'],
 '254': ['trans-2-methyl-2-butenoic_acid', '80-59-1'],
 '255': ['a-hexyl_cinnamaldehyde', '101-86-0'],
 '603': ['isopropyl_formate', '625-55-8'],
 '602': ['eucalyptol', '470-82-6'],
 '939': ['glycerol', '56-81-5'],
 '731': ['bisabolene', '495-62-5'],
 '730': ['lauric_aldehyde', '112-54-9'],
 '733': ['ethyl_propyl_trisulfide', '31499-70-4'],
 '732': ['(z)(z)-3,6-nonadien-1-ol', '76649-25-7'],
 '735': ['p-tolyl-3-methyl_butyrate', '55066-56-3'],
 '734': ['phenylacetaldehyde', '122-78-1'],
 '508': ['diethyl_malate', '7554-12-3'],
 '509': ['6-undecanone', '927-49-1'],
 '506': ['benzyl_tiglate', '5837-78-5'],
 '507': ['1-butanethiol', '109-79-5'],
 '504': ['p-menth-8-en-1-ol', '138-87-4'],
 '505': ['ethyl_propionate', '105-37-3'],
 '502': ['heptyl_isobutyrate', '2349-13-5'],
 '503': ['veratraldehyde', '120-14-9'],
 '500': ['(z)-4-propenylphenol', '85960-81-2'],
 '501': ['4-methylnonanoic_acid', '45019-28-1'],
 '630': ['amyl_alcohol', '71-41-0'],
 '631': ['3-methyl-1-pentanol', '589-35-5'],
 '632': ['2(10)-pinen-3-ol', '5947-36-4'],
 '633': ['methyl_propyl_trisulfide', '17619-36-2'],
 '469': ['3-oxohexadecanoic_acid_glyceride', '91052-71-0'],
 '468': ['decyl_acetate', '112-17-4'],
 '636': ['benzyl_methyl_sulfide', '766-92-7'],
 '637': ['o-propylphenol', '644-35-9'],
 '465': ['d-ribose', '50-69-1'],
 '464': ['3-methyl-2-cyclohexen-1-one', '1193-18-6'],
 '467': ['citric_acid', '77-92-9'],
 '466': ['m-cresol', '108-39-4'],
 '461': ['4-carvomenthenol', '562-74-3'],
 '460': ['2-methylvaleric_acid', '97-61-0'],
 '463': ['propyl_benzoate', '2315-68-6'],
 '462': ['propiophenone', '93-55-0'],
 '901': ['2,3,5-trithiahexane', '42474-44-2'],
 '168': ['ethyl_2-(methylthio)acetate', '4455-13-4'],
 '169': ['2-(2-butyl)-4,5-dimethyl-3-thiazoline', '65894-82-8'],
 '164': ['neryl_butyrate', '999-40-6'],
 '165': ['eugenol', '97-53-0'],
 '166': ['1-decanol', '112-30-1'],
 '167': ['l-menthyl_acetate', '16409-45-3'],
 '160': ['2-methyl-3-furanthiol', '28588-74-1'],
 '161': ['p-dimethoxybenzene', '150-78-7'],
 '162': ['p-mentha-1,4(8)-dien-3-one', '491-09-8'],
 '163': ['4-methyl-2-pentenal', '5362-56-1'],
 '964': ['p-methylanisole', '104-93-8'],
 '965': ['piperonal', '120-57-0'],
 '966': ['erythrobic_acid', '89-65-6'],
 '967': ['geranyl_isovalerate', '109-20-6'],
 '960': ['2-acetylthiazole', '24295-03-2'],
 '961': ['2,3-octanedione', '585-25-1'],
 '962': ['4-methyl-5-thiazoleethanol', '137-00-8'],
 '963': ['benzophenone', '119-61-9'],
 '968': ['isopropenylpyrazine', '38713-41-6'],
 '969': ['3-heptanol', '589-82-2'],
 '936': ['benzyl_hexanoate', '6938-45-0'],
 '1106': ['ethyl_sorbate', '2396-84-1'],
 '1104': ['guaiacol', '90-05-1'],
 '1105': ['(+/?)-methyl_5-acetoxyhexanoate', '35234-22-1'],
 '1102': ['2-heptanol', '543-49-7'],
 '935': ['3-mercapto-3-methyl-1-butanol', '34300-94-2'],
 '1100': ['cis-3-hexenyl_benzoate', '25152-85-6'],
 '1101': ['hexyl_benzoate', '6789-88-4'],
 '934': ['o-methoxycinnamaldehyde', '1504-74-1'],
 '908': ['3-phenylpropyl_acetate', '122-72-5'],
 '909': ['2-heptanone', '110-43-0'],
 '1096': ['2-methyl-1,3-dithiolane', '5616-51-3'],
 '1090': ['methyl_jasmonate', '1211-29-6'],
 '1091': ['ethyl_benzoate', '93-89-0'],
 '600': ['propyl_alcohol', '71-23-8'],
 '878': ['ethyl_10-undecenoate', '692-86-4'],
 '879': ['ethyl_palmitate', '628-97-7'],
 '876': ['l-malic_acid', '97-67-6'],
 '877': ['acetal', '105-57-7'],
 '874': ['3-hexenyl_phenylacetate', '42436-07-7'],
 '875': ['3-nonanone', '925-78-0'],
 '872': ['methyl-cis-4-octenoate', '21063-71-8'],
 '873': ['ethyl_crotonate', '623-70-1'],
 '870': ['3-methyl-2,4-nonanedione', '113486-29-6'],
 '871': ['5h-5-methyl-6,7-dihydrocyclopenta(b)pyrazine', '23747-48-0'],
 '9': ['methyl_2-methylthiobutyrate', '42075-45-6'],
 '890': ['2,3,5-trimethylpyrazine', '14667-55-1'],
 '891': ['dehydromenthofurolactone', '75640-26-5'],
 '892': ['3-oxohexanoic_acid_glyceride', '91052-72-1'],
 '893': ['2,3-pentanedione', '600-14-6'],
 '894': ['isoeugenyl_methyl_ether', '93-16-3'],
 '647': ['3-methyl-2-butanol', '598-75-4'],
 '896': ['2,6,6-trimethylcyclohex-2-ene-1,4-dione', '1125-21-9'],
 '897': ['5-methyl-2-hepten-4-one', '81925-81-7'],
 '898': ['carvone', '6485-40-1'],
 '899': ['4-methylthio-2-butanone', '3407-39-7'],
 '1087': ['ethyl_pyruvate', '617-35-6'],
 '646': ['2-methyl-1-propanethiol', '513-44-0'],
 '1099': ['l-arabinose', '5328-37-0'],
 '649': ['4-ethylguaiacol', '2785-89-9'],
 '648': ['5-ethyl-2-hydroxy-3-methylcyclopent-2-en-1-one', '53263-58-4'],
 '1086': ['pyroligneous_acid,_extract', '8028-47-5'],
 '357': ['isobutyric_acid', '79-31-2'],
 '356': ['3-acetyl-2,5-dimethylthiophene', '2530-10-1'],
 '355': ['isoprenyl_acetate', '5205-07-2'],
 '354': ['citronellyl_isobutyrate', '97-89-2'],
 '353': ['5-isopropyl-2-methylpyrazine', '13925-05-8'],
 '352': ['2-undecanone', '112-12-9'],
 '351': ['3,6-dihydro-4-methyl-2-(2-methylpropen-1-yl)-2h-pyran', '1786-08-9'],
 '350': ['p-menth-1-ene-9-al', '29548-14-9'],
 '803': ['citronellyl_butyrate', '141-16-2'],
 '802': ['3-ethyl-2-hydroxy-4-methylcylcopent-2-en-1-one', '42348-12-9'],
 '801': ['sodium_diacetate', '126-96-5'],
 '800': ['ethyl_formate', '109-94-4'],
 '807': ['hexyl_phenylacetate', '5421-17-0'],
 '806': ['isobutyl_alcohol', '78-83-1'],
 '359': ['ethyl_levulinate', '539-88-8'],
 '358': ['decanoic_acid', '334-48-5'],
 '216': ['citronellyl_propionate', '141-14-0'],
 '217': ['3-(2-furyl)acrolein', '623-30-3'],
 '214': ['acetanisole', '100-06-1'],
 '215': ['benzoic_acid', '65-85-0'],
 '212': ['4-decenal', '30390-50-2'],
 '213': ['myrtenyl_acetate', '1079-01-2'],
 '210': ['6-methylquinoline', '91-62-3'],
 '211': ['2-ethyl-5-methylpyrazine', '13360-64-0'],
 '762': ['3-phenylpropyl_cinnamate', '122-68-9'],
 '763': ['n-butyl-2-methylbutyrate', '15706-73-7'],
 '760': ['linalyl_propionate', '144-39-8'],
 '761': ['3-hexen-1-ol', '928-96-1'],
 '766': ['methyl_myristate', '124-10-7'],
 '767': ['fenchyl_alcohol', '1632-73-1'],
 '218': ['d-decalactone', '705-86-2'],
 '219': ['a-ionol', '25312-34-9'],
 '957': ['methyl_octanoate', '111-11-5'],
 '956': ['cinnamyl_acetate', '103-54-8'],
 '1033': ['2-butanone', '78-93-3'],
 '1078': ['citronellic_acid', '502-47-6'],
 '1079': ['1-methyl-1-cyclopenten-3-one', '2758-18-1'],
 '1076': ['3,4-dimethoxy-1-vinylbenzene', '6380-23-0'],
 '1077': ['4-methyl-2-oxopentanoic_acid', '816-66-0'],
 '1074': ['2,6-dimethyl-4-heptanone', '108-83-8'],
 '1075': ['phenethyl_isobutyrate', '103-48-0'],
 '1072': ['s-methyl_thioacetate', '1534-08-3'],
 '1073': ['n-propyl_hexanoate', '626-77-7'],
 '1070': ['benzaldehyde', '100-52-7'],
 '1071': ['a-furfuryl_pentanoate', '36701-01-6'],
 '289': ['3-decanol', '1565-81-7'],
 '288': ['butyl_alcohol', '71-36-3'],
 '1003': ['polyarabinogalactan', '9036-66-2'],
 '4': ['methyl-3-phenylpropionate', '103-25-3'],
 '281': ['3-oxooctanoic_acid_glyceride', '91052-68-5'],
 '280': ['ethyl_lactate', '97-64-3'],
 '283': ['benzyl_formate', '104-57-4'],
 '282': ['vanillin,_natural', '121-33-5'],
 '285': ['3-(methylthio)_propionaldehyde', '3268-49-3'],
 '284': ['3,5-dimethyl-1,2,4-trithiolane', '23654-92-4'],
 '287': ['5-methyl-2-thiophenecarboxaldehyde', '13679-70-4'],
 '286': ['s-methyl_4-methylpentanethioate', '53966-59-9'],
 '1094': ['butyl_propionate', '590-01-2'],
 '1095': ['neryl_propionate', '105-91-9'],
 '1083': ['s-methyl_hexanethioate', '2432-77-1'],
 '1097': ['4-hydroxy-3-methyloctanoic_acid_lactone', '39212-23-2'],
 '678': ['5-hydroxy-7-decenoic_acid_d-lactone', '25524-95-2'],
 '679': ['isosafrole_(prohibited)', '120-58-1'],
 '1092': ['p-mentha-1,4-diene', '99-85-4'],
 '1093': ['ethyl_cis-4-octenoate', '34495-71-1'],
 '674': ['3-oxodecanoic_acid_glyceride', '91052-69-6'],
 '675': ['(e)-2-octen-1-ol', '18409-17-1'],
 '676': ['4-hydroxybenzaldehyde', '123-08-0'],
 '677': ['santalol,_a_and', '11031-45-1'],
 '670': ['decanal', '112-31-2'],
 '671': ['acetic_acid', '64-19-7'],
 '672': ['ethyl_tiglate', '5837-78-5'],
 '673': ['myrcene', '123-35-3'],
 '263': ['cis-6-nonen-1-ol', '35854-86-5'],
 '262': ['d-dodecalactone', '713-95-1'],
 '261': ['3,5,5-trimethyl-1-hexanol', '3452-97-9'],
 '260': ['d-piperitone', '6091-50-5'],
 '267': ['triacetin', '102-76-1'],
 '266': ['3-hepten-2-one', '1119-44-4'],
 '265': ['acetaldehyde_ethyl_(z)-3-hexenyl_acetal', '28069-74-1'],
 '264': ['3-methylthiobutyraldehyde', '16630-52-7'],
 '1031': ['3-hexenoic_acid', '4219-24-3'],
 '269': ['cis-3-hexen-1-yl_acetate', '3681-71-8'],
 '268': ['isoamyl_hexanoate', '2198-61-0'],
 '701': ['bornyl_isovalerate', '76-50-6'],
 '59': ['2-isobutyl-3-methylpyrazine', '13925-06-9'],
 '58': ['cis-3-hexenyl_isovalerate', '35154-45-1'],
 '55': ['isoamyl_cinnamate', '7779-65-9'],
 '54': ["disodium_5'-inosinate", '4691-65-0'],
 '57': ['dihydrocoumarin', '119-84-6'],
 '56': ['isoamyl_acetate', '123-92-2'],
 '51': ['pyroligneous_acid', '8030-97-5'],
 '50': ['isoamyl_formate', '110-45-2'],
 '53': ['methyl_p-hydroxybenzoate', '99-76-3'],
 '52': ['3-ethylpyridine', '536-78-7'],
 '537': ['erythro_and_threo-3-mercapto-2-methylbutan-1-ol', '227456-33-9'],
 '536': ['hexyl_2-methylbutyrate', '10032-15-2'],
 '535': ['rhodinyl_butyrate', '141-15-1'],
 '63': ['3-penten-2-one', '625-33-2'],
 '533': ['4-methylthiazole', '693-95-8'],
 '532': ['methyl_(e)-2-(z)-4-decadienoate', '4493-42-9'],
 '531': ['butylamine', '109-73-9'],
 '530': ['methyl-4-(methylthio)butyrate', '53053-51-3'],
 '539': ['3-mercapto-2-methylpentanal', '227456-28-2'],
 '538': ['phenethyl_butyrate', '103-52-6'],
 '987': ['4-phenyl-3-buten-2-one', '122-57-6'],
 '775': ['geranyl_formate', '105-86-2'],
 '988': ['ethyl_isovalerate', '108-64-5'],
 '989': ['1-p-menthen-9-yl_acetate', '17916-91-5'],
 '774': ['octyl_butyrate', '110-39-4'],
 '982': ['2-methyl-3-tetrahydrofuranthiol', '57124-87-5'],
 '983': ['2-methoxy-3-(1-methylpropyl)pyrazine', '24168-70-5'],
 '980': ['2-pentanol', '6032-29-7'],
 '981': ['2-hepten-4-one', '4643-25-8'],
 '986': ['phenethyl_isovalerate', '140-26-1'],
 '777': ['isoamyl_salicylate', '87-20-7'],
 '984': ['methyl_disulfide', '624-92-0'],
 '985': ['styrene', '100-42-5'],
 '115': ['3-methylpentanoic_acid', '105-43-1'],
 '114': ['2-methylbutyl_acetate', '624-41-9'],
 '117': ['allyl_isothiocyanate', '57-06-7'],
 '116': ['2,6,6-trimethylcyclohexa-1,3-dienyl_methanal', '116-26-7'],
 '111': ['ethyl_anthranilate', '87-25-2'],
 '110': ['cis-3-hexenyl_hexanoate', '31501-11-8'],
 '113': ['1-buten-1-yl_methyl_sulfide', '32951-19-2'],
 '112': ['3-methylbutanethiol', '541-31-4'],
 '771': ['2-hydroxybenzoic_acid', '69-72-7'],
 '119': ['2-hydroxy-3,5,5-trimethyl-1,2-cyclohexenone', '4883-60-7'],
 '118': ['2-nonanone', '821-55-6'],
 '770': ['2,2,4-trimethyl-1,3-oxacyclopentane', '1193-11-9'],
 '773': ['octyl_isobutyrate', '109-15-9'],
 '772': ['3-methylbutyraldehyde', '590-86-3'],
 '953': ['campholene_acetate', '1727-68-0'],
 '429': ['propyl_mercaptan', '107-03-9'],
 '428': ['1-(p-methoxyphenyl)-2-propanone', '122-84-9'],
 '534': ['amyl_hexanoate', '540-07-8'],
 '919': ['butyl_isobutyrate', '97-87-0'],
 '918': ['2-pentanone', '108-87-9'],
 '421': ['ethyl_salicylate', '118-61-6'],
 '420': ['ethyl-trans-2,_cis-4-decadienoate', '3025-30-7'],
 '423': ['vanillin', '121-33-5'],
 '422': ['paraldehyde', '123-63-7'],
 '425': ['4-methyl-2,6-dimethoxyphenol', '6638-05-7'],
 '424': ['2-pentylfuran', '3777-69-3'],
 '427': ['pyrrole', '109-97-7'],
 '426': ['thymol', '89-83-8'],
 '308': ['2,4-dimethylacetophenone', '89-74-7'],
 '309': ['ethyl_cis-4,7-octadienoate', '69925-33-3'],
 '855': ['2-methylheptan-3-one', '13019-20-0'],
 '300': ['cyclohexanecarboxylic_acid', '98-89-5'],
 '301': ['ethyl_octanoate', '106-32-1'],
 '302': ['2,5-dimethyl-4-methoxy-3(2h)-furanone', '4077-47-8'],
 '303': ['p-menthan-2-one', '499-70-7'],
 '304': ['ethyl_3-mercaptopropionate', '5466-06-8'],
 '305': ['1,2-dimethoxybenzene', '91-16-7'],
 '306': ['benzyl_propionate', '122-63-4'],
 '307': ['2-isopropyl-5-methyl-2-hexenal', '35158-25-9'],
 '895': ['1,1-dimethoxyethane', '534-15-6'],
 '825': ['d-undecalactone', '710-04-3'],
 '824': ['5-hydroxy-4-octanone', '496-77-5'],
 '827': ['2-methyl-3-butenal', '497-03-0'],
 '847': ['methyl_salicylate', '119-36-8'],
 '846': ['dihydro-a-ionone', '31499-72-6'],
 '845': ['1-methyl-2,3-cyclohexadione', '3008-43-3'],
 '826': ['cis-3-hexenyl-2-methylbutyrate', '53398-85-9'],
 '843': ['methyl_3-nonenoate', '13481-87-3'],
 '842': ['dl-isomenthone', '491-07-6'],
 '841': ['2,5_diethyl-3-methylpyrazine', '32736-91-7'],
 '840': ['2-isopropylphenol', '88-69-7'],
 '821': ['isoamyl_butyrate', '106-27-4'],
 '853': ['phenethylamine', '64-04-0'],
 '849': ['methyl_propyl_disulfide', '2179-60-4'],
 '820': ['3-methylthio-1-hexanol', '51755-66-9'],
 '823': ['l-glutamic_acid', '56-86-0'],
 '822': ['salicylaldehyde', '90-02-8'],
 '954': ['geraniol', '106-24-1'],
 '1034': ['a-propylphenethyl_alcohol', '705-73-7'],
 ...}

In [5]:
def compound2character(compounds):
    dict_comp_char = {}
    for comp_id in compounds:
        compound = compounds[comp_id][0]
        char_list = []
        for char in compound:
            char_list.append(char)
        dict_comp_char[compound] = char_list        
    return dict_comp_char

dict_compound2character = compound2character(compounds)

In [6]:
def read_corpus_char_level(dict_compound2character):
    for comp in dict_compound2character:
        #For training data, add tags
        compound = comp
        characters = dict_compound2character[comp]
        yield gensim.models.doc2vec.TaggedDocument(characters, [compound])

        
corpus = list(read_corpus_char_level(dict_compound2character))
corpus


Out[6]:
[TaggedDocument(words=['j', 'a', 's', 'm', 'o', 'n', 'e'], tags=['jasmone']),
 TaggedDocument(words=['5', '-', 'm', 'e', 't', 'h', 'y', 'l', 'h', 'e', 'x', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['5-methylhexanoic_acid']),
 TaggedDocument(words=['l', '-', 'g', 'l', 'u', 't', 'a', 'm', 'i', 'n', 'e'], tags=['l-glutamine']),
 TaggedDocument(words=['1', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', '-', '4', '-', 'i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', 'b', 'e', 'n', 'z', 'e', 'n', 'e'], tags=['1-methyl-3-methoxy-4-isopropylbenzene']),
 TaggedDocument(words=['3', '-', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'o', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'e', 'n', 't', 'a', 'n', '-', '1', '-', 'o', 'l', '_', '(', 'r', 'a', 'c', 'e', 'm', 'i', 'c', ')'], tags=['3-mercapto-2-methylpentan-1-ol_(racemic)']),
 TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['butyl_isobutyrate']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'd', 'i', 'h', 'y', 'd', 'r', 'o', 'j', 'a', 's', 'm', 'o', 'n', 'a', 't', 'e'], tags=['methyl_dihydrojasmonate']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['methyl_2-methylthiobutyrate']),
 TaggedDocument(words=['o', 'c', 't', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['octyl_propionate']),
 TaggedDocument(words=['2', '_', 'o', 'r', '_', '5', '_', 'o', 'r', '_', '6', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e', '_', '(', 'm', 'i', 'x', 't', 'u', 'r', 'e', '_', 'o', 'f', '_', 'i', 's', 'o', 'm', 'e', 'r', 's', ')'], tags=['2_or_5_or_6-methoxy-3-methylpyrazine_(mixture_of_isomers)']),
 TaggedDocument(words=['l', '-', '_', 'a', 'n', 'd', '_', 'd', 'l', '-', 'a', 'l', 'a', 'n', 'i', 'n', 'e'], tags=['l-_and_dl-alanine']),
 TaggedDocument(words=['p', '-', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'a', 'n', 'i', 's', 'o', 'l', 'e'], tags=['p-propyl_anisole']),
 TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'h', 'e', 'x', 'a', 'n', 'o', 'l'], tags=['2-ethyl-1-hexanol']),
 TaggedDocument(words=['1', '-', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'o', '-', '2', '-', 'p', 'r', 'o', 'p', 'a', 'n', 'o', 'n', 'e'], tags=['1-mercapto-2-propanone']),
 TaggedDocument(words=['1', '0', '-', 'u', 'n', 'd', 'e', 'c', 'e', 'n', 'a', 'l'], tags=['10-undecenal']),
 TaggedDocument(words=['(', 'z', ')', '-', '3', '_', '&', 'a', 'm', 'p', ';', '_', '(', 'e', ')', '-', '2', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e', '_', '(', 'm', 'i', 'x', 't', 'u', 'r', 'e', ')'], tags=['(z)-3_&_(e)-2-hexenyl_propionate_(mixture)']),
 TaggedDocument(words=['(', '+', '/', '?', ')', '-', '1', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'e', 't', 'h', 'y', 'l', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'a', 'n'], tags=['(+/?)-1-phenylethylmercaptan']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 't', 'r', 'a', 'n', 's', '-', '2', '-', 'o', 'c', 't', 'e', 'n', 'o', 'a', 't', 'e'], tags=['methyl_trans-2-octenoate']),
 TaggedDocument(words=['3', '-', 'a', 'c', 'e', 't', 'y', 'l', 'p', 'y', 'r', 'i', 'd', 'i', 'n', 'e'], tags=['3-acetylpyridine']),
 TaggedDocument(words=['c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', 'n', 'e', 'e', 't', 'h', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['cyclohexaneethyl_acetate']),
 TaggedDocument(words=['l', 'i', 'n', 'a', 'l', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['linalyl_butyrate']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', 's', 'u', 'l', 'f', 'i', 'n', 'y', 'l', 'm', 'e', 't', 'h', 'a', 'n', 'e'], tags=['methylsulfinylmethane']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', '2', ',', '4', ',', '7', '-', 'd', 'e', 'c', 'a', 't', 'r', 'i', 'e', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_2,4,7-decatrienoate']),
 TaggedDocument(words=['l', 'a', 'u', 'r', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['lauryl_acetate']),
 TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'b', 'u', 't', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'l'], tags=['4-methoxy-2-methyl-2-butanethiol']),
 TaggedDocument(words=['4', '-', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'n', 'e'], tags=['4-heptanone']),
 TaggedDocument(words=['b', 'e', 'n', 'z', 'o', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['benzothiazole']),
 TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', 'a', 'n', '-', '2', '-', 'o', 'l'], tags=['p-menthan-2-ol']),
 TaggedDocument(words=['w', '-', '6', '-', 'h', 'e', 'x', 'a', 'd', 'e', 'c', 'e', 'n', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['w-6-hexadecenlactone']),
 TaggedDocument(words=['t', 'e', 'r', 'p', 'i', 'n', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['terpinyl_propionate']),
 TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', '-', '8', '-', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['p-menth-8-en-1-ol']),
 TaggedDocument(words=['t', 'h', 'u', 'j', 'a', 'n', '-', '4', '-', 'o', 'l'], tags=['thujan-4-ol']),
 TaggedDocument(words=['(', '+', '/', '?', ')', '-', '(', '2', ',', '6', ',', '6', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'y', 'l', 'i', 'd', 'e', 'n', 'e', ')', '_', 'a', 'c', 'e', '-', 't', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'g', '-', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['(+/?)-(2,6,6-trimethyl-2-hydroxycyclohexylidene)_ace-tic_acid_g-lactone']),
 TaggedDocument(words=['2', ',', '6', ',', '6', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '2', '-', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'e', 'n', '-', '1', '-', 'c', 'a', 'r', 'b', 'o', 'x', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2,6,6-trimethyl-1,2-cyclohexen-1-carboxaldehyde']),
 TaggedDocument(words=['2', '-', 'a', 'c', 'e', 't', 'y', 'l', 'f', 'u', 'r', 'a', 'n'], tags=['2-acetylfuran']),
 TaggedDocument(words=['n', 'o', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['nonyl_acetate']),
 TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '2', ',', '4', '-', 't', 'r', 'i', 't', 'h', 'i', 'a', 'n', 'e'], tags=['3-methyl-1,2,4-trithiane']),
 TaggedDocument(words=['4', "'", '-', 'm', 'e', 't', 'h', 'y', 'l', 'a', 'c', 'e', 't', 'o', 'p', 'h', 'e', 'n', 'o', 'n', 'e'], tags=["4'-methylacetophenone"]),
 TaggedDocument(words=['4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '3', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['4-hydroxy-3-methoxybenzoic_acid']),
 TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['hexyl_isovalerate']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-methylpyrazine']),
 TaggedDocument(words=['m', 'o', 'n', 'o', 's', 'o', 'd', 'i', 'u', 'm', '_', 'g', 'l', 'u', 't', 'a', 'm', 'a', 't', 'e'], tags=['monosodium_glutamate']),
 TaggedDocument(words=['(', 'z', ')', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['(z)-3-hexenyl_isobutyrate']),
 TaggedDocument(words=['4', '-', 'a', 'l', 'l', 'y', 'l', '-', '2', ',', '_', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['4-allyl-2,_6-dimethoxyphenol']),
 TaggedDocument(words=['o', 'c', 't', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['octyl_acetate']),
 TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', 'a', '-', '1', ',', '8', '-', 'd', 'i', 'e', 'n', '-', '7', '-', 'a', 'l'], tags=['p-mentha-1,8-dien-7-al']),
 TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 't', 'e'], tags=['benzyl_cinnamate']),
 TaggedDocument(words=['a', 'm', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['amyl_formate']),
 TaggedDocument(words=['p', '-', 'e', 't', 'h', 'y', 'l', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['p-ethylphenol']),
 TaggedDocument(words=['b', 'u', 't', 'a', 'n', '-', '3', '-', 'o', 'n', 'e', '-', '2', '-', 'y', 'l', '_', 'b', 'u', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['butan-3-one-2-yl_butanoate']),
 TaggedDocument(words=['4', '-', '(', '1', ',', '1', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', ')', '_', 'e', 't', 'h', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['4-(1,1-dimethyl)_ethyl_phenol']),
 TaggedDocument(words=['6', ',', '1', '0', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', ',', '9', '-', 'u', 'n', 'd', 'e', 'c', 'a', 'd', 'i', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['6,10-dimethyl-5,9-undecadien-2-one']),
 TaggedDocument(words=['p', 'y', 'r', 'o', 'l', 'i', 'g', 'n', 'e', 'o', 'u', 's', '_', 'a', 'c', 'i', 'd'], tags=['pyroligneous_acid']),
 TaggedDocument(words=['3', '-', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'i', 'd', 'i', 'n', 'e'], tags=['3-ethylpyridine']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['methyl_propionate']),
 TaggedDocument(words=['d', 'i', 's', 'o', 'd', 'i', 'u', 'm', '_', '5', "'", '-', 'i', 'n', 'o', 's', 'i', 'n', 'a', 't', 'e'], tags=["disodium_5'-inosinate"]),
 TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['isoamyl_acetate']),
 TaggedDocument(words=['d', 'e', 'c', 'a', 'n', 'a', 'l'], tags=['decanal']),
 TaggedDocument(words=['2', '-', 'u', 'n', 'd', 'e', 'c', 'a', 'n', 'o', 'n', 'e'], tags=['2-undecanone']),
 TaggedDocument(words=['3', '-', 'o', 'x', 'o', 'd', 'o', 'd', 'e', 'c', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'g', 'l', 'y', 'c', 'e', 'r', 'i', 'd', 'e'], tags=['3-oxododecanoic_acid_glyceride']),
 TaggedDocument(words=['g', 'e', 'r', 'a', 'n', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['geranyl_butyrate']),
 TaggedDocument(words=['t', 'r', 'a', 'n', 's', ',', '_', 't', 'r', 'a', 'n', 's', '-', '2', ',', '4', '-', 'h', 'e', 'x', 'a', 'd', 'i', 'e', 'n', 'a', 'l'], tags=['trans,_trans-2,4-hexadienal']),
 TaggedDocument(words=['3', '-', 'p', 'e', 'n', 't', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['3-penten-2-one']),
 TaggedDocument(words=['2', '-', 'd', 'o', 'd', 'e', 'c', 'e', 'n', 'a', 'l'], tags=['2-dodecenal']),
 TaggedDocument(words=['n', '-', 'b', 'u', 't', 'y', 'l', '_', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['n-butyl_valerate']),
 TaggedDocument(words=['4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['4-hydroxybenzaldehyde']),
 TaggedDocument(words=['r', 'h', 'o', 'd', 'i', 'n', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['rhodinyl_formate']),
 TaggedDocument(words=['4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '3', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['4-hydroxy-3,5-dimethoxybenzaldehyde']),
 TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['isopropyl_isobutyrate']),
 TaggedDocument(words=['h', 'y', 'd', 'r', 'o', 'x', 'y', 'n', 'o', 'n', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', ',', '_', 'd', '-', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['hydroxynonanoic_acid,_d-lactone']),
 TaggedDocument(words=['2', '-', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'y', 'l', 'p', 'y', 'r', 'r', 'o', 'l', 'i', 'n', 'e'], tags=['2-propionylpyrroline']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 't', 'h', 'i', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['methyl_thiobutyrate']),
 TaggedDocument(words=['l', '-', 't', 'y', 'r', 'o', 's', 'i', 'n', 'e'], tags=['l-tyrosine']),
 TaggedDocument(words=['g', '-', 'd', 'o', 'd', 'e', 'c', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['g-dodecalactone']),
 TaggedDocument(words=['(', 'z', ')', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', '(', 'e', ')', '-', '2', '-', 'b', 'u', 't', 'e', 'n', 'o', 'a', 't', 'e'], tags=['(z)-3-hexenyl_(e)-2-butenoate']),
 TaggedDocument(words=['3', ',', '7', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '3', ',', '6', '-', 'o', 'c', 't', 'a', 't', 'r', 'i', 'e', 'n', 'e'], tags=['3,7-dimethyl-1,3,6-octatriene']),
 TaggedDocument(words=['2', '-', 't', 'r', 'a', 'n', 's', '-', '6', '-', 'c', 'i', 's', '-', 'd', 'o', 'd', 'e', 'c', 'a', 'd', 'i', 'e', 'n', 'a', 'l'], tags=['2-trans-6-cis-dodecadienal']),
 TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', '2', '-', 'b', 'u', 't', 'e', 'n', 'o', 'a', 't', 'e'], tags=['isobutyl_2-butenoate']),
 TaggedDocument(words=['2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 'h', 'e', 'p', 't', 'e', 'n', 'a', 'l'], tags=['2,6-dimethyl-5-heptenal']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'a', 't', 'e'], tags=['methyl_citronellate']),
 TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'o', 'c', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['isoamyl_octanoate']),
 TaggedDocument(words=['2', ',', '3', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2,3-dimethylpyrazine']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['ethyl_butyrate']),
 TaggedDocument(words=['d', 'i', 'f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'e', 't', 'h', 'e', 'r'], tags=['difurfuryl_ether']),
 TaggedDocument(words=['v', 'a', 'l', 'e', 'n', 'c', 'e', 'n', 'e'], tags=['valencene']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['methyl_2-methylpentanoate']),
 TaggedDocument(words=['m', 'e', 'n', 't', 'h', 'o', 'l'], tags=['menthol']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['methyl_valerate']),
 TaggedDocument(words=['d', 'i', 'p', 'h', 'e', 'n', 'y', 'l', '_', 'e', 't', 'h', 'e', 'r'], tags=['diphenyl_ether']),
 TaggedDocument(words=['g', '-', 'h', 'e', 'x', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['g-hexalactone']),
 TaggedDocument(words=['p', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['p-methoxybenzaldehyde']),
 TaggedDocument(words=['t', 'o', 'l', 'u', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e', 's', ',', '_', 'm', 'i', 'x', 'e', 'd', '_', 'o', '-', ',', '_', 'm', '-', ',', '_', 'p', '-'], tags=['tolualdehydes,_mixed_o-,_m-,_p-']),
 TaggedDocument(words=['p', 'r', 'o', 'p', 'e', 'n', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['propenyl_propyl_disulfide']),
 TaggedDocument(words=['3', '-', 'h', 'e', 'x', 'a', 'n', 'o', 'l'], tags=['3-hexanol']),
 TaggedDocument(words=['f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 't', 'h', 'i', 'o', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['furfuryl_thioacetate']),
 TaggedDocument(words=['(', '+', '/', '?', ')', '-', 'e', 't', 'h', 'y', 'l', '_', '3', '-', 'a', 'c', 'e', 't', 'o', 'x', 'y', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['(+/?)-ethyl_3-acetoxy-2-methylbutyrate']),
 TaggedDocument(words=['1', '-', 'p', 'e', 'n', 't', 'e', 'n', '-', '3', '-', 'o', 'n', 'e'], tags=['1-penten-3-one']),
 TaggedDocument(words=['b', '-', 'c', 'a', 'r', 'y', 'o', 'p', 'h', 'y', 'l', 'l', 'e', 'n', 'e'], tags=['b-caryophyllene']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', 'c', 'y', 'c', 'l', 'o', 'p', 'e', 'n', 't', 'e', 'n', 'o', 'l', 'o', 'n', 'e'], tags=['methylcyclopentenolone']),
 TaggedDocument(words=['d', 'l', '-', '(', '3', '-', 'a', 'm', 'i', 'n', 'o', '-', '3', '-', 'c', 'a', 'r', 'b', 'o', 'x', 'y', 'p', 'r', 'o', 'p', 'y', 'l', ')', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 's', 'u', 'l', 'f', 'o', 'n', 'i', 'u', 'm', '_', 'c', 'h', 'l', 'o', 'r', 'i', 'd', 'e'], tags=['dl-(3-amino-3-carboxypropyl)dimethylsulfonium_chloride']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['ethyl-3-methylthiopropionate']),
 TaggedDocument(words=['p', 'y', 'r', 'u', 'v', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['pyruvic_acid']),
 TaggedDocument(words=['i', 's', 'o', 'b', 'o', 'r', 'n', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['isobornyl_isovalerate']),
 TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['isobutyl_butyrate']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'p', 'y', 'r', 'r', 'o', 'l', 'y', 'l', '_', 'k', 'e', 't', 'o', 'n', 'e'], tags=['methyl-2-pyrrolyl_ketone']),
 TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['isobutyraldehyde']),
 TaggedDocument(words=['5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', ',', '3', '-', 'h', 'e', 'x', 'a', 'n', 'e', 'd', 'i', 'o', 'n', 'e'], tags=['5-methyl-2,3-hexanedione']),
 TaggedDocument(words=['1', ',', '2', ',', '5', ',', '6', '-', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'c', 'u', 'm', 'i', 'n', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['1,2,5,6-tetrahydrocuminic_acid']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'a', 'n', 't', 'h', 'r', 'a', 'n', 'i', 'l', 'a', 't', 'e'], tags=['ethyl_anthranilate']),
 TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'l'], tags=['3-methylbutanethiol']),
 TaggedDocument(words=['1', '-', 'b', 'u', 't', 'e', 'n', '-', '1', '-', 'y', 'l', '_', 'm', 'e', 't', 'h', 'y', 'l', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['1-buten-1-yl_methyl_sulfide']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['2-methylbutyl_acetate']),
 TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['3-methylpentanoic_acid']),
 TaggedDocument(words=['n', '-', '(', '4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '3', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'y', 'l', ')', '-', '8', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '6', '-', 'n', 'o', 'n', 'e', 'n', 'a', '-', 'm', 'i', 'd', 'e'], tags=['n-(4-hydroxy-3-methoxybenzyl)-8-methyl-6-nonena-mide']),
 TaggedDocument(words=['a', 'l', 'l', 'y', 'l', '_', 'i', 's', 'o', 't', 'h', 'i', 'o', 'c', 'y', 'a', 'n', 'a', 't', 'e'], tags=['allyl_isothiocyanate']),
 TaggedDocument(words=['2', '-', 'n', 'o', 'n', 'a', 'n', 'o', 'n', 'e'], tags=['2-nonanone']),
 TaggedDocument(words=['a', 'd', 'i', 'p', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['adipic_acid']),
 TaggedDocument(words=['2', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '3', ',', '5', ',', '5', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '2', '-', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'e', 'n', 'o', 'n', 'e'], tags=['2-hydroxy-3,5,5-trimethyl-1,2-cyclohexenone']),
 TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 's', 'a', 'l', 'i', 'c', 'y', 'l', 'a', 't', 'e'], tags=['butyl_salicylate']),
 TaggedDocument(words=['a', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'e', 'n', 'z', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['a-methylbenzyl_alcohol']),
 TaggedDocument(words=['1', ',', '6', '-', 'h', 'e', 'x', 'a', 'n', 'e', 'd', 'i', 't', 'h', 'i', 'o', 'l'], tags=['1,6-hexanedithiol']),
 TaggedDocument(words=['d', 'e', 'h', 'y', 'd', 'r', 'o', 'd', 'i', 'h', 'y', 'd', 'r', 'o', 'i', 'o', 'n', 'o', 'n', 'e'], tags=['dehydrodihydroionone']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'u', 'n', 'd', 'e', 'c', 'a', 'n', 'a', 'l'], tags=['2-methylundecanal']),
 TaggedDocument(words=['p', '-', 'c', 'r', 'e', 's', 'o', 'l'], tags=['p-cresol']),
 TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['isopropyl_hexanoate']),
 TaggedDocument(words=['3', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['3-phenylpropionic_acid']),
 TaggedDocument(words=['i', 's', 'o', 'q', 'u', 'i', 'n', 'o', 'l', 'i', 'n', 'e'], tags=['isoquinoline']),
 TaggedDocument(words=['4', '-', 'd', 'e', 'c', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['4-decenoic_acid']),
 TaggedDocument(words=['2', ',', '2', ',', '6', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', 'n', 'o', 'n', 'e'], tags=['2,2,6-trimethylcyclohexanone']),
 TaggedDocument(words=['z', 'i', 'n', 'g', 'e', 'r', 'o', 'n', 'e'], tags=['zingerone']),
 TaggedDocument(words=['c', 'a', 'r', 'v', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['carvyl_acetate']),
 TaggedDocument(words=['c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'a', 'l'], tags=['citronellal']),
 TaggedDocument(words=['c', 'i', 's', '-', '4', '-', 'h', 'e', 'x', 'e', 'n', 'a', 'l'], tags=['cis-4-hexenal']),
 TaggedDocument(words=['d', 'e', 'l', 't', 'a', '-', 't', 'e', 't', 'r', 'a', 'd', 'e', 'c', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['delta-tetradecalactone']),
 TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 't', 'e'], tags=['isobutyl_cinnamate']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'o', 'a', 't', 'e'], tags=['methyl-3-hexenoate']),
 TaggedDocument(words=['c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'y', 'l', '_', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['citronellyl_valerate']),
 TaggedDocument(words=['q', 'u', 'i', 'n', 'o', 'l', 'i', 'n', 'e'], tags=['quinoline']),
 TaggedDocument(words=['c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['cyclohexyl_acetate']),
 TaggedDocument(words=['n', 'e', 'r', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['neryl_formate']),
 TaggedDocument(words=['b', '-', 'i', 'o', 'n', 'o', 'n', 'e'], tags=['b-ionone']),
 TaggedDocument(words=['4', '-', '[', '(', '2', ',', '6', ',', '6', ')', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', '-', '1', '-', 'e', 'n', 'y', 'l', ']', '-', 'b', 'u', 't', '-', '2', '-', 'e', 'n', '-', '4', '-', 'o', 'n', 'e'], tags=['4-[(2,6,6)-trimethyl-cyclohex-1-enyl]-but-2-en-4-one']),
 TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['isoamyl_propionate']),
 TaggedDocument(words=['1', ',', '3', '-', 'p', 'r', 'o', 'p', 'a', 'n', 'e', 'd', 'i', 't', 'h', 'i', 'o', 'l'], tags=['1,3-propanedithiol']),
 TaggedDocument(words=['c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['citronellyl_formate']),
 TaggedDocument(words=['i', 's', 'o', 'p', 'e', 'n', 't', 'y', 'l', 'a', 'm', 'i', 'n', 'e'], tags=['isopentylamine']),
 TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['benzyl_benzoate']),
 TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['isobutyl_formate']),
 TaggedDocument(words=['3', ',', '4', '-', 'h', 'e', 'x', 'a', 'n', 'e', 'd', 'i', 'o', 'n', 'e'], tags=['3,4-hexanedione']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'p', 'a', 'l', 'm', 'i', 't', 'a', 't', 'e'], tags=['ethyl_palmitate']),
 TaggedDocument(words=['v', 'i', 't', 'a', 'm', 'i', 'n', '_', 'b', '1'], tags=['vitamin_b1']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '2', '-', 't', 'h', 'i', 'o', 'f', 'u', 'r', 'o', 'a', 't', 'e'], tags=['methyl_2-thiofuroate']),
 TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['isobutyl_propionate']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', 'a', 'c', 'e', 't', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2-methylthioacetaldehyde']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 't', 'e'], tags=['methyl_cinnamate']),
 TaggedDocument(words=['(', 'd', '-', ',', '_', 'l', '-', ',', '_', 'd', 'l', '-', ',', '_', 'm', 'e', 's', 'o', ')', '_', 't', 'a', 'r', 't', 'a', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['(d-,_l-,_dl-,_meso)_tartaric_acid']),
 TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', 'a', '-', '1', ',', '8', '-', 'd', 'i', 'e', 'n', '-', '7', '-', 'y', 'l', '-', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['p-mentha-1,8-dien-7-yl-acetate']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', 'e', 'n', 'e', '_', 'o', 'x', 'i', 'd', 'e'], tags=['ethylene_oxide']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 's', 'o', 'r', 'b', 'a', 't', 'e'], tags=['ethyl_sorbate']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'f', 'u', 'r', 'a', 'n', 't', 'h', 'i', 'o', 'l'], tags=['2-methyl-3-furanthiol']),
 TaggedDocument(words=['2', ',', '6', '-', 'n', 'o', 'n', 'a', 'd', 'i', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['2,6-nonadien-1-ol']),
 TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', 'a', '-', '1', ',', '4', '(', '8', ')', '-', 'd', 'i', 'e', 'n', '-', '3', '-', 'o', 'n', 'e'], tags=['p-mentha-1,4(8)-dien-3-one']),
 TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'p', 'e', 'n', 't', 'e', 'n', 'a', 'l'], tags=['4-methyl-2-pentenal']),
 TaggedDocument(words=['n', 'e', 'r', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['neryl_butyrate']),
 TaggedDocument(words=['e', 'u', 'g', 'e', 'n', 'o', 'l'], tags=['eugenol']),
 TaggedDocument(words=['5', '-', 'm', 'e', 't', 'h', 'y', 'l', 'q', 'u', 'i', 'n', 'o', 'x', 'a', 'l', 'i', 'n', 'e'], tags=['5-methylquinoxaline']),
 TaggedDocument(words=['l', '-', 'm', 'e', 'n', 't', 'h', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['l-menthyl_acetate']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', '2', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['ethyl_2-(methylthio)acetate']),
 TaggedDocument(words=['2', '-', '(', '2', '-', 'b', 'u', 't', 'y', 'l', ')', '-', '4', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'i', 'n', 'e'], tags=['2-(2-butyl)-4,5-dimethyl-3-thiazoline']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['2-methylheptanoic_acid']),
 TaggedDocument(words=['c', 'a', 'm', 'p', 'h', 'e', 'n', 'e'], tags=['camphene']),
 TaggedDocument(words=['c', 'i', 's', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'a', 'l'], tags=['cis-3-hexenal']),
 TaggedDocument(words=['n', 'o', 'n', 'a', '-', '2', '-', 't', 'r', 'a', 'n', 's', ',', '-', '6', '-', 'c', 'i', 's', '-', 'd', 'i', 'e', 'n', 'a', 'l'], tags=['nona-2-trans,-6-cis-dienal']),
 TaggedDocument(words=['a', 'c', 'e', 't', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['acetylpyrazine']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['methyl_butyrate']),
 TaggedDocument(words=['a', 'l', 'l', 'y', 'l', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['allyl_sulfide']),
 TaggedDocument(words=['d', 'i', 'm', 'e', 't', 'h', 'y', 'l', '_', 's', 'u', 'c', 'c', 'i', 'n', 'a', 't', 'e'], tags=['dimethyl_succinate']),
 TaggedDocument(words=['b', '-', 'p', 'i', 'n', 'e', 'n', 'e'], tags=['b-pinene']),
 TaggedDocument(words=['f', 'e', 'n', 'c', 'h', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['fenchyl_alcohol']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['ethyl_isobutyrate']),
 TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['phenethyl-2-methylbutyrate']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 't', 'e'], tags=['ethyl_cinnamate']),
 TaggedDocument(words=['p', '-', 'm', 'e', 't', 'h', 'y', 'l', '_', 'd', 'i', 'p', 'h', 'e', 'n', 'y', 'l'], tags=['p-methyl_diphenyl']),
 TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'e', 't', 'h', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['butyl_ethyl_disulfide']),
 TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'p', 'e', 'n', 't', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['4-methyl-3-penten-2-one']),
 TaggedDocument(words=['5', '-', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'i', 'd', 'i', 'n', 'e'], tags=['5-ethyl-2-methylpyridine']),
 TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['isobutyl_isobutyrate']),
 TaggedDocument(words=['t', 'e', 'r', 'p', 'i', 'n', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['terpinyl_formate']),
 TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', '-', '3', ',', '5', '(', '6', ')', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-ethyl-3,5(6)-dimethylpyrazine']),
 TaggedDocument(words=['(', 'e', ',', 'e', ')', '-', '2', ',', '4', '-', 'd', 'e', 'c', 'a', 'd', 'i', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['(e,e)-2,4-decadien-1-ol']),
 TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['butyl_hexanoate']),
 TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['2-ethylbutyric_acid']),
 TaggedDocument(words=['2', ',', '3', '-', 'd', 'i', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2,3-diethylpyrazine']),
 TaggedDocument(words=['l', '-', 'h', 'i', 's', 't', 'i', 'd', 'i', 'n', 'e'], tags=['l-histidine']),
 TaggedDocument(words=['u', 'n', 'd', 'e', 'c', 'a', 'n', 'a', 'l'], tags=['undecanal']),
 TaggedDocument(words=['v', 'a', 'l', 'e', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['valeric_acid']),
 TaggedDocument(words=['2', '-', 'u', 'n', 'd', 'e', 'c', 'e', 'n', 'a', 'l'], tags=['2-undecenal']),
 TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['butyl_sulfide']),
 TaggedDocument(words=['a', 'm', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['amyl_butyrate']),
 TaggedDocument(words=['3', '-', 'd', 'e', 'c', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['3-decen-2-one']),
 TaggedDocument(words=['1', '-', 'o', 'c', 't', 'e', 'n', '-', '3', '-', 'o', 'l'], tags=['1-octen-3-ol']),
 TaggedDocument(words=['d', 'i', 's', 'o', 'd', 'i', 'u', 'm', '_', '5', "'", '-', 'g', 'u', 'a', 'n', 'y', 'l', 'a', 't', 'e'], tags=["disodium_5'-guanylate"]),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['2-methylbutyl-3-methylbutanoate']),
 TaggedDocument(words=['p', 'h', 'e', 'n', 'y', 'l', 'a', 'c', 'e', 't', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e', '_', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 'l'], tags=['phenylacetaldehyde_dimethyl_acetal']),
 TaggedDocument(words=['1', '-', 'p', '-', 'm', 'e', 'n', 't', 'h', 'e', 'n', 'e', '-', '8', '-', 't', 'h', 'i', 'o', 'l'], tags=['1-p-menthene-8-thiol']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['ethyl_acetate']),
 TaggedDocument(words=['2', '-', 'a', 'c', 'e', 't', 'y', 'l', '-', '5', '-', 'm', 'e', 't', 'h', 'y', 'l', 'f', 'u', 'r', 'a', 'n'], tags=['2-acetyl-5-methylfuran']),
 TaggedDocument(words=['6', '-', 'm', 'e', 't', 'h', 'y', 'l', 'q', 'u', 'i', 'n', 'o', 'l', 'i', 'n', 'e'], tags=['6-methylquinoline']),
 TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-ethyl-5-methylpyrazine']),
 TaggedDocument(words=['4', '-', 'd', 'e', 'c', 'e', 'n', 'a', 'l'], tags=['4-decenal']),
 TaggedDocument(words=['m', 'y', 'r', 't', 'e', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['myrtenyl_acetate']),
 TaggedDocument(words=['1', '-', 'p', 'h', 'e', 'n', 'y', 'l', '-', '1', '-', 'p', 'r', 'o', 'p', 'a', 'n', 'o', 'l'], tags=['1-phenyl-1-propanol']),
 TaggedDocument(words=['c', 'i', 'n', 'n', 'a', 'm', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['cinnamyl_alcohol']),
 TaggedDocument(words=['c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['citronellyl_propionate']),
 TaggedDocument(words=['3', '-', '(', '2', '-', 'f', 'u', 'r', 'y', 'l', ')', 'a', 'c', 'r', 'o', 'l', 'e', 'i', 'n'], tags=['3-(2-furyl)acrolein']),
 TaggedDocument(words=['d', '-', 'd', 'e', 'c', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['d-decalactone']),
 TaggedDocument(words=['a', '-', 'i', 'o', 'n', 'o', 'l'], tags=['a-ionol']),
 TaggedDocument(words=['2', ',', '6', '-', 'x', 'y', 'l', 'e', 'n', 'o', 'l'], tags=['2,6-xylenol']),
 TaggedDocument(words=['(', 'z', ')', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['(z)-3-hexenyl_propionate']),
 TaggedDocument(words=['2', ',', '2', "'", '-', '(', 't', 'h', 'i', 'o', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'e', 'n', 'e', ')', '_', 'd', 'i', 'f', 'u', 'r', 'a', 'n'], tags=["2,2'-(thiodimethylene)_difuran"]),
 TaggedDocument(words=['o', '-', 'c', 'r', 'e', 's', 'o', 'l'], tags=['o-cresol']),
 TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', 'f', 'u', 'r', 'a', 'n'], tags=['2-ethylfuran']),
 TaggedDocument(words=['2', '-', 'u', 'n', 'd', 'e', 'c', 'a', 'n', 'o', 'l'], tags=['2-undecanol']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'a', 'n', 't', 'h', 'r', 'a', 'n', 'i', 'l', 'a', 't', 'e'], tags=['methyl_anthranilate']),
 TaggedDocument(words=['2', '-', 'h', 'e', 'x', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['2-hexen-1-ol']),
 TaggedDocument(words=['p', 'a', 'l', 'm', 'i', 't', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['palmitic_acid']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['methoxypyrazine']),
 TaggedDocument(words=['a', '-', 'm', 'e', 't', 'h', 'y', 'l', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['a-methylcinnamaldehyde']),
 TaggedDocument(words=['2', '-', 'n', 'o', 'n', 'a', 'n', 'o', 'l'], tags=['2-nonanol']),
 TaggedDocument(words=['h', 'e', 'x', 'a', 'n', 'a', 'l'], tags=['hexanal']),
 TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['propyl_isovalerate']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', 'n', 'e', 'c', 'a', 'r', 'b', 'o', 'x', 'y', 'l', 'a', 't', 'e'], tags=['methyl_cyclohexanecarboxylate']),
 TaggedDocument(words=['a', '-', 'p', 'i', 'n', 'e', 'n', 'e'], tags=['a-pinene']),
 TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', '-', '4', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'o', 'x', 'a', 'z', 'o', 'l', 'e'], tags=['2-ethyl-4,5-dimethyloxazole']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'o', 'c', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['methyl_octanoate']),
 TaggedDocument(words=['2', ',', '5', '_', 'd', 'i', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2,5_diethyl-3-methylpyrazine']),
 TaggedDocument(words=['h', 'e', 'x', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['hexanoic_acid']),
 TaggedDocument(words=['a', 'c', 'e', 't', 'a', 'n', 'i', 's', 'o', 'l', 'e'], tags=['acetanisole']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 't', 'r', 'a', 'n', 's', '-', '2', '-', 'o', 'c', 't', 'e', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_trans-2-octenoate']),
 TaggedDocument(words=['(', '2', '-', 'f', 'u', 'r', 'y', 'l', ')', '-', '2', '-', 'p', 'r', 'o', 'p', 'a', 'n', 'o', 'n', 'e'], tags=['(2-furyl)-2-propanone']),
 TaggedDocument(words=['1', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '2', '-', 'b', 'u', 't', 'a', 'n', 'o', 'n', 'e'], tags=['1-hydroxy-2-butanone']),
 TaggedDocument(words=['2', '-', 'p', 'h', 'e', 'n', 'y', 'l', '-', '2', '-', 'b', 'u', 't', 'e', 'n', 'a', 'l'], tags=['2-phenyl-2-butenal']),
 TaggedDocument(words=['p', 'y', 'r', 'i', 'd', 'i', 'n', 'e'], tags=['pyridine']),
 TaggedDocument(words=['h', 'y', 'd', 'r', 'o', 'x', 'y', 'c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'a', 'l'], tags=['hydroxycitronellal']),
 TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['isoamyl_benzoate']),
 TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', '-', '3', '-', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['p-menth-3-en-1-ol']),
 TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['propyl_formate']),
 TaggedDocument(words=['1', '-', 'd', 'e', 'c', 'a', 'n', 'o', 'l'], tags=['1-decanol']),
 TaggedDocument(words=['p', 'r', 'e', 'n', 'y', 'l', 't', 'h', 'i', 'o', 'l'], tags=['prenylthiol']),
 TaggedDocument(words=['e', 'u', 'c', 'a', 'l', 'y', 'p', 't', 'o', 'l'], tags=['eucalyptol']),
 TaggedDocument(words=['b', 'u', 't', 'y', 'r', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['butyraldehyde']),
 TaggedDocument(words=['m', 'y', 'r', 'i', 's', 't', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['myristaldehyde']),
 TaggedDocument(words=['3', '-', 'o', 'x', 'o', 'h', 'e', 'x', 'a', 'd', 'e', 'c', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'g', 'l', 'y', 'c', 'e', 'r', 'i', 'd', 'e'], tags=['3-oxohexadecanoic_acid_glyceride']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '3', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['methyl_3-hydroxyhexanoate']),
 TaggedDocument(words=['c', 'y', 'c', 'l', 'o', 'i', 'o', 'n', 'o', 'n', 'e'], tags=['cycloionone']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'm', 'e', 't', 'h', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['ethyl_methyl_disulfide']),
 TaggedDocument(words=['3', ',', '5', ',', '5', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'h', 'e', 'x', 'a', 'n', 'o', 'l'], tags=['3,5,5-trimethyl-1-hexanol']),
 TaggedDocument(words=['d', '-', 'd', 'o', 'd', 'e', 'c', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['d-dodecalactone']),
 TaggedDocument(words=['c', 'i', 's', '-', '6', '-', 'n', 'o', 'n', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['cis-6-nonen-1-ol']),
 TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', 'b', 'u', 't', 'y', 'r', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['3-methylthiobutyraldehyde']),
 TaggedDocument(words=['a', 'c', 'e', 't', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e', '_', 'e', 't', 'h', 'y', 'l', '_', '(', 'z', ')', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 'l'], tags=['acetaldehyde_ethyl_(z)-3-hexenyl_acetal']),
 TaggedDocument(words=['3', '-', 'h', 'e', 'p', 't', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['3-hepten-2-one']),
 TaggedDocument(words=['t', 'r', 'i', 'a', 'c', 'e', 't', 'i', 'n'], tags=['triacetin']),
 TaggedDocument(words=['2', ',', '3', '-', 'o', 'c', 't', 'a', 'n', 'e', 'd', 'i', 'o', 'n', 'e'], tags=['2,3-octanedione']),
 TaggedDocument(words=['4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 'h', 'e', 'x', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'g', 'a', 'm', 'm', 'a', '-', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['4-hydroxy-4-methyl-5-hexenoic_acid_gamma-lactone']),
 TaggedDocument(words=['2', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', 'e', 't', 'h', 'a', 'n', 'o', 'l'], tags=['2-(methylthio)ethanol']),
 TaggedDocument(words=['4', '-', 'a', 'c', 'e', 't', 'o', 'x', 'y', '-', '2', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '(', '2', 'h', ')', '-', 'f', 'u', 'r', 'a', 'n', 'o', 'n', 'e'], tags=['4-acetoxy-2,5-dimethyl-3(2h)-furanone']),
 TaggedDocument(words=['2', ',', '4', '-', 'n', 'o', 'n', 'a', 'd', 'i', 'e', 'n', 'a', 'l'], tags=['2,4-nonadienal']),
 TaggedDocument(words=['t', 'e', 'r', 'p', 'i', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['terpinyl_acetate']),
 TaggedDocument(words=['3', '-', 'h', 'e', 'x', 'a', 'n', 'o', 'n', 'e'], tags=['3-hexanone']),
 TaggedDocument(words=['l', 'a', 'u', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['lauric_acid']),
 TaggedDocument(words=['4', '-', 'h', 'e', 'p', 't', 'e', 'n', 'a', 'l', '_', '(', 'c', 'i', 's', '-', '_', 'a', 'n', 'd', '_', 't', 'r', 'a', 'n', 's', '-', ')'], tags=['4-heptenal_(cis-_and_trans-)']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['methyl_heptanoate']),
 TaggedDocument(words=['l', 'i', 'n', 'a', 'l', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['linalyl_isovalerate']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'l', 'a', 'c', 't', 'a', 't', 'e'], tags=['ethyl_lactate']),
 TaggedDocument(words=['3', '-', 'o', 'x', 'o', 'o', 'c', 't', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'g', 'l', 'y', 'c', 'e', 'r', 'i', 'd', 'e'], tags=['3-oxooctanoic_acid_glyceride']),
 TaggedDocument(words=['v', 'a', 'n', 'i', 'l', 'l', 'i', 'n', ',', '_', 'n', 'a', 't', 'u', 'r', 'a', 'l'], tags=['vanillin,_natural']),
 TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['hexyl_acetate']),
 TaggedDocument(words=['3', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '2', ',', '4', '-', 't', 'r', 'i', 't', 'h', 'i', 'o', 'l', 'a', 'n', 'e'], tags=['3,5-dimethyl-1,2,4-trithiolane']),
 TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'b', 'u', 't', 'y', 'l', '_', 'e', 't', 'h', 'e', 'r'], tags=['benzyl_butyl_ether']),
 TaggedDocument(words=['2', '-', 't', 'r', 'a', 'n', 's', ',', '_', '6', '-', 't', 'r', 'a', 'n', 's', '-', 'n', 'o', 'n', 'a', 'd', 'i', 'e', 'n', 'a', 'l'], tags=['2-trans,_6-trans-nonadienal']),
 TaggedDocument(words=['5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 't', 'h', 'i', 'o', 'p', 'h', 'e', 'n', 'e', 'c', 'a', 'r', 'b', 'o', 'x', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['5-methyl-2-thiophenecarboxaldehyde']),
 TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'l'], tags=['3-methyl-1-pentanol']),
 TaggedDocument(words=['3', '-', 'd', 'e', 'c', 'a', 'n', 'o', 'l'], tags=['3-decanol']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 't', 'h', 'i', 'o', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['ethyl_thioacetate']),
 TaggedDocument(words=['1', '-', 'o', 'c', 't', 'a', 'n', 'o', 'l'], tags=['1-octanol']),
 TaggedDocument(words=['m', 'i', 'n', 't', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['mintlactone']),
 TaggedDocument(words=['2', '-', 'p', 'e', 'n', 't', 'e', 'n', 'a', 'l'], tags=['2-pentenal']),
 TaggedDocument(words=['e', 'u', 'g', 'e', 'n', 'y', 'l', '_', 'm', 'e', 't', 'h', 'y', 'l', '_', 'e', 't', 'h', 'e', 'r'], tags=['eugenyl_methyl_ether']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['methyl-3-methylthiopropionate']),
 TaggedDocument(words=['p', '-', 'a', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'e', 'n', 'z', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['p-a-dimethylbenzyl_alcohol']),
 TaggedDocument(words=['c', 'a', 'r', 'y', 'o', 'p', 'h', 'y', 'l', 'l', 'e', 'n', 'e', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['caryophyllene_alcohol']),
 TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'b', 'u', 't', 'e', 'n', 'a', 'l'], tags=['3-methyl-2-butenal']),
 TaggedDocument(words=['n', 'o', 'n', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['nonanoic_acid']),
 TaggedDocument(words=['c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', 'n', 'e', 'c', 'a', 'r', 'b', 'o', 'x', 'y', 'l', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['cyclohexanecarboxylic_acid']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'o', 'c', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_octanoate']),
 TaggedDocument(words=['2', ',', '4', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'a', 'n', 'i', 's', 'o', 'l', 'e'], tags=['2,4-dimethylanisole']),
 TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['isoamyl_formate']),
 TaggedDocument(words=['1', ',', '2', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'e', 'n', 'e'], tags=['1,2-dimethoxybenzene']),
 TaggedDocument(words=['g', 'e', 'r', 'a', 'n', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['geranyl_isovalerate']),
 TaggedDocument(words=['2', '-', 'i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '-', '5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'h', 'e', 'x', 'e', 'n', 'a', 'l'], tags=['2-isopropyl-5-methyl-2-hexenal']),
 TaggedDocument(words=['2', ',', '4', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'a', 'c', 'e', 't', 'o', 'p', 'h', 'e', 'n', 'o', 'n', 'e'], tags=['2,4-dimethylacetophenone']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'c', 'i', 's', '-', '4', ',', '7', '-', 'o', 'c', 't', 'a', 'd', 'i', 'e', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_cis-4,7-octadienoate']),
 TaggedDocument(words=['p', '-', 't', 'o', 'l', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['p-tolyl_acetate']),
 TaggedDocument(words=['2', '-', 'o', 'c', 't', 'a', 'n', 'o', 'l'], tags=['2-octanol']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 't', 'r', 'a', 'n', 's', '-', '2', '-', 'h', 'e', 'x', 'e', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_trans-2-hexenoate']),
 TaggedDocument(words=['n', '-', 'f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'p', 'y', 'r', 'r', 'o', 'l', 'e'], tags=['n-furfuryl_pyrrole']),
 TaggedDocument(words=['3', '-', 'o', 'x', 'o', 'h', 'e', 'x', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'g', 'l', 'y', 'c', 'e', 'r', 'i', 'd', 'e'], tags=['3-oxohexanoic_acid_glyceride']),
 TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 's', 't', 'e', 'a', 'r', 'a', 't', 'e'], tags=['butyl_stearate']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'd', 'e', 'c', 'a', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_decanoate']),
 TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['propyl_butyrate']),
 TaggedDocument(words=['2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '0', '-', 'm', 'e', 't', 'h', 'y', 'l', 'e', 'n', 'e', '-', '2', ',', '6', ',', '1', '1', '-', 'd', 'o', 'd', 'e', 'c', 'a', 't', 'r', 'i', 'e', 'n', 'a', 'l'], tags=['2,6-dimethyl-10-methylene-2,6,11-dodecatrienal']),
 TaggedDocument(words=['3', '-', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'l'], tags=['3-heptanol']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '2', '-', 'h', 'e', 'x', 'e', 'n', 'o', 'a', 't', 'e'], tags=['methyl_2-hexenoate']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['methyl-4-methylvalerate']),
 TaggedDocument(words=['p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['pyrazine']),
 TaggedDocument(words=['c', 'a', 'd', 'i', 'n', 'e', 'n', 'e'], tags=['cadinene']),
 TaggedDocument(words=['a', 'n', 'i', 's', 'o', 'l', 'e'], tags=['anisole']),
 TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', 'a', '-', '1', ',', '8', '-', 'd', 'i', 'e', 'n', '-', '7', '-', 'o', 'l'], tags=['p-mentha-1,8-dien-7-ol']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'a', 'n', 'i', 's', 'a', 't', 'e'], tags=['methyl_anisate']),
 TaggedDocument(words=['f', 'e', 'n', 'c', 'h', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['fenchyl_acetate']),
 TaggedDocument(words=['t', 'h', 'y', 'm', 'o', 'l'], tags=['thymol']),
 TaggedDocument(words=['3', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['3-phenylpropionaldehyde']),
 TaggedDocument(words=['n', 'o', 'n', 'y', 'l', '_', 'o', 'c', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['nonyl_octanoate']),
 TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'i', 's', 'o', 't', 'h', 'i', 'o', 'c', 'y', 'a', 'n', 'a', 't', 'e'], tags=['3-methylthiopropyl_isothiocyanate']),
 TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', '-', '8', '-', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['p-menth-8-en-2-one']),
 TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['isobutyl_benzoate']),
 TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['isobutyl_acetate']),
 TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', '-', '1', '-', 'e', 'n', '-', '3', '-', 'o', 'l'], tags=['p-menth-1-en-3-ol']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'y', 'l', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_phenyl_sulfide']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'r', 'o', 'p', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['2-methylpropyl-3-methylbutyrate']),
 TaggedDocument(words=['f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['furfuryl_alcohol']),
 TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['propyl_heptanoate']),
 TaggedDocument(words=['1', '-', 'm', 'e', 't', 'h', 'y', 'l', 'n', 'a', 'p', 'h', 't', 'h', 'a', 'l', 'e', 'n', 'e'], tags=['1-methylnaphthalene']),
 TaggedDocument(words=['d', 'i', 'h', 'y', 'd', 'r', 'o', 'c', 'o', 'u', 'm', 'a', 'r', 'i', 'n'], tags=['dihydrocoumarin']),
 TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', 'a', 'n', 'e', '-', '3', ',', '8', '-', 'd', 'i', 'o', 'l'], tags=['p-menthane-3,8-diol']),
 TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'm', 'y', 'r', 'i', 's', 't', 'a', 't', 'e'], tags=['isopropyl_myristate']),
 TaggedDocument(words=['s', '-', 'm', 'e', 't', 'h', 'y', 'l', '_', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'a', 't', 'e'], tags=['s-methyl_3-methylbutanethioate']),
 TaggedDocument(words=['g', '-', 'n', 'o', 'n', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['g-nonalactone']),
 TaggedDocument(words=['4', '-', '(', '2', ',', '6', ',', '6', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', '-', '1', ',', '3', '-', 'd', 'i', 'e', 'n', 'y', 'l', ')', 'b', 'u', 't', '-', '2', '-', 'e', 'n', '-', '4', '-', 'o', 'n', 'e'], tags=['4-(2,6,6-trimethyl-cyclohexa-1,3-dienyl)but-2-en-4-one']),
 TaggedDocument(words=['c', 'i', 'n', 'n', 'a', 'm', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['cinnamic_acid']),
 TaggedDocument(words=['l', 'i', 'm', 'o', 'n', 'e', 'n', 'e', '_', '(', 'd', '-', ',', 'l', '-', ',', '_', 'a', 'n', 'd', '_', 'd', 'l', '-', ')'], tags=['limonene_(d-,l-,_and_dl-)']),
 TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['benzyl_propionate']),
 TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', '-', '1', '-', 'e', 'n', 'e', '-', '9', '-', 'a', 'l'], tags=['p-menth-1-ene-9-al']),
 TaggedDocument(words=['3', ',', '6', '-', 'd', 'i', 'h', 'y', 'd', 'r', 'o', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', '(', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'r', 'o', 'p', 'e', 'n', '-', '1', '-', 'y', 'l', ')', '-', '2', 'h', '-', 'p', 'y', 'r', 'a', 'n'], tags=['3,6-dihydro-4-methyl-2-(2-methylpropen-1-yl)-2h-pyran']),
 TaggedDocument(words=['2', '-', 'i', 's', 'o', 'b', 'u', 't', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-isobutyl-3-methylpyrazine']),
 TaggedDocument(words=['5', '-', 'i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['5-isopropyl-2-methylpyrazine']),
 TaggedDocument(words=['c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['citronellyl_isobutyrate']),
 TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'e', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['isoprenyl_acetate']),
 TaggedDocument(words=['3', '-', 'a', 'c', 'e', 't', 'y', 'l', '-', '2', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', 'p', 'h', 'e', 'n', 'e'], tags=['3-acetyl-2,5-dimethylthiophene']),
 TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['isobutyric_acid']),
 TaggedDocument(words=['d', 'e', 'c', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['decanoic_acid']),
 TaggedDocument(words=['4', '-', 'p', 'r', 'o', 'p', 'y', 'l', '-', '2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['4-propyl-2,6-dimethoxyphenol']),
 TaggedDocument(words=['p', 'i', 'p', 'e', 'r', 'o', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['piperonyl_acetate']),
 TaggedDocument(words=['t', 'e', 'r', 'p', 'i', 'n', 'o', 'l', 'e', 'n', 'e'], tags=['terpinolene']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 'v', 'i', 'n', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-methyl-5-vinylpyrazine']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 's', 'a', 'l', 'i', 'c', 'y', 'l', 'a', 't', 'e'], tags=['methyl_salicylate']),
 TaggedDocument(words=['3', '-', '(', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'r', 'o', 'p', 'y', 'l', ')', 'p', 'y', 'r', 'i', 'd', 'i', 'n', 'e'], tags=['3-(2-methylpropyl)pyridine']),
 TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'o', 'c', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['hexyl_octanoate']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'n', 'o', 'n', 'a', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_nonanoate']),
 TaggedDocument(words=['2', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['2,5-dimethylthiazole']),
 TaggedDocument(words=['9', '-', 'o', 'c', 't', 'a', 'd', 'e', 'c', 'e', 'n', 'a', 'l'], tags=['9-octadecenal']),
 TaggedDocument(words=['3', '-', 'p', 'h', 'e', 'n', 'y', 'l', '-', '1', '-', 'p', 'r', 'o', 'p', 'a', 'n', 'o', 'l'], tags=['3-phenyl-1-propanol']),
 TaggedDocument(words=['p', '-', 'i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', 'b', 'e', 'n', 'z', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['p-isopropylbenzyl_alcohol']),
 TaggedDocument(words=['2', ',', '8', '-', 'd', 'i', 't', 'h', 'i', 'a', 'n', 'o', 'n', '-', '4', '-', 'e', 'n', '-', '4', '-', 'c', 'a', 'r', 'b', 'o', 'x', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2,8-dithianon-4-en-4-carboxaldehyde']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_benzyl_disulfide']),
 TaggedDocument(words=['f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'm', 'e', 't', 'h', 'y', 'l', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['furfuryl_methyl_sulfide']),
 TaggedDocument(words=['p', 'y', 'r', 'u', 'v', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['pyruvaldehyde']),
 TaggedDocument(words=['c', 'o', 'u', 'm', 'a', 'r', 'i', 'n', '_', '(', 'p', 'r', 'o', 'h', 'i', 'b', 'i', 't', 'e', 'd', ')'], tags=['coumarin_(prohibited)']),
 TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['benzyl_acetate']),
 TaggedDocument(words=['l', 'a', 'c', 't', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['lactic_acid']),
 TaggedDocument(words=['o', 'c', 't', 'y', 'l', '_', '2', '-', 'f', 'u', 'r', 'o', 'a', 't', 'e'], tags=['octyl_2-furoate']),
 TaggedDocument(words=['(', 'z', ')', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['(z)-3-hexenyl_valerate']),
 TaggedDocument(words=['3', '-', 'b', 'u', 't', 'y', 'l', 'i', 'd', 'e', 'n', 'e', 'p', 'h', 't', 'h', 'a', 'l', 'i', 'd', 'e'], tags=['3-butylidenephthalide']),
 TaggedDocument(words=['2', '-', 'o', 'x', 'o', 'b', 'u', 't', 'y', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['2-oxobutyric_acid']),
 TaggedDocument(words=['2', ',', '3', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'e', 'n', 'z', 'o', 'f', 'u', 'r', 'a', 'n'], tags=['2,3-dimethylbenzofuran']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['2-methylbutyric_acid']),
 TaggedDocument(words=['1', ',', '5', ',', '5', ',', '9', '-', 't', 'e', 't', 'r', 'a', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '3', '-', 'o', 'x', 'a', 't', 'r', 'i', 'c', 'y', 'c', 'l', 'o', '-', '(', '8', '.', '3', '.', '0', '.', '0', '(', '4', ',', '9', ')', ')', '_', '_', '_', '_', '_', '_', '_', '_', 't', 'r', 'i', 'd', 'e', 'c', 'a', 'n', 'e'], tags=['1,5,5,9-tetramethyl-13-oxatricyclo-(8.3.0.0(4,9))________tridecane']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'f', 'u', 'r', 'a', 'n', 't', 'h', 'i', 'o', 'l'], tags=['2-methyl-3-tetrahydrofuranthiol']),
 TaggedDocument(words=['(', 'e', ')', '-', '2', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['(e)-2-hexenyl_hexanoate']),
 TaggedDocument(words=['5', '-', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '(', '5', 'h', ')', '-', 'f', 'u', 'r', 'a', 'n', 'o', 'n', 'e'], tags=['5-ethyl-3-hydroxy-4-methyl-2(5h)-furanone']),
 TaggedDocument(words=['3', '-', 'o', 'c', 't', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['3-octyl_acetate']),
 TaggedDocument(words=['(', 'z', ')', '-', '8', '-', 't', 'e', 't', 'r', 'a', 'd', 'e', 'c', 'e', 'n', 'a', 'l'], tags=['(z)-8-tetradecenal']),
 TaggedDocument(words=['t', 'r', 'a', 'n', 's', ',', '_', 't', 'r', 'a', 'n', 's', '-', '2', ',', '4', '-', 'o', 'c', 't', 'a', 'd', 'i', 'e', 'n', 'a', 'l'], tags=['trans,_trans-2,4-octadienal']),
 TaggedDocument(words=['5', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '7', '-', 'd', 'e', 'c', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'd', '-', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['5-hydroxy-7-decenoic_acid_d-lactone']),
 TaggedDocument(words=['d', '-', 'c', 'a', 'm', 'p', 'h', 'o', 'r'], tags=['d-camphor']),
 TaggedDocument(words=['3', '-', 'p', 'r', 'o', 'p', 'y', 'l', 'i', 'd', 'e', 'n', 'e', 'p', 'h', 't', 'h', 'a', 'l', 'i', 'd', 'e'], tags=['3-propylidenephthalide']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'l', 'a', 'u', 'r', 'a', 't', 'e'], tags=['ethyl_laurate']),
 TaggedDocument(words=['2', '-', 'a', 'c', 'e', 't', 'y', 'l', '-', '3', ',', '_', '(', '5', '_', 'o', 'r', '_', '6', ')', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e', ',', '_', 'm', 'i', 'x', 't', 'u', 'r', 'e', '_', 'o', 'f', '_', 'i', 's', 'o', 'm', 'e', 'r', 's'], tags=['2-acetyl-3,_(5_or_6)-dimethylpyrazine,_mixture_of_isomers']),
 TaggedDocument(words=['6', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 'h', 'e', 'p', 't', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['6-methyl-5-hepten-2-one']),
 TaggedDocument(words=['g', '-', 'v', 'a', 'l', 'e', 'r', 'o', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['g-valerolactone']),
 TaggedDocument(words=['a', 'n', 'i', 's', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['anisyl_formate']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', '4', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', '-', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['ethyl_4-(methylthio)-butyrate']),
 TaggedDocument(words=['h', 'e', 'p', 't', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['heptyl_alcohol']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', '-', '3', '-', '(', '1', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'r', 'o', 'p', 'y', 'l', ')', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-methoxy-3-(1-methylpropyl)pyrazine']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'n', 'i', 'c', 'o', 't', 'i', 'n', 'a', 't', 'e'], tags=['methyl_nicotinate']),
 TaggedDocument(words=['s', 'u', 'l', 'f', 'u', 'r', '_', 'd', 'i', 'o', 'x', 'i', 'd', 'e'], tags=['sulfur_dioxide']),
 TaggedDocument(words=['n', 'o', 'o', 't', 'k', 'a', 't', 'o', 'n', 'e'], tags=['nootkatone']),
 TaggedDocument(words=['l', 'e', 'v', 'u', 'l', 'i', 'n', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['levulinic_acid']),
 TaggedDocument(words=['2', '-', 't', 'r', 'i', 'd', 'e', 'c', 'a', 'n', 'o', 'n', 'e'], tags=['2-tridecanone']),
 TaggedDocument(words=['4', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '_', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['4,5-dimethyl_thiazole']),
 TaggedDocument(words=['l', '-', 'l', 'y', 's', 'i', 'n', 'e'], tags=['l-lysine']),
 TaggedDocument(words=['2', '-', 't', 'r', 'a', 'n', 's', ',', '_', '4', '-', 't', 'r', 'a', 'n', 's', '-', 'd', 'e', 'c', 'a', 'd', 'i', 'e', 'n', 'a', 'l'], tags=['2-trans,_4-trans-decadienal']),
 TaggedDocument(words=['3', '-', 'o', 'x', 'o', 't', 'e', 't', 'r', 'a', 'd', 'e', 'c', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'g', 'l', 'y', 'c', 'e', 'r', 'i', 'd', 'e'], tags=['3-oxotetradecanoic_acid_glyceride']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'e', 't', 'h', 'e', 'r'], tags=['methyl_phenethyl_ether']),
 TaggedDocument(words=['p', '-', 'p', 'r', 'o', 'p', 'y', 'l', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['p-propylphenol']),
 TaggedDocument(words=['a', '-', 'i', 'o', 'n', 'o', 'n', 'e'], tags=['a-ionone']),
 TaggedDocument(words=['a', 'l', 'l', 'y', 'l', '_', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'a', 'n'], tags=['allyl_mercaptan']),
 TaggedDocument(words=['p', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['p-methoxycinnamaldehyde']),
 TaggedDocument(words=['2', '-', 'p', 'r', 'o', 'p', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-propylpyrazine']),
 TaggedDocument(words=['1', '-', 'e', 't', 'h', 'o', 'x', 'y', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'b', 'u', 't', 'e', 'n', 'e'], tags=['1-ethoxy-3-methyl-2-butene']),
 TaggedDocument(words=['(', 'e', ')', '-', '2', '-', 'd', 'e', 'c', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['(e)-2-decenoic_acid']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['ethyl_alcohol']),
 TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['phenethyl_isovalerate']),
 TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', ',', '3', '-', 'p', 'e', 'n', 't', 'a', 'n', 'e', 'd', 'i', 'o', 'n', 'e'], tags=['4-methyl-2,3-pentanedione']),
 TaggedDocument(words=['p', 'a', 'r', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['paraldehyde']),
 TaggedDocument(words=['2', '-', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'y', 'l', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['2-propionylthiazole']),
 TaggedDocument(words=['2', '-', 'p', 'e', 'n', 't', 'y', 'l', 'f', 'u', 'r', 'a', 'n'], tags=['2-pentylfuran']),
 TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['4-methyl-2,6-dimethoxyphenol']),
 TaggedDocument(words=['o', 'c', 't', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['octyl_butyrate']),
 TaggedDocument(words=['p', 'y', 'r', 'r', 'o', 'l', 'e'], tags=['pyrrole']),
 TaggedDocument(words=['c', 'i', 'n', 'n', 'a', 'm', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['cinnamyl_benzoate']),
 TaggedDocument(words=['c', 'a', 'm', 'p', 'h', 'o', 'l', 'e', 'n', 'e', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['campholene_acetate']),
 TaggedDocument(words=['2', '-', 'i', 's', 'o', 'b', 'u', 't', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-isobutyl-3-methoxypyrazine']),
 TaggedDocument(words=['9', ',', '1', '2', '-', 'o', 'c', 't', 'a', 'd', 'e', 'c', 'a', 'd', 'i', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', '(', '4', '8', '%', ')', '_', 'p', 'l', 'u', 's', '_', '9', ',', '1', '2', ',', '1', '5', '-', 'o', 'c', 't', 'a', 'd', 'e', 'c', 'a', '-', '_', 't', 'r', 'i', 'e', 'n', 'o', 'i', 'n', 'c', '_', 'a', 'c', 'i', 'd', '_', '(', '5', '2', '%', ')', '_', '(', 'm', 'e', 't', 'h', 'y', 'l', '_', 'e', 's', 't', 'e', 'r', 's', ')'], tags=['9,12-octadecadienoic_acid_(48%)_plus_9,12,15-octadeca-_trienoinc_acid_(52%)_(methyl_esters)']),
 TaggedDocument(words=['d', 'i', 'h', 'y', 'd', 'r', 'o', '-', 'b', '-', 'i', 'o', 'n', 'o', 'l'], tags=['dihydro-b-ionol']),
 TaggedDocument(words=['i', 's', 'o', 'p', 'u', 'l', 'e', 'g', 'o', 'l'], tags=['isopulegol']),
 TaggedDocument(words=['t', 'r', 'a', 'n', 's', '-', '2', '-', 'h', 'e', 'x', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['trans-2-hexenoic_acid']),
 TaggedDocument(words=['l', 'i', 'n', 'a', 'l', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['linalyl_acetate']),
 TaggedDocument(words=['1', '-', 'h', 'e', 'x', 'a', 'd', 'e', 'c', 'a', 'n', 'o', 'l'], tags=['1-hexadecanol']),
 TaggedDocument(words=['n', '-', 'o', 'c', 't', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['n-octyl_formate']),
 TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 't', 'r', 'a', 'n', 's', '-', '2', '-', 'h', 'e', 'x', 'e', 'n', 'o', 'a', 't', 'e'], tags=['hexyl_trans-2-hexenoate']),
 TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['hexyl_hexanoate']),
 TaggedDocument(words=['5', '-', 'm', 'e', 't', 'h', 'y', 'l', 'f', 'u', 'r', 'f', 'u', 'r', 'a', 'l'], tags=['5-methylfurfural']),
 TaggedDocument(words=['g', 'e', 'r', 'a', 'n', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['geranyl_propionate']),
 TaggedDocument(words=['n', 'e', 'r', 'o', 'l', 'i', 'd', 'o', 'l'], tags=['nerolidol']),
 TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', 'a', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['p-menthan-2-one']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'e', 'n', 't', 'a', 'n', 'a', 'l'], tags=['2-methylpentanal']),
 TaggedDocument(words=['d', 'i', 'i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['diisopropyl_disulfide']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', '3', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['ethyl_3-phenylpropionate']),
 TaggedDocument(words=['(', '+', '/', '?', ')', '_', 'h', 'e', 'p', 't', 'a', 'n', '-', '3', '-', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['(+/?)_heptan-3-yl_butyrate']),
 TaggedDocument(words=['(', '+', '/', '?', ')', '_', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'b', 'u', 't', 'a', 'n', 'o', 'l'], tags=['(+/?)_2-methyl-1-butanol']),
 TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', '-', '6', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-ethyl-6-methylpyrazine']),
 TaggedDocument(words=['b', 'e', 't', 'a', '-', 'c', 'y', 'c', 'l', 'o', 'd', 'e', 'x', 't', 'r', 'i', 'n'], tags=['beta-cyclodextrin']),
 TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'y', 'l', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['hexyl_phenylacetate']),
 TaggedDocument(words=['h', 'e', 'p', 't', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['heptyl_acetate']),
 TaggedDocument(words=['3', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['3-phenylpropyl_propionate']),
 TaggedDocument(words=['3', '-', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'o', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['3-mercapto-3-methylbutyl_formate']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'h', 'e', 'x', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['2-methylhexanoic_acid']),
 TaggedDocument(words=['o', 'c', 't', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['octyl_isobutyrate']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'n', 'o', 'n', 'a', 'n', 'o', 'a', 't', 'e'], tags=['methyl_nonanoate']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', 'n', 'e', 'c', 'a', 'r', 'b', 'o', 'x', 'y', 'l', 'a', 't', 'e'], tags=['ethyl_cyclohexanecarboxylate']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '1', '-', 'p', 'r', 'o', 'p', 'e', 'n', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_1-propenyl_disulfide']),
 TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'i', 's', 'o', 't', 'h', 'i', 'o', 'c', 'y', 'a', 'n', 'a', 't', 'e'], tags=['phenethyl_isothiocyanate']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'v', 'a', 'l', 'e', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['2-methylvaleric_acid']),
 TaggedDocument(words=['4', '-', 'c', 'a', 'r', 'v', 'o', 'm', 'e', 'n', 't', 'h', 'e', 'n', 'o', 'l'], tags=['4-carvomenthenol']),
 TaggedDocument(words=['p', 'r', 'o', 'p', 'i', 'o', 'p', 'h', 'e', 'n', 'o', 'n', 'e'], tags=['propiophenone']),
 TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['propyl_benzoate']),
 TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'e', 'n', '-', '1', '-', 'o', 'n', 'e'], tags=['3-methyl-2-cyclohexen-1-one']),
 TaggedDocument(words=['o', '-', 'v', 'i', 'n', 'y', 'l', 'a', 'n', 'i', 's', 'o', 'l', 'e'], tags=['o-vinylanisole']),
 TaggedDocument(words=['d', 'i', 'e', 't', 'h', 'y', 'l', '_', 'm', 'a', 'l', 'a', 't', 'e'], tags=['diethyl_malate']),
 TaggedDocument(words=['c', 'i', 't', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['citric_acid']),
 TaggedDocument(words=['d', 'e', 'c', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['decyl_acetate']),
 TaggedDocument(words=['v', 'a', 'n', 'i', 'l', 'l', 'y', 'l', '_', 'e', 't', 'h', 'y', 'l', '_', 'e', 't', 'h', 'e', 'r'], tags=['vanillyl_ethyl_ether']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['ethyl_propionate']),
 TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['propyl_disulfide']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['methyl_isobutyrate']),
 TaggedDocument(words=['2', '-', 'i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['2-isopropyl-4-methylthiazole']),
 TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-ethyl-3-methylpyrazine']),
 TaggedDocument(words=['c', 'i', 's', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', '-', '1', '-', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['cis-3-hexen-1-yl_acetate']),
 TaggedDocument(words=['t', 'h', 'a', 'u', 'm', 'a', 't', 'i', 'n', '_', 'b', '-', 'r', 'e', 'c', 'o', 'm', 'b', 'i', 'n', 'a', 'n', 't'], tags=['thaumatin_b-recombinant']),
 TaggedDocument(words=['1', ',', '4', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '4', '-', 'a', 'c', 'e', 't', 'y', 'l', '-', '1', '-', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'e', 'n', 'e'], tags=['1,4-dimethyl-4-acetyl-1-cyclohexene']),
 TaggedDocument(words=['2', '-', 'u', 'n', 'd', 'e', 'c', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['2-undecen-1-ol']),
 TaggedDocument(words=['c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', 'n', 'o', 'n', 'e'], tags=['cyclohexanone']),
 TaggedDocument(words=['(', '+', '/', '?', ')', '-', 'c', 'i', 's', '-', '_', 'a', 'n', 'd', '_', 't', 'r', 'a', 'n', 's', '-', '3', ',', '5', '-', 'd', 'i', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '2', ',', '4', '-', 't', 'r', 'i', 't', 'h', 'i', 'o', 'l', 'a', 'n', 'e'], tags=['(+/?)-cis-_and_trans-3,5-diethyl-1,2,4-trithiolane']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 's', 'a', 'l', 'i', 'c', 'y', 'l', 'a', 't', 'e'], tags=['ethyl_salicylate']),
 TaggedDocument(words=['g', '-', 'b', 'u', 't', 'y', 'r', 'o', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['g-butyrolactone']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '2', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['methyl_2-hydroxy-4-methylpentanoate']),
 TaggedDocument(words=['5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'h', 'e', 'p', 't', 'e', 'n', '-', '4', '-', 'o', 'n', 'e'], tags=['5-methyl-2-hepten-4-one']),
 TaggedDocument(words=['i', 's', 'o', 's', 'a', 'f', 'r', 'o', 'l', 'e', '_', '(', 'p', 'r', 'o', 'h', 'i', 'b', 'i', 't', 'e', 'd', ')'], tags=['isosafrole_(prohibited)']),
 TaggedDocument(words=['4', '-', 'e', 't', 'h', 'y', 'l', 'g', 'u', 'a', 'i', 'a', 'c', 'o', 'l'], tags=['4-ethylguaiacol']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['methyl-3-phenylpropionate']),
 TaggedDocument(words=['a', '-', 'i', 'r', 'o', 'n', 'e'], tags=['a-irone']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'u', 'n', 'd', 'e', 'c', 'a', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_undecanoate']),
 TaggedDocument(words=['b', '-', 'i', 'o', 'n', 'o', 'l'], tags=['b-ionol']),
 TaggedDocument(words=['d', '-', 'o', 'c', 't', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['d-octalactone']),
 TaggedDocument(words=['4', '-', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'o', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'n', 'e'], tags=['4-mercapto-4-methyl-2-pentanone']),
 TaggedDocument(words=['2', ',', '5', '-', 'd', 'i', 'e', 't', 'h', 'y', 'l', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'f', 'u', 'r', 'a', 'n'], tags=['2,5-diethyltetrahydrofuran']),
 TaggedDocument(words=['6', ',', '7', '-', 'd', 'i', 'h', 'y', 'd', 'r', 'o', '-', '2', ',', '3', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', 'h', '-', 'c', 'y', 'c', 'l', 'o', 'p', 'e', 'n', 't', 'a', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['6,7-dihydro-2,3-dimethyl-5h-cyclopentapyrazine']),
 TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['isopropyl_2-methylbutyrate']),
 TaggedDocument(words=['3', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', '_', 'h', 'e', 'x', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['3-(methylthio)_hexyl_acetate']),
 TaggedDocument(words=['p', 'h', 'e', 'n', 'o', 'x', 'y', 'a', 'c', 'e', 't', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['phenoxyacetic_acid']),
 TaggedDocument(words=['d', 'e', 'c', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['decyl_butyrate']),
 TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', 'n', 'o', 'n', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['4-methylnonanoic_acid']),
 TaggedDocument(words=['v', 'e', 'r', 'a', 't', 'r', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['veratraldehyde']),
 TaggedDocument(words=['f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'a', 'n'], tags=['furfuryl_mercaptan']),
 TaggedDocument(words=['m', 'e', 'n', 't', 'h', 'o', 'n', 'e'], tags=['menthone']),
 TaggedDocument(words=['2', ',', '6', ',', '6', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', '-', '1', ',', '3', '-', 'd', 'i', 'e', 'n', 'y', 'l', '_', 'm', 'e', 't', 'h', 'a', 'n', 'a', 'l'], tags=['2,6,6-trimethylcyclohexa-1,3-dienyl_methanal']),
 TaggedDocument(words=['4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['4-hydroxybenzoic_acid']),
 TaggedDocument(words=['1', '-', 'b', 'u', 't', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'l'], tags=['1-butanethiol']),
 TaggedDocument(words=['6', '-', 'u', 'n', 'd', 'e', 'c', 'a', 'n', 'o', 'n', 'e'], tags=['6-undecanone']),
 TaggedDocument(words=['c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['citronellyl_acetate']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'o', 'c', 't', 'a', 'd', 'e', 'c', 'a', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_octadecanoate']),
 TaggedDocument(words=['1', '-', 'h', 'e', 'x', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'l'], tags=['1-hexanethiol']),
 TaggedDocument(words=['4', ',', '5', ',', '6', ',', '7', '-', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', '-', '3', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'e', 'n', 'z', 'o', 'f', 'u', 'r', 'a', 'n'], tags=['4,5,6,7-tetrahydro-3,6-dimethylbenzofuran']),
 TaggedDocument(words=['4', '-', 'h', 'e', 'x', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['4-hexen-1-ol']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'o', 'c', 't', 'e', 'n', 'a', 'l'], tags=['2-methyl-2-octenal']),
 TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['isopropyl_propionate']),
 TaggedDocument(words=['p', 'i', 'p', 'e', 'r', 'i', 'n', 'e'], tags=['piperine']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['methyl_benzoate']),
 TaggedDocument(words=['t', 'r', 'i', 't', 'h', 'i', 'o', 'a', 'c', 'e', 't', 'o', 'n', 'e'], tags=['trithioacetone']),
 TaggedDocument(words=['d', 'i', 'h', 'y', 'd', 'r', 'o', '-', 'b', '-', 'i', 'o', 'n', 'o', 'n', 'e'], tags=['dihydro-b-ionone']),
 TaggedDocument(words=['l', 'i', 'n', 'a', 'l', 'o', 'o', 'l', '_', 'o', 'x', 'i', 'd', 'e'], tags=['linalool_oxide']),
 TaggedDocument(words=['o', 'c', 't', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['octanoic_acid']),
 TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'l', '_', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['3-methylbutyl_2-methylbutanoate']),
 TaggedDocument(words=['p', 'o', 't', 'a', 's', 's', 'i', 'u', 'm', '_', 's', 'o', 'r', 'b', 'a', 't', 'e'], tags=['potassium_sorbate']),
 TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['2-ethyl-4-methylthiazole']),
 TaggedDocument(words=['2', ',', '2', "'", '-', '(', 'd', 'i', 't', 'h', 'i', 'o', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'e', 'n', 'e', ')', '-', 'd', 'i', 'f', 'u', 'r', 'a', 'n'], tags=["2,2'-(dithiodimethylene)-difuran"]),
 TaggedDocument(words=['d', 'i', 'e', 't', 'h', 'y', 'l', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['diethyl_sulfide']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '-', '4', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['methyl-4-(methylthio)butyrate']),
 TaggedDocument(words=['s', 'k', 'a', 't', 'o', 'l', 'e'], tags=['skatole']),
 TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['4-methylthiazole']),
 TaggedDocument(words=['3', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '2', '-', 'c', 'y', 'c', 'l', 'o', 'p', 'e', 'n', 't', 'a', 'd', 'i', 'o', 'n', 'e'], tags=['3,5-dimethyl-1,2-cyclopentadione']),
 TaggedDocument(words=['r', 'h', 'o', 'd', 'i', 'n', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['rhodinyl_butyrate']),
 TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['hexyl_2-methylbutyrate']),
 TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['phenethyl_butyrate']),
 TaggedDocument(words=['3', '-', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'o', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'e', 'n', 't', 'a', 'n', 'a', 'l'], tags=['3-mercapto-2-methylpentanal']),
 TaggedDocument(words=['a', 'c', 'e', 't', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['acetaldehyde']),
 TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['butyl_butyrate']),
 TaggedDocument(words=['4', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '2', ',', '5', '-', 'd', 'i', 'h', 'y', 'd', 'r', 'o', 'f', 'u', 'r', 'a', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['4,5-dimethyl-3-hydroxy-2,5-dihydrofuran-2-one']),
 TaggedDocument(words=['p', '-', 'a', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '_', 's', 't', 'y', 'r', 'e', 'n', 'e'], tags=['p-a-dimethyl_styrene']),
 TaggedDocument(words=['f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'm', 'e', 't', 'h', 'y', 'l', '_', 'e', 't', 'h', 'e', 'r'], tags=['furfuryl_methyl_ether']),
 TaggedDocument(words=['5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['5-methyl-3-hexen-2-one']),
 TaggedDocument(words=['t', 'h', 'e', 'o', 'b', 'r', 'o', 'm', 'i', 'n', 'e'], tags=['theobromine']),
 TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'l', 'a', 'u', 'r', 'a', 't', 'e'], tags=['butyl_laurate']),
 TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['hexyl_isobutyrate']),
 TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', 'n', 'o', 'n', 'e'], tags=['3-methylcyclohexanone']),
 TaggedDocument(words=['i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['isovaleric_acid']),
 TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-ethylpyrazine']),
 TaggedDocument(words=['s', 'o', 'd', 'i', 'u', 'm', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['sodium_acetate']),
 TaggedDocument(words=['t', 'r', 'a', 'n', 's', '-', '2', '-', 'n', 'o', 'n', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['trans-2-nonen-1-ol']),
 TaggedDocument(words=['2', '-', 'h', 'e', 'x', 'e', 'n', 'a', 'l'], tags=['2-hexenal']),
 TaggedDocument(words=['3', ',', '4', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', '-', '1', '-', 'v', 'i', 'n', 'y', 'l', 'b', 'e', 'n', 'z', 'e', 'n', 'e'], tags=['3,4-dimethoxy-1-vinylbenzene']),
 TaggedDocument(words=['n', '-', 'n', 'o', 'n', 'a', 'n', 'a', 'l'], tags=['n-nonanal']),
 TaggedDocument(words=['2', '-', 'o', 'c', 't', 'e', 'n', 'a', 'l'], tags=['2-octenal']),
 TaggedDocument(words=['2', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2,5-dimethylpyrazine']),
 TaggedDocument(words=['d', 'i', 'p', 'r', 'o', 'p', 'y', 'l', '_', 't', 'r', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['dipropyl_trisulfide']),
 TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'n', 'e'], tags=['4-methyl-2-pentanone']),
 TaggedDocument(words=['(', '+', '/', '?', ')', '-', 'd', 'i', 'h', 'y', 'd', 'r', 'o', 'f', 'a', 'r', 'n', 'e', 's', 'o', 'l'], tags=['(+/?)-dihydrofarnesol']),
 TaggedDocument(words=['c', 'a', 'r', 'v', 'a', 'c', 'r', 'o', 'l'], tags=['carvacrol']),
 TaggedDocument(words=['d', 'i', 'e', 't', 'h', 'y', 'l', '_', 't', 'r', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['diethyl_trisulfide']),
 TaggedDocument(words=['2', ',', '6', ',', '1', '0', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', ',', '6', ',', '1', '0', '-', 'p', 'e', 'n', 't', 'a', 'd', 'e', 'c', 'a', 't', 'r', 'i', 'e', 'n', '-', '1', '4', '-', 'o', 'n', 'e'], tags=['2,6,10-trimethyl-2,6,10-pentadecatrien-14-one']),
 TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'a', 'n'], tags=['propyl_mercaptan']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '-', 'n', '-', 'm', 'e', 't', 'h', 'y', 'l', 'a', 'n', 't', 'h', 'r', 'a', 'n', 'i', 'l', 'a', 't', 'e'], tags=['methyl-n-methylanthranilate']),
 TaggedDocument(words=['c', 'a', 'f', 'f', 'e', 'i', 'n', 'e'], tags=['caffeine']),
 TaggedDocument(words=['(', '+', '/', '?', ')', '-', '2', '-', '(', '5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 'v', 'i', 'n', 'y', 'l', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'f', 'u', 'r', 'a', 'n', '-', '2', '-', 'y', 'l', ')', 'p', 'r', 'o', 'p', 'i', '-', 'o', 'n', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['(+/?)-2-(5-methyl-5-vinyltetrahydrofuran-2-yl)propi-onaldehyde']),
 TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['isopropyl_acetate']),
 TaggedDocument(words=['3', ',', '7', ',', '1', '1', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', ',', '6', ',', '1', '0', '-', 'd', 'o', 'd', 'e', 'c', 'a', 't', 'r', 'i', 'e', 'n', 'a', 'l'], tags=['3,7,11-trimethyl-2,6,10-dodecatrienal']),
 TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'a', 'c', 'e', 't', 'o', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['benzyl_acetoacetate']),
 TaggedDocument(words=['t', 'r', 'i', 'e', 't', 'h', 'y', 'l', '_', 'c', 'i', 't', 'r', 'a', 't', 'e'], tags=['triethyl_citrate']),
 TaggedDocument(words=['l', 'i', 'n', 'a', 'l', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['linalyl_formate']),
 TaggedDocument(words=['a', 's', 'c', 'o', 'r', 'b', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['ascorbic_acid']),
 TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['hexyl_butyrate']),
 TaggedDocument(words=['2', '-', 'i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['2-isobutyl_thiazole']),
 TaggedDocument(words=['d', '-', 'r', 'i', 'b', 'o', 's', 'e'], tags=['d-ribose']),
 TaggedDocument(words=['t', 'r', 'a', 'n', 's', '-', 'a', 'n', 'e', 't', 'h', 'o', 'l', 'e'], tags=['trans-anethole']),
 TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'a', 'n', 't', 'h', 'r', 'a', 'n', 'i', 'l', 'a', 't', 'e'], tags=['butyl_anthranilate']),
 TaggedDocument(words=['b', 'u', 't', 't', 'e', 'r', '_', 's', 't', 'a', 'r', 't', 'e', 'r', '_', 'd', 'i', 's', 't', 'i', 'l', 'l', 'a', 't', 'e'], tags=['butter_starter_distillate']),
 TaggedDocument(words=['l', '-', 'a', 's', 'p', 'a', 'r', 't', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['l-aspartic_acid']),
 TaggedDocument(words=['2', ',', '6', ',', '6', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '6', '-', 'v', 'i', 'n', 'y', 'l', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'p', 'y', 'r', 'a', 'n'], tags=['2,6,6-trimethyl-6-vinyltetrahydropyran']),
 TaggedDocument(words=['2', '-', 'h', 'e', 'p', 't', 'e', 'n', '-', '4', '-', 'o', 'n', 'e'], tags=['2-hepten-4-one']),
 TaggedDocument(words=['g', '-', 'o', 'c', 't', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['g-octalactone']),
 TaggedDocument(words=['t', 'e', 'r', 'p', 'i', 'n', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['terpinyl_butyrate']),
 TaggedDocument(words=['u', 'n', 'd', 'e', 'c', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['undecyl_alcohol']),
 TaggedDocument(words=['2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'i', 'd', 'i', 'n', 'e'], tags=['2,6-dimethylpyridine']),
 TaggedDocument(words=['6', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'd', 'i', 'h', 'y', 'd', 'r', 'o', 't', 'h', 'e', 'a', 's', 'p', 'i', 'r', 'a', 'n', 'e'], tags=['6-hydroxydihydrotheaspirane']),
 TaggedDocument(words=['l', 'i', 'n', 'a', 'l', 'o', 'o', 'l'], tags=['linalool']),
 TaggedDocument(words=['i', 's', 'o', 'e', 'u', 'g', 'e', 'n', 'o', 'l'], tags=['isoeugenol']),
 TaggedDocument(words=['c', 'i', 'n', 'n', 'a', 'm', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['cinnamaldehyde']),
 TaggedDocument(words=['g', 'u', 'a', 'i', 'e', 'n', 'e'], tags=['guaiene']),
 TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'b', 'u', 't', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'l'], tags=['3-methyl-1-butanethiol']),
 TaggedDocument(words=['b', 'u', 't', 't', 'e', 'r', '_', 'a', 'c', 'i', 'd', 's'], tags=['butter_acids']),
 TaggedDocument(words=['i', 's', 'o', 'p', 'u', 'l', 'e', 'g', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['isopulegyl_acetate']),
 TaggedDocument(words=['4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '(', '2', 'h', ')', '-', 'f', 'u', 'r', 'a', 'n', 'o', 'n', 'e'], tags=['4-hydroxy-5-methyl-3(2h)-furanone']),
 TaggedDocument(words=['2', '-', 'o', 'c', 't', 'a', 'n', 'o', 'n', 'e'], tags=['2-octanone']),
 TaggedDocument(words=['i', 's', 'o', 'b', 'o', 'r', 'n', 'e', 'o', 'l'], tags=['isoborneol']),
 TaggedDocument(words=['d', '-', 'h', 'e', 'x', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['d-hexalactone']),
 TaggedDocument(words=['b', 'e', 'n', 'z', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e', '_', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 'l'], tags=['benzaldehyde_dimethyl_acetal']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_sulfide']),
 TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['propyl_alcohol']),
 TaggedDocument(words=['1', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', '-', '2', '-', 'b', 'u', 't', 'a', 'n', 'o', 'n', 'e'], tags=['1-(methylthio)-2-butanone']),
 TaggedDocument(words=['t', 'r', 'a', 'n', 's', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'b', 'u', 't', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['trans-2-methyl-2-butenoic_acid']),
 TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['isopropyl_formate']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'y', 'l', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['ethyl_phenylacetate']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'e', 't', 'h', 'y', 'l', '_', 't', 'r', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_ethyl_trisulfide']),
 TaggedDocument(words=['t', 'h', 'a', 'u', 'm', 'a', 't', 'i', 'n'], tags=['thaumatin']),
 TaggedDocument(words=['m', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'e', 'n', 'e'], tags=['m-dimethoxybenzene']),
 TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['phenethyl_acetate']),
 TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 's', 'a', 'l', 'i', 'c', 'y', 'l', 'a', 't', 'e'], tags=['benzyl_salicylate']),
 TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2-ethylbutyraldehyde']),
 TaggedDocument(words=['r', 'e', 's', 'o', 'r', 'c', 'i', 'n', 'o', 'l'], tags=['resorcinol']),
 TaggedDocument(words=['a', 'm', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['amyl_hexanoate']),
 TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['benzyl_formate']),
 TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['benzyl_alcohol']),
 TaggedDocument(words=['v', 'e', 'r', 'b', 'e', 'n', 'o', 'l'], tags=['verbenol']),
 TaggedDocument(words=['g', 'l', 'y', 'c', 'e', 'r', 'o', 'l', '_', 't', 'r', 'i', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['glycerol_tributyrate']),
 TaggedDocument(words=['a', ',', 'a', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['a,a-dimethylphenethyl_alcohol']),
 TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['hexyl_formate']),
 TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 'v', 'i', 'n', 'y', 'l', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['4-methyl-5-vinylthiazole']),
 TaggedDocument(words=['c', 'i', 's', '-', '_', 'a', 'n', 'd', '_', 't', 'r', 'a', 'n', 's', '-', 'm', 'e', 'n', 't', 'h', 'o', 'n', 'e', '-', '8', '-', 't', 'h', 'i', 'o', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['cis-_and_trans-menthone-8-thioacetate']),
 TaggedDocument(words=['l', 'a', 'u', 'r', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['lauryl_alcohol']),
 TaggedDocument(words=['d', 'i', 'a', 'l', 'l', 'y', 'l', '_', 'p', 'o', 'l', 'y', 's', 'u', 'l', 'f', 'i', 'd', 'e', 's'], tags=['diallyl_polysulfides']),
 TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['isobutyl_heptanoate']),
 TaggedDocument(words=['2', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2-phenylpropionaldehyde']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', '-', '4', '-', 'p', 'r', 'o', 'p', 'y', 'l', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['2-methoxy-4-propylphenol']),
 TaggedDocument(words=['f', 'a', 'r', 'n', 'e', 's', 'e', 'n', 'e'], tags=['farnesene']),
 TaggedDocument(words=['2', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2-hydroxy-4-methyl_benzaldehyde']),
 TaggedDocument(words=['(', 'z', ')', '-', '4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '6', '-', 'd', 'o', 'd', 'e', 'c', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['(z)-4-hydroxy-6-dodecenoic_acid_lactone']),
 TaggedDocument(words=['m', 'e', 'n', 't', 'h', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['menthyl_isovalerate']),
 TaggedDocument(words=['o', 'l', 'e', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['oleic_acid']),
 TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', 'a', '-', '1', ',', '3', '-', 'd', 'i', 'e', 'n', 'e'], tags=['p-mentha-1,3-diene']),
 TaggedDocument(words=['l', 'i', 'n', 'a', 'l', 'y', 'l', '_', 'o', 'c', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['linalyl_octanoate']),
 TaggedDocument(words=['1', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'a', 'c', 'e', 't', 'y', 'l', 'p', 'y', 'r', 'r', 'o', 'l', 'e'], tags=['1-methyl-2-acetylpyrrole']),
 TaggedDocument(words=['a', 'm', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['amyl_alcohol']),
 TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', '-', '2', '-', 'b', 'u', 't', 'a', 'n', 'o', 'n', 'e'], tags=['4-methylthio-2-butanone']),
 TaggedDocument(words=['2', '(', '1', '0', ')', '-', 'p', 'i', 'n', 'e', 'n', '-', '3', '-', 'o', 'l'], tags=['2(10)-pinen-3-ol']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'y', 'l', '_', 't', 'r', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_propyl_trisulfide']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'p', 'e', 'n', 't', 'e', 'n', 'a', 'l'], tags=['2-methyl-2-pentenal']),
 TaggedDocument(words=['4', '-', '(', 'p', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'h', 'e', 'n', 'y', 'l', ')', '-', '2', '-', 'b', 'u', 't', 'a', 'n', 'o', 'n', 'e'], tags=['4-(p-methoxyphenyl)-2-butanone']),
 TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'm', 'e', 't', 'h', 'y', 'l', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['benzyl_methyl_sulfide']),
 TaggedDocument(words=['h', 'e', 'p', 't', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['heptyl_isobutyrate']),
 TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 't', 'i', 'g', 'l', 'a', 't', 'e'], tags=['benzyl_tiglate']),
 TaggedDocument(words=['5', ',', '6', ',', '7', ',', '8', '-', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'q', 'u', 'i', 'n', 'o', 'x', 'a', 'l', 'i', 'n', 'e'], tags=['5,6,7,8-tetrahydroquinoxaline']),
 TaggedDocument(words=['t', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', '(', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'r', 'o', 'p', 'e', 'n', '-', '1', '-', 'y', 'l', ')', 'p', 'y', 'r', 'a', 'n'], tags=['tetrahydro-4-methyl-2-(2-methylpropen-1-yl)pyran']),
 TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'c', 'y', 'c', 'l', 'o', 'p', 'e', 'n', 't', 'a', 'd', 'e', 'c', 'a', 'n', 'o', 'n', 'e'], tags=['3-methyl-1-cyclopentadecanone']),
 TaggedDocument(words=['c', 'a', 'r', 'v', 'e', 'o', 'l'], tags=['carveol']),
 TaggedDocument(words=['2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['2,6-dimethoxyphenol']),
 TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['isopropyl_butyrate']),
 TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', 'e', 'n', 'e', '_', 'g', 'l', 'y', 'c', 'o', 'l'], tags=['propylene_glycol']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'p', 'r', 'o', 'p', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'l'], tags=['2-methyl-1-propanethiol']),
 TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'b', 'u', 't', 'a', 'n', 'o', 'l'], tags=['3-methyl-2-butanol']),
 TaggedDocument(words=['5', '-', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'c', 'y', 'c', 'l', 'o', 'p', 'e', 'n', 't', '-', '2', '-', 'e', 'n', '-', '1', '-', 'o', 'n', 'e'], tags=['5-ethyl-2-hydroxy-3-methylcyclopent-2-en-1-one']),
 TaggedDocument(words=['(', 'e', ')', '-', '3', '-', '(', 'z', ')', '-', '6', '-', 'n', 'o', 'n', 'a', 'd', 'i', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['(e)-3-(z)-6-nonadien-1-ol']),
 TaggedDocument(words=['b', 'o', 'r', 'n', 'e', 'o', 'l'], tags=['borneol']),
 TaggedDocument(words=['6', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', ',', '5', '-', 'h', 'e', 'p', 't', 'a', 'd', 'i', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['6-methyl-3,5-heptadien-2-one']),
 TaggedDocument(words=['a', 'c', 'e', 't', 'i', 'c', '_', 'a', 'n', 'h', 'y', 'd', 'r', 'i', 'd', 'e'], tags=['acetic_anhydride']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_phenyl_disulfide']),
 TaggedDocument(words=['3', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['3-(methylthio)propyl_acetate']),
 TaggedDocument(words=['3', '-', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'n', 'e'], tags=['3-heptanone']),
 TaggedDocument(words=['n', '-', 'o', 'c', 't', 'a', 'n', 'a', 'l'], tags=['n-octanal']),
 TaggedDocument(words=['o', '-', 'm', 'e', 't', 'h', 'y', 'l', 'a', 'n', 'i', 's', 'o', 'l', 'e'], tags=['o-methylanisole']),
 TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['butyl_isovalerate']),
 TaggedDocument(words=['1', '-', 'o', 'c', 't', 'e', 'n', '-', '3', '-', 'o', 'n', 'e'], tags=['1-octen-3-one']),
 TaggedDocument(words=['2', ',', '4', ',', '5', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', 'd', '-', '3', '-', 'o', 'x', 'a', 'z', 'o', 'l', 'i', 'n', 'e'], tags=['2,4,5-trimethyl-d-3-oxazoline']),
 TaggedDocument(words=['2', '-', 't', 'r', 'i', 'd', 'e', 'c', 'e', 'n', 'a', 'l'], tags=['2-tridecenal']),
 TaggedDocument(words=['2', ',', '4', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'e', 'n', 'z', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2,4-dimethylbenzaldehyde']),
 TaggedDocument(words=['1', '-', 'a', 'm', 'i', 'n', 'o', '-', '2', '-', 'p', 'r', 'o', 'p', 'a', 'n', 'o', 'l'], tags=['1-amino-2-propanol']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 't', 'h', 'i', 'o', 'm', 'e', 't', 'h', 'y', 'l', 'f', 'u', 'r', 'a', 'n'], tags=['2-methyl-5-thiomethylfuran']),
 TaggedDocument(words=['t', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['tetrahydrofurfuryl_alcohol']),
 TaggedDocument(words=['c', 'i', 's', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'l', 'a', 'c', 't', 'a', 't', 'e'], tags=['cis-3-hexenyl_lactate']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'f', 'u', 'r', 'o', 'a', 't', 'e'], tags=['methyl_furoate']),
 TaggedDocument(words=['p', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'e', 'n', 'e'], tags=['p-dimethoxybenzene']),
 TaggedDocument(words=['a', 'c', 'e', 't', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['acetic_acid']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 't', 'i', 'g', 'l', 'a', 't', 'e'], tags=['ethyl_tiglate']),
 TaggedDocument(words=['m', 'y', 'r', 'c', 'e', 'n', 'e'], tags=['myrcene']),
 TaggedDocument(words=['3', '-', 'o', 'x', 'o', 'd', 'e', 'c', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'g', 'l', 'y', 'c', 'e', 'r', 'i', 'd', 'e'], tags=['3-oxodecanoic_acid_glyceride']),
 TaggedDocument(words=['(', 'e', ')', '-', '2', '-', 'o', 'c', 't', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['(e)-2-octen-1-ol']),
 TaggedDocument(words=['o', 'c', 't', 'y', 'l', '_', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['octyl_2-methylbutyrate']),
 TaggedDocument(words=['s', 'a', 'n', 't', 'a', 'l', 'o', 'l', ',', '_', 'a', '_', 'a', 'n', 'd'], tags=['santalol,_a_and']),
 TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'l', 'a', 'c', 't', 'a', 't', 'e'], tags=['butyl_lactate']),
 TaggedDocument(words=['n', 'e', 'r', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['neryl_acetate']),
 TaggedDocument(words=['c', 'i', 's', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['cis-3-hexenyl_butyrate']),
 TaggedDocument(words=['t', 'r', 'a', 'n', 's', '-', '2', '-', 'o', 'c', 't', 'e', 'n', '-', '1', '-', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['trans-2-octen-1-yl_acetate']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'l', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['2-methylbutyl-2-methyl_butyrate']),
 TaggedDocument(words=['d', ',', 'l', '-', 'v', 'a', 'l', 'i', 'n', 'e'], tags=['d,l-valine']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['ethyl_propyl_disulfide']),
 TaggedDocument(words=['a', 'l', 'l', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['allyl_disulfide']),
 TaggedDocument(words=['2', ',', '3', '-', 'd', 'i', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2,3-diethyl-5-methylpyrazine']),
 TaggedDocument(words=['t', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'a', 'm', 'i', 'n', 'e'], tags=['trimethylamine']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 't', 'r', 'a', 'n', 's', '-', '2', '-', 'd', 'e', 'c', 'e', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_trans-2-decenoate']),
 TaggedDocument(words=['a', 'c', 'e', 't', 'o', 'p', 'h', 'e', 'n', 'o', 'n', 'e'], tags=['acetophenone']),
 TaggedDocument(words=['f', 'o', 'r', 'm', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['formic_acid']),
 TaggedDocument(words=['4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '2', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '(', '2', 'h', ')', '-', 'f', 'u', 'r', 'a', 'n', 'o', 'n', 'e'], tags=['4-hydroxy-2,5-dimethyl-3(2h)-furanone']),
 TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'y', 'l', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['isoamyl_phenylacetate']),
 TaggedDocument(words=['5', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '2', ',', '4', '-', 'd', 'e', 'c', 'a', 'd', 'i', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'd', '-', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['5-hydroxy-2,4-decadienoic_acid_d-lactone']),
 TaggedDocument(words=['e', 'u', 'g', 'e', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['eugenyl_acetate']),
 TaggedDocument(words=['d', 'i', 's', 'o', 'd', 'i', 'u', 'm', '_', 's', 'u', 'c', 'c', 'i', 'n', 'a', 't', 'e'], tags=['disodium_succinate']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '-', 't', 'r', 'a', 'n', 's', '-', '2', ',', '_', 'c', 'i', 's', '-', '4', '-', 'd', 'e', 'c', 'a', 'd', 'i', 'e', 'n', 'o', 'a', 't', 'e'], tags=['ethyl-trans-2,_cis-4-decadienoate']),
 TaggedDocument(words=['1', '-', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'a', 'c', 'e', 't', 'y', 'l', 'p', 'y', 'r', 'r', 'o', 'l', 'e'], tags=['1-ethyl-2-acetylpyrrole']),
 TaggedDocument(words=['b', 'i', 'p', 'h', 'e', 'n', 'y', 'l'], tags=['biphenyl']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 't', 'h', 'i', 'o', 'p', 'h', 'e', 'n', '-', '3', '-', 'o', 'n', 'e'], tags=['2-methyltetrahydrothiophen-3-one']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '3', '-', 'n', 'o', 'n', 'e', 'n', 'o', 'a', 't', 'e'], tags=['methyl_3-nonenoate']),
 TaggedDocument(words=['s', 'o', 'd', 'i', 'u', 'm', '_', 'c', 'i', 't', 'r', 'a', 't', 'e'], tags=['sodium_citrate']),
 TaggedDocument(words=['e', 's', 't', 'r', 'a', 'g', 'o', 'l', 'e'], tags=['estragole']),
 TaggedDocument(words=['o', '-', 'p', 'r', 'o', 'p', 'y', 'l', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['o-propylphenol']),
 TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', '_', '(', '3', '_', 'o', 'r', '_', '5', '_', 'o', 'r', '_', '6', ')', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e', '_', '(', '8', '5', '%', ')', '_', 'p', 'l', 'u', 's', '_', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', '_', '(', '3', '_', 'o', 'r', '_', '5', '_', 'o', 'r', '_', '6', ')', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e', '_', '(', '1', '3', '%', ')'], tags=['2-ethyl_(3_or_5_or_6)-methoxypyrazine_(85%)_plus_2-methyl_(3_or_5_or_6)-methoxypyrazine_(13%)']),
 TaggedDocument(words=['f', 'u', 'r', 'f', 'u', 'r', 'a', 'l'], tags=['furfural']),
 TaggedDocument(words=['a', 'l', 'p', 'h', 'a', '-', 't', 'e', 'r', 'p', 'i', 'n', 'e', 'o', 'l'], tags=['alpha-terpineol']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['methyl_isovalerate']),
 TaggedDocument(words=['a', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'e', 'n', 'z', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['a-methylbenzyl_acetate']),
 TaggedDocument(words=['r', 'h', 'o', 'd', 'i', 'n', 'o', 'l'], tags=['rhodinol']),
 TaggedDocument(words=['q', 'u', 'i', 'n', 'i', 'n', 'e'], tags=['quinine']),
 TaggedDocument(words=['3', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '2', '-', 'o', 'x', 'o', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['3-hydroxy-2-oxopropionic_acid']),
 TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['isoamyl_isovalerate']),
 TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['phenethyl_propionate']),
 TaggedDocument(words=['g', '-', 'i', 'o', 'n', 'o', 'n', 'e'], tags=['g-ionone']),
 TaggedDocument(words=['2', '-', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'y', 'l', 'p', 'y', 'r', 'r', 'o', 'l', 'e'], tags=['2-propionylpyrrole']),
 TaggedDocument(words=['g', 'e', 'r', 'a', 'n', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['geranyl_isobutyrate']),
 TaggedDocument(words=['b', 'i', 's', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', 'm', 'e', 't', 'h', 'a', 'n', 'e'], tags=['bis-(methylthio)methane']),
 TaggedDocument(words=['d', '-', 'f', 'e', 'n', 'c', 'h', 'o', 'n', 'e'], tags=['d-fenchone']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'o', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['methyl_o-methoxybenzoate']),
 TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['phenethyl_formate']),
 TaggedDocument(words=['l', 'a', 'u', 'r', 'i', 'c', '_', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['lauric_aldehyde']),
 TaggedDocument(words=['p', 'r', 'o', 'p', 'i', 'o', 'n', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['propionic_acid']),
 TaggedDocument(words=['m', '-', 'c', 'r', 'e', 's', 'o', 'l'], tags=['m-cresol']),
 TaggedDocument(words=['c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['cyclohexyl_butyrate']),
 TaggedDocument(words=['2', ',', '3', '-', 'p', 'e', 'n', 't', 'a', 'n', 'e', 'd', 'i', 'o', 'n', 'e'], tags=['2,3-pentanedione']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 's', 'o', 'r', 'b', 'a', 't', 'e'], tags=['methyl_sorbate']),
 TaggedDocument(words=['4', ',', '5', '-', 'd', 'i', 'h', 'y', 'd', 'r', 'o', '-', '3', '-', '(', '2', 'h', ')', 't', 'h', 'i', 'o', 'p', 'h', 'e', 'n', 'o', 'n', 'e'], tags=['4,5-dihydro-3-(2h)thiophenone']),
 TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'o', 'x', 'o', 'b', 'u', 't', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['3-methyl-2-oxobutanoic_acid']),
 TaggedDocument(words=['2', "'", '-', 'a', 'm', 'i', 'n', 'o', 'a', 'c', 'e', 't', 'o', 'p', 'h', 'e', 'n', 'o', 'n', 'e'], tags=["2'-aminoacetophenone"]),
 TaggedDocument(words=['(', 'e', ')', '-', '3', ',', '7', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '5', ',', '7', '-', 'o', 'c', 't', 'a', 't', 'r', 'i', 'e', 'n', '-', '3', '-', 'o', 'l'], tags=['(e)-3,7-dimethyl-1,5,7-octatrien-3-ol']),
 TaggedDocument(words=['t', 'r', 'a', 'n', 's', ',', '_', 't', 'r', 'a', 'n', 's', '-', '2', ',', '4', '-', 'd', 'o', 'd', 'e', 'c', 'a', 'd', 'i', 'e', 'n', 'a', 'l'], tags=['trans,_trans-2,4-dodecadienal']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', '3', '-', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'o', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['ethyl_3-mercaptopropionate']),
 TaggedDocument(words=['2', ',', '4', '-', 'p', 'e', 'n', 't', 'a', 'd', 'i', 'e', 'n', 'a', 'l'], tags=['2,4-pentadienal']),
 TaggedDocument(words=['5', '-', 'i', 's', 'o', 'p', 'r', 'o', 'p', 'e', 'n', 'y', 'l', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'v', 'i', 'n', 'y', 'l', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'f', 'u', 'r', 'a', 'n'], tags=['5-isopropenyl-2-methyl-2-vinyltetrahydrofuran']),
 TaggedDocument(words=['b', 'i', 's', 'a', 'b', 'o', 'l', 'e', 'n', 'e'], tags=['bisabolene']),
 TaggedDocument(words=['(', 'z', ')', '(', 'z', ')', '-', '3', ',', '6', '-', 'n', 'o', 'n', 'a', 'd', 'i', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['(z)(z)-3,6-nonadien-1-ol']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'y', 'l', '_', 't', 'r', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['ethyl_propyl_trisulfide']),
 TaggedDocument(words=['p', 'h', 'e', 'n', 'y', 'l', 'a', 'c', 'e', 't', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['phenylacetaldehyde']),
 TaggedDocument(words=['p', '-', 't', 'o', 'l', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['p-tolyl-3-methyl_butyrate']),
 TaggedDocument(words=['5', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '2', '-', 'd', 'e', 'c', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'd', '-', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['5-hydroxy-2-decenoic_acid_d-lactone']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'j', 'a', 's', 'm', 'o', 'n', 'a', 't', 'e'], tags=['methyl_jasmonate']),
 TaggedDocument(words=['v', 'a', 'n', 'i', 'l', 'l', 'i', 'n', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['vanillin_acetate']),
 TaggedDocument(words=['u', 'n', 'd', 'e', 'c', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['undecen-1-ol']),
 TaggedDocument(words=['2', '-', 'h', 'e', 'p', 't', 'y', 'l', 'f', 'u', 'r', 'a', 'n'], tags=['2-heptylfuran']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'a', 'n'], tags=['methyl_mercaptan']),
 TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', 'a', 'm', 'i', 'n', 'e'], tags=['phenethylamine']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_heptanoate']),
 TaggedDocument(words=['c', 'i', 's', '-', '2', '-', 'n', 'o', 'n', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['cis-2-nonen-1-ol']),
 TaggedDocument(words=['h', 'y', 'd', 'r', 'o', 'x', 'y', 'c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'o', 'l'], tags=['hydroxycitronellol']),
 TaggedDocument(words=['(', 'z', ')', '-', '4', '-', 'd', 'o', 'd', 'e', 'c', 'e', 'n', 'a', 'l'], tags=['(z)-4-dodecenal']),
 TaggedDocument(words=['3', '-', 'o', 'x', 'o', 'b', 'u', 't', 'a', 'n', 'a', 'l', ',', '_', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 'l'], tags=['3-oxobutanal,_dimethyl_acetal']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_hexanoate']),
 TaggedDocument(words=['1', '-', '(', 'p', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'h', 'e', 'n', 'y', 'l', ')', '-', '2', '-', 'p', 'r', 'o', 'p', 'a', 'n', 'o', 'n', 'e'], tags=['1-(p-methoxyphenyl)-2-propanone']),
 TaggedDocument(words=['4', '-', '(', 'p', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'p', 'h', 'e', 'n', 'y', 'l', ')', '-', '2', '-', 'b', 'u', 't', 'a', 'n', 'o', 'n', 'e'], tags=['4-(p-hydroxyphenyl)-2-butanone']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'c', 'i', 's', '-', '4', '-', 'h', 'e', 'p', 't', 'e', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_cis-4-heptenoate']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'h', 'e', 'p', 't', 'a', 'n', '-', '3', '-', 'o', 'n', 'e'], tags=['2-methylheptan-3-one']),
 TaggedDocument(words=['2', ',', '3', ',', '5', ',', '6', '-', 't', 'e', 't', 'r', 'a', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2,3,5,6-tetramethylpyrazine']),
 TaggedDocument(words=['2', '-', 's', 'e', 'c', '-', 'b', 'u', 't', 'y', 'l', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', 'n', 'o', 'n', 'e'], tags=['2-sec-butylcyclohexanone']),
 TaggedDocument(words=['s', 'o', 'd', 'i', 'u', 'm', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['sodium_benzoate']),
 TaggedDocument(words=['3', '-', 'o', 'c', 't', 'a', 'n', 'o', 'l'], tags=['3-octanol']),
 TaggedDocument(words=['3', '-', 'c', 'a', 'r', 'e', 'n', 'e'], tags=['3-carene']),
 TaggedDocument(words=['d', '-', 'u', 'n', 'd', 'e', 'c', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['d-undecalactone']),
 TaggedDocument(words=['c', 'u', 'm', 'i', 'n', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['cuminaldehyde']),
 TaggedDocument(words=['l', 'i', 'n', 'a', 'l', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['linalyl_propionate']),
 TaggedDocument(words=['3', '-', 'h', 'e', 'x', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['3-hexen-1-ol']),
 TaggedDocument(words=['3', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 't', 'e'], tags=['3-phenylpropyl_cinnamate']),
 TaggedDocument(words=['n', '-', 'b', 'u', 't', 'y', 'l', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['n-butyl-2-methylbutyrate']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2-methylbutyraldehyde']),
 TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['benzyl_isobutyrate']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'm', 'y', 'r', 'i', 's', 't', 'a', 't', 'e'], tags=['methyl_myristate']),
 TaggedDocument(words=['2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['2,6-dimethylthiophenol']),
 TaggedDocument(words=['h', 'e', 'p', 't', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['heptanoic_acid']),
 TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['butyl_acetate']),
 TaggedDocument(words=['d', '-', 'p', 'i', 'p', 'e', 'r', 'i', 't', 'o', 'n', 'e'], tags=['d-piperitone']),
 TaggedDocument(words=['c', 'i', 's', '-', '6', '-', 'n', 'o', 'n', 'e', 'n', 'a', 'l'], tags=['cis-6-nonenal']),
 TaggedDocument(words=['2', ',', '2', ',', '4', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '3', '-', 'o', 'x', 'a', 'c', 'y', 'c', 'l', 'o', 'p', 'e', 'n', 't', 'a', 'n', 'e'], tags=['2,2,4-trimethyl-1,3-oxacyclopentane']),
 TaggedDocument(words=['2', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['2-hydroxybenzoic_acid']),
 TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['3-methylbutyraldehyde']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', '3', '-', 'h', 'e', 'x', 'e', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_3-hexenoate']),
 TaggedDocument(words=['2', ',', '4', '-', 'd', 'i', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['2,4-dihydroxybenzoic_acid']),
 TaggedDocument(words=['g', 'e', 'r', 'a', 'n', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['geranyl_formate']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'e', 't', 'h', 'y', 'l', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_ethyl_sulfide']),
 TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 's', 'a', 'l', 'i', 'c', 'y', 'l', 'a', 't', 'e'], tags=['isoamyl_salicylate']),
 TaggedDocument(words=['2', ',', '5', '-', 'x', 'y', 'l', 'e', 'n', 'o', 'l'], tags=['2,5-xylenol']),
 TaggedDocument(words=['d', '-', 'n', 'e', 'o', 'm', 'e', 'n', 't', 'h', 'o', 'l'], tags=['d-neomenthol']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_furfuryl_disulfide']),
 TaggedDocument(words=['4', '-', 'e', 't', 'h', 'y', 'l', '-', '2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['4-ethyl-2,6-dimethoxyphenol']),
 TaggedDocument(words=['2', '-', 'p', 'e', 'n', 't', 'y', 'l', 'p', 'y', 'r', 'i', 'd', 'i', 'n', 'e'], tags=['2-pentylpyridine']),
 TaggedDocument(words=['3', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '4', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'b', 'u', 't', 'a', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['3-hydroxy-4-phenylbutan-2-one']),
 TaggedDocument(words=['g', 'e', 'r', 'a', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['geranyl_acetate']),
 TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['phenethyl_benzoate']),
 TaggedDocument(words=['1', '-', 'p', 'e', 'n', 't', 'e', 'n', '-', '3', '-', 'o', 'l'], tags=['1-penten-3-ol']),
 TaggedDocument(words=['a', 'n', 'i', 's', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['anisyl_alcohol']),
 TaggedDocument(words=['d', 'i', 'e', 't', 'h', 'y', 'l', '_', 's', 'u', 'c', 'c', 'i', 'n', 'a', 't', 'e'], tags=['diethyl_succinate']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'p', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['methyl_p-hydroxybenzoate']),
 TaggedDocument(words=['n', 'o', 'n', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['nonyl_isovalerate']),
 TaggedDocument(words=['c', 'i', 'n', 'n', 'a', 'm', 'y', 'l', '_', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 't', 'e'], tags=['cinnamyl_cinnamate']),
 TaggedDocument(words=['n', '-', 'o', 'c', 't', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['n-octyl_isovalerate']),
 TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['propyl_propionate']),
 TaggedDocument(words=['a', '-', 'p', 'h', 'e', 'l', 'l', 'a', 'n', 'd', 'r', 'e', 'n', 'e'], tags=['a-phellandrene']),
 TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['isoamyl_alcohol']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', '(', 'p', '-', 'i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', 'p', 'h', 'e', 'n', 'y', 'l', ')', '-', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2-methyl-3-(p-isopropylphenyl)-propionaldehyde']),
 TaggedDocument(words=['2', '-', 'a', 'c', 'e', 't', 'o', 'x', 'y', '-', '3', '-', 'b', 'u', 't', 'a', 'n', 'o', 'n', 'e'], tags=['2-acetoxy-3-butanone']),
 TaggedDocument(words=['a', 'c', 'o', 'n', 'i', 't', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['aconitic_acid']),
 TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['hexyl_alcohol']),
 TaggedDocument(words=['2', ',', '4', ',', '5', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '_', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['2,4,5-trimethyl_thiazole']),
 TaggedDocument(words=['d', 'i', 'm', 'e', 't', 'h', 'y', 'l', '_', 't', 'r', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['dimethyl_trisulfide']),
 TaggedDocument(words=['1', '-', 'p', 'h', 'e', 'n', 'y', 'l', '-', '1', ',', '2', '-', 'p', 'r', 'o', 'p', 'a', 'n', 'e', 'd', 'i', 'o', 'n', 'e'], tags=['1-phenyl-1,2-propanedione']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['ethyl_formate']),
 TaggedDocument(words=['s', 'o', 'd', 'i', 'u', 'm', '_', 'd', 'i', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['sodium_diacetate']),
 TaggedDocument(words=['3', '-', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', 'c', 'y', 'l', 'c', 'o', 'p', 'e', 'n', 't', '-', '2', '-', 'e', 'n', '-', '1', '-', 'o', 'n', 'e'], tags=['3-ethyl-2-hydroxy-4-methylcylcopent-2-en-1-one']),
 TaggedDocument(words=['c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['citronellyl_butyrate']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'l', 'a', 'u', 'r', 'a', 't', 'e'], tags=['methyl_laurate']),
 TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'y', 'l', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['isobutyl_phenylacetate']),
 TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['isobutyl_alcohol']),
 TaggedDocument(words=['a', 'l', 'l', 'y', 'l', '_', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['allyl_heptanoate']),
 TaggedDocument(words=['2', '-', 'a', 'c', 'e', 't', 'y', 'l', '-', '2', '-', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'i', 'n', 'e'], tags=['2-acetyl-2-thiazoline']),
 TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['butyl_alcohol']),
 TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['isobutyl_hexanoate']),
 TaggedDocument(words=['t', 'a', 'n', 'n', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['tannic_acid']),
 TaggedDocument(words=['m', 'y', 'r', 'i', 's', 't', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['myristic_acid']),
 TaggedDocument(words=['2', ',', '2', ',', '3', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'c', 'y', 'c', 'l', 'o', 'p', 'e', 'n', 't', '-', '3', '-', 'e', 'n', '-', '1', '-', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2,2,3-trimethylcyclopent-3-en-1-yl_acetaldehyde']),
 TaggedDocument(words=['h', 'y', 'd', 'r', 'o', 'g', 'e', 'n', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['hydrogen_sulfide']),
 TaggedDocument(words=['v', 'a', 'n', 'i', 'l', 'l', 'i', 'n'], tags=['vanillin']),
 TaggedDocument(words=['4', '-', '(', '2', '-', 'f', 'u', 'r', 'y', 'l', ')', '-', '3', '-', 'b', 'u', 't', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['4-(2-furyl)-3-buten-2-one']),
 TaggedDocument(words=['p', 'h', 'e', 'n', 'y', 'l', 'a', 'c', 'e', 't', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['phenylacetic_acid']),
 TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'c', 'r', 'o', 't', 'o', 'n', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['3-methylcrotonic_acid']),
 TaggedDocument(words=['5', '-', '_', 'a', 'n', 'd', '_', '6', '-', 'd', 'e', 'c', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['5-_and_6-decenoic_acid']),
 TaggedDocument(words=['2', '-', 'o', 'c', 't', 'e', 'n', '-', '4', '-', 'o', 'n', 'e'], tags=['2-octen-4-one']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'f', 'u', 'r', 'a', 'n', '-', '3', '-', 'o', 'n', 'e'], tags=['2-methyltetrahydrofuran-3-one']),
 TaggedDocument(words=['1', ',', '2', '-', 'e', 't', 'h', 'a', 'n', 'e', 'd', 'i', 't', 'h', 'i', 'o', 'l'], tags=['1,2-ethanedithiol']),
 TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', '-', '1', '-', 'h', 'e', 'x', 'a', 'n', 'o', 'l'], tags=['3-methylthio-1-hexanol']),
 TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['isoamyl_butyrate']),
 TaggedDocument(words=['s', 'a', 'l', 'i', 'c', 'y', 'l', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['salicylaldehyde']),
 TaggedDocument(words=['l', '-', 'g', 'l', 'u', 't', 'a', 'm', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['l-glutamic_acid']),
 TaggedDocument(words=['5', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '4', '-', 'o', 'c', 't', 'a', 'n', 'o', 'n', 'e'], tags=['5-hydroxy-4-octanone']),
 TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['hexyl_benzoate']),
 TaggedDocument(words=['2', '-', 'p', 'e', 'n', 't', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'l'], tags=['2-pentanethiol']),
 TaggedDocument(words=['3', '-', 'e', 't', 'h', 'y', 'l', '-', '2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['3-ethyl-2,6-dimethylpyrazine']),
 TaggedDocument(words=['c', 'i', 's', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['cis-3-hexenyl-2-methylbutyrate']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'b', 'u', 't', 'e', 'n', 'a', 'l'], tags=['2-methyl-3-butenal']),
 TaggedDocument(words=['g', 'l', 'y', 'c', 'i', 'n', 'e'], tags=['glycine']),
 TaggedDocument(words=['1', '0', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'm', 'e', 't', 'h', 'y', 'l', 'e', 'n', 'e', '-', '2', '-', 'p', 'i', 'n', 'e', 'n', 'e'], tags=['10-hydroxymethylene-2-pinene']),
 TaggedDocument(words=['4', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', '-', '2', '-', 'o', 'x', 'o', 'b', 'u', 't', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['4-(methylthio)-2-oxobutanoic_acid']),
 TaggedDocument(words=['s', '-', 'm', 'e', 't', 'h', 'y', 'l', '_', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'e', 'n', 't', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'a', 't', 'e'], tags=['s-methyl_4-methylpentanethioate']),
 TaggedDocument(words=['f', 'u', 'm', 'a', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['fumaric_acid']),
 TaggedDocument(words=['2', ',', '2', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', '-', '(', '1', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'r', 'o', 'p', 'e', 'n', '-', '1', '-', 'y', 'l', ')', '-', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'f', 'u', 'r', 'a', 'n'], tags=['2,2-dimethyl-5-(1-methylpropen-1-yl)-__________________________________tetrahydrofuran']),
 TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', 'h', 'e', 'x', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'l'], tags=['2-ethylhexanethiol']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'a', 'c', 'r', 'y', 'l', 'a', 't', 'e'], tags=['ethyl_acrylate']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', '-', '4', '-', 'v', 'i', 'n', 'y', 'l', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['2-methoxy-4-vinylphenol']),
 TaggedDocument(words=['2', '-', '(', '1', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'r', 'o', 'p', 'y', 'l', ')', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['2-(1-methylpropyl)thiazole']),
 TaggedDocument(words=['2', '-', 't', 'r', 'a', 'n', 's', '-', '4', '-', 'c', 'i', 's', '-', '7', '-', 'c', 'i', 's', '-', 't', 'r', 'i', 'd', 'e', 'c', 'a', 't', 'r', 'i', 'e', 'n', 'a', 'l'], tags=['2-trans-4-cis-7-cis-tridecatrienal']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'p', '-', 'a', 'n', 'i', 's', 'a', 't', 'e'], tags=['ethyl_p-anisate']),
 TaggedDocument(words=['2', '-', 'i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['2-isopropylphenol']),
 TaggedDocument(words=['4', '-', 'e', 't', 'h', 'y', 'l', 'o', 'c', 't', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['4-ethyloctanoic_acid']),
 TaggedDocument(words=['d', 'l', '-', 'i', 's', 'o', 'm', 'e', 'n', 't', 'h', 'o', 'n', 'e'], tags=['dl-isomenthone']),
 TaggedDocument(words=['c', 'i', 'n', 'n', 'a', 'm', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['cinnamyl_butyrate']),
 TaggedDocument(words=['1', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', ',', '3', '-', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', 'd', 'i', 'o', 'n', 'e'], tags=['1-methyl-2,3-cyclohexadione']),
 TaggedDocument(words=['d', 'i', 'h', 'y', 'd', 'r', 'o', '-', 'a', '-', 'i', 'o', 'n', 'o', 'n', 'e'], tags=['dihydro-a-ionone']),
 TaggedDocument(words=['t', 'a', 'u', 'r', 'i', 'n', 'e'], tags=['taurine']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_propyl_disulfide']),
 TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', '-', '4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '(', '2', 'h', ')', '-', 'f', 'u', 'r', 'a', 'n', 'o', 'n', 'e'], tags=['2-ethyl-4-hydroxy-5-methyl-3(2h)-furanone']),
 TaggedDocument(words=['o', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['o-(methylthio)phenol']),
 TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'n', 'o', 'n', 'a', 'n', 'o', 'a', 't', 'e'], tags=['isoamyl_nonanoate']),
 TaggedDocument(words=['1', '0', '-', 'u', 'n', 'd', 'e', 'c', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['10-undecenoic_acid']),
 TaggedDocument(words=['d', 'i', 'e', 't', 'h', 'y', 'l', '_', 'm', 'a', 'l', 'o', 'n', 'a', 't', 'e'], tags=['diethyl_malonate']),
 TaggedDocument(words=['b', 'o', 'r', 'n', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['bornyl_isovalerate']),
 TaggedDocument(words=['h', 'e', 'p', 't', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['heptyl_formate']),
 TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['benzyl_butyrate']),
 TaggedDocument(words=['5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'p', 'h', 'e', 'n', 'y', 'l', '-', '2', '-', 'h', 'e', 'x', 'e', 'n', 'a', 'l'], tags=['5-methyl-2-phenyl-2-hexenal']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'b', 'u', 't', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'l'], tags=['2-methyl-1-butanethiol']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', '3', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_3-hydroxyhexanoate']),
 TaggedDocument(words=['m', 'a', 'l', 't', 'o', 'l'], tags=['maltol']),
 TaggedDocument(words=['h', 'e', 'p', 't', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['heptyl_butyrate']),
 TaggedDocument(words=['2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '4', '-', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'l'], tags=['2,6-dimethyl-4-heptanol']),
 TaggedDocument(words=['2', '(', '4', ')', '-', 'i', 's', 'o', 'b', 'u', 't', 'y', 'l', '-', '4', '(', '2', ')', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'd', 'i', 'h', 'y', 'd', 'r', 'o', '-', '4', 'h', '-', '1', ',', '3', ',', '5', '-', 'd', 'i', 't', 'h', 'i', 'a', 'z', 'i', 'n', 'e'], tags=['2(4)-isobutyl-4(2),6-dimethyldihydro-4h-1,3,5-dithiazine']),
 TaggedDocument(words=['4', '-', 'e', 't', 'h', 'y', 'l', 'b', 'e', 'n', 'z', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['4-ethylbenzaldehyde']),
 TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['propyl_isobutyrate']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'y', 'l', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['methyl_phenylacetate']),
 TaggedDocument(words=['b', 'o', 'r', 'n', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['bornyl_formate']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'l', 'i', 'n', 'o', 'l', 'e', 'a', 't', 'e', '_', '(', '4', '8', '%', ')', '_', 'm', 'e', 't', 'h', 'y', 'l', '_', 'l', 'i', 'n', 'o', 'l', 'e', 'n', 'a', 't', 'e', '_', '(', '5', '2', '%', ')', '_', 'm', 'i', 'x', '-', 't', 'u', 'r', 'e'], tags=['methyl_linoleate_(48%)_methyl_linolenate_(52%)_mix-ture']),
 TaggedDocument(words=['g', '-', 'u', 'n', 'd', 'e', 'c', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['g-undecalactone']),
 TaggedDocument(words=['5', 'h', '-', '5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '6', ',', '7', '-', 'd', 'i', 'h', 'y', 'd', 'r', 'o', 'c', 'y', 'c', 'l', 'o', 'p', 'e', 'n', 't', 'a', '(', 'b', ')', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['5h-5-methyl-6,7-dihydrocyclopenta(b)pyrazine']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '-', 'c', 'i', 's', '-', '4', '-', 'o', 'c', 't', 'e', 'n', 'o', 'a', 't', 'e'], tags=['methyl-cis-4-octenoate']),
 TaggedDocument(words=['2', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '4', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', '-', '3', '(', '2', 'h', ')', '-', 'f', 'u', 'r', 'a', 'n', 'o', 'n', 'e'], tags=['2,5-dimethyl-4-methoxy-3(2h)-furanone']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'c', 'r', 'o', 't', 'o', 'n', 'a', 't', 'e'], tags=['ethyl_crotonate']),
 TaggedDocument(words=['3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'y', 'l', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['3-hexenyl_phenylacetate']),
 TaggedDocument(words=['3', '-', 'n', 'o', 'n', 'a', 'n', 'o', 'n', 'e'], tags=['3-nonanone']),
 TaggedDocument(words=['l', '-', 'm', 'a', 'l', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['l-malic_acid']),
 TaggedDocument(words=['a', 'c', 'e', 't', 'o', 'n', 'e'], tags=['acetone']),
 TaggedDocument(words=['a', 'c', 'e', 't', 'a', 'l'], tags=['acetal']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', '1', '0', '-', 'u', 'n', 'd', 'e', 'c', 'e', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_10-undecenoate']),
 TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', 'a', '-', '8', '-', 't', 'h', 'i', 'o', 'l', '-', '3', '-', 'o', 'n', 'e'], tags=['p-mentha-8-thiol-3-one']),
 TaggedDocument(words=['a', 'l', 'l', 'y', 'l', '_', '2', '-', 'f', 'u', 'r', 'o', 'a', 't', 'e'], tags=['allyl_2-furoate']),
 TaggedDocument(words=['g', 'e', 'r', 'a', 'n', 'y', 'l', '_', 't', 'i', 'g', 'l', 'a', 't', 'e'], tags=['geranyl_tiglate']),
 TaggedDocument(words=['3', ',', '7', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'o', 'c', 't', 'a', 'n', 'o', 'l'], tags=['3,7-dimethyl-1-octanol']),
 TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['2-methoxy-4-methylphenol']),
 TaggedDocument(words=['p', ',', 'a', ',', 'a', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'e', 'n', 'z', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['p,a,a-trimethylbenzyl_alcohol']),
 TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['butyl_formate']),
 TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['hexyl_propionate']),
 TaggedDocument(words=['2', ',', '3', ',', '5', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2,3,5-trimethylpyrazine']),
 TaggedDocument(words=['d', 'e', 'h', 'y', 'd', 'r', 'o', 'm', 'e', 'n', 't', 'h', 'o', 'f', 'u', 'r', 'o', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['dehydromenthofurolactone']),
 TaggedDocument(words=['d', 'i', 'h', 'y', 'd', 'r', 'o', 'c', 'a', 'r', 'v', 'e', 'o', 'l'], tags=['dihydrocarveol']),
 TaggedDocument(words=['2', '-', 't', 'r', 'a', 'n', 's', '-', '3', ',', '7', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'o', 'c', 't', 'a', '-', '2', ',', '6', '-', 'd', 'i', 'e', 'n', 'y', 'l', '-', '2', '-', 'e', 't', 'h', 'y', 'l', '_', 'b', 'u', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['2-trans-3,7-dimethylocta-2,6-dienyl-2-ethyl_butanoate']),
 TaggedDocument(words=['i', 's', 'o', 'e', 'u', 'g', 'e', 'n', 'y', 'l', '_', 'm', 'e', 't', 'h', 'y', 'l', '_', 'e', 't', 'h', 'e', 'r'], tags=['isoeugenyl_methyl_ether']),
 TaggedDocument(words=['1', ',', '1', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'e', 't', 'h', 'a', 'n', 'e'], tags=['1,1-dimethoxyethane']),
 TaggedDocument(words=['2', ',', '6', ',', '6', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', '-', '2', '-', 'e', 'n', 'e', '-', '1', ',', '4', '-', 'd', 'i', 'o', 'n', 'e'], tags=['2,6,6-trimethylcyclohex-2-ene-1,4-dione']),
 TaggedDocument(words=['c', 'a', 'r', 'v', 'o', 'n', 'e'], tags=['carvone']),
 TaggedDocument(words=['d', 'i', 'h', 'y', 'd', 'r', 'o', 'c', 'a', 'r', 'v', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['dihydrocarvyl_acetate']),
 TaggedDocument(words=['d', 'l', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'a', 'l', 'a', 'n', 'i', 'n', 'e'], tags=['dl-phenylalanine']),
 TaggedDocument(words=['2', ',', '3', ',', '5', '-', 't', 'r', 'i', 't', 'h', 'i', 'a', 'h', 'e', 'x', 'a', 'n', 'e'], tags=['2,3,5-trithiahexane']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_3-methylpentanoate']),
 TaggedDocument(words=['4', '-', 'a', 'c', 'e', 't', 'y', 'l', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'i', 'm', 'i', 'd', 'i', 'n', 'e'], tags=['4-acetyl-2-methylpyrimidine']),
 TaggedDocument(words=['t', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['thiazole']),
 TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'l', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'r', 'o', 'p', 'a', 'n', 'o', 'a', 't', 'e'], tags=['3-methylbutyl-2-methylpropanoate']),
 TaggedDocument(words=['b', 'o', 'r', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['bornyl_acetate']),
 TaggedDocument(words=['3', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['3-phenylpropyl_acetate']),
 TaggedDocument(words=['2', '-', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'n', 'e'], tags=['2-heptanone']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'o', 'l', 'e', 'a', 't', 'e'], tags=['ethyl_oleate']),
 TaggedDocument(words=['i', 'n', 'd', 'o', 'l', 'e'], tags=['indole']),
 TaggedDocument(words=['n', '-', 'b', 'u', 't', 'y', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['n-butyric_acid']),
 TaggedDocument(words=['p', 'y', 'r', 'r', 'o', 'l', 'i', 'd', 'i', 'n', 'e'], tags=['pyrrolidine']),
 TaggedDocument(words=['d', ',', 'l', '-', 'm', 'e', 't', 'h', 'i', 'o', 'n', 'i', 'n', 'e'], tags=['d,l-methionine']),
 TaggedDocument(words=['2', '-', 'a', 'c', 'e', 't', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-acetyl-3-methylpyrazine']),
 TaggedDocument(words=['2', '-', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'n', 'e'], tags=['2-pentanone']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '-', '3', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['ethyl-3-hydroxybutyrate']),
 TaggedDocument(words=['p', '-', 'v', 'i', 'n', 'y', 'l', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['p-vinylphenol']),
 TaggedDocument(words=['c', 'i', 't', 'r', 'a', 'l', '_', '(', 'n', 'e', 'r', 'a', 'l', ')'], tags=['citral_(neral)']),
 TaggedDocument(words=['2', '-', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'y', 'l', '-', '2', '-', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'i', 'n', 'e'], tags=['2-propionyl-2-thiazoline']),
 TaggedDocument(words=['i', 's', 'o', 'b', 'o', 'r', 'n', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['isobornyl_propionate']),
 TaggedDocument(words=['3', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '2', '-', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'n', 'e'], tags=['3-hydroxy-2-pentanone']),
 TaggedDocument(words=['2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2,6-dimethylpyrazine']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '(', 'e', ')', '-', '2', '-', '(', 'z', ')', '-', '4', '-', 'd', 'e', 'c', 'a', 'd', 'i', 'e', 'n', 'o', 'a', 't', 'e'], tags=['methyl_(e)-2-(z)-4-decadienoate']),
 TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'p', 'h', 'e', 'n', 'y', 'l', '-', '2', '-', 'p', 'e', 'n', 't', 'e', 'n', 'a', 'l'], tags=['4-methyl-2-phenyl-2-pentenal']),
 TaggedDocument(words=['i', 's', 'o', 'p', 'h', 'o', 'r', 'o', 'n', 'e'], tags=['isophorone']),
 TaggedDocument(words=['i', 's', 'o', 'b', 'o', 'r', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['isobornyl_acetate']),
 TaggedDocument(words=['1', '-', 'o', 'c', 't', 'e', 'n', '-', '3', '-', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['1-octen-3-yl_acetate']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'f', 'u', 'r', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_2-methyl-3-furyl_disulfide']),
 TaggedDocument(words=['o', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['o-methoxycinnamaldehyde']),
 TaggedDocument(words=['3', '-', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'o', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'b', 'u', 't', 'a', 'n', 'o', 'l'], tags=['3-mercapto-3-methyl-1-butanol']),
 TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['benzyl_hexanoate']),
 TaggedDocument(words=['f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['furfuryl_acetate']),
 TaggedDocument(words=['p', 'i', 'p', 'e', 'r', 'i', 'd', 'i', 'n', 'e'], tags=['piperidine']),
 TaggedDocument(words=['g', 'l', 'y', 'c', 'e', 'r', 'o', 'l'], tags=['glycerol']),
 TaggedDocument(words=['2', '-', 'a', 'c', 'e', 't', 'y', 'l', '-', '3', '-', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-acetyl-3-ethylpyrazine']),
 TaggedDocument(words=['3', '-', 'd', 'e', 'c', 'a', 'n', 'o', 'n', 'e'], tags=['3-decanone']),
 TaggedDocument(words=['(', 'e', ',', 'e', ')', '-', '3', ',', '5', '-', 'o', 'c', 't', 'a', 'd', 'i', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['(e,e)-3,5-octadien-2-one']),
 TaggedDocument(words=['a', 'c', 'e', 't', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e', '_', 'd', 'i', 'i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 'l'], tags=['acetaldehyde_diisoamyl_acetal']),
 TaggedDocument(words=['a', '-', 'd', 'a', 'm', 'a', 's', 'c', 'o', 'n', 'e'], tags=['a-damascone']),
 TaggedDocument(words=['b', 'e', 'n', 'z', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['benzoic_acid']),
 TaggedDocument(words=['1', ',', '3', ',', '5', '-', 'u', 'n', 'd', 'e', 'c', 'a', 't', 'r', 'i', 'e', 'n', 'e', '_', '(', 'a', '_', 'm', 'i', 'x', 't', 'u', 'r', 'e', '_', 'o', 'f', '_', '1', ',', '3', '(', 'e', ')', ',', '5', '(', 'z', ')', '-', '_', 'a', 'n', 'd', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '1', ',', '3', '(', 'e', ')', ',', '5', '(', 'e', ')', '-', 'i', 's', 'o', 'm', 'e', 'r', 's', ')'], tags=['1,3,5-undecatriene_(a_mixture_of_1,3(e),5(z)-_and_______________1,3(e),5(e)-isomers)']),
 TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'b', 'u', 't', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['3-methyl-2-buten-1-ol']),
 TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['benzyl_isovalerate']),
 TaggedDocument(words=['w', '-', 'p', 'e', 'n', 't', 'a', 'd', 'e', 'c', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['w-pentadecalactone']),
 TaggedDocument(words=['f', 'a', 'r', 'n', 'e', 's', 'o', 'l'], tags=['farnesol']),
 TaggedDocument(words=['c', 'i', 's', '-', '3', '-', 'o', 'c', 't', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['cis-3-octen-1-ol']),
 TaggedDocument(words=['g', 'e', 'r', 'a', 'n', 'i', 'o', 'l'], tags=['geraniol']),
 TaggedDocument(words=['f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['furfuryl_propionate']),
 TaggedDocument(words=['c', 'i', 'n', 'n', 'a', 'm', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['cinnamyl_acetate']),
 TaggedDocument(words=['g', '-', 'd', 'e', 'c', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['g-decalactone']),
 TaggedDocument(words=['3', ',', '4', '-', 'x', 'y', 'l', 'e', 'n', 'o', 'l'], tags=['3,4-xylenol']),
 TaggedDocument(words=['p', '-', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['p-ethoxybenzaldehyde']),
 TaggedDocument(words=['a', '-', 'h', 'e', 'x', 'y', 'l', '_', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['a-hexyl_cinnamaldehyde']),
 TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['phenethyl_hexanoate']),
 TaggedDocument(words=['2', '-', 'a', 'c', 'e', 't', 'y', 'l', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['2-acetylthiazole']),
 TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'a', 'n', 'g', 'e', 'l', 'a', 't', 'e'], tags=['isobutyl_angelate']),
 TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e', 'e', 't', 'h', 'a', 'n', 'o', 'l'], tags=['4-methyl-5-thiazoleethanol']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['methyl_(methylthio)acetate']),
 TaggedDocument(words=['p', '-', 'm', 'e', 't', 'h', 'y', 'l', 'a', 'n', 'i', 's', 'o', 'l', 'e'], tags=['p-methylanisole']),
 TaggedDocument(words=['p', 'i', 'p', 'e', 'r', 'o', 'n', 'a', 'l'], tags=['piperonal']),
 TaggedDocument(words=['a', 'l', 'l', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['allyl_hexanoate']),
 TaggedDocument(words=['e', 'r', 'y', 't', 'h', 'r', 'o', 'b', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['erythrobic_acid']),
 TaggedDocument(words=['d', 'i', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'a', 'c', 'e', 't', 'o', 'n', 'e'], tags=['dihydroxyacetone']),
 TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'o', 'p', 'e', 'n', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['isopropenylpyrazine']),
 TaggedDocument(words=['3', ',', '4', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '2', '-', 'c', 'y', 'c', 'l', 'o', 'p', 'e', 'n', 't', 'a', 'n', 'e', 'd', 'i', 'o', 'n', 'e'], tags=['3,4-dimethyl-1,2-cyclopentanedione']),
 TaggedDocument(words=['4', '-', 'p', 'r', 'o', 'p', 'e', 'n', 'y', 'l', '-', '2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['4-propenyl-2,6-dimethoxyphenol']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['methyl_hexanoate']),
 TaggedDocument(words=['3', '-', 'o', 'c', 't', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['3-octen-2-one']),
 TaggedDocument(words=['2', '-', 'h', 'e', 'x', 'e', 'n', '-', '1', '-', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['2-hexen-1-yl_acetate']),
 TaggedDocument(words=['p', 'u', 'l', 'e', 'g', 'o', 'n', 'e'], tags=['pulegone']),
 TaggedDocument(words=['a', 'm', 'y', 'l', '_', 'o', 'c', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['amyl_octanoate']),
 TaggedDocument(words=['d', 'i', 'a', 'l', 'l', 'y', 'l', '_', 't', 'r', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['diallyl_trisulfide']),
 TaggedDocument(words=['l', 'i', 'n', 'a', 'l', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['linalyl_benzoate']),
 TaggedDocument(words=['3', '-', 'o', 'c', 't', 'a', 'n', 'o', 'n', 'e'], tags=['3-octanone']),
 TaggedDocument(words=['b', 'e', 'n', 'z', 'o', 'p', 'h', 'e', 'n', 'o', 'n', 'e'], tags=['benzophenone']),
 TaggedDocument(words=['i', 's', 'o', 'p', 'u', 'l', 'e', 'g', 'o', 'n', 'e'], tags=['isopulegone']),
 TaggedDocument(words=['2', '-', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'l'], tags=['2-pentanol']),
 TaggedDocument(words=['4', '-', 'h', 'e', 'x', 'e', 'n', 'e', '-', '3', '-', 'o', 'n', 'e'], tags=['4-hexene-3-one']),
 TaggedDocument(words=['o', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['o-methoxybenzaldehyde']),
 TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'l', 'a', 'u', 'r', 'a', 't', 'e'], tags=['isoamyl_laurate']),
 TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_disulfide']),
 TaggedDocument(words=['s', 't', 'y', 'r', 'e', 'n', 'e'], tags=['styrene']),
 TaggedDocument(words=['c', 'i', 's', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['cis-3-hexenyl_isovalerate']),
 TaggedDocument(words=['4', '-', 'p', 'h', 'e', 'n', 'y', 'l', '-', '3', '-', 'b', 'u', 't', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['4-phenyl-3-buten-2-one']),
 TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['ethyl_isovalerate']),
 TaggedDocument(words=['1', '-', 'p', '-', 'm', 'e', 'n', 't', 'h', 'e', 'n', '-', '9', '-', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['1-p-menthen-9-yl_acetate']),
 TaggedDocument(words=['2', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'a', 'c', 'e', 't', 'o', 'p', 'h', 'e', 'n', 'o', 'n', 'e'], tags=['2-hydroxyacetophenone']),
 TaggedDocument(words=['2', '-', 'p', 'e', 'n', 't', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['2-pentyl_butyrate']),
 TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 't', 'e'], tags=['isoamyl_cinnamate']),
 TaggedDocument(words=['t', 'h', 'e', 'a', 's', 'p', 'i', 'r', 'a', 'n', 'e'], tags=['theaspirane']),
 TaggedDocument(words=['d', 'i', 'a', 'c', 'e', 't', 'y', 'l'], tags=['diacetyl']),
 TaggedDocument(words=['c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'o', 'l'], tags=['citronellol']),
 TaggedDocument(words=['2', '-', 'p', 'e', 'n', 't', 'a', 'd', 'e', 'c', 'a', 'n', 'o', 'n', 'e'], tags=['2-pentadecanone']),
 TaggedDocument(words=['(', 'e', ')', '-', '7', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'o', 'c', 't', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['(e)-7-methyl-3-octen-2-one']),
 TaggedDocument(words=['2', '-', 'n', 'o', 'n', 'e', 'n', 'a', 'l'], tags=['2-nonenal']),
 TaggedDocument(words=['(', 'z', ')', '-', '4', '-', 'p', 'r', 'o', 'p', 'e', 'n', 'y', 'l', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['(z)-4-propenylphenol']),
 TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['propyl_acetate']),
 TaggedDocument(words=['2', '-', 'p', 'r', 'o', 'p', 'y', 'l', 'p', 'y', 'r', 'i', 'd', 'i', 'n', 'e'], tags=['2-propylpyridine']),
 ...]

In [7]:
def make_plot_simple(name, points, labels, publish):
    traces = []
    traces.append(go.Scattergl(
            x = points[:, 0],
            y = points[:, 1],
            mode = 'markers',
            marker = dict(
                color = sns.xkcd_rgb["black"],
                size = 8,
                opacity = 0.6,
                #line = dict(width = 1)
            ),
            text = labels,
            hoverinfo = 'text',
        )
        )
                  
    layout = go.Layout(
        xaxis=dict(
            autorange=True,
            showgrid=False,
            zeroline=True,
            showline=True,
            autotick=True,
            ticks='',
            showticklabels=False
        ),
        yaxis=dict(
            autorange=True,
            showgrid=False,
            zeroline=True,
            showline=True,
            autotick=True,
            ticks='',
            showticklabels=False
        )
        )
                  
    fig = go.Figure(data=traces, layout=layout)
    if publish:
        plotter = py.iplot
    else:
        plotter = offline.plot
    plotter(fig, filename=name + '.html')

In [8]:
"""
Train Doc2Vec Model

"""
time_start = time.time()

cores = multiprocessing.cpu_count()

#dm/m,d50,n5,w5,mc5,s0.001,t3
#model = gensim.models.doc2vec.Doc2Vec(size=50, min_count=5, iter=55)

# PV-DM w/ average
model = gensim.models.doc2vec.Doc2Vec(size=50, window=5, min_count=3, iter=100)
model.build_vocab(corpus, keep_raw_vocab=False)

print "Unique Character Count", len(model.wv.vocab)
print "Total Compoounds Count:", model.corpus_count

%time model.train(corpus, total_examples=model.corpus_count, epochs=model.iter)

print 'Doc2Vec training done! Time elapsed: {} seconds'.format(time.time()-time_start)


save_name = 'embeddings' + os.sep + 'embeddings_flavor_compounds_50dim.bin'
model.save_word2vec_format(save_name, doctag_vec=True, word_vec=False, prefix='*dt_', fvocab=None, binary=True)


Unique Character Count 46
Total Compoounds Count: 1107
CPU times: user 4.24 s, sys: 2.57 s, total: 6.81 s
Wall time: 5.05 s
Doc2Vec training done! Time elapsed: 5.10063791275 seconds

In [9]:
"""
TSNE of Doc2Vec

"""
time_start = time.time()
X = model.docvecs
tsne = TSNE(n_components=2)
X_tsne = tsne.fit_transform(X)

print 't-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start)


t-SNE done! Time elapsed: 5.66017103195 seconds

In [10]:
labels = []

for doc_id in range(0, len(model.docvecs)):
    labels.append(model.docvecs.index_to_doctag(doc_id))

make_plot_simple(name='compound2vec_char2',
          points=X_tsne, 
          labels=labels, 
          publish=False)

In [36]:
load_name = 'embeddings' + os.sep + 'embeddings_flavor_compounds_50dim.bin'
#char_embbeding = gensim.models.Word2Vec.load(load_name)

from gensim.models.keyedvectors import KeyedVectors
word_vectors = KeyedVectors.load_word2vec_format(load_name, binary=True)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-36-0aabed540d8b> in <module>()
      4 from gensim.models.keyedvectors import KeyedVectors
      5 word_vectors = KeyedVectors.load_word2vec_format(load_name, binary=True)
----> 6 print len(word_vectors)

TypeError: object of type 'KeyedVectors' has no len()

In [ ]: