In [2]:
import os
import time
import multiprocessing
import gensim
from sklearn.manifold import TSNE
import seaborn as sns
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.offline as offline
In [3]:
path = 'data'
comp_info = path + os.sep + 'comp_info.tsv'
# {compound_id: [compound_name, CAS_number]}
def load_compounds(path):
compounds = {}
compounds_list = []
with open(path, 'r') as f:
for line in f:
if line[0] == '#':
pass
else:
line_split = line.rstrip().split('\t')
compounds_id = line_split[0]
compounds_list = line_split[1:]
compounds[compounds_id] = compounds_list
return compounds
In [4]:
compounds = load_compounds(comp_info)
compounds
Out[4]:
{'344': ['s-methyl_3-methylbutanethioate', '23747-45-7'],
'0': ['jasmone', '488-10-8'],
'346': ['4-(2,6,6-trimethyl-cyclohexa-1,3-dienyl)but-2-en-4-one',
'23696-85-7'],
'347': ['cinnamic_acid', '621-82-9'],
'340': ['1-methylnaphthalene', '90-12-0'],
'341': ['5-ethyl-3-hydroxy-4-methyl-2(5h)-furanone', '698-10-2'],
'342': ['p-menthane-3,8-diol', '42822-86-6'],
'343': ['isopropyl_myristate', '110-27-0'],
'810': ['tannic_acid', '1401-55-4'],
'811': ['myristic_acid', '544-63-8'],
'812': ['2,2,3-trimethylcyclopent-3-en-1-yl_acetaldehyde', '4501-58-0'],
'813': ['d-octalactone', '698-76-0'],
'348': ['limonene_(d-,l-,_and_dl-)',
'5989-27-5,7705-14-8,5989-54-8,5989-27-5'],
'349': ['guaiene', '88-84-6'],
'816': ['5-_and_6-decenoic_acid', '85392-03-6,85392-04-7'],
'817': ['2-octen-4-one', '4643-27-0'],
'595': ['2-octanone', '111-13-7'],
'719': ['propionic_acid', '79-09-4'],
'718': ['caryophyllene_alcohol', '4586-22-5'],
'717': ['phenethyl_formate', '104-62-1'],
'716': ['methyl_o-methoxybenzoate', '606-45-1'],
'715': ['d-fenchone', '4695-62-9'],
'714': ['bis-(methylthio)methane', '1618-26-4'],
'713': ['geranyl_isobutyrate', '2345-26-8'],
'712': ['2-propionylpyrrole', '1073-26-3'],
'711': ['g-ionone', '79-76-5'],
'710': ['phenethyl_propionate', '122-70-3'],
'915': ['d,l-methionine', '59-51-8'],
'914': ['pyrrolidine', '123-75-1'],
'606': ['phenethyl_acetate', '103-45-7'],
'917': ['furfuryl_isovalerate', '13678-60-9'],
'594': ['4-hydroxy-5-methyl-3(2h)-furanone', '19322-27-1'],
'736': ['2-methylundecanal', '110-41-8'],
'916': ['2-acetyl-3-methylpyrazine', '23787-80-6'],
'1061': ['methyl-2-methylbutyrate', '868-57-5'],
'911': ['2-acetylpyridine', '1122-62-9'],
'1063': ['butyl_ethyl_disulfide', '63986-03-8'],
'1062': ['phosphoric_acid', '7664-38-2'],
'1065': ['benzaldehyde_propylene_glycol_acetal', '2568-25-4'],
'1064': ['phenol', '108-95-2'],
'619': ['lauryl_alcohol', '112-53-8'],
'910': ['ethyl_oleate', '111-62-6'],
'913': ['n-butyric_acid', '107-92-6'],
'298': ['formic_acid', '64-18-6'],
'299': ['nonanoic_acid', '112-05-0'],
'296': ['methyl-3-methylthiopropionate', '13532-18-8'],
'297': ['p-a-dimethylbenzyl_alcohol', '536-50-5'],
'294': ['2-pentenal', '764-39-6'],
'295': ['eugenyl_methyl_ether', '93-15-2'],
'292': ['1-octanol', '111-87-5'],
'293': ['mintlactone', '13341-72-5'],
'290': ['ethyl_thioacetate', '625-60-5'],
'291': ['p-isopropylacetophenone', '645-13-6'],
'591': ['butter_acids', '85536-25-0'],
'590': ['methyl_1-propenyl_disulfide', '5905-47-5'],
'593': ['ethyl_3-methylpentanoate', '5870-68-8'],
'592': ['10-hydroxymethylene-2-pinene', '128-50-7'],
'199': ['2-undecenal', '2463-77-6'],
'198': ['valeric_acid', '109-52-4'],
'597': ['d-hexalactone', '823-22-3'],
'596': ['linalyl_isovalerate', '1118-27-0'],
'195': ['l-histidine', '71-00-1'],
'194': ['2,3-diethylpyrazine', '15707-24-1'],
'197': ['2,6-dimethylthiophenol', '118-72-9'],
'196': ['undecanal', '112-44-7'],
'191': ['butyl_hexanoate', '626-82-4'],
'190': ['(e,e)-2,4-decadien-1-ol', '18409-21-7'],
'193': ['4,5-dihydro-3-(2h)thiophenone', '1003-04-9'],
'192': ['2-ethylbutyric_acid', '88-09-5'],
'270': ['2-(methylthio)ethanol', '5271-38-5'],
'271': ['4-acetoxy-2,5-dimethyl-3(2h)-furanone', '4166-20-5'],
'272': ['2,4-nonadienal', '6750-03-4'],
'273': ['terpinyl_acetate', '80-26-2'],
'274': ['3-hexanone', '589-38-8'],
'275': ['lauric_acid', '143-07-7'],
'276': ['4-heptenal_(cis-_and_trans-)', '6728-31-0'],
'277': ['2,5-xylenol', '95-87-4'],
'278': ['isoborneol', '124-76-5'],
'279': ['3-methylcyclohexanone', '591-24-2'],
'738': ['vanillin_acetate', '881-68-5'],
'1067': ['1,4-cineole', '470-67-7'],
'524': ['octanoic_acid', '124-07-2'],
'525': ['3-methylbutyl_2-methylbutanoate', '27625-35-0'],
'526': ['dl-phenylalanine', '150-30-1'],
'527': ['2-ethyl-4-methylthiazole', '15679-12-6'],
'520': ['trithioacetone', '828-26-2'],
'521': ['dihydro-b-ionone', '17283-81-7'],
'522': ['linalool_oxide', '1365-19-1'],
'523': ['3-mercapto-3-methylbutyl_formate', '50746-10-6'],
'1014': ['2-pentyl_acetate', '626-38-0'],
'1015': ['nerol', '106-25-2'],
'599': ['methyl_sulfide', '75-18-3'],
'1017': ['geranyl_hexanoate', '10032-02-7'],
'528': ["2,2'-(dithiodimethylene)-difuran", '4437-20-1'],
'529': ['diethyl_sulfide', '352-93-2'],
'994': ['theaspirane', '36431-72-8'],
'1013': ['n-valeraldehyde', '110-62-3'],
'1025': ['l-phenylalanine', '63-91-2'],
'449': ['(+/?)_heptan-3-yl_butyrate', '39026-94-3'],
'448': ['ethyl_3-phenylpropionate', '2021-28-5'],
'443': ['2-pentyl_butyrate', '60415-61-4'],
'442': ['nerolidol', '7212-44-4'],
'441': ['geranyl_propionate', '105-90-8'],
'440': ['2-hydroxyacetophenone', '582-24-1'],
'447': ['diisopropyl_disulfide', '4253-89-8'],
'446': ['prenylthiol', '5287-45-6'],
'445': ['2-methylpentanal', '123-15-9'],
'444': ['isopropyl_propionate', '637-78-5'],
'108': ['isobutyraldehyde', '78-84-2'],
'109': ['5-methyl-2,3-hexanedione', '13706-86-0'],
'102': ['furfuryl_methyl_ether', '13679-46-4'],
'103': ['dl-(3-amino-3-carboxypropyl)dimethylsulfonium_chloride',
'1115-84-0'],
'100': ['(z)-8-tetradecenal', '169054-69-7'],
'101': ['b-caryophyllene', '87-44-5'],
'106': ['isobornyl_isovalerate', '7779-73-9'],
'107': ['methyl-2-pyrrolyl_ketone', '1072-83-9'],
'104': ['ethyl-3-methylthiopropionate', '13327-56-5'],
'105': ['pyruvic_acid', '127-17-3'],
'902': ['2-nonenal', '2463-53-8'],
'903': ['4-acetyl-2-methylpyrimidine', '67860-38-2'],
'39': ['hexyl_isovalerate', '10032-13-0'],
'38': ['4-hydroxy-3-methoxybenzoic_acid', '121-34-6'],
'906': ['bornyl_acetate', '76-49-3'],
'907': ['2-phenylpropionaldehyde', '93-53-8'],
'904': ['thiazole', '288-47-1'],
'905': ['3-methylbutyl-2-methylpropanoate', '2050-01-3'],
'33': ['2,6,6-trimethyl-1,2-cyclohexen-1-carboxaldehyde', '977045-71-8'],
'32': ['(+/?)-(2,6,6-trimethyl-2-hydroxycyclohexylidene)_ace-tic_acid_g-lactone',
'15356-74-8'],
'31': ['(z)-3-hexenyl_valerate', '35852-46-1'],
'30': ['terpinyl_propionate', '80-27-3'],
'37': ["4'-methylacetophenone", '122-00-9'],
'36': ['3-methyl-1,2,4-trithiane', '43040-01-3'],
'35': ['nonyl_acetate', '143-13-5'],
'34': ['2-acetylfuran', '1192-62-7'],
'641': ['3-methyl-1-cyclopentadecanone', '541-91-3'],
'640': ['g-heptalactone', '105-21-5'],
'643': ['2,6-dimethoxyphenol', '91-10-1'],
'642': ['carveol', '99-48-9'],
'645': ['2-hexenal', '505-57-7'],
'644': ['isopropyl_butyrate', '638-11-9'],
'438': ['3,7,11-trimethyl-2,6,10-dodecatrienal', '19317-11-4'],
'439': ['hexyl_hexanoate', '6378-65-0'],
'436': ['n-octyl_formate', '112-32-3'],
'437': ['hexyl_trans-2-hexenoate', '.'],
'434': ['linalyl_acetate', '115-95-7'],
'435': ['1-hexadecanol', '36653-82-4'],
'432': ['dihydro-b-ionol', '3293-47-8'],
'433': ['isopulegol', '89-79-2'],
'430': ['2-isobutyl-3-methoxypyrazine', '24683-00-9'],
'431': ['9,12-octadecadienoic_acid_(48%)_plus_9,12,15-octadeca-_trienoinc_acid_(52%)_(methyl_esters)',
'99999-08-0'],
'1002': ['4,5-dimethyl-3-hydroxy-2,5-dihydrofuran-2-one', '28664-35-9'],
'339': ['propyl_heptanoate', '7778-87-2'],
'338': ['furfuryl_alcohol', '98-00-0'],
'335': ['p-menth-1-en-3-ol', '491-04-3'],
'334': ['cinnamyl_benzoate', '5320-75-2'],
'337': ['2-methylpropyl-3-methylbutyrate', '589-59-3'],
'336': ['methyl_phenyl_sulfide', '100-68-5'],
'331': ['p-menth-8-en-2-one', '3792-53-8'],
'330': ['3-methylthiopropyl_isothiocyanate', '505-79-3'],
'333': ['isobutyl_acetate', '110-19-0'],
'332': ['isobutyl_benzoate', '120-50-3'],
'744': ['ethyl_heptanoate', '106-30-9'],
'1000': ['furfuryl_mercaptan', '98-02-2'],
'745': ['cis-2-nonen-1-ol', '41453-56-9'],
'854': ['diethyl_malonate', '105-53-3'],
'818': ['2-methyltetrahydrofuran-3-one', '3188-00-9'],
'856': ['heptyl_formate', '112-23-2'],
'857': ['benzyl_butyrate', '103-37-7'],
'850': ['pyrazine', '290-37-9'],
'851': ['o-(methylthio)phenol', '1073-29-6'],
'852': ['isoamyl_nonanoate', '7779-70-6'],
'345': ['g-nonalactone', '104-61-0'],
'858': ['5-methyl-2-phenyl-2-hexenal', '21834-92-4'],
'859': ['2-methyl-1-butanethiol', '1878-18-8'],
'1081': ['allyl_2-furoate', '4208-49-5'],
'748': ['ethyl_octadecanoate', '111-61-5'],
'6': ['ethyl-3-hydroxybutyrate', '5405-41-4'],
'900': ['potassium_sorbate', '590-00-1'],
'848': ['4-hydroxy-4-methyl-5-hexenoic_acid_gamma-lactone', '1073-11-6'],
'99': ['(+/?)-ethyl_3-acetoxy-2-methylbutyrate', '139564-43-5'],
'98': ['furfuryl_thioacetate', '13678-68-7'],
'844': ['cinnamyl_butyrate', '103-61-7'],
'1085': ['benzyl_mercaptan', '100-53-8'],
'91': ['methyl_valerate', '624-24-8'],
'90': ['menthol', '89-78-1'],
'93': ['g-hexalactone', '695-06-7'],
'92': ['diphenyl_ether', '101-84-8'],
'95': ['tolualdehydes,_mixed_o-,_m-,_p-', '1334-78-7'],
'94': ['p-methoxybenzaldehyde', '123-11-5'],
'97': ['3-hexanol', '623-37-0'],
'96': ['propenyl_propyl_disulfide', '5905-46-4'],
'814': ['4-(2-furyl)-3-buten-2-one', '623-15-4'],
'1030': ['methyl_acetate', '79-20-9'],
'815': ['isoamyl_phenylacetate', '102-19-2'],
'1098': ['2-ethyl-4-hydroxy-5-methyl-3(2h)-furanone', '27538-10-9'],
'1066': ['butyl_heptanoate', '5454-28-4'],
'740': ['n-octyl_isovalerate', '7786-58-5'],
'741': ['2-heptylfuran', '3777-71-7'],
'742': ['methyl_mercaptan', '74-93-1'],
'743': ['10-undecenoic_acid', '112-38-9'],
'559': ['4-methyl-2-pentanone', '108-10-1'],
'558': ['dipropyl_trisulfide', '6028-61-1'],
'746': ['hydroxycitronellol', '107-74-4'],
'747': ['3-phenylpropionic_acid', '501-52-0'],
'555': ['2-octenal', '2363-89-5'],
'554': ['n-nonanal', '124-19-6'],
'557': ['2-ethylbutyraldehyde', '97-96-1'],
'556': ['2,5-dimethylpyrazine', '123-32-0'],
'551': ['sodium_acetate', '127-09-3'],
'550': ['2-ethylpyrazine', '13925-00-3'],
'553': ['propylene_glycol', '57-55-6'],
'552': ['trans-2-nonen-1-ol', '31502-14-4'],
'238': ['a-pinene', '80-56-8'],
'239': ['2-ethyl-4,5-dimethyloxazole', '53833-30-0'],
'234': ['2-nonanol', '628-99-9'],
'235': ['hexanal', '66-25-1'],
'236': ['propyl_isovalerate', '557-00-6'],
'237': ['methyl_cyclohexanecarboxylate', '4630-82-4'],
'230': ['methyl_phenyl_disulfide', '14173-25-2'],
'231': ['methoxypyrazine', '3149-28-8'],
'232': ['(z)-3-hexenyl_propionate', '33467-74-2'],
'233': ['(z)-3-hexenyl_isobutyrate', '41519-23-7'],
'1050': ['2,3-heptanedione', '977043-66-5'],
'1051': ['3-decen-2-one', '10519-33-2'],
'1052': ['linalyl_isobutyrate', '78-35-3'],
'1053': ['2-methyl-3-pentenoic_acid', '37674-63-8'],
'1054': ['2-heptenal', '2463-63-0'],
'1055': ['neryl_isobutyrate', '2345-24-6'],
'1056': ['hydroxycitronellal_dimethyl_acetal', '107-75-5'],
'1057': ['ethyl_2-methylbutyrate', '7452-79-1'],
'1058': ['phenethyl_octanoate', '5457-70-5'],
'1059': ['propyl_thioacetate', '2307-10-0'],
'992': ['2-pentanethiol', '2084-19-7'],
'1': ['5-methylhexanoic_acid', '628-46-6'],
'614': ['o-methylanisole', '578-58-5'],
'146': ['citronellyl_formate', '105-85-1'],
'147': ['isopentylamine', '107-85-7'],
'144': ['isoamyl_propionate', '105-68-0'],
'145': ['1,3-propanedithiol', '109-80-8'],
'142': ['5-methylquinoxaline', '13708-12-8'],
'143': ['4-[(2,6,6)-trimethyl-cyclohex-1-enyl]-but-2-en-4-one', '35044-68-9'],
'140': ['cyclohexyl_acetate', '622-45-7'],
'141': ['neryl_formate', '2142-94-1'],
'612': ['verbenol', '473-67-6'],
'613': ['diallyl_trisulfide', '2050-87-5'],
'610': ['hexyl_acetate', '142-92-7'],
'611': ['benzyl_alcohol', '100-51-6'],
'616': ['hexyl_formate', '629-33-4'],
'617': ['4-methyl-5-vinylthiazole', '1759-28-0'],
'148': ['benzyl_benzoate', '120-51-4'],
'149': ['isobutyl_formate', '542-55-2'],
'1007': ['2-decenal', '3913-71-1'],
'912': ['indole', '120-72-9'],
'1006': ['1-phenyl-1-propanol', '93-54-9'],
'951': ['farnesol', '4602-84-0'],
'1005': ['cis-3-hexenyl_formate', '33467-73-1'],
'1082': ['nonyl_alcohol', '143-08-8'],
'1004': ['propionaldehyde', '123-38-6'],
'948': ['2-ethyl-6-methylpyrazine', '13925-03-6'],
'949': ['benzyl_isovalerate', '103-38-8'],
'946': ['1,3,5-undecatriene_(a_mixture_of_1,3(e),5(z)-_and_______________1,3(e),5(e)-isomers)',
'16356-11-9'],
'947': ['3-methyl-2-buten-1-ol', '556-82-1'],
'944': ['acetaldehyde_diisoamyl_acetal', '13002-09-0'],
'945': ['a-damascone', '43052-87-5'],
'942': ['3,5-dimethyl-1,2-cyclopentadione', '13494-07-0'],
'943': ['(e,e)-3,5-octadien-2-one', '30086-02-3'],
'940': ['2-acetyl-3-ethylpyrazine', '32974-92-8'],
'941': ['3-decanone', '928-80-3'],
'768': ['butyl_acetate', '123-86-4'],
'689': ['2,4,5-trimethyl_thiazole', '13623-11-5'],
'688': ['acetophenone', '98-86-2'],
'685': ['ethyl_propyl_disulfide', '30453-31-7'],
'684': ['d,l-valine', '516-06-3'],
'687': ['ethyl_trans-2-decenoate', '7367-88-6'],
'686': ['trimethylamine', '75-50-3'],
'681': ['cadinene', '29350-73-0'],
'680': ['neryl_acetate', '141-12-8'],
'683': ['trans-2-octen-1-yl_acetate', '3913-80-2'],
'682': ['cis-3-hexenyl_butyrate', '16491-36-4'],
'623': ['2-methoxy-4-propylphenol', '2785-87-7'],
'819': ['1,2-ethanedithiol', '540-63-6'],
'1103': ['1-octen-3-yl_butyrate', '16491-54-6'],
'622': ['benzyl_butyl_ether', '588-67-0'],
'133': ['cis-4-hexenal', '4634-89-3'],
'132': ['citronellal', '106-23-0'],
'131': ['carvyl_acetate', '97-42-7'],
'130': ['zingerone', '122-48-5'],
'137': ['methyl-3-hexenoate', '2396-78-3'],
'136': ['isoamyl_alcohol', '123-51-3'],
'135': ['2-methylbutyl-3-methylbutanoate', '2445-77-4'],
'134': ['methyl_(methylthio)acetate', '16630-66-3'],
'494': ['dihydroxyacetone', '96-26-4'],
'495': ['6,7-dihydro-2,3-dimethyl-5h-cyclopentapyrazine', '38917-63-4'],
'139': ['quinoline', '91-22-5'],
'138': ['citronellyl_valerate', '7540-53-6'],
'490': ['hydrogen_sulfide', '7783-06-4'],
'491': ['anisyl_acetate', '104-21-2'],
'492': ['4-mercapto-4-methyl-2-pentanone', '19872-52-7'],
'493': ['2,5-diethyltetrahydrofuran', '41239-48-9'],
'24': ['lauryl_acetate', '112-66-3'],
'25': ['4-methoxy-2-methyl-2-butanethiol', '94087-83-9'],
'26': ['4-heptanone', '123-19-3'],
'27': ['benzothiazole', '95-16-9'],
'20': ['lactic_acid', '598-82-3'],
'21': ['linalyl_butyrate', '78-36-4'],
'22': ['methylsulfinylmethane', '67-68-5'],
'23': ['ethyl_2,4,7-decatrienoate', '78417-28-4'],
'927': ['skatole', '83-34-1'],
'28': ['p-menthan-2-ol', '499-69-4'],
'29': ['w-6-hexadecenlactone', '7779-50-2'],
'407': ['isobutyl_cinnamate', '122-67-8'],
'406': ['2-tridecanone', '593-08-8'],
'405': ['levulinic_acid', '123-76-2'],
'404': ['nootkatone', '4674-50-4'],
'403': ['sulfur_dioxide', '7446-09-5'],
'402': ['methyl_nicotinate', '93-60-7'],
'401': ['isoamyl_laurate', '6309-51-9'],
'400': ['heptyl_alcohol', '111-70-6'],
'933': ['methyl_2-methyl-3-furyl_disulfide', '65505-17-1'],
'932': ['isopropyl_2-methylbutyrate', '66576-71-4'],
'931': ['1-octen-3-yl_acetate', '2442-10-6'],
'930': ['isobornyl_acetate', '125-12-2'],
'937': ['furfuryl_acetate', '623-17-6'],
'629': ['linalyl_octanoate', '10024-64-3'],
'409': ['l-lysine', '56-87-1'],
'408': ['4,5-dimethyl_thiazole', '3581-91-7'],
'1069': ['myrtenol', '515-00-4'],
'628': ['p-mentha-1,3-diene', '99-86-5'],
'1028': ['taurine', '107-35-7'],
'758': ['isoquinoline', '119-65-3'],
'1018': ['undecanoic_acid', '112-37-8'],
'379': ['3-butylidenephthalide', '551-08-6'],
'378': ['thujan-4-ol', '546-79-2'],
'829': ['isopropyl_isobutyrate', '617-50-5'],
'828': ['glycine', '56-40-6'],
'1060': ['ethyl_myristate', '124-06-1'],
'371': ['2,8-dithianon-4-en-4-carboxaldehyde', '59902-01-1'],
'370': ['p-isopropylbenzyl_alcohol', '536-60-7'],
'373': ['pyruvaldehyde', '78-98-8'],
'372': ['3-oxododecanoic_acid_glyceride', '91052-70-9'],
'375': ['benzyl_acetate', '140-11-4'],
'374': ['coumarin_(prohibited)', '91-64-5'],
'377': ['octyl_2-furoate', '39251-88-2'],
'376': ['cyclohexaneethyl_acetate', '21722-83-8'],
'1019': ['cis-5-octen-1-ol', '64275-73-6'],
'708': ['3-hydroxy-2-oxopropionic_acid', '1113-60-6'],
'709': ['isoamyl_isovalerate', '659-70-1'],
'704': ['alpha-terpineol', '10482-56-1'],
'705': ['a-methylbenzyl_acetate', '93-92-5'],
'706': ['4-methyloctanoic_acid', '54947-74-9'],
'707': ['quinine', '.'],
'700': ['estragole', '140-67-0'],
'618': ['cis-_and_trans-menthone-8-thioacetate', '57129-12-1'],
'702': ['2-ethyl_(3_or_5_or_6)-methoxypyrazine_(85%)_plus_2-methyl_(3_or_5_or_6)-methoxypyrazine_(13%)',
'977044-47-5'],
'703': ['furfural', '98-01-1'],
'393': ['d-camphor', '464-49-3'],
'392': ['diallyl_polysulfides', '72869-75-1'],
'88': ['valencene', '4630-07-3'],
'89': ['methyl_2-methylpentanoate', '2177-77-7'],
'397': ['6-methyl-5-hepten-2-one', '110-93-0'],
'396': ['2-acetyl-3,_(5_or_6)-dimethylpyrazine,_mixture_of_isomers',
'977043-63-2'],
'395': ['ethyl_laurate', '106-33-2'],
'394': ['3-propylidenephthalide', '17369-59-4'],
'82': ['4-(methylthio)-2-oxobutanoic_acid', '583-92-6'],
'83': ['4-ethyloctanoic_acid', '16493-80-4'],
'80': ['methyl_2-hydroxy-4-methylpentanoate', '40348-72-9'],
'81': ['isoamyl_octanoate', '2035-99-6'],
'86': ['ethyl_butyrate', '105-54-4'],
'87': ['difurfuryl_ether', '4437-22-3'],
'84': ['2,3-dimethylpyrazine', '5910-89-4'],
'85': ['2-methyl-4-propyl-1,3-oxathiane', '67715-80-4'],
'797': ['aconitic_acid', '499-12-7'],
'796': ['2-acetoxy-3-butanone', '4906-24-5'],
'795': ['2-methyl-3-(p-isopropylphenyl)-propionaldehyde', '103-95-7'],
'794': ['6,10-dimethyl-5,9-undecadien-2-one', '689-67-8'],
'793': ['a-phellandrene', '99-83-2'],
'792': ['propyl_propionate', '106-36-5'],
'791': ['menthone', '89-80-5'],
'790': ['cinnamyl_cinnamate', '122-69-0'],
'799': ['1-phenyl-1,2-propanedione', '579-07-7'],
'798': ['hexyl_alcohol', '111-27-3'],
'7': ['cyclohexyl_butyrate', '1551-44-6'],
'601': ['1-(methylthio)-2-butanone', '13678-58-5'],
'607': ['benzyl_salicylate', '118-58-1'],
'586': ['linalool', '78-70-6'],
'587': ['isoeugenol', '97-54-1'],
'584': ['2,6-dimethylpyridine', '108-48-5'],
'585': ['6-hydroxydihydrotheaspirane', '65620-50-0'],
'582': ['terpinyl_butyrate', '80-26-6'],
'583': ['undecyl_alcohol', '112-42-5'],
'580': ['2,6,6-trimethyl-6-vinyltetrahydropyran', '7392-19-0'],
'581': ['g-octalactone', '104-50-7'],
'588': ['methyl_trans-2-octenoate', '2396-85-'],
'589': ["disodium_5'-guanylate", '5550-12-9'],
'245': ['(2-furyl)-2-propanone', '6975-60-6'],
'244': ['ethyl_trans-2-octenoate', '7367-82-0'],
'247': ['2-phenyl-2-butenal', '4411-89-6'],
'246': ['1-hydroxy-2-butanone', '5077-67-8'],
'241': ['g-decalactone', '706-14-9'],
'240': ['anisyl_alcohol', '105-13-5'],
'243': ['hexanoic_acid', '142-62-1'],
'242': ["2,2'-(thiodimethylene)_difuran", '13678-67-6'],
'615': ['a,a-dimethylphenethyl_alcohol', '100-86-7'],
'249': ['pyridine', '110-86-1'],
'248': ['tetrahydrofurfuryl_alcohol', '97-99-4'],
'924': ['bornyl_formate', '7492-41-3'],
'970': ['4-propenyl-2,6-dimethoxyphenol', '20675-95-0'],
'925': ['3-hydroxy-2-pentanone', '3142-66-3'],
'519': ['methyl_benzoate', '93-58-3'],
'518': ['piperine', '94-62-2'],
'926': ['2,6-dimethylpyrazine', '108-50-9'],
'1009': ['3,7-dimethyl-1-octanol', '106-21-8'],
'1008': ['sclareolide', '.'],
'511': ['citronellyl_acetate', '150-84-5'],
'510': ['2-propylpyridine', '622-39-9'],
'513': ['1-hexanethiol', '111-31-9'],
'512': ['ethyl_nonanoate', '123-29-5'],
'515': ['4-hexen-1-ol', '6126-50-7'],
'514': ['4,5,6,7-tetrahydro-3,6-dimethylbenzofuran', '494-90-6'],
'517': ['4-ethylbenzaldehyde', '4748-78-1'],
'516': ['2-methyl-2-octenal', '49576-57-0'],
'458': ['eugenyl_acetate', '93-28-7'],
'459': ['d-neomenthol', '2216-52-6'],
'621': ['isobutyl_heptanoate', '7779-80-8'],
'620': ['safrole', '94-59-7'],
'627': ['(z)-4-hydroxy-6-dodecenoic_acid_lactone', '18679-18-0'],
'626': ['2-hydroxy-4-methyl_benzaldehyde', '698-27-1'],
'625': ['thaumatin_b-recombinant', '53859-34-3'],
'624': ['octyl_2-methylbutyrate', '29811-50-5'],
'450': ['decyl_butyrate', '5454-09-1'],
'451': ['(+/?)2-mercapto-2-methylpentan-1-ol', '258823-39-1'],
'452': ['beta-cyclodextrin', '7585-39-9'],
'453': ['heptyl_acetate', '112-06-1'],
'454': ['3-phenylpropyl_propionate', '122-74-7'],
'455': ['n-(4-hydroxy-3-methoxybenzyl)-8-methyl-6-nonena-mide', '404-86-4'],
'456': ['methyl_nonanoate', '1731-84-6'],
'457': ['ethyl_cyclohexanecarboxylate', '3289-28-9'],
'1084': ['a-amylcinnamaldehyde', '122-40-7'],
'979': ['isopulegone', '29606-79-9'],
'179': ['ethyl_isobutyrate', '97-62-1'],
'178': ['2,4-dimethylanisole', '6738-23-4'],
'177': ['b-pinene', '127-91-3'],
'176': ['dimethyl_succinate', '106-65-0'],
'175': ['allyl_sulfide', '592-88-1'],
'174': ['methyl_butyrate', '623-42-7'],
'173': ['acetylpyrazine', '22047-25-2'],
'172': ['ethyl_methyl_disulfide', '20333-39-5'],
'171': ['camphene', '79-92-5'],
'170': ['2-methylheptanoic_acid', '1188-02-9'],
'977': ['linalyl_benzoate', '126-64-7'],
'656': ['3-heptanone', '106-35-4'],
'975': ['amyl_octanoate', '638-25-5'],
'974': ['acetaldehyde', '75-07-0'],
'973': ['2-hexen-1-yl_acetate', '2497-18-9'],
'972': ['3-octen-2-one', '1669-44-9'],
'971': ['methyl_hexanoate', '106-70-7'],
'657': ['n-octanal', '124-13-0'],
'1080': ['4-hydroxy-2,3-dimethyl-2,4-nonadienoic_acid_gamma-lactone',
'774-64-1'],
'654': ['methyl_ethyl_trisulfide', '31499-71-5'],
'253': ['ethyl_3-hexenoate', '2396-83-0'],
'978': ['3-octanone', '106-68-3'],
'182': ['p-methoxycinnamaldehyde', '1963-36-6'],
'183': ['4-methyl-3-penten-2-one', '141-79-7'],
'180': ['phenethyl-2-methylbutyrate', '24817-51-4'],
'181': ['p-methyl_diphenyl', '644-08-6'],
'186': ['terpinyl_formate', '2153-26-6'],
'187': ['4-propyl-2,6-dimethoxyphenol', '6766-82-1'],
'184': ['5-ethyl-2-methylpyridine', '104-90-5'],
'185': ['isobutyl_isobutyrate', '97-85-8'],
'886': ['p,a,a-trimethylbenzyl_alcohol', '1197-01-9'],
'652': ['1,2,5,6-tetrahydrocuminic_acid', '56424-87-4'],
'188': ['furfuryl_methyl_sulfide', '1438-91-1'],
'189': ['2-ethyl-3,5(6)-dimethylpyrazine', '27043-05-6'],
'658': ['2,4-dimethylbenzaldehyde', '15764-16-6'],
'653': ['acetic_anhydride', '108-24-7'],
'650': ['borneol', '507-70-0'],
'651': ['6-methyl-3,5-heptadien-2-one', '1604-28-0'],
'1089': ['3-phenyl-1-propanol', '122-97-4'],
'764': ['2-methylbutyraldehyde', '96-17-3'],
'1088': ['2-formyl-6,6-dimethyl_bicyclo_[3.1.1]_hept-2-ene', '564-94-3'],
'11': ['octyl_propionate', '142-60-9'],
'10': ['2-(1-methylpropyl)thiazole', '18277-27-5'],
'13': ['l-_and_dl-alanine', '302-72-7'],
'12': ['2_or_5_or_6-methoxy-3-methylpyrazine_(mixture_of_isomers)',
'68378-13-2'],
'15': ['2-trans-3,7-dimethylocta-2,6-dienyl-2-ethyl_butanoate', '73019-14-4'],
'14': ['2-propionyl-2-thiazoline', '29926-42-9'],
'17': ['(z)-3_&_(e)-2-hexenyl_propionate_(mixture)', '33467-74-2'],
'16': ['10-undecenal', '112-45-8'],
'19': ['3,5-diethyl-2-methylpyrazine', '18138-05-1'],
'18': ['(+/?)-1-phenylethylmercaptan', '6263-65-6'],
'863': ['p-mentha-8-thiol-3-one', '38462-22-5'],
'862': ['heptyl_butyrate', '5870-93-9'],
'865': ['propyl_butyrate', '105-66-8'],
'864': ['2(4)-isobutyl-4(2),6-dimethyldihydro-4h-1,3,5-dithiazine',
'977161-98-0'],
'867': ['methyl_phenylacetate', '101-41-7'],
'866': ['propyl_isobutyrate', '644-49-5'],
'884': ['dihydrocarveol', '619-01-2'],
'938': ['p-mentha-1,8-dien-7-yl-acetate', '15111-96-3'],
'659': ['(+/?)-dihydrofarnesol', '1335-48-4'],
'883': ['(z)-4-dodecenal', '21944-98-9'],
'753': ['2,3,5,6-tetramethylpyrazine', '1124-11-4'],
'881': ['geranyl_tiglate', '7785-33-7'],
'880': ['g-undecalactone', '104-67-6'],
'887': ['3-phenylpropionaldehyde', '104-53-0'],
'831': ['methyl_benzyl_disulfide', '699-10-5'],
'885': ['2-methoxy-4-methylphenol', '93-51-6'],
'752': ['ethyl_cis-4-heptenoate', '39924-27-1'],
'928': ['4-methyl-2-phenyl-2-pentenal', '26643-91-4'],
'62': ['trans,_trans-2,4-hexadienal', '142-83-6'],
'888': ['hexyl_propionate', '2445-76-3'],
'1012': ['p-cymene', '99-87-6'],
'1032': ['diethyl_tartrate', '87-91-2'],
'950': ['w-pentadecalactone', '106-02-5'],
'756': ['3-octanol', '589-98-0'],
'929': ['isophorone', '78-59-1'],
'809': ['isobutyl_hexanoate', '105-79-3'],
'322': ['acetone', '67-64-1'],
'323': ['2,6-dimethyl-4-heptanol', '108-82-7'],
'320': ['methyl_2-hexenoate', '2396-77-2'],
'321': ['methyl-4-methylvalerate', '2412-80-8'],
'326': ['methyl_anisate', '121-98-2'],
'327': ['fenchyl_acetate', '13851-11-1'],
'324': ['anisole', '100-66-3'],
'325': ['p-mentha-1,8-dien-7-ol', '536-59-4'],
'328': ['butyl_formate', '592-84-7'],
'329': ['nonyl_octanoate', '7786-48-3'],
'759': ['cuminaldehyde', '122-03-2'],
'201': ['amyl_butyrate', '540-18-1'],
'200': ['butyl_sulfide', '544-40-1'],
'203': ['1-octen-3-ol', '3391-86-4'],
'202': ['resorcinol', '108-46-3'],
'205': ['delta-tetradecalactone', '2721-22-4'],
'204': ['p-cresol', '106-44-5'],
'207': ['1-p-menthene-8-thiol', '71159-90-5'],
'206': ['phenylacetaldehyde_dimethyl_acetal', '101-48-4'],
'209': ['2-acetyl-5-methylfuran', '1193-79-9'],
'208': ['ethylene_oxide', '75-21-8'],
'779': ['2-trans,_6-trans-nonadienal', '17587-33-6'],
'778': ['methyl_heptanoate', '106-73-0'],
'889': ['rhodinol', '6812-78-8'],
'77': ['isobutyl_2-butenoate', '589-66-2'],
'76': ['2-trans-6-cis-dodecadienal', '21662-13-5'],
'75': ['3,7-dimethyl-1,3,6-octatriene', '13877-91-3'],
'74': ['(z)-3-hexenyl_(e)-2-butenoate', '65405-80-3'],
'73': ['g-dodecalactone', '2305-05-7'],
'72': ['l-tyrosine', '60-18-4'],
'71': ['methyl_thiobutyrate', '2432-51-1'],
'70': ['2-propionylpyrroline', '133447-37-7'],
'655': ['3-(methylthio)propyl_acetate', '16630-55-0'],
'79': ['methyl_citronellate', '2270-60-2'],
'78': ['2,6-dimethyl-5-heptenal', '106-72-9'],
'2': ['l-glutamine', '56-85-9'],
'1042': ['l-arginine', '74-79-3'],
'1041': ['ethyl_acetoacetate', '141-97-9'],
'1040': ['phenethyl_alcohol', '60-12-8'],
'1047': ['2-methylcyclohexanone', '583-60-8'],
'1043': ['2-methoxy-3_(5_and_6)-isopropylpyrazine', '25773-40-4'],
'1045': ['heptanal', '111-71-7'],
'1044': ['5-isopropenyl-2-methyl-2-vinyltetrahydrofuran', '13679-86-2'],
'1049': ['isopropyl_alcohol', '67-63-0'],
'1048': ['p-a-dimethyl_styrene', '1195-32-0'],
'805': ['isobutyl_phenylacetate', '102-13-6'],
'804': ['methyl_laurate', '111-82-0'],
'669': ['methyl_furoate', '611-13-2'],
'668': ['cis-3-hexenyl_lactate', '61931-81-5'],
'667': ['2-ethyl-3-methylpyrazine', '15707-23-0'],
'666': ['2-methyl-5-thiomethylfuran', '13678-59-6'],
'665': ['1-amino-2-propanol', '78-96-6'],
'664': ['amyl_formate', '638-49-3'],
'663': ['2-tridecenal', '7774-82-5'],
'662': ['2,4,5-trimethyl-d-3-oxazoline', '22694-96-8'],
'661': ['1-octen-3-one', '4312-99-6'],
'660': ['butyl_isovalerate', '109-19-3'],
'769': ['cis-6-nonenal', '2277-19-2'],
'692': ['5-hydroxy-2,4-decadienoic_acid_d-lactone', '.'],
'693': ['3-ethyl-2,6-dimethylpyrazine', '13925-07-0'],
'690': ['4-hydroxy-2,5-dimethyl-3(2h)-furanone', '3658-77-3'],
'691': ['3-methylcrotonic_acid', '541-47-9'],
'696': ['biphenyl', '92-52-4'],
'697': ['2-methyltetrahydrothiophen-3-one', '13679-85-1'],
'694': ['disodium_succinate', '150-90-3'],
'695': ['1-ethyl-2-acetylpyrrole', '39741-41-8'],
'698': ['b-ionol', '22029-76-1'],
'699': ['sodium_citrate', '68-04-2'],
'542': ['benzyl_ethyl_ether', '539-30-0'],
'543': ['cinnamyl_alcohol', '104-54-1'],
'540': ['pulegone', '89-82-7'],
'541': ['b-ionone', '14901-07-6'],
'546': ['butyl_laurate', '106-18-3'],
'547': ['hexyl_isobutyrate', '2349-07-7'],
'544': ['5-methyl-3-hexen-2-one', '5166-53-0'],
'545': ['theobromine', '83-67-0'],
'8': ['methyl_dihydrojasmonate', '24851-98-7'],
'548': ['1-methyl-2-acetylpyrrole', '932-16-1'],
'549': ['isovaleric_acid', '503-74-2'],
'68': ['4-methyl-2,3-pentanedione', '7493-58-5'],
'598': ['benzaldehyde_dimethyl_acetal', '1125-88-8'],
'995': ['diacetyl', '431-03-8'],
'869': ['methyl_linoleate_(48%)_methyl_linolenate_(52%)_mix-ture',
'977136-80-3'],
'997': ['2-pentadecanone', '2345-28-0'],
'996': ['citronellol', '26489-01-0'],
'991': ['butyl_butyrate', '109-21-7'],
'990': ['5-methylfurfural', '620-02-0'],
'993': ['butyl_phenylacetate', '122-43-0'],
'868': ['allyl_heptanoate', '142-19-8'],
'999': ['allyl_hexanoate', '123-68-2'],
'998': ['(e)-7-methyl-3-octen-2-one', '.'],
'120': ['butyl_salicylate', '2052-14-4'],
'121': ['a-methylbenzyl_alcohol', '98-85-1'],
'122': ['1,6-hexanedithiol', '1191-43-1'],
'123': ['dehydrodihydroionone', '20483-36-7'],
'124': ['5-hydroxy-2-decenoic_acid_d-lactone', '51154-96-2'],
'125': ['allyl_methyl_trisulfide', '34135-85-8'],
'126': ['3-methyl-2-butenal', '107-86-8'],
'127': ['p-ethoxybenzaldehyde', '10031-82-0'],
'128': ['4-decenoic_acid', '26303-90-2'],
'129': ['butter_starter_distillate', '977019-27-4'],
'765': ['benzyl_isobutyrate', '103-28-6'],
'1016': ['isopropyl_benzoate', '939-48-0'],
'1010': ['a-campholenic_alcohol', '1901-38-8'],
'1011': ['carvacrol', '499-75-2'],
'414': ['p-propylphenol', '645-56-7'],
'415': ['a-ionone', '127-41-3'],
'416': ['2-propylpyrazine', '18138-03-9'],
'417': ['1-ethoxy-3-methyl-2-butene', '22094-00-4'],
'410': ['2-trans,_4-trans-decadienal', '25152-84-5'],
'411': ['3-oxotetradecanoic_acid_glyceride', '91052-73-2'],
'412': ['methyl_phenethyl_ether', '3558-60-9'],
'413': ['nona-2-trans,-6-cis-dienal', '557-48-2'],
'920': ['p-vinylphenol', '2628-17-3'],
'498': ['ethyl_decanoate', '110-38-3'],
'922': ['2-ethyl-1-hexanol', '104-76-7'],
'923': ['isobornyl_propionate', '2756-56-1'],
'418': ['4-hydroxy-3,5-dimethoxybenzaldehyde', '134-96-3'],
'419': ['ethyl_alcohol', '64-17-5'],
'776': ['methyl_ethyl_sulfide', '625-80-9'],
'499': ['phenoxyacetic_acid', '122-59-8'],
'319': ['3,4-dimethyl-1,2-cyclopentanedione', '13494-06-9'],
'318': ['2,6-dimethyl-10-methylene-2,6,11-dodecatrienal', '60066-88-8'],
'313': ['n-furfuryl_pyrrole', '1438-94-4'],
'312': ['ethyl_trans-2-hexenoate', '27829-72-7'],
'311': ['2-octanol', '123-96-6'],
'310': ['p-tolyl_acetate', '140-39-6'],
'317': ['ethyl_acetate', '141-78-6'],
'316': ['methyl_propionate', '554-12-1'],
'315': ['butyl_stearate', '123-95-5'],
'314': ['4-(1,1-dimethyl)_ethyl_phenol', '98-54-4'],
'861': ['maltol', '118-71-8'],
'921': ['citral_(neral)', '5392-40-5'],
'496': ['menthyl_isovalerate', '16409-46-4'],
'832': ['fumaric_acid', '110-17-8'],
'833': ['isobutyl_angelate', '7779-81-9'],
'830': ['isopulegyl_acetate', '57576-09-7'],
'497': ['3-(methylthio)_hexyl_acetate', '51755-85-2'],
'836': ['2-methoxy-4-vinylphenol', '7786-61-0'],
'837': ['adipic_acid', '124-04-9'],
'834': ['2,2-dimethyl-5-(1-methylpropen-1-yl)-__________________________________tetrahydrofuran',
'7416-35-5'],
'835': ['ethyl_acrylate', '140-88-5'],
'838': ['2-trans-4-cis-7-cis-tridecatrienal', '13552-96-0'],
'839': ['ethyl_p-anisate', '94-30-4'],
'808': ['2-acetyl-2-thiazoline', '29926-41-8'],
'3': ['1-methyl-3-methoxy-4-isopropylbenzene', '1076-56-8'],
'725': ['3-methyl-2-oxobutanoic_acid', '759-05-7'],
'368': ['9-octadecenal', '5090-41-5'],
'369': ['4-methylpentanoic_acid', '646-07-1'],
'366': ['hexyl_octanoate', '1117-55-1'],
'367': ['2-methylbutyl-2-methyl_butyrate', '2445-78-5'],
'364': ['trans,_trans-2,4-dodecadienal', '21662-16-8'],
'365': ['3-(2-methylpropyl)pyridine', '14159-61-6'],
'362': ['glycerol_tributyrate', '60-01-5'],
'363': ['2-methyl-5-vinylpyrazine', '13925-08-1'],
'360': ['piperonyl_acetate', '326-61-4'],
'361': ['terpinolene', '586-62-9'],
'959': ['phenethyl_hexanoate', '6290-37-5'],
'952': ['cis-3-octen-1-ol', '20125-84-2'],
'882': ['2-propionylthiazole', '43039-98-1'],
'380': ['2-oxobutyric_acid', '600-18-0'],
'381': ['4-hexene-3-one', '2497-21-4'],
'382': ['(e)-3-(z)-6-nonadien-1-ol', '56805-23-3'],
'383': ['2,3-dimethylbenzofuran', '3782-00-1'],
'384': ['2-methylbutyric_acid', '116-53-0'],
'385': ['1,5,5,9-tetramethyl-13-oxatricyclo-(8.3.0.0(4,9))________tridecane',
'6790-58-5'],
'386': ['o-methoxybenzaldehyde', '135-02-4'],
'387': ['(e)-2-hexenyl_hexanoate', '53398-86-0'],
'388': ['allyl_mercaptan', '870-23-5'],
'389': ['2-isopropyl-4-methylthiazole', '15679-13-7'],
'784': ['geranyl_acetate', '105-87-3'],
'785': ['phenethyl_benzoate', '94-47-3'],
'786': ['1-penten-3-ol', '616-25-1'],
'787': ['isobutyl_butyrate', '539-90-2'],
'780': ['methyl_furfuryl_disulfide', '57500-00-2'],
'781': ['4-ethyl-2,6-dimethoxyphenol', '14059-92-8'],
'782': ['2-pentylpyridine', '2294-76-0'],
'783': ['3-hydroxy-4-phenylbutan-2-one', '5355-63-5'],
'788': ['diethyl_succinate', '123-25-1'],
'789': ['nonyl_isovalerate', '7786-47-2'],
'860': ['ethyl_3-hydroxyhexanoate', '2305-25-1'],
'605': ['m-dimethoxybenzene', '151-10-0'],
'579': ['l-aspartic_acid', '8021-39-4'],
'578': ['2,2,6-trimethylcyclohexanone', '2408-37-9'],
'604': ["2'-aminoacetophenone", '551-93-9'],
'573': ['hexyl_butyrate', '2639-63-6'],
'572': ['ascorbic_acid', '50-81-7'],
'571': ['linalyl_formate', '115-99-1'],
'570': ['triethyl_citrate', '77-93-0'],
'577': ['butyl_anthranilate', '7756-96-9'],
'576': ['trans-anethole', '4180-23-8'],
'575': ['o-vinylanisole', '612-15-7'],
'574': ['2-isobutyl_thiazole', '18640-74-9'],
'60': ['2-ethylhexanethiol', '7341-17-5'],
'61': ['geranyl_butyrate', '106-29-6'],
'258': ['methyl_3-hydroxyhexanoate', '21188-58-9'],
'259': ['monosodium_glutamate', '142-47-2'],
'64': ['2-dodecenal', '4826-62-4'],
'65': ['n-butyl_valerate', '591-68-4'],
'66': ['rhodinyl_formate', '141-09-3'],
'67': ['(e)-2-decenoic_acid', '334-49-6'],
'252': ['propyl_formate', '110-74-7'],
'69': ['hydroxynonanoic_acid,_d-lactone', '3301-94-8'],
'250': ['hydroxycitronellal', '107-75-5'],
'251': ['isoamyl_benzoate', '94-46-2'],
'256': ['myristaldehyde', '124-25-4'],
'257': ['2,4-dihydroxybenzoic_acid', '89-86-1'],
'254': ['trans-2-methyl-2-butenoic_acid', '80-59-1'],
'255': ['a-hexyl_cinnamaldehyde', '101-86-0'],
'603': ['isopropyl_formate', '625-55-8'],
'602': ['eucalyptol', '470-82-6'],
'939': ['glycerol', '56-81-5'],
'731': ['bisabolene', '495-62-5'],
'730': ['lauric_aldehyde', '112-54-9'],
'733': ['ethyl_propyl_trisulfide', '31499-70-4'],
'732': ['(z)(z)-3,6-nonadien-1-ol', '76649-25-7'],
'735': ['p-tolyl-3-methyl_butyrate', '55066-56-3'],
'734': ['phenylacetaldehyde', '122-78-1'],
'508': ['diethyl_malate', '7554-12-3'],
'509': ['6-undecanone', '927-49-1'],
'506': ['benzyl_tiglate', '5837-78-5'],
'507': ['1-butanethiol', '109-79-5'],
'504': ['p-menth-8-en-1-ol', '138-87-4'],
'505': ['ethyl_propionate', '105-37-3'],
'502': ['heptyl_isobutyrate', '2349-13-5'],
'503': ['veratraldehyde', '120-14-9'],
'500': ['(z)-4-propenylphenol', '85960-81-2'],
'501': ['4-methylnonanoic_acid', '45019-28-1'],
'630': ['amyl_alcohol', '71-41-0'],
'631': ['3-methyl-1-pentanol', '589-35-5'],
'632': ['2(10)-pinen-3-ol', '5947-36-4'],
'633': ['methyl_propyl_trisulfide', '17619-36-2'],
'469': ['3-oxohexadecanoic_acid_glyceride', '91052-71-0'],
'468': ['decyl_acetate', '112-17-4'],
'636': ['benzyl_methyl_sulfide', '766-92-7'],
'637': ['o-propylphenol', '644-35-9'],
'465': ['d-ribose', '50-69-1'],
'464': ['3-methyl-2-cyclohexen-1-one', '1193-18-6'],
'467': ['citric_acid', '77-92-9'],
'466': ['m-cresol', '108-39-4'],
'461': ['4-carvomenthenol', '562-74-3'],
'460': ['2-methylvaleric_acid', '97-61-0'],
'463': ['propyl_benzoate', '2315-68-6'],
'462': ['propiophenone', '93-55-0'],
'901': ['2,3,5-trithiahexane', '42474-44-2'],
'168': ['ethyl_2-(methylthio)acetate', '4455-13-4'],
'169': ['2-(2-butyl)-4,5-dimethyl-3-thiazoline', '65894-82-8'],
'164': ['neryl_butyrate', '999-40-6'],
'165': ['eugenol', '97-53-0'],
'166': ['1-decanol', '112-30-1'],
'167': ['l-menthyl_acetate', '16409-45-3'],
'160': ['2-methyl-3-furanthiol', '28588-74-1'],
'161': ['p-dimethoxybenzene', '150-78-7'],
'162': ['p-mentha-1,4(8)-dien-3-one', '491-09-8'],
'163': ['4-methyl-2-pentenal', '5362-56-1'],
'964': ['p-methylanisole', '104-93-8'],
'965': ['piperonal', '120-57-0'],
'966': ['erythrobic_acid', '89-65-6'],
'967': ['geranyl_isovalerate', '109-20-6'],
'960': ['2-acetylthiazole', '24295-03-2'],
'961': ['2,3-octanedione', '585-25-1'],
'962': ['4-methyl-5-thiazoleethanol', '137-00-8'],
'963': ['benzophenone', '119-61-9'],
'968': ['isopropenylpyrazine', '38713-41-6'],
'969': ['3-heptanol', '589-82-2'],
'936': ['benzyl_hexanoate', '6938-45-0'],
'1106': ['ethyl_sorbate', '2396-84-1'],
'1104': ['guaiacol', '90-05-1'],
'1105': ['(+/?)-methyl_5-acetoxyhexanoate', '35234-22-1'],
'1102': ['2-heptanol', '543-49-7'],
'935': ['3-mercapto-3-methyl-1-butanol', '34300-94-2'],
'1100': ['cis-3-hexenyl_benzoate', '25152-85-6'],
'1101': ['hexyl_benzoate', '6789-88-4'],
'934': ['o-methoxycinnamaldehyde', '1504-74-1'],
'908': ['3-phenylpropyl_acetate', '122-72-5'],
'909': ['2-heptanone', '110-43-0'],
'1096': ['2-methyl-1,3-dithiolane', '5616-51-3'],
'1090': ['methyl_jasmonate', '1211-29-6'],
'1091': ['ethyl_benzoate', '93-89-0'],
'600': ['propyl_alcohol', '71-23-8'],
'878': ['ethyl_10-undecenoate', '692-86-4'],
'879': ['ethyl_palmitate', '628-97-7'],
'876': ['l-malic_acid', '97-67-6'],
'877': ['acetal', '105-57-7'],
'874': ['3-hexenyl_phenylacetate', '42436-07-7'],
'875': ['3-nonanone', '925-78-0'],
'872': ['methyl-cis-4-octenoate', '21063-71-8'],
'873': ['ethyl_crotonate', '623-70-1'],
'870': ['3-methyl-2,4-nonanedione', '113486-29-6'],
'871': ['5h-5-methyl-6,7-dihydrocyclopenta(b)pyrazine', '23747-48-0'],
'9': ['methyl_2-methylthiobutyrate', '42075-45-6'],
'890': ['2,3,5-trimethylpyrazine', '14667-55-1'],
'891': ['dehydromenthofurolactone', '75640-26-5'],
'892': ['3-oxohexanoic_acid_glyceride', '91052-72-1'],
'893': ['2,3-pentanedione', '600-14-6'],
'894': ['isoeugenyl_methyl_ether', '93-16-3'],
'647': ['3-methyl-2-butanol', '598-75-4'],
'896': ['2,6,6-trimethylcyclohex-2-ene-1,4-dione', '1125-21-9'],
'897': ['5-methyl-2-hepten-4-one', '81925-81-7'],
'898': ['carvone', '6485-40-1'],
'899': ['4-methylthio-2-butanone', '3407-39-7'],
'1087': ['ethyl_pyruvate', '617-35-6'],
'646': ['2-methyl-1-propanethiol', '513-44-0'],
'1099': ['l-arabinose', '5328-37-0'],
'649': ['4-ethylguaiacol', '2785-89-9'],
'648': ['5-ethyl-2-hydroxy-3-methylcyclopent-2-en-1-one', '53263-58-4'],
'1086': ['pyroligneous_acid,_extract', '8028-47-5'],
'357': ['isobutyric_acid', '79-31-2'],
'356': ['3-acetyl-2,5-dimethylthiophene', '2530-10-1'],
'355': ['isoprenyl_acetate', '5205-07-2'],
'354': ['citronellyl_isobutyrate', '97-89-2'],
'353': ['5-isopropyl-2-methylpyrazine', '13925-05-8'],
'352': ['2-undecanone', '112-12-9'],
'351': ['3,6-dihydro-4-methyl-2-(2-methylpropen-1-yl)-2h-pyran', '1786-08-9'],
'350': ['p-menth-1-ene-9-al', '29548-14-9'],
'803': ['citronellyl_butyrate', '141-16-2'],
'802': ['3-ethyl-2-hydroxy-4-methylcylcopent-2-en-1-one', '42348-12-9'],
'801': ['sodium_diacetate', '126-96-5'],
'800': ['ethyl_formate', '109-94-4'],
'807': ['hexyl_phenylacetate', '5421-17-0'],
'806': ['isobutyl_alcohol', '78-83-1'],
'359': ['ethyl_levulinate', '539-88-8'],
'358': ['decanoic_acid', '334-48-5'],
'216': ['citronellyl_propionate', '141-14-0'],
'217': ['3-(2-furyl)acrolein', '623-30-3'],
'214': ['acetanisole', '100-06-1'],
'215': ['benzoic_acid', '65-85-0'],
'212': ['4-decenal', '30390-50-2'],
'213': ['myrtenyl_acetate', '1079-01-2'],
'210': ['6-methylquinoline', '91-62-3'],
'211': ['2-ethyl-5-methylpyrazine', '13360-64-0'],
'762': ['3-phenylpropyl_cinnamate', '122-68-9'],
'763': ['n-butyl-2-methylbutyrate', '15706-73-7'],
'760': ['linalyl_propionate', '144-39-8'],
'761': ['3-hexen-1-ol', '928-96-1'],
'766': ['methyl_myristate', '124-10-7'],
'767': ['fenchyl_alcohol', '1632-73-1'],
'218': ['d-decalactone', '705-86-2'],
'219': ['a-ionol', '25312-34-9'],
'957': ['methyl_octanoate', '111-11-5'],
'956': ['cinnamyl_acetate', '103-54-8'],
'1033': ['2-butanone', '78-93-3'],
'1078': ['citronellic_acid', '502-47-6'],
'1079': ['1-methyl-1-cyclopenten-3-one', '2758-18-1'],
'1076': ['3,4-dimethoxy-1-vinylbenzene', '6380-23-0'],
'1077': ['4-methyl-2-oxopentanoic_acid', '816-66-0'],
'1074': ['2,6-dimethyl-4-heptanone', '108-83-8'],
'1075': ['phenethyl_isobutyrate', '103-48-0'],
'1072': ['s-methyl_thioacetate', '1534-08-3'],
'1073': ['n-propyl_hexanoate', '626-77-7'],
'1070': ['benzaldehyde', '100-52-7'],
'1071': ['a-furfuryl_pentanoate', '36701-01-6'],
'289': ['3-decanol', '1565-81-7'],
'288': ['butyl_alcohol', '71-36-3'],
'1003': ['polyarabinogalactan', '9036-66-2'],
'4': ['methyl-3-phenylpropionate', '103-25-3'],
'281': ['3-oxooctanoic_acid_glyceride', '91052-68-5'],
'280': ['ethyl_lactate', '97-64-3'],
'283': ['benzyl_formate', '104-57-4'],
'282': ['vanillin,_natural', '121-33-5'],
'285': ['3-(methylthio)_propionaldehyde', '3268-49-3'],
'284': ['3,5-dimethyl-1,2,4-trithiolane', '23654-92-4'],
'287': ['5-methyl-2-thiophenecarboxaldehyde', '13679-70-4'],
'286': ['s-methyl_4-methylpentanethioate', '53966-59-9'],
'1094': ['butyl_propionate', '590-01-2'],
'1095': ['neryl_propionate', '105-91-9'],
'1083': ['s-methyl_hexanethioate', '2432-77-1'],
'1097': ['4-hydroxy-3-methyloctanoic_acid_lactone', '39212-23-2'],
'678': ['5-hydroxy-7-decenoic_acid_d-lactone', '25524-95-2'],
'679': ['isosafrole_(prohibited)', '120-58-1'],
'1092': ['p-mentha-1,4-diene', '99-85-4'],
'1093': ['ethyl_cis-4-octenoate', '34495-71-1'],
'674': ['3-oxodecanoic_acid_glyceride', '91052-69-6'],
'675': ['(e)-2-octen-1-ol', '18409-17-1'],
'676': ['4-hydroxybenzaldehyde', '123-08-0'],
'677': ['santalol,_a_and', '11031-45-1'],
'670': ['decanal', '112-31-2'],
'671': ['acetic_acid', '64-19-7'],
'672': ['ethyl_tiglate', '5837-78-5'],
'673': ['myrcene', '123-35-3'],
'263': ['cis-6-nonen-1-ol', '35854-86-5'],
'262': ['d-dodecalactone', '713-95-1'],
'261': ['3,5,5-trimethyl-1-hexanol', '3452-97-9'],
'260': ['d-piperitone', '6091-50-5'],
'267': ['triacetin', '102-76-1'],
'266': ['3-hepten-2-one', '1119-44-4'],
'265': ['acetaldehyde_ethyl_(z)-3-hexenyl_acetal', '28069-74-1'],
'264': ['3-methylthiobutyraldehyde', '16630-52-7'],
'1031': ['3-hexenoic_acid', '4219-24-3'],
'269': ['cis-3-hexen-1-yl_acetate', '3681-71-8'],
'268': ['isoamyl_hexanoate', '2198-61-0'],
'701': ['bornyl_isovalerate', '76-50-6'],
'59': ['2-isobutyl-3-methylpyrazine', '13925-06-9'],
'58': ['cis-3-hexenyl_isovalerate', '35154-45-1'],
'55': ['isoamyl_cinnamate', '7779-65-9'],
'54': ["disodium_5'-inosinate", '4691-65-0'],
'57': ['dihydrocoumarin', '119-84-6'],
'56': ['isoamyl_acetate', '123-92-2'],
'51': ['pyroligneous_acid', '8030-97-5'],
'50': ['isoamyl_formate', '110-45-2'],
'53': ['methyl_p-hydroxybenzoate', '99-76-3'],
'52': ['3-ethylpyridine', '536-78-7'],
'537': ['erythro_and_threo-3-mercapto-2-methylbutan-1-ol', '227456-33-9'],
'536': ['hexyl_2-methylbutyrate', '10032-15-2'],
'535': ['rhodinyl_butyrate', '141-15-1'],
'63': ['3-penten-2-one', '625-33-2'],
'533': ['4-methylthiazole', '693-95-8'],
'532': ['methyl_(e)-2-(z)-4-decadienoate', '4493-42-9'],
'531': ['butylamine', '109-73-9'],
'530': ['methyl-4-(methylthio)butyrate', '53053-51-3'],
'539': ['3-mercapto-2-methylpentanal', '227456-28-2'],
'538': ['phenethyl_butyrate', '103-52-6'],
'987': ['4-phenyl-3-buten-2-one', '122-57-6'],
'775': ['geranyl_formate', '105-86-2'],
'988': ['ethyl_isovalerate', '108-64-5'],
'989': ['1-p-menthen-9-yl_acetate', '17916-91-5'],
'774': ['octyl_butyrate', '110-39-4'],
'982': ['2-methyl-3-tetrahydrofuranthiol', '57124-87-5'],
'983': ['2-methoxy-3-(1-methylpropyl)pyrazine', '24168-70-5'],
'980': ['2-pentanol', '6032-29-7'],
'981': ['2-hepten-4-one', '4643-25-8'],
'986': ['phenethyl_isovalerate', '140-26-1'],
'777': ['isoamyl_salicylate', '87-20-7'],
'984': ['methyl_disulfide', '624-92-0'],
'985': ['styrene', '100-42-5'],
'115': ['3-methylpentanoic_acid', '105-43-1'],
'114': ['2-methylbutyl_acetate', '624-41-9'],
'117': ['allyl_isothiocyanate', '57-06-7'],
'116': ['2,6,6-trimethylcyclohexa-1,3-dienyl_methanal', '116-26-7'],
'111': ['ethyl_anthranilate', '87-25-2'],
'110': ['cis-3-hexenyl_hexanoate', '31501-11-8'],
'113': ['1-buten-1-yl_methyl_sulfide', '32951-19-2'],
'112': ['3-methylbutanethiol', '541-31-4'],
'771': ['2-hydroxybenzoic_acid', '69-72-7'],
'119': ['2-hydroxy-3,5,5-trimethyl-1,2-cyclohexenone', '4883-60-7'],
'118': ['2-nonanone', '821-55-6'],
'770': ['2,2,4-trimethyl-1,3-oxacyclopentane', '1193-11-9'],
'773': ['octyl_isobutyrate', '109-15-9'],
'772': ['3-methylbutyraldehyde', '590-86-3'],
'953': ['campholene_acetate', '1727-68-0'],
'429': ['propyl_mercaptan', '107-03-9'],
'428': ['1-(p-methoxyphenyl)-2-propanone', '122-84-9'],
'534': ['amyl_hexanoate', '540-07-8'],
'919': ['butyl_isobutyrate', '97-87-0'],
'918': ['2-pentanone', '108-87-9'],
'421': ['ethyl_salicylate', '118-61-6'],
'420': ['ethyl-trans-2,_cis-4-decadienoate', '3025-30-7'],
'423': ['vanillin', '121-33-5'],
'422': ['paraldehyde', '123-63-7'],
'425': ['4-methyl-2,6-dimethoxyphenol', '6638-05-7'],
'424': ['2-pentylfuran', '3777-69-3'],
'427': ['pyrrole', '109-97-7'],
'426': ['thymol', '89-83-8'],
'308': ['2,4-dimethylacetophenone', '89-74-7'],
'309': ['ethyl_cis-4,7-octadienoate', '69925-33-3'],
'855': ['2-methylheptan-3-one', '13019-20-0'],
'300': ['cyclohexanecarboxylic_acid', '98-89-5'],
'301': ['ethyl_octanoate', '106-32-1'],
'302': ['2,5-dimethyl-4-methoxy-3(2h)-furanone', '4077-47-8'],
'303': ['p-menthan-2-one', '499-70-7'],
'304': ['ethyl_3-mercaptopropionate', '5466-06-8'],
'305': ['1,2-dimethoxybenzene', '91-16-7'],
'306': ['benzyl_propionate', '122-63-4'],
'307': ['2-isopropyl-5-methyl-2-hexenal', '35158-25-9'],
'895': ['1,1-dimethoxyethane', '534-15-6'],
'825': ['d-undecalactone', '710-04-3'],
'824': ['5-hydroxy-4-octanone', '496-77-5'],
'827': ['2-methyl-3-butenal', '497-03-0'],
'847': ['methyl_salicylate', '119-36-8'],
'846': ['dihydro-a-ionone', '31499-72-6'],
'845': ['1-methyl-2,3-cyclohexadione', '3008-43-3'],
'826': ['cis-3-hexenyl-2-methylbutyrate', '53398-85-9'],
'843': ['methyl_3-nonenoate', '13481-87-3'],
'842': ['dl-isomenthone', '491-07-6'],
'841': ['2,5_diethyl-3-methylpyrazine', '32736-91-7'],
'840': ['2-isopropylphenol', '88-69-7'],
'821': ['isoamyl_butyrate', '106-27-4'],
'853': ['phenethylamine', '64-04-0'],
'849': ['methyl_propyl_disulfide', '2179-60-4'],
'820': ['3-methylthio-1-hexanol', '51755-66-9'],
'823': ['l-glutamic_acid', '56-86-0'],
'822': ['salicylaldehyde', '90-02-8'],
'954': ['geraniol', '106-24-1'],
'1034': ['a-propylphenethyl_alcohol', '705-73-7'],
...}
In [5]:
def compound2character(compounds):
dict_comp_char = {}
for comp_id in compounds:
compound = compounds[comp_id][0]
char_list = []
for char in compound:
char_list.append(char)
dict_comp_char[compound] = char_list
return dict_comp_char
dict_compound2character = compound2character(compounds)
In [6]:
def read_corpus_char_level(dict_compound2character):
for comp in dict_compound2character:
#For training data, add tags
compound = comp
characters = dict_compound2character[comp]
yield gensim.models.doc2vec.TaggedDocument(characters, [compound])
corpus = list(read_corpus_char_level(dict_compound2character))
corpus
Out[6]:
[TaggedDocument(words=['j', 'a', 's', 'm', 'o', 'n', 'e'], tags=['jasmone']),
TaggedDocument(words=['5', '-', 'm', 'e', 't', 'h', 'y', 'l', 'h', 'e', 'x', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['5-methylhexanoic_acid']),
TaggedDocument(words=['l', '-', 'g', 'l', 'u', 't', 'a', 'm', 'i', 'n', 'e'], tags=['l-glutamine']),
TaggedDocument(words=['1', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', '-', '4', '-', 'i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', 'b', 'e', 'n', 'z', 'e', 'n', 'e'], tags=['1-methyl-3-methoxy-4-isopropylbenzene']),
TaggedDocument(words=['3', '-', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'o', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'e', 'n', 't', 'a', 'n', '-', '1', '-', 'o', 'l', '_', '(', 'r', 'a', 'c', 'e', 'm', 'i', 'c', ')'], tags=['3-mercapto-2-methylpentan-1-ol_(racemic)']),
TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['butyl_isobutyrate']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'd', 'i', 'h', 'y', 'd', 'r', 'o', 'j', 'a', 's', 'm', 'o', 'n', 'a', 't', 'e'], tags=['methyl_dihydrojasmonate']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['methyl_2-methylthiobutyrate']),
TaggedDocument(words=['o', 'c', 't', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['octyl_propionate']),
TaggedDocument(words=['2', '_', 'o', 'r', '_', '5', '_', 'o', 'r', '_', '6', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e', '_', '(', 'm', 'i', 'x', 't', 'u', 'r', 'e', '_', 'o', 'f', '_', 'i', 's', 'o', 'm', 'e', 'r', 's', ')'], tags=['2_or_5_or_6-methoxy-3-methylpyrazine_(mixture_of_isomers)']),
TaggedDocument(words=['l', '-', '_', 'a', 'n', 'd', '_', 'd', 'l', '-', 'a', 'l', 'a', 'n', 'i', 'n', 'e'], tags=['l-_and_dl-alanine']),
TaggedDocument(words=['p', '-', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'a', 'n', 'i', 's', 'o', 'l', 'e'], tags=['p-propyl_anisole']),
TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'h', 'e', 'x', 'a', 'n', 'o', 'l'], tags=['2-ethyl-1-hexanol']),
TaggedDocument(words=['1', '-', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'o', '-', '2', '-', 'p', 'r', 'o', 'p', 'a', 'n', 'o', 'n', 'e'], tags=['1-mercapto-2-propanone']),
TaggedDocument(words=['1', '0', '-', 'u', 'n', 'd', 'e', 'c', 'e', 'n', 'a', 'l'], tags=['10-undecenal']),
TaggedDocument(words=['(', 'z', ')', '-', '3', '_', '&', 'a', 'm', 'p', ';', '_', '(', 'e', ')', '-', '2', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e', '_', '(', 'm', 'i', 'x', 't', 'u', 'r', 'e', ')'], tags=['(z)-3_&_(e)-2-hexenyl_propionate_(mixture)']),
TaggedDocument(words=['(', '+', '/', '?', ')', '-', '1', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'e', 't', 'h', 'y', 'l', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'a', 'n'], tags=['(+/?)-1-phenylethylmercaptan']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 't', 'r', 'a', 'n', 's', '-', '2', '-', 'o', 'c', 't', 'e', 'n', 'o', 'a', 't', 'e'], tags=['methyl_trans-2-octenoate']),
TaggedDocument(words=['3', '-', 'a', 'c', 'e', 't', 'y', 'l', 'p', 'y', 'r', 'i', 'd', 'i', 'n', 'e'], tags=['3-acetylpyridine']),
TaggedDocument(words=['c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', 'n', 'e', 'e', 't', 'h', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['cyclohexaneethyl_acetate']),
TaggedDocument(words=['l', 'i', 'n', 'a', 'l', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['linalyl_butyrate']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', 's', 'u', 'l', 'f', 'i', 'n', 'y', 'l', 'm', 'e', 't', 'h', 'a', 'n', 'e'], tags=['methylsulfinylmethane']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', '2', ',', '4', ',', '7', '-', 'd', 'e', 'c', 'a', 't', 'r', 'i', 'e', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_2,4,7-decatrienoate']),
TaggedDocument(words=['l', 'a', 'u', 'r', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['lauryl_acetate']),
TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'b', 'u', 't', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'l'], tags=['4-methoxy-2-methyl-2-butanethiol']),
TaggedDocument(words=['4', '-', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'n', 'e'], tags=['4-heptanone']),
TaggedDocument(words=['b', 'e', 'n', 'z', 'o', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['benzothiazole']),
TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', 'a', 'n', '-', '2', '-', 'o', 'l'], tags=['p-menthan-2-ol']),
TaggedDocument(words=['w', '-', '6', '-', 'h', 'e', 'x', 'a', 'd', 'e', 'c', 'e', 'n', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['w-6-hexadecenlactone']),
TaggedDocument(words=['t', 'e', 'r', 'p', 'i', 'n', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['terpinyl_propionate']),
TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', '-', '8', '-', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['p-menth-8-en-1-ol']),
TaggedDocument(words=['t', 'h', 'u', 'j', 'a', 'n', '-', '4', '-', 'o', 'l'], tags=['thujan-4-ol']),
TaggedDocument(words=['(', '+', '/', '?', ')', '-', '(', '2', ',', '6', ',', '6', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'y', 'l', 'i', 'd', 'e', 'n', 'e', ')', '_', 'a', 'c', 'e', '-', 't', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'g', '-', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['(+/?)-(2,6,6-trimethyl-2-hydroxycyclohexylidene)_ace-tic_acid_g-lactone']),
TaggedDocument(words=['2', ',', '6', ',', '6', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '2', '-', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'e', 'n', '-', '1', '-', 'c', 'a', 'r', 'b', 'o', 'x', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2,6,6-trimethyl-1,2-cyclohexen-1-carboxaldehyde']),
TaggedDocument(words=['2', '-', 'a', 'c', 'e', 't', 'y', 'l', 'f', 'u', 'r', 'a', 'n'], tags=['2-acetylfuran']),
TaggedDocument(words=['n', 'o', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['nonyl_acetate']),
TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '2', ',', '4', '-', 't', 'r', 'i', 't', 'h', 'i', 'a', 'n', 'e'], tags=['3-methyl-1,2,4-trithiane']),
TaggedDocument(words=['4', "'", '-', 'm', 'e', 't', 'h', 'y', 'l', 'a', 'c', 'e', 't', 'o', 'p', 'h', 'e', 'n', 'o', 'n', 'e'], tags=["4'-methylacetophenone"]),
TaggedDocument(words=['4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '3', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['4-hydroxy-3-methoxybenzoic_acid']),
TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['hexyl_isovalerate']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-methylpyrazine']),
TaggedDocument(words=['m', 'o', 'n', 'o', 's', 'o', 'd', 'i', 'u', 'm', '_', 'g', 'l', 'u', 't', 'a', 'm', 'a', 't', 'e'], tags=['monosodium_glutamate']),
TaggedDocument(words=['(', 'z', ')', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['(z)-3-hexenyl_isobutyrate']),
TaggedDocument(words=['4', '-', 'a', 'l', 'l', 'y', 'l', '-', '2', ',', '_', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['4-allyl-2,_6-dimethoxyphenol']),
TaggedDocument(words=['o', 'c', 't', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['octyl_acetate']),
TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', 'a', '-', '1', ',', '8', '-', 'd', 'i', 'e', 'n', '-', '7', '-', 'a', 'l'], tags=['p-mentha-1,8-dien-7-al']),
TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 't', 'e'], tags=['benzyl_cinnamate']),
TaggedDocument(words=['a', 'm', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['amyl_formate']),
TaggedDocument(words=['p', '-', 'e', 't', 'h', 'y', 'l', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['p-ethylphenol']),
TaggedDocument(words=['b', 'u', 't', 'a', 'n', '-', '3', '-', 'o', 'n', 'e', '-', '2', '-', 'y', 'l', '_', 'b', 'u', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['butan-3-one-2-yl_butanoate']),
TaggedDocument(words=['4', '-', '(', '1', ',', '1', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', ')', '_', 'e', 't', 'h', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['4-(1,1-dimethyl)_ethyl_phenol']),
TaggedDocument(words=['6', ',', '1', '0', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', ',', '9', '-', 'u', 'n', 'd', 'e', 'c', 'a', 'd', 'i', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['6,10-dimethyl-5,9-undecadien-2-one']),
TaggedDocument(words=['p', 'y', 'r', 'o', 'l', 'i', 'g', 'n', 'e', 'o', 'u', 's', '_', 'a', 'c', 'i', 'd'], tags=['pyroligneous_acid']),
TaggedDocument(words=['3', '-', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'i', 'd', 'i', 'n', 'e'], tags=['3-ethylpyridine']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['methyl_propionate']),
TaggedDocument(words=['d', 'i', 's', 'o', 'd', 'i', 'u', 'm', '_', '5', "'", '-', 'i', 'n', 'o', 's', 'i', 'n', 'a', 't', 'e'], tags=["disodium_5'-inosinate"]),
TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['isoamyl_acetate']),
TaggedDocument(words=['d', 'e', 'c', 'a', 'n', 'a', 'l'], tags=['decanal']),
TaggedDocument(words=['2', '-', 'u', 'n', 'd', 'e', 'c', 'a', 'n', 'o', 'n', 'e'], tags=['2-undecanone']),
TaggedDocument(words=['3', '-', 'o', 'x', 'o', 'd', 'o', 'd', 'e', 'c', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'g', 'l', 'y', 'c', 'e', 'r', 'i', 'd', 'e'], tags=['3-oxododecanoic_acid_glyceride']),
TaggedDocument(words=['g', 'e', 'r', 'a', 'n', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['geranyl_butyrate']),
TaggedDocument(words=['t', 'r', 'a', 'n', 's', ',', '_', 't', 'r', 'a', 'n', 's', '-', '2', ',', '4', '-', 'h', 'e', 'x', 'a', 'd', 'i', 'e', 'n', 'a', 'l'], tags=['trans,_trans-2,4-hexadienal']),
TaggedDocument(words=['3', '-', 'p', 'e', 'n', 't', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['3-penten-2-one']),
TaggedDocument(words=['2', '-', 'd', 'o', 'd', 'e', 'c', 'e', 'n', 'a', 'l'], tags=['2-dodecenal']),
TaggedDocument(words=['n', '-', 'b', 'u', 't', 'y', 'l', '_', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['n-butyl_valerate']),
TaggedDocument(words=['4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['4-hydroxybenzaldehyde']),
TaggedDocument(words=['r', 'h', 'o', 'd', 'i', 'n', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['rhodinyl_formate']),
TaggedDocument(words=['4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '3', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['4-hydroxy-3,5-dimethoxybenzaldehyde']),
TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['isopropyl_isobutyrate']),
TaggedDocument(words=['h', 'y', 'd', 'r', 'o', 'x', 'y', 'n', 'o', 'n', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', ',', '_', 'd', '-', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['hydroxynonanoic_acid,_d-lactone']),
TaggedDocument(words=['2', '-', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'y', 'l', 'p', 'y', 'r', 'r', 'o', 'l', 'i', 'n', 'e'], tags=['2-propionylpyrroline']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 't', 'h', 'i', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['methyl_thiobutyrate']),
TaggedDocument(words=['l', '-', 't', 'y', 'r', 'o', 's', 'i', 'n', 'e'], tags=['l-tyrosine']),
TaggedDocument(words=['g', '-', 'd', 'o', 'd', 'e', 'c', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['g-dodecalactone']),
TaggedDocument(words=['(', 'z', ')', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', '(', 'e', ')', '-', '2', '-', 'b', 'u', 't', 'e', 'n', 'o', 'a', 't', 'e'], tags=['(z)-3-hexenyl_(e)-2-butenoate']),
TaggedDocument(words=['3', ',', '7', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '3', ',', '6', '-', 'o', 'c', 't', 'a', 't', 'r', 'i', 'e', 'n', 'e'], tags=['3,7-dimethyl-1,3,6-octatriene']),
TaggedDocument(words=['2', '-', 't', 'r', 'a', 'n', 's', '-', '6', '-', 'c', 'i', 's', '-', 'd', 'o', 'd', 'e', 'c', 'a', 'd', 'i', 'e', 'n', 'a', 'l'], tags=['2-trans-6-cis-dodecadienal']),
TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', '2', '-', 'b', 'u', 't', 'e', 'n', 'o', 'a', 't', 'e'], tags=['isobutyl_2-butenoate']),
TaggedDocument(words=['2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 'h', 'e', 'p', 't', 'e', 'n', 'a', 'l'], tags=['2,6-dimethyl-5-heptenal']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'a', 't', 'e'], tags=['methyl_citronellate']),
TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'o', 'c', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['isoamyl_octanoate']),
TaggedDocument(words=['2', ',', '3', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2,3-dimethylpyrazine']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['ethyl_butyrate']),
TaggedDocument(words=['d', 'i', 'f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'e', 't', 'h', 'e', 'r'], tags=['difurfuryl_ether']),
TaggedDocument(words=['v', 'a', 'l', 'e', 'n', 'c', 'e', 'n', 'e'], tags=['valencene']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['methyl_2-methylpentanoate']),
TaggedDocument(words=['m', 'e', 'n', 't', 'h', 'o', 'l'], tags=['menthol']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['methyl_valerate']),
TaggedDocument(words=['d', 'i', 'p', 'h', 'e', 'n', 'y', 'l', '_', 'e', 't', 'h', 'e', 'r'], tags=['diphenyl_ether']),
TaggedDocument(words=['g', '-', 'h', 'e', 'x', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['g-hexalactone']),
TaggedDocument(words=['p', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['p-methoxybenzaldehyde']),
TaggedDocument(words=['t', 'o', 'l', 'u', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e', 's', ',', '_', 'm', 'i', 'x', 'e', 'd', '_', 'o', '-', ',', '_', 'm', '-', ',', '_', 'p', '-'], tags=['tolualdehydes,_mixed_o-,_m-,_p-']),
TaggedDocument(words=['p', 'r', 'o', 'p', 'e', 'n', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['propenyl_propyl_disulfide']),
TaggedDocument(words=['3', '-', 'h', 'e', 'x', 'a', 'n', 'o', 'l'], tags=['3-hexanol']),
TaggedDocument(words=['f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 't', 'h', 'i', 'o', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['furfuryl_thioacetate']),
TaggedDocument(words=['(', '+', '/', '?', ')', '-', 'e', 't', 'h', 'y', 'l', '_', '3', '-', 'a', 'c', 'e', 't', 'o', 'x', 'y', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['(+/?)-ethyl_3-acetoxy-2-methylbutyrate']),
TaggedDocument(words=['1', '-', 'p', 'e', 'n', 't', 'e', 'n', '-', '3', '-', 'o', 'n', 'e'], tags=['1-penten-3-one']),
TaggedDocument(words=['b', '-', 'c', 'a', 'r', 'y', 'o', 'p', 'h', 'y', 'l', 'l', 'e', 'n', 'e'], tags=['b-caryophyllene']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', 'c', 'y', 'c', 'l', 'o', 'p', 'e', 'n', 't', 'e', 'n', 'o', 'l', 'o', 'n', 'e'], tags=['methylcyclopentenolone']),
TaggedDocument(words=['d', 'l', '-', '(', '3', '-', 'a', 'm', 'i', 'n', 'o', '-', '3', '-', 'c', 'a', 'r', 'b', 'o', 'x', 'y', 'p', 'r', 'o', 'p', 'y', 'l', ')', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 's', 'u', 'l', 'f', 'o', 'n', 'i', 'u', 'm', '_', 'c', 'h', 'l', 'o', 'r', 'i', 'd', 'e'], tags=['dl-(3-amino-3-carboxypropyl)dimethylsulfonium_chloride']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['ethyl-3-methylthiopropionate']),
TaggedDocument(words=['p', 'y', 'r', 'u', 'v', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['pyruvic_acid']),
TaggedDocument(words=['i', 's', 'o', 'b', 'o', 'r', 'n', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['isobornyl_isovalerate']),
TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['isobutyl_butyrate']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'p', 'y', 'r', 'r', 'o', 'l', 'y', 'l', '_', 'k', 'e', 't', 'o', 'n', 'e'], tags=['methyl-2-pyrrolyl_ketone']),
TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['isobutyraldehyde']),
TaggedDocument(words=['5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', ',', '3', '-', 'h', 'e', 'x', 'a', 'n', 'e', 'd', 'i', 'o', 'n', 'e'], tags=['5-methyl-2,3-hexanedione']),
TaggedDocument(words=['1', ',', '2', ',', '5', ',', '6', '-', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'c', 'u', 'm', 'i', 'n', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['1,2,5,6-tetrahydrocuminic_acid']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'a', 'n', 't', 'h', 'r', 'a', 'n', 'i', 'l', 'a', 't', 'e'], tags=['ethyl_anthranilate']),
TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'l'], tags=['3-methylbutanethiol']),
TaggedDocument(words=['1', '-', 'b', 'u', 't', 'e', 'n', '-', '1', '-', 'y', 'l', '_', 'm', 'e', 't', 'h', 'y', 'l', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['1-buten-1-yl_methyl_sulfide']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['2-methylbutyl_acetate']),
TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['3-methylpentanoic_acid']),
TaggedDocument(words=['n', '-', '(', '4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '3', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'y', 'l', ')', '-', '8', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '6', '-', 'n', 'o', 'n', 'e', 'n', 'a', '-', 'm', 'i', 'd', 'e'], tags=['n-(4-hydroxy-3-methoxybenzyl)-8-methyl-6-nonena-mide']),
TaggedDocument(words=['a', 'l', 'l', 'y', 'l', '_', 'i', 's', 'o', 't', 'h', 'i', 'o', 'c', 'y', 'a', 'n', 'a', 't', 'e'], tags=['allyl_isothiocyanate']),
TaggedDocument(words=['2', '-', 'n', 'o', 'n', 'a', 'n', 'o', 'n', 'e'], tags=['2-nonanone']),
TaggedDocument(words=['a', 'd', 'i', 'p', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['adipic_acid']),
TaggedDocument(words=['2', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '3', ',', '5', ',', '5', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '2', '-', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'e', 'n', 'o', 'n', 'e'], tags=['2-hydroxy-3,5,5-trimethyl-1,2-cyclohexenone']),
TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 's', 'a', 'l', 'i', 'c', 'y', 'l', 'a', 't', 'e'], tags=['butyl_salicylate']),
TaggedDocument(words=['a', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'e', 'n', 'z', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['a-methylbenzyl_alcohol']),
TaggedDocument(words=['1', ',', '6', '-', 'h', 'e', 'x', 'a', 'n', 'e', 'd', 'i', 't', 'h', 'i', 'o', 'l'], tags=['1,6-hexanedithiol']),
TaggedDocument(words=['d', 'e', 'h', 'y', 'd', 'r', 'o', 'd', 'i', 'h', 'y', 'd', 'r', 'o', 'i', 'o', 'n', 'o', 'n', 'e'], tags=['dehydrodihydroionone']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'u', 'n', 'd', 'e', 'c', 'a', 'n', 'a', 'l'], tags=['2-methylundecanal']),
TaggedDocument(words=['p', '-', 'c', 'r', 'e', 's', 'o', 'l'], tags=['p-cresol']),
TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['isopropyl_hexanoate']),
TaggedDocument(words=['3', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['3-phenylpropionic_acid']),
TaggedDocument(words=['i', 's', 'o', 'q', 'u', 'i', 'n', 'o', 'l', 'i', 'n', 'e'], tags=['isoquinoline']),
TaggedDocument(words=['4', '-', 'd', 'e', 'c', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['4-decenoic_acid']),
TaggedDocument(words=['2', ',', '2', ',', '6', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', 'n', 'o', 'n', 'e'], tags=['2,2,6-trimethylcyclohexanone']),
TaggedDocument(words=['z', 'i', 'n', 'g', 'e', 'r', 'o', 'n', 'e'], tags=['zingerone']),
TaggedDocument(words=['c', 'a', 'r', 'v', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['carvyl_acetate']),
TaggedDocument(words=['c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'a', 'l'], tags=['citronellal']),
TaggedDocument(words=['c', 'i', 's', '-', '4', '-', 'h', 'e', 'x', 'e', 'n', 'a', 'l'], tags=['cis-4-hexenal']),
TaggedDocument(words=['d', 'e', 'l', 't', 'a', '-', 't', 'e', 't', 'r', 'a', 'd', 'e', 'c', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['delta-tetradecalactone']),
TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 't', 'e'], tags=['isobutyl_cinnamate']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'o', 'a', 't', 'e'], tags=['methyl-3-hexenoate']),
TaggedDocument(words=['c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'y', 'l', '_', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['citronellyl_valerate']),
TaggedDocument(words=['q', 'u', 'i', 'n', 'o', 'l', 'i', 'n', 'e'], tags=['quinoline']),
TaggedDocument(words=['c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['cyclohexyl_acetate']),
TaggedDocument(words=['n', 'e', 'r', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['neryl_formate']),
TaggedDocument(words=['b', '-', 'i', 'o', 'n', 'o', 'n', 'e'], tags=['b-ionone']),
TaggedDocument(words=['4', '-', '[', '(', '2', ',', '6', ',', '6', ')', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', '-', '1', '-', 'e', 'n', 'y', 'l', ']', '-', 'b', 'u', 't', '-', '2', '-', 'e', 'n', '-', '4', '-', 'o', 'n', 'e'], tags=['4-[(2,6,6)-trimethyl-cyclohex-1-enyl]-but-2-en-4-one']),
TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['isoamyl_propionate']),
TaggedDocument(words=['1', ',', '3', '-', 'p', 'r', 'o', 'p', 'a', 'n', 'e', 'd', 'i', 't', 'h', 'i', 'o', 'l'], tags=['1,3-propanedithiol']),
TaggedDocument(words=['c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['citronellyl_formate']),
TaggedDocument(words=['i', 's', 'o', 'p', 'e', 'n', 't', 'y', 'l', 'a', 'm', 'i', 'n', 'e'], tags=['isopentylamine']),
TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['benzyl_benzoate']),
TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['isobutyl_formate']),
TaggedDocument(words=['3', ',', '4', '-', 'h', 'e', 'x', 'a', 'n', 'e', 'd', 'i', 'o', 'n', 'e'], tags=['3,4-hexanedione']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'p', 'a', 'l', 'm', 'i', 't', 'a', 't', 'e'], tags=['ethyl_palmitate']),
TaggedDocument(words=['v', 'i', 't', 'a', 'm', 'i', 'n', '_', 'b', '1'], tags=['vitamin_b1']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '2', '-', 't', 'h', 'i', 'o', 'f', 'u', 'r', 'o', 'a', 't', 'e'], tags=['methyl_2-thiofuroate']),
TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['isobutyl_propionate']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', 'a', 'c', 'e', 't', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2-methylthioacetaldehyde']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 't', 'e'], tags=['methyl_cinnamate']),
TaggedDocument(words=['(', 'd', '-', ',', '_', 'l', '-', ',', '_', 'd', 'l', '-', ',', '_', 'm', 'e', 's', 'o', ')', '_', 't', 'a', 'r', 't', 'a', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['(d-,_l-,_dl-,_meso)_tartaric_acid']),
TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', 'a', '-', '1', ',', '8', '-', 'd', 'i', 'e', 'n', '-', '7', '-', 'y', 'l', '-', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['p-mentha-1,8-dien-7-yl-acetate']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', 'e', 'n', 'e', '_', 'o', 'x', 'i', 'd', 'e'], tags=['ethylene_oxide']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 's', 'o', 'r', 'b', 'a', 't', 'e'], tags=['ethyl_sorbate']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'f', 'u', 'r', 'a', 'n', 't', 'h', 'i', 'o', 'l'], tags=['2-methyl-3-furanthiol']),
TaggedDocument(words=['2', ',', '6', '-', 'n', 'o', 'n', 'a', 'd', 'i', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['2,6-nonadien-1-ol']),
TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', 'a', '-', '1', ',', '4', '(', '8', ')', '-', 'd', 'i', 'e', 'n', '-', '3', '-', 'o', 'n', 'e'], tags=['p-mentha-1,4(8)-dien-3-one']),
TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'p', 'e', 'n', 't', 'e', 'n', 'a', 'l'], tags=['4-methyl-2-pentenal']),
TaggedDocument(words=['n', 'e', 'r', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['neryl_butyrate']),
TaggedDocument(words=['e', 'u', 'g', 'e', 'n', 'o', 'l'], tags=['eugenol']),
TaggedDocument(words=['5', '-', 'm', 'e', 't', 'h', 'y', 'l', 'q', 'u', 'i', 'n', 'o', 'x', 'a', 'l', 'i', 'n', 'e'], tags=['5-methylquinoxaline']),
TaggedDocument(words=['l', '-', 'm', 'e', 'n', 't', 'h', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['l-menthyl_acetate']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', '2', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['ethyl_2-(methylthio)acetate']),
TaggedDocument(words=['2', '-', '(', '2', '-', 'b', 'u', 't', 'y', 'l', ')', '-', '4', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'i', 'n', 'e'], tags=['2-(2-butyl)-4,5-dimethyl-3-thiazoline']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['2-methylheptanoic_acid']),
TaggedDocument(words=['c', 'a', 'm', 'p', 'h', 'e', 'n', 'e'], tags=['camphene']),
TaggedDocument(words=['c', 'i', 's', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'a', 'l'], tags=['cis-3-hexenal']),
TaggedDocument(words=['n', 'o', 'n', 'a', '-', '2', '-', 't', 'r', 'a', 'n', 's', ',', '-', '6', '-', 'c', 'i', 's', '-', 'd', 'i', 'e', 'n', 'a', 'l'], tags=['nona-2-trans,-6-cis-dienal']),
TaggedDocument(words=['a', 'c', 'e', 't', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['acetylpyrazine']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['methyl_butyrate']),
TaggedDocument(words=['a', 'l', 'l', 'y', 'l', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['allyl_sulfide']),
TaggedDocument(words=['d', 'i', 'm', 'e', 't', 'h', 'y', 'l', '_', 's', 'u', 'c', 'c', 'i', 'n', 'a', 't', 'e'], tags=['dimethyl_succinate']),
TaggedDocument(words=['b', '-', 'p', 'i', 'n', 'e', 'n', 'e'], tags=['b-pinene']),
TaggedDocument(words=['f', 'e', 'n', 'c', 'h', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['fenchyl_alcohol']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['ethyl_isobutyrate']),
TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['phenethyl-2-methylbutyrate']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 't', 'e'], tags=['ethyl_cinnamate']),
TaggedDocument(words=['p', '-', 'm', 'e', 't', 'h', 'y', 'l', '_', 'd', 'i', 'p', 'h', 'e', 'n', 'y', 'l'], tags=['p-methyl_diphenyl']),
TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'e', 't', 'h', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['butyl_ethyl_disulfide']),
TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'p', 'e', 'n', 't', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['4-methyl-3-penten-2-one']),
TaggedDocument(words=['5', '-', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'i', 'd', 'i', 'n', 'e'], tags=['5-ethyl-2-methylpyridine']),
TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['isobutyl_isobutyrate']),
TaggedDocument(words=['t', 'e', 'r', 'p', 'i', 'n', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['terpinyl_formate']),
TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', '-', '3', ',', '5', '(', '6', ')', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-ethyl-3,5(6)-dimethylpyrazine']),
TaggedDocument(words=['(', 'e', ',', 'e', ')', '-', '2', ',', '4', '-', 'd', 'e', 'c', 'a', 'd', 'i', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['(e,e)-2,4-decadien-1-ol']),
TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['butyl_hexanoate']),
TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['2-ethylbutyric_acid']),
TaggedDocument(words=['2', ',', '3', '-', 'd', 'i', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2,3-diethylpyrazine']),
TaggedDocument(words=['l', '-', 'h', 'i', 's', 't', 'i', 'd', 'i', 'n', 'e'], tags=['l-histidine']),
TaggedDocument(words=['u', 'n', 'd', 'e', 'c', 'a', 'n', 'a', 'l'], tags=['undecanal']),
TaggedDocument(words=['v', 'a', 'l', 'e', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['valeric_acid']),
TaggedDocument(words=['2', '-', 'u', 'n', 'd', 'e', 'c', 'e', 'n', 'a', 'l'], tags=['2-undecenal']),
TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['butyl_sulfide']),
TaggedDocument(words=['a', 'm', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['amyl_butyrate']),
TaggedDocument(words=['3', '-', 'd', 'e', 'c', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['3-decen-2-one']),
TaggedDocument(words=['1', '-', 'o', 'c', 't', 'e', 'n', '-', '3', '-', 'o', 'l'], tags=['1-octen-3-ol']),
TaggedDocument(words=['d', 'i', 's', 'o', 'd', 'i', 'u', 'm', '_', '5', "'", '-', 'g', 'u', 'a', 'n', 'y', 'l', 'a', 't', 'e'], tags=["disodium_5'-guanylate"]),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['2-methylbutyl-3-methylbutanoate']),
TaggedDocument(words=['p', 'h', 'e', 'n', 'y', 'l', 'a', 'c', 'e', 't', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e', '_', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 'l'], tags=['phenylacetaldehyde_dimethyl_acetal']),
TaggedDocument(words=['1', '-', 'p', '-', 'm', 'e', 'n', 't', 'h', 'e', 'n', 'e', '-', '8', '-', 't', 'h', 'i', 'o', 'l'], tags=['1-p-menthene-8-thiol']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['ethyl_acetate']),
TaggedDocument(words=['2', '-', 'a', 'c', 'e', 't', 'y', 'l', '-', '5', '-', 'm', 'e', 't', 'h', 'y', 'l', 'f', 'u', 'r', 'a', 'n'], tags=['2-acetyl-5-methylfuran']),
TaggedDocument(words=['6', '-', 'm', 'e', 't', 'h', 'y', 'l', 'q', 'u', 'i', 'n', 'o', 'l', 'i', 'n', 'e'], tags=['6-methylquinoline']),
TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-ethyl-5-methylpyrazine']),
TaggedDocument(words=['4', '-', 'd', 'e', 'c', 'e', 'n', 'a', 'l'], tags=['4-decenal']),
TaggedDocument(words=['m', 'y', 'r', 't', 'e', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['myrtenyl_acetate']),
TaggedDocument(words=['1', '-', 'p', 'h', 'e', 'n', 'y', 'l', '-', '1', '-', 'p', 'r', 'o', 'p', 'a', 'n', 'o', 'l'], tags=['1-phenyl-1-propanol']),
TaggedDocument(words=['c', 'i', 'n', 'n', 'a', 'm', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['cinnamyl_alcohol']),
TaggedDocument(words=['c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['citronellyl_propionate']),
TaggedDocument(words=['3', '-', '(', '2', '-', 'f', 'u', 'r', 'y', 'l', ')', 'a', 'c', 'r', 'o', 'l', 'e', 'i', 'n'], tags=['3-(2-furyl)acrolein']),
TaggedDocument(words=['d', '-', 'd', 'e', 'c', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['d-decalactone']),
TaggedDocument(words=['a', '-', 'i', 'o', 'n', 'o', 'l'], tags=['a-ionol']),
TaggedDocument(words=['2', ',', '6', '-', 'x', 'y', 'l', 'e', 'n', 'o', 'l'], tags=['2,6-xylenol']),
TaggedDocument(words=['(', 'z', ')', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['(z)-3-hexenyl_propionate']),
TaggedDocument(words=['2', ',', '2', "'", '-', '(', 't', 'h', 'i', 'o', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'e', 'n', 'e', ')', '_', 'd', 'i', 'f', 'u', 'r', 'a', 'n'], tags=["2,2'-(thiodimethylene)_difuran"]),
TaggedDocument(words=['o', '-', 'c', 'r', 'e', 's', 'o', 'l'], tags=['o-cresol']),
TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', 'f', 'u', 'r', 'a', 'n'], tags=['2-ethylfuran']),
TaggedDocument(words=['2', '-', 'u', 'n', 'd', 'e', 'c', 'a', 'n', 'o', 'l'], tags=['2-undecanol']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'a', 'n', 't', 'h', 'r', 'a', 'n', 'i', 'l', 'a', 't', 'e'], tags=['methyl_anthranilate']),
TaggedDocument(words=['2', '-', 'h', 'e', 'x', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['2-hexen-1-ol']),
TaggedDocument(words=['p', 'a', 'l', 'm', 'i', 't', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['palmitic_acid']),
TaggedDocument(words=['m', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['methoxypyrazine']),
TaggedDocument(words=['a', '-', 'm', 'e', 't', 'h', 'y', 'l', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['a-methylcinnamaldehyde']),
TaggedDocument(words=['2', '-', 'n', 'o', 'n', 'a', 'n', 'o', 'l'], tags=['2-nonanol']),
TaggedDocument(words=['h', 'e', 'x', 'a', 'n', 'a', 'l'], tags=['hexanal']),
TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['propyl_isovalerate']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', 'n', 'e', 'c', 'a', 'r', 'b', 'o', 'x', 'y', 'l', 'a', 't', 'e'], tags=['methyl_cyclohexanecarboxylate']),
TaggedDocument(words=['a', '-', 'p', 'i', 'n', 'e', 'n', 'e'], tags=['a-pinene']),
TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', '-', '4', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'o', 'x', 'a', 'z', 'o', 'l', 'e'], tags=['2-ethyl-4,5-dimethyloxazole']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'o', 'c', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['methyl_octanoate']),
TaggedDocument(words=['2', ',', '5', '_', 'd', 'i', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2,5_diethyl-3-methylpyrazine']),
TaggedDocument(words=['h', 'e', 'x', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['hexanoic_acid']),
TaggedDocument(words=['a', 'c', 'e', 't', 'a', 'n', 'i', 's', 'o', 'l', 'e'], tags=['acetanisole']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 't', 'r', 'a', 'n', 's', '-', '2', '-', 'o', 'c', 't', 'e', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_trans-2-octenoate']),
TaggedDocument(words=['(', '2', '-', 'f', 'u', 'r', 'y', 'l', ')', '-', '2', '-', 'p', 'r', 'o', 'p', 'a', 'n', 'o', 'n', 'e'], tags=['(2-furyl)-2-propanone']),
TaggedDocument(words=['1', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '2', '-', 'b', 'u', 't', 'a', 'n', 'o', 'n', 'e'], tags=['1-hydroxy-2-butanone']),
TaggedDocument(words=['2', '-', 'p', 'h', 'e', 'n', 'y', 'l', '-', '2', '-', 'b', 'u', 't', 'e', 'n', 'a', 'l'], tags=['2-phenyl-2-butenal']),
TaggedDocument(words=['p', 'y', 'r', 'i', 'd', 'i', 'n', 'e'], tags=['pyridine']),
TaggedDocument(words=['h', 'y', 'd', 'r', 'o', 'x', 'y', 'c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'a', 'l'], tags=['hydroxycitronellal']),
TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['isoamyl_benzoate']),
TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', '-', '3', '-', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['p-menth-3-en-1-ol']),
TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['propyl_formate']),
TaggedDocument(words=['1', '-', 'd', 'e', 'c', 'a', 'n', 'o', 'l'], tags=['1-decanol']),
TaggedDocument(words=['p', 'r', 'e', 'n', 'y', 'l', 't', 'h', 'i', 'o', 'l'], tags=['prenylthiol']),
TaggedDocument(words=['e', 'u', 'c', 'a', 'l', 'y', 'p', 't', 'o', 'l'], tags=['eucalyptol']),
TaggedDocument(words=['b', 'u', 't', 'y', 'r', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['butyraldehyde']),
TaggedDocument(words=['m', 'y', 'r', 'i', 's', 't', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['myristaldehyde']),
TaggedDocument(words=['3', '-', 'o', 'x', 'o', 'h', 'e', 'x', 'a', 'd', 'e', 'c', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'g', 'l', 'y', 'c', 'e', 'r', 'i', 'd', 'e'], tags=['3-oxohexadecanoic_acid_glyceride']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '3', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['methyl_3-hydroxyhexanoate']),
TaggedDocument(words=['c', 'y', 'c', 'l', 'o', 'i', 'o', 'n', 'o', 'n', 'e'], tags=['cycloionone']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'm', 'e', 't', 'h', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['ethyl_methyl_disulfide']),
TaggedDocument(words=['3', ',', '5', ',', '5', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'h', 'e', 'x', 'a', 'n', 'o', 'l'], tags=['3,5,5-trimethyl-1-hexanol']),
TaggedDocument(words=['d', '-', 'd', 'o', 'd', 'e', 'c', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['d-dodecalactone']),
TaggedDocument(words=['c', 'i', 's', '-', '6', '-', 'n', 'o', 'n', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['cis-6-nonen-1-ol']),
TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', 'b', 'u', 't', 'y', 'r', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['3-methylthiobutyraldehyde']),
TaggedDocument(words=['a', 'c', 'e', 't', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e', '_', 'e', 't', 'h', 'y', 'l', '_', '(', 'z', ')', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 'l'], tags=['acetaldehyde_ethyl_(z)-3-hexenyl_acetal']),
TaggedDocument(words=['3', '-', 'h', 'e', 'p', 't', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['3-hepten-2-one']),
TaggedDocument(words=['t', 'r', 'i', 'a', 'c', 'e', 't', 'i', 'n'], tags=['triacetin']),
TaggedDocument(words=['2', ',', '3', '-', 'o', 'c', 't', 'a', 'n', 'e', 'd', 'i', 'o', 'n', 'e'], tags=['2,3-octanedione']),
TaggedDocument(words=['4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 'h', 'e', 'x', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'g', 'a', 'm', 'm', 'a', '-', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['4-hydroxy-4-methyl-5-hexenoic_acid_gamma-lactone']),
TaggedDocument(words=['2', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', 'e', 't', 'h', 'a', 'n', 'o', 'l'], tags=['2-(methylthio)ethanol']),
TaggedDocument(words=['4', '-', 'a', 'c', 'e', 't', 'o', 'x', 'y', '-', '2', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '(', '2', 'h', ')', '-', 'f', 'u', 'r', 'a', 'n', 'o', 'n', 'e'], tags=['4-acetoxy-2,5-dimethyl-3(2h)-furanone']),
TaggedDocument(words=['2', ',', '4', '-', 'n', 'o', 'n', 'a', 'd', 'i', 'e', 'n', 'a', 'l'], tags=['2,4-nonadienal']),
TaggedDocument(words=['t', 'e', 'r', 'p', 'i', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['terpinyl_acetate']),
TaggedDocument(words=['3', '-', 'h', 'e', 'x', 'a', 'n', 'o', 'n', 'e'], tags=['3-hexanone']),
TaggedDocument(words=['l', 'a', 'u', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['lauric_acid']),
TaggedDocument(words=['4', '-', 'h', 'e', 'p', 't', 'e', 'n', 'a', 'l', '_', '(', 'c', 'i', 's', '-', '_', 'a', 'n', 'd', '_', 't', 'r', 'a', 'n', 's', '-', ')'], tags=['4-heptenal_(cis-_and_trans-)']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['methyl_heptanoate']),
TaggedDocument(words=['l', 'i', 'n', 'a', 'l', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['linalyl_isovalerate']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'l', 'a', 'c', 't', 'a', 't', 'e'], tags=['ethyl_lactate']),
TaggedDocument(words=['3', '-', 'o', 'x', 'o', 'o', 'c', 't', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'g', 'l', 'y', 'c', 'e', 'r', 'i', 'd', 'e'], tags=['3-oxooctanoic_acid_glyceride']),
TaggedDocument(words=['v', 'a', 'n', 'i', 'l', 'l', 'i', 'n', ',', '_', 'n', 'a', 't', 'u', 'r', 'a', 'l'], tags=['vanillin,_natural']),
TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['hexyl_acetate']),
TaggedDocument(words=['3', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '2', ',', '4', '-', 't', 'r', 'i', 't', 'h', 'i', 'o', 'l', 'a', 'n', 'e'], tags=['3,5-dimethyl-1,2,4-trithiolane']),
TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'b', 'u', 't', 'y', 'l', '_', 'e', 't', 'h', 'e', 'r'], tags=['benzyl_butyl_ether']),
TaggedDocument(words=['2', '-', 't', 'r', 'a', 'n', 's', ',', '_', '6', '-', 't', 'r', 'a', 'n', 's', '-', 'n', 'o', 'n', 'a', 'd', 'i', 'e', 'n', 'a', 'l'], tags=['2-trans,_6-trans-nonadienal']),
TaggedDocument(words=['5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 't', 'h', 'i', 'o', 'p', 'h', 'e', 'n', 'e', 'c', 'a', 'r', 'b', 'o', 'x', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['5-methyl-2-thiophenecarboxaldehyde']),
TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'l'], tags=['3-methyl-1-pentanol']),
TaggedDocument(words=['3', '-', 'd', 'e', 'c', 'a', 'n', 'o', 'l'], tags=['3-decanol']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 't', 'h', 'i', 'o', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['ethyl_thioacetate']),
TaggedDocument(words=['1', '-', 'o', 'c', 't', 'a', 'n', 'o', 'l'], tags=['1-octanol']),
TaggedDocument(words=['m', 'i', 'n', 't', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['mintlactone']),
TaggedDocument(words=['2', '-', 'p', 'e', 'n', 't', 'e', 'n', 'a', 'l'], tags=['2-pentenal']),
TaggedDocument(words=['e', 'u', 'g', 'e', 'n', 'y', 'l', '_', 'm', 'e', 't', 'h', 'y', 'l', '_', 'e', 't', 'h', 'e', 'r'], tags=['eugenyl_methyl_ether']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['methyl-3-methylthiopropionate']),
TaggedDocument(words=['p', '-', 'a', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'e', 'n', 'z', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['p-a-dimethylbenzyl_alcohol']),
TaggedDocument(words=['c', 'a', 'r', 'y', 'o', 'p', 'h', 'y', 'l', 'l', 'e', 'n', 'e', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['caryophyllene_alcohol']),
TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'b', 'u', 't', 'e', 'n', 'a', 'l'], tags=['3-methyl-2-butenal']),
TaggedDocument(words=['n', 'o', 'n', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['nonanoic_acid']),
TaggedDocument(words=['c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', 'n', 'e', 'c', 'a', 'r', 'b', 'o', 'x', 'y', 'l', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['cyclohexanecarboxylic_acid']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'o', 'c', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_octanoate']),
TaggedDocument(words=['2', ',', '4', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'a', 'n', 'i', 's', 'o', 'l', 'e'], tags=['2,4-dimethylanisole']),
TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['isoamyl_formate']),
TaggedDocument(words=['1', ',', '2', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'e', 'n', 'e'], tags=['1,2-dimethoxybenzene']),
TaggedDocument(words=['g', 'e', 'r', 'a', 'n', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['geranyl_isovalerate']),
TaggedDocument(words=['2', '-', 'i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '-', '5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'h', 'e', 'x', 'e', 'n', 'a', 'l'], tags=['2-isopropyl-5-methyl-2-hexenal']),
TaggedDocument(words=['2', ',', '4', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'a', 'c', 'e', 't', 'o', 'p', 'h', 'e', 'n', 'o', 'n', 'e'], tags=['2,4-dimethylacetophenone']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'c', 'i', 's', '-', '4', ',', '7', '-', 'o', 'c', 't', 'a', 'd', 'i', 'e', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_cis-4,7-octadienoate']),
TaggedDocument(words=['p', '-', 't', 'o', 'l', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['p-tolyl_acetate']),
TaggedDocument(words=['2', '-', 'o', 'c', 't', 'a', 'n', 'o', 'l'], tags=['2-octanol']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 't', 'r', 'a', 'n', 's', '-', '2', '-', 'h', 'e', 'x', 'e', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_trans-2-hexenoate']),
TaggedDocument(words=['n', '-', 'f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'p', 'y', 'r', 'r', 'o', 'l', 'e'], tags=['n-furfuryl_pyrrole']),
TaggedDocument(words=['3', '-', 'o', 'x', 'o', 'h', 'e', 'x', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'g', 'l', 'y', 'c', 'e', 'r', 'i', 'd', 'e'], tags=['3-oxohexanoic_acid_glyceride']),
TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 's', 't', 'e', 'a', 'r', 'a', 't', 'e'], tags=['butyl_stearate']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'd', 'e', 'c', 'a', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_decanoate']),
TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['propyl_butyrate']),
TaggedDocument(words=['2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '0', '-', 'm', 'e', 't', 'h', 'y', 'l', 'e', 'n', 'e', '-', '2', ',', '6', ',', '1', '1', '-', 'd', 'o', 'd', 'e', 'c', 'a', 't', 'r', 'i', 'e', 'n', 'a', 'l'], tags=['2,6-dimethyl-10-methylene-2,6,11-dodecatrienal']),
TaggedDocument(words=['3', '-', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'l'], tags=['3-heptanol']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '2', '-', 'h', 'e', 'x', 'e', 'n', 'o', 'a', 't', 'e'], tags=['methyl_2-hexenoate']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['methyl-4-methylvalerate']),
TaggedDocument(words=['p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['pyrazine']),
TaggedDocument(words=['c', 'a', 'd', 'i', 'n', 'e', 'n', 'e'], tags=['cadinene']),
TaggedDocument(words=['a', 'n', 'i', 's', 'o', 'l', 'e'], tags=['anisole']),
TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', 'a', '-', '1', ',', '8', '-', 'd', 'i', 'e', 'n', '-', '7', '-', 'o', 'l'], tags=['p-mentha-1,8-dien-7-ol']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'a', 'n', 'i', 's', 'a', 't', 'e'], tags=['methyl_anisate']),
TaggedDocument(words=['f', 'e', 'n', 'c', 'h', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['fenchyl_acetate']),
TaggedDocument(words=['t', 'h', 'y', 'm', 'o', 'l'], tags=['thymol']),
TaggedDocument(words=['3', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['3-phenylpropionaldehyde']),
TaggedDocument(words=['n', 'o', 'n', 'y', 'l', '_', 'o', 'c', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['nonyl_octanoate']),
TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'i', 's', 'o', 't', 'h', 'i', 'o', 'c', 'y', 'a', 'n', 'a', 't', 'e'], tags=['3-methylthiopropyl_isothiocyanate']),
TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', '-', '8', '-', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['p-menth-8-en-2-one']),
TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['isobutyl_benzoate']),
TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['isobutyl_acetate']),
TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', '-', '1', '-', 'e', 'n', '-', '3', '-', 'o', 'l'], tags=['p-menth-1-en-3-ol']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'y', 'l', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_phenyl_sulfide']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'r', 'o', 'p', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['2-methylpropyl-3-methylbutyrate']),
TaggedDocument(words=['f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['furfuryl_alcohol']),
TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['propyl_heptanoate']),
TaggedDocument(words=['1', '-', 'm', 'e', 't', 'h', 'y', 'l', 'n', 'a', 'p', 'h', 't', 'h', 'a', 'l', 'e', 'n', 'e'], tags=['1-methylnaphthalene']),
TaggedDocument(words=['d', 'i', 'h', 'y', 'd', 'r', 'o', 'c', 'o', 'u', 'm', 'a', 'r', 'i', 'n'], tags=['dihydrocoumarin']),
TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', 'a', 'n', 'e', '-', '3', ',', '8', '-', 'd', 'i', 'o', 'l'], tags=['p-menthane-3,8-diol']),
TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'm', 'y', 'r', 'i', 's', 't', 'a', 't', 'e'], tags=['isopropyl_myristate']),
TaggedDocument(words=['s', '-', 'm', 'e', 't', 'h', 'y', 'l', '_', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'a', 't', 'e'], tags=['s-methyl_3-methylbutanethioate']),
TaggedDocument(words=['g', '-', 'n', 'o', 'n', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['g-nonalactone']),
TaggedDocument(words=['4', '-', '(', '2', ',', '6', ',', '6', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', '-', '1', ',', '3', '-', 'd', 'i', 'e', 'n', 'y', 'l', ')', 'b', 'u', 't', '-', '2', '-', 'e', 'n', '-', '4', '-', 'o', 'n', 'e'], tags=['4-(2,6,6-trimethyl-cyclohexa-1,3-dienyl)but-2-en-4-one']),
TaggedDocument(words=['c', 'i', 'n', 'n', 'a', 'm', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['cinnamic_acid']),
TaggedDocument(words=['l', 'i', 'm', 'o', 'n', 'e', 'n', 'e', '_', '(', 'd', '-', ',', 'l', '-', ',', '_', 'a', 'n', 'd', '_', 'd', 'l', '-', ')'], tags=['limonene_(d-,l-,_and_dl-)']),
TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['benzyl_propionate']),
TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', '-', '1', '-', 'e', 'n', 'e', '-', '9', '-', 'a', 'l'], tags=['p-menth-1-ene-9-al']),
TaggedDocument(words=['3', ',', '6', '-', 'd', 'i', 'h', 'y', 'd', 'r', 'o', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', '(', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'r', 'o', 'p', 'e', 'n', '-', '1', '-', 'y', 'l', ')', '-', '2', 'h', '-', 'p', 'y', 'r', 'a', 'n'], tags=['3,6-dihydro-4-methyl-2-(2-methylpropen-1-yl)-2h-pyran']),
TaggedDocument(words=['2', '-', 'i', 's', 'o', 'b', 'u', 't', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-isobutyl-3-methylpyrazine']),
TaggedDocument(words=['5', '-', 'i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['5-isopropyl-2-methylpyrazine']),
TaggedDocument(words=['c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['citronellyl_isobutyrate']),
TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'e', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['isoprenyl_acetate']),
TaggedDocument(words=['3', '-', 'a', 'c', 'e', 't', 'y', 'l', '-', '2', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', 'p', 'h', 'e', 'n', 'e'], tags=['3-acetyl-2,5-dimethylthiophene']),
TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['isobutyric_acid']),
TaggedDocument(words=['d', 'e', 'c', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['decanoic_acid']),
TaggedDocument(words=['4', '-', 'p', 'r', 'o', 'p', 'y', 'l', '-', '2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['4-propyl-2,6-dimethoxyphenol']),
TaggedDocument(words=['p', 'i', 'p', 'e', 'r', 'o', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['piperonyl_acetate']),
TaggedDocument(words=['t', 'e', 'r', 'p', 'i', 'n', 'o', 'l', 'e', 'n', 'e'], tags=['terpinolene']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 'v', 'i', 'n', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-methyl-5-vinylpyrazine']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 's', 'a', 'l', 'i', 'c', 'y', 'l', 'a', 't', 'e'], tags=['methyl_salicylate']),
TaggedDocument(words=['3', '-', '(', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'r', 'o', 'p', 'y', 'l', ')', 'p', 'y', 'r', 'i', 'd', 'i', 'n', 'e'], tags=['3-(2-methylpropyl)pyridine']),
TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'o', 'c', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['hexyl_octanoate']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'n', 'o', 'n', 'a', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_nonanoate']),
TaggedDocument(words=['2', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['2,5-dimethylthiazole']),
TaggedDocument(words=['9', '-', 'o', 'c', 't', 'a', 'd', 'e', 'c', 'e', 'n', 'a', 'l'], tags=['9-octadecenal']),
TaggedDocument(words=['3', '-', 'p', 'h', 'e', 'n', 'y', 'l', '-', '1', '-', 'p', 'r', 'o', 'p', 'a', 'n', 'o', 'l'], tags=['3-phenyl-1-propanol']),
TaggedDocument(words=['p', '-', 'i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', 'b', 'e', 'n', 'z', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['p-isopropylbenzyl_alcohol']),
TaggedDocument(words=['2', ',', '8', '-', 'd', 'i', 't', 'h', 'i', 'a', 'n', 'o', 'n', '-', '4', '-', 'e', 'n', '-', '4', '-', 'c', 'a', 'r', 'b', 'o', 'x', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2,8-dithianon-4-en-4-carboxaldehyde']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_benzyl_disulfide']),
TaggedDocument(words=['f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'm', 'e', 't', 'h', 'y', 'l', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['furfuryl_methyl_sulfide']),
TaggedDocument(words=['p', 'y', 'r', 'u', 'v', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['pyruvaldehyde']),
TaggedDocument(words=['c', 'o', 'u', 'm', 'a', 'r', 'i', 'n', '_', '(', 'p', 'r', 'o', 'h', 'i', 'b', 'i', 't', 'e', 'd', ')'], tags=['coumarin_(prohibited)']),
TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['benzyl_acetate']),
TaggedDocument(words=['l', 'a', 'c', 't', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['lactic_acid']),
TaggedDocument(words=['o', 'c', 't', 'y', 'l', '_', '2', '-', 'f', 'u', 'r', 'o', 'a', 't', 'e'], tags=['octyl_2-furoate']),
TaggedDocument(words=['(', 'z', ')', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['(z)-3-hexenyl_valerate']),
TaggedDocument(words=['3', '-', 'b', 'u', 't', 'y', 'l', 'i', 'd', 'e', 'n', 'e', 'p', 'h', 't', 'h', 'a', 'l', 'i', 'd', 'e'], tags=['3-butylidenephthalide']),
TaggedDocument(words=['2', '-', 'o', 'x', 'o', 'b', 'u', 't', 'y', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['2-oxobutyric_acid']),
TaggedDocument(words=['2', ',', '3', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'e', 'n', 'z', 'o', 'f', 'u', 'r', 'a', 'n'], tags=['2,3-dimethylbenzofuran']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['2-methylbutyric_acid']),
TaggedDocument(words=['1', ',', '5', ',', '5', ',', '9', '-', 't', 'e', 't', 'r', 'a', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '3', '-', 'o', 'x', 'a', 't', 'r', 'i', 'c', 'y', 'c', 'l', 'o', '-', '(', '8', '.', '3', '.', '0', '.', '0', '(', '4', ',', '9', ')', ')', '_', '_', '_', '_', '_', '_', '_', '_', 't', 'r', 'i', 'd', 'e', 'c', 'a', 'n', 'e'], tags=['1,5,5,9-tetramethyl-13-oxatricyclo-(8.3.0.0(4,9))________tridecane']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'f', 'u', 'r', 'a', 'n', 't', 'h', 'i', 'o', 'l'], tags=['2-methyl-3-tetrahydrofuranthiol']),
TaggedDocument(words=['(', 'e', ')', '-', '2', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['(e)-2-hexenyl_hexanoate']),
TaggedDocument(words=['5', '-', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '(', '5', 'h', ')', '-', 'f', 'u', 'r', 'a', 'n', 'o', 'n', 'e'], tags=['5-ethyl-3-hydroxy-4-methyl-2(5h)-furanone']),
TaggedDocument(words=['3', '-', 'o', 'c', 't', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['3-octyl_acetate']),
TaggedDocument(words=['(', 'z', ')', '-', '8', '-', 't', 'e', 't', 'r', 'a', 'd', 'e', 'c', 'e', 'n', 'a', 'l'], tags=['(z)-8-tetradecenal']),
TaggedDocument(words=['t', 'r', 'a', 'n', 's', ',', '_', 't', 'r', 'a', 'n', 's', '-', '2', ',', '4', '-', 'o', 'c', 't', 'a', 'd', 'i', 'e', 'n', 'a', 'l'], tags=['trans,_trans-2,4-octadienal']),
TaggedDocument(words=['5', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '7', '-', 'd', 'e', 'c', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'd', '-', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['5-hydroxy-7-decenoic_acid_d-lactone']),
TaggedDocument(words=['d', '-', 'c', 'a', 'm', 'p', 'h', 'o', 'r'], tags=['d-camphor']),
TaggedDocument(words=['3', '-', 'p', 'r', 'o', 'p', 'y', 'l', 'i', 'd', 'e', 'n', 'e', 'p', 'h', 't', 'h', 'a', 'l', 'i', 'd', 'e'], tags=['3-propylidenephthalide']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'l', 'a', 'u', 'r', 'a', 't', 'e'], tags=['ethyl_laurate']),
TaggedDocument(words=['2', '-', 'a', 'c', 'e', 't', 'y', 'l', '-', '3', ',', '_', '(', '5', '_', 'o', 'r', '_', '6', ')', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e', ',', '_', 'm', 'i', 'x', 't', 'u', 'r', 'e', '_', 'o', 'f', '_', 'i', 's', 'o', 'm', 'e', 'r', 's'], tags=['2-acetyl-3,_(5_or_6)-dimethylpyrazine,_mixture_of_isomers']),
TaggedDocument(words=['6', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 'h', 'e', 'p', 't', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['6-methyl-5-hepten-2-one']),
TaggedDocument(words=['g', '-', 'v', 'a', 'l', 'e', 'r', 'o', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['g-valerolactone']),
TaggedDocument(words=['a', 'n', 'i', 's', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['anisyl_formate']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', '4', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', '-', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['ethyl_4-(methylthio)-butyrate']),
TaggedDocument(words=['h', 'e', 'p', 't', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['heptyl_alcohol']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', '-', '3', '-', '(', '1', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'r', 'o', 'p', 'y', 'l', ')', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-methoxy-3-(1-methylpropyl)pyrazine']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'n', 'i', 'c', 'o', 't', 'i', 'n', 'a', 't', 'e'], tags=['methyl_nicotinate']),
TaggedDocument(words=['s', 'u', 'l', 'f', 'u', 'r', '_', 'd', 'i', 'o', 'x', 'i', 'd', 'e'], tags=['sulfur_dioxide']),
TaggedDocument(words=['n', 'o', 'o', 't', 'k', 'a', 't', 'o', 'n', 'e'], tags=['nootkatone']),
TaggedDocument(words=['l', 'e', 'v', 'u', 'l', 'i', 'n', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['levulinic_acid']),
TaggedDocument(words=['2', '-', 't', 'r', 'i', 'd', 'e', 'c', 'a', 'n', 'o', 'n', 'e'], tags=['2-tridecanone']),
TaggedDocument(words=['4', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '_', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['4,5-dimethyl_thiazole']),
TaggedDocument(words=['l', '-', 'l', 'y', 's', 'i', 'n', 'e'], tags=['l-lysine']),
TaggedDocument(words=['2', '-', 't', 'r', 'a', 'n', 's', ',', '_', '4', '-', 't', 'r', 'a', 'n', 's', '-', 'd', 'e', 'c', 'a', 'd', 'i', 'e', 'n', 'a', 'l'], tags=['2-trans,_4-trans-decadienal']),
TaggedDocument(words=['3', '-', 'o', 'x', 'o', 't', 'e', 't', 'r', 'a', 'd', 'e', 'c', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'g', 'l', 'y', 'c', 'e', 'r', 'i', 'd', 'e'], tags=['3-oxotetradecanoic_acid_glyceride']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'e', 't', 'h', 'e', 'r'], tags=['methyl_phenethyl_ether']),
TaggedDocument(words=['p', '-', 'p', 'r', 'o', 'p', 'y', 'l', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['p-propylphenol']),
TaggedDocument(words=['a', '-', 'i', 'o', 'n', 'o', 'n', 'e'], tags=['a-ionone']),
TaggedDocument(words=['a', 'l', 'l', 'y', 'l', '_', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'a', 'n'], tags=['allyl_mercaptan']),
TaggedDocument(words=['p', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['p-methoxycinnamaldehyde']),
TaggedDocument(words=['2', '-', 'p', 'r', 'o', 'p', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-propylpyrazine']),
TaggedDocument(words=['1', '-', 'e', 't', 'h', 'o', 'x', 'y', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'b', 'u', 't', 'e', 'n', 'e'], tags=['1-ethoxy-3-methyl-2-butene']),
TaggedDocument(words=['(', 'e', ')', '-', '2', '-', 'd', 'e', 'c', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['(e)-2-decenoic_acid']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['ethyl_alcohol']),
TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['phenethyl_isovalerate']),
TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', ',', '3', '-', 'p', 'e', 'n', 't', 'a', 'n', 'e', 'd', 'i', 'o', 'n', 'e'], tags=['4-methyl-2,3-pentanedione']),
TaggedDocument(words=['p', 'a', 'r', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['paraldehyde']),
TaggedDocument(words=['2', '-', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'y', 'l', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['2-propionylthiazole']),
TaggedDocument(words=['2', '-', 'p', 'e', 'n', 't', 'y', 'l', 'f', 'u', 'r', 'a', 'n'], tags=['2-pentylfuran']),
TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['4-methyl-2,6-dimethoxyphenol']),
TaggedDocument(words=['o', 'c', 't', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['octyl_butyrate']),
TaggedDocument(words=['p', 'y', 'r', 'r', 'o', 'l', 'e'], tags=['pyrrole']),
TaggedDocument(words=['c', 'i', 'n', 'n', 'a', 'm', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['cinnamyl_benzoate']),
TaggedDocument(words=['c', 'a', 'm', 'p', 'h', 'o', 'l', 'e', 'n', 'e', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['campholene_acetate']),
TaggedDocument(words=['2', '-', 'i', 's', 'o', 'b', 'u', 't', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-isobutyl-3-methoxypyrazine']),
TaggedDocument(words=['9', ',', '1', '2', '-', 'o', 'c', 't', 'a', 'd', 'e', 'c', 'a', 'd', 'i', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', '(', '4', '8', '%', ')', '_', 'p', 'l', 'u', 's', '_', '9', ',', '1', '2', ',', '1', '5', '-', 'o', 'c', 't', 'a', 'd', 'e', 'c', 'a', '-', '_', 't', 'r', 'i', 'e', 'n', 'o', 'i', 'n', 'c', '_', 'a', 'c', 'i', 'd', '_', '(', '5', '2', '%', ')', '_', '(', 'm', 'e', 't', 'h', 'y', 'l', '_', 'e', 's', 't', 'e', 'r', 's', ')'], tags=['9,12-octadecadienoic_acid_(48%)_plus_9,12,15-octadeca-_trienoinc_acid_(52%)_(methyl_esters)']),
TaggedDocument(words=['d', 'i', 'h', 'y', 'd', 'r', 'o', '-', 'b', '-', 'i', 'o', 'n', 'o', 'l'], tags=['dihydro-b-ionol']),
TaggedDocument(words=['i', 's', 'o', 'p', 'u', 'l', 'e', 'g', 'o', 'l'], tags=['isopulegol']),
TaggedDocument(words=['t', 'r', 'a', 'n', 's', '-', '2', '-', 'h', 'e', 'x', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['trans-2-hexenoic_acid']),
TaggedDocument(words=['l', 'i', 'n', 'a', 'l', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['linalyl_acetate']),
TaggedDocument(words=['1', '-', 'h', 'e', 'x', 'a', 'd', 'e', 'c', 'a', 'n', 'o', 'l'], tags=['1-hexadecanol']),
TaggedDocument(words=['n', '-', 'o', 'c', 't', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['n-octyl_formate']),
TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 't', 'r', 'a', 'n', 's', '-', '2', '-', 'h', 'e', 'x', 'e', 'n', 'o', 'a', 't', 'e'], tags=['hexyl_trans-2-hexenoate']),
TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['hexyl_hexanoate']),
TaggedDocument(words=['5', '-', 'm', 'e', 't', 'h', 'y', 'l', 'f', 'u', 'r', 'f', 'u', 'r', 'a', 'l'], tags=['5-methylfurfural']),
TaggedDocument(words=['g', 'e', 'r', 'a', 'n', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['geranyl_propionate']),
TaggedDocument(words=['n', 'e', 'r', 'o', 'l', 'i', 'd', 'o', 'l'], tags=['nerolidol']),
TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', 'a', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['p-menthan-2-one']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'e', 'n', 't', 'a', 'n', 'a', 'l'], tags=['2-methylpentanal']),
TaggedDocument(words=['d', 'i', 'i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['diisopropyl_disulfide']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', '3', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['ethyl_3-phenylpropionate']),
TaggedDocument(words=['(', '+', '/', '?', ')', '_', 'h', 'e', 'p', 't', 'a', 'n', '-', '3', '-', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['(+/?)_heptan-3-yl_butyrate']),
TaggedDocument(words=['(', '+', '/', '?', ')', '_', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'b', 'u', 't', 'a', 'n', 'o', 'l'], tags=['(+/?)_2-methyl-1-butanol']),
TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', '-', '6', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-ethyl-6-methylpyrazine']),
TaggedDocument(words=['b', 'e', 't', 'a', '-', 'c', 'y', 'c', 'l', 'o', 'd', 'e', 'x', 't', 'r', 'i', 'n'], tags=['beta-cyclodextrin']),
TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'y', 'l', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['hexyl_phenylacetate']),
TaggedDocument(words=['h', 'e', 'p', 't', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['heptyl_acetate']),
TaggedDocument(words=['3', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['3-phenylpropyl_propionate']),
TaggedDocument(words=['3', '-', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'o', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['3-mercapto-3-methylbutyl_formate']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'h', 'e', 'x', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['2-methylhexanoic_acid']),
TaggedDocument(words=['o', 'c', 't', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['octyl_isobutyrate']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'n', 'o', 'n', 'a', 'n', 'o', 'a', 't', 'e'], tags=['methyl_nonanoate']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', 'n', 'e', 'c', 'a', 'r', 'b', 'o', 'x', 'y', 'l', 'a', 't', 'e'], tags=['ethyl_cyclohexanecarboxylate']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '1', '-', 'p', 'r', 'o', 'p', 'e', 'n', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_1-propenyl_disulfide']),
TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'i', 's', 'o', 't', 'h', 'i', 'o', 'c', 'y', 'a', 'n', 'a', 't', 'e'], tags=['phenethyl_isothiocyanate']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'v', 'a', 'l', 'e', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['2-methylvaleric_acid']),
TaggedDocument(words=['4', '-', 'c', 'a', 'r', 'v', 'o', 'm', 'e', 'n', 't', 'h', 'e', 'n', 'o', 'l'], tags=['4-carvomenthenol']),
TaggedDocument(words=['p', 'r', 'o', 'p', 'i', 'o', 'p', 'h', 'e', 'n', 'o', 'n', 'e'], tags=['propiophenone']),
TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['propyl_benzoate']),
TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'e', 'n', '-', '1', '-', 'o', 'n', 'e'], tags=['3-methyl-2-cyclohexen-1-one']),
TaggedDocument(words=['o', '-', 'v', 'i', 'n', 'y', 'l', 'a', 'n', 'i', 's', 'o', 'l', 'e'], tags=['o-vinylanisole']),
TaggedDocument(words=['d', 'i', 'e', 't', 'h', 'y', 'l', '_', 'm', 'a', 'l', 'a', 't', 'e'], tags=['diethyl_malate']),
TaggedDocument(words=['c', 'i', 't', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['citric_acid']),
TaggedDocument(words=['d', 'e', 'c', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['decyl_acetate']),
TaggedDocument(words=['v', 'a', 'n', 'i', 'l', 'l', 'y', 'l', '_', 'e', 't', 'h', 'y', 'l', '_', 'e', 't', 'h', 'e', 'r'], tags=['vanillyl_ethyl_ether']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['ethyl_propionate']),
TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['propyl_disulfide']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['methyl_isobutyrate']),
TaggedDocument(words=['2', '-', 'i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['2-isopropyl-4-methylthiazole']),
TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-ethyl-3-methylpyrazine']),
TaggedDocument(words=['c', 'i', 's', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', '-', '1', '-', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['cis-3-hexen-1-yl_acetate']),
TaggedDocument(words=['t', 'h', 'a', 'u', 'm', 'a', 't', 'i', 'n', '_', 'b', '-', 'r', 'e', 'c', 'o', 'm', 'b', 'i', 'n', 'a', 'n', 't'], tags=['thaumatin_b-recombinant']),
TaggedDocument(words=['1', ',', '4', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '4', '-', 'a', 'c', 'e', 't', 'y', 'l', '-', '1', '-', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'e', 'n', 'e'], tags=['1,4-dimethyl-4-acetyl-1-cyclohexene']),
TaggedDocument(words=['2', '-', 'u', 'n', 'd', 'e', 'c', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['2-undecen-1-ol']),
TaggedDocument(words=['c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', 'n', 'o', 'n', 'e'], tags=['cyclohexanone']),
TaggedDocument(words=['(', '+', '/', '?', ')', '-', 'c', 'i', 's', '-', '_', 'a', 'n', 'd', '_', 't', 'r', 'a', 'n', 's', '-', '3', ',', '5', '-', 'd', 'i', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '2', ',', '4', '-', 't', 'r', 'i', 't', 'h', 'i', 'o', 'l', 'a', 'n', 'e'], tags=['(+/?)-cis-_and_trans-3,5-diethyl-1,2,4-trithiolane']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 's', 'a', 'l', 'i', 'c', 'y', 'l', 'a', 't', 'e'], tags=['ethyl_salicylate']),
TaggedDocument(words=['g', '-', 'b', 'u', 't', 'y', 'r', 'o', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['g-butyrolactone']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '2', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['methyl_2-hydroxy-4-methylpentanoate']),
TaggedDocument(words=['5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'h', 'e', 'p', 't', 'e', 'n', '-', '4', '-', 'o', 'n', 'e'], tags=['5-methyl-2-hepten-4-one']),
TaggedDocument(words=['i', 's', 'o', 's', 'a', 'f', 'r', 'o', 'l', 'e', '_', '(', 'p', 'r', 'o', 'h', 'i', 'b', 'i', 't', 'e', 'd', ')'], tags=['isosafrole_(prohibited)']),
TaggedDocument(words=['4', '-', 'e', 't', 'h', 'y', 'l', 'g', 'u', 'a', 'i', 'a', 'c', 'o', 'l'], tags=['4-ethylguaiacol']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['methyl-3-phenylpropionate']),
TaggedDocument(words=['a', '-', 'i', 'r', 'o', 'n', 'e'], tags=['a-irone']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'u', 'n', 'd', 'e', 'c', 'a', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_undecanoate']),
TaggedDocument(words=['b', '-', 'i', 'o', 'n', 'o', 'l'], tags=['b-ionol']),
TaggedDocument(words=['d', '-', 'o', 'c', 't', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['d-octalactone']),
TaggedDocument(words=['4', '-', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'o', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'n', 'e'], tags=['4-mercapto-4-methyl-2-pentanone']),
TaggedDocument(words=['2', ',', '5', '-', 'd', 'i', 'e', 't', 'h', 'y', 'l', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'f', 'u', 'r', 'a', 'n'], tags=['2,5-diethyltetrahydrofuran']),
TaggedDocument(words=['6', ',', '7', '-', 'd', 'i', 'h', 'y', 'd', 'r', 'o', '-', '2', ',', '3', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', 'h', '-', 'c', 'y', 'c', 'l', 'o', 'p', 'e', 'n', 't', 'a', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['6,7-dihydro-2,3-dimethyl-5h-cyclopentapyrazine']),
TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['isopropyl_2-methylbutyrate']),
TaggedDocument(words=['3', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', '_', 'h', 'e', 'x', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['3-(methylthio)_hexyl_acetate']),
TaggedDocument(words=['p', 'h', 'e', 'n', 'o', 'x', 'y', 'a', 'c', 'e', 't', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['phenoxyacetic_acid']),
TaggedDocument(words=['d', 'e', 'c', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['decyl_butyrate']),
TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', 'n', 'o', 'n', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['4-methylnonanoic_acid']),
TaggedDocument(words=['v', 'e', 'r', 'a', 't', 'r', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['veratraldehyde']),
TaggedDocument(words=['f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'a', 'n'], tags=['furfuryl_mercaptan']),
TaggedDocument(words=['m', 'e', 'n', 't', 'h', 'o', 'n', 'e'], tags=['menthone']),
TaggedDocument(words=['2', ',', '6', ',', '6', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', '-', '1', ',', '3', '-', 'd', 'i', 'e', 'n', 'y', 'l', '_', 'm', 'e', 't', 'h', 'a', 'n', 'a', 'l'], tags=['2,6,6-trimethylcyclohexa-1,3-dienyl_methanal']),
TaggedDocument(words=['4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['4-hydroxybenzoic_acid']),
TaggedDocument(words=['1', '-', 'b', 'u', 't', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'l'], tags=['1-butanethiol']),
TaggedDocument(words=['6', '-', 'u', 'n', 'd', 'e', 'c', 'a', 'n', 'o', 'n', 'e'], tags=['6-undecanone']),
TaggedDocument(words=['c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['citronellyl_acetate']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'o', 'c', 't', 'a', 'd', 'e', 'c', 'a', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_octadecanoate']),
TaggedDocument(words=['1', '-', 'h', 'e', 'x', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'l'], tags=['1-hexanethiol']),
TaggedDocument(words=['4', ',', '5', ',', '6', ',', '7', '-', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', '-', '3', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'e', 'n', 'z', 'o', 'f', 'u', 'r', 'a', 'n'], tags=['4,5,6,7-tetrahydro-3,6-dimethylbenzofuran']),
TaggedDocument(words=['4', '-', 'h', 'e', 'x', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['4-hexen-1-ol']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'o', 'c', 't', 'e', 'n', 'a', 'l'], tags=['2-methyl-2-octenal']),
TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['isopropyl_propionate']),
TaggedDocument(words=['p', 'i', 'p', 'e', 'r', 'i', 'n', 'e'], tags=['piperine']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['methyl_benzoate']),
TaggedDocument(words=['t', 'r', 'i', 't', 'h', 'i', 'o', 'a', 'c', 'e', 't', 'o', 'n', 'e'], tags=['trithioacetone']),
TaggedDocument(words=['d', 'i', 'h', 'y', 'd', 'r', 'o', '-', 'b', '-', 'i', 'o', 'n', 'o', 'n', 'e'], tags=['dihydro-b-ionone']),
TaggedDocument(words=['l', 'i', 'n', 'a', 'l', 'o', 'o', 'l', '_', 'o', 'x', 'i', 'd', 'e'], tags=['linalool_oxide']),
TaggedDocument(words=['o', 'c', 't', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['octanoic_acid']),
TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'l', '_', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['3-methylbutyl_2-methylbutanoate']),
TaggedDocument(words=['p', 'o', 't', 'a', 's', 's', 'i', 'u', 'm', '_', 's', 'o', 'r', 'b', 'a', 't', 'e'], tags=['potassium_sorbate']),
TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['2-ethyl-4-methylthiazole']),
TaggedDocument(words=['2', ',', '2', "'", '-', '(', 'd', 'i', 't', 'h', 'i', 'o', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'e', 'n', 'e', ')', '-', 'd', 'i', 'f', 'u', 'r', 'a', 'n'], tags=["2,2'-(dithiodimethylene)-difuran"]),
TaggedDocument(words=['d', 'i', 'e', 't', 'h', 'y', 'l', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['diethyl_sulfide']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '-', '4', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['methyl-4-(methylthio)butyrate']),
TaggedDocument(words=['s', 'k', 'a', 't', 'o', 'l', 'e'], tags=['skatole']),
TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['4-methylthiazole']),
TaggedDocument(words=['3', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '2', '-', 'c', 'y', 'c', 'l', 'o', 'p', 'e', 'n', 't', 'a', 'd', 'i', 'o', 'n', 'e'], tags=['3,5-dimethyl-1,2-cyclopentadione']),
TaggedDocument(words=['r', 'h', 'o', 'd', 'i', 'n', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['rhodinyl_butyrate']),
TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['hexyl_2-methylbutyrate']),
TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['phenethyl_butyrate']),
TaggedDocument(words=['3', '-', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'o', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'e', 'n', 't', 'a', 'n', 'a', 'l'], tags=['3-mercapto-2-methylpentanal']),
TaggedDocument(words=['a', 'c', 'e', 't', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['acetaldehyde']),
TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['butyl_butyrate']),
TaggedDocument(words=['4', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '2', ',', '5', '-', 'd', 'i', 'h', 'y', 'd', 'r', 'o', 'f', 'u', 'r', 'a', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['4,5-dimethyl-3-hydroxy-2,5-dihydrofuran-2-one']),
TaggedDocument(words=['p', '-', 'a', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '_', 's', 't', 'y', 'r', 'e', 'n', 'e'], tags=['p-a-dimethyl_styrene']),
TaggedDocument(words=['f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'm', 'e', 't', 'h', 'y', 'l', '_', 'e', 't', 'h', 'e', 'r'], tags=['furfuryl_methyl_ether']),
TaggedDocument(words=['5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['5-methyl-3-hexen-2-one']),
TaggedDocument(words=['t', 'h', 'e', 'o', 'b', 'r', 'o', 'm', 'i', 'n', 'e'], tags=['theobromine']),
TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'l', 'a', 'u', 'r', 'a', 't', 'e'], tags=['butyl_laurate']),
TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['hexyl_isobutyrate']),
TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', 'n', 'o', 'n', 'e'], tags=['3-methylcyclohexanone']),
TaggedDocument(words=['i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['isovaleric_acid']),
TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-ethylpyrazine']),
TaggedDocument(words=['s', 'o', 'd', 'i', 'u', 'm', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['sodium_acetate']),
TaggedDocument(words=['t', 'r', 'a', 'n', 's', '-', '2', '-', 'n', 'o', 'n', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['trans-2-nonen-1-ol']),
TaggedDocument(words=['2', '-', 'h', 'e', 'x', 'e', 'n', 'a', 'l'], tags=['2-hexenal']),
TaggedDocument(words=['3', ',', '4', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', '-', '1', '-', 'v', 'i', 'n', 'y', 'l', 'b', 'e', 'n', 'z', 'e', 'n', 'e'], tags=['3,4-dimethoxy-1-vinylbenzene']),
TaggedDocument(words=['n', '-', 'n', 'o', 'n', 'a', 'n', 'a', 'l'], tags=['n-nonanal']),
TaggedDocument(words=['2', '-', 'o', 'c', 't', 'e', 'n', 'a', 'l'], tags=['2-octenal']),
TaggedDocument(words=['2', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2,5-dimethylpyrazine']),
TaggedDocument(words=['d', 'i', 'p', 'r', 'o', 'p', 'y', 'l', '_', 't', 'r', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['dipropyl_trisulfide']),
TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'n', 'e'], tags=['4-methyl-2-pentanone']),
TaggedDocument(words=['(', '+', '/', '?', ')', '-', 'd', 'i', 'h', 'y', 'd', 'r', 'o', 'f', 'a', 'r', 'n', 'e', 's', 'o', 'l'], tags=['(+/?)-dihydrofarnesol']),
TaggedDocument(words=['c', 'a', 'r', 'v', 'a', 'c', 'r', 'o', 'l'], tags=['carvacrol']),
TaggedDocument(words=['d', 'i', 'e', 't', 'h', 'y', 'l', '_', 't', 'r', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['diethyl_trisulfide']),
TaggedDocument(words=['2', ',', '6', ',', '1', '0', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', ',', '6', ',', '1', '0', '-', 'p', 'e', 'n', 't', 'a', 'd', 'e', 'c', 'a', 't', 'r', 'i', 'e', 'n', '-', '1', '4', '-', 'o', 'n', 'e'], tags=['2,6,10-trimethyl-2,6,10-pentadecatrien-14-one']),
TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'a', 'n'], tags=['propyl_mercaptan']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '-', 'n', '-', 'm', 'e', 't', 'h', 'y', 'l', 'a', 'n', 't', 'h', 'r', 'a', 'n', 'i', 'l', 'a', 't', 'e'], tags=['methyl-n-methylanthranilate']),
TaggedDocument(words=['c', 'a', 'f', 'f', 'e', 'i', 'n', 'e'], tags=['caffeine']),
TaggedDocument(words=['(', '+', '/', '?', ')', '-', '2', '-', '(', '5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 'v', 'i', 'n', 'y', 'l', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'f', 'u', 'r', 'a', 'n', '-', '2', '-', 'y', 'l', ')', 'p', 'r', 'o', 'p', 'i', '-', 'o', 'n', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['(+/?)-2-(5-methyl-5-vinyltetrahydrofuran-2-yl)propi-onaldehyde']),
TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['isopropyl_acetate']),
TaggedDocument(words=['3', ',', '7', ',', '1', '1', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', ',', '6', ',', '1', '0', '-', 'd', 'o', 'd', 'e', 'c', 'a', 't', 'r', 'i', 'e', 'n', 'a', 'l'], tags=['3,7,11-trimethyl-2,6,10-dodecatrienal']),
TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'a', 'c', 'e', 't', 'o', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['benzyl_acetoacetate']),
TaggedDocument(words=['t', 'r', 'i', 'e', 't', 'h', 'y', 'l', '_', 'c', 'i', 't', 'r', 'a', 't', 'e'], tags=['triethyl_citrate']),
TaggedDocument(words=['l', 'i', 'n', 'a', 'l', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['linalyl_formate']),
TaggedDocument(words=['a', 's', 'c', 'o', 'r', 'b', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['ascorbic_acid']),
TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['hexyl_butyrate']),
TaggedDocument(words=['2', '-', 'i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['2-isobutyl_thiazole']),
TaggedDocument(words=['d', '-', 'r', 'i', 'b', 'o', 's', 'e'], tags=['d-ribose']),
TaggedDocument(words=['t', 'r', 'a', 'n', 's', '-', 'a', 'n', 'e', 't', 'h', 'o', 'l', 'e'], tags=['trans-anethole']),
TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'a', 'n', 't', 'h', 'r', 'a', 'n', 'i', 'l', 'a', 't', 'e'], tags=['butyl_anthranilate']),
TaggedDocument(words=['b', 'u', 't', 't', 'e', 'r', '_', 's', 't', 'a', 'r', 't', 'e', 'r', '_', 'd', 'i', 's', 't', 'i', 'l', 'l', 'a', 't', 'e'], tags=['butter_starter_distillate']),
TaggedDocument(words=['l', '-', 'a', 's', 'p', 'a', 'r', 't', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['l-aspartic_acid']),
TaggedDocument(words=['2', ',', '6', ',', '6', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '6', '-', 'v', 'i', 'n', 'y', 'l', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'p', 'y', 'r', 'a', 'n'], tags=['2,6,6-trimethyl-6-vinyltetrahydropyran']),
TaggedDocument(words=['2', '-', 'h', 'e', 'p', 't', 'e', 'n', '-', '4', '-', 'o', 'n', 'e'], tags=['2-hepten-4-one']),
TaggedDocument(words=['g', '-', 'o', 'c', 't', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['g-octalactone']),
TaggedDocument(words=['t', 'e', 'r', 'p', 'i', 'n', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['terpinyl_butyrate']),
TaggedDocument(words=['u', 'n', 'd', 'e', 'c', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['undecyl_alcohol']),
TaggedDocument(words=['2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'i', 'd', 'i', 'n', 'e'], tags=['2,6-dimethylpyridine']),
TaggedDocument(words=['6', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'd', 'i', 'h', 'y', 'd', 'r', 'o', 't', 'h', 'e', 'a', 's', 'p', 'i', 'r', 'a', 'n', 'e'], tags=['6-hydroxydihydrotheaspirane']),
TaggedDocument(words=['l', 'i', 'n', 'a', 'l', 'o', 'o', 'l'], tags=['linalool']),
TaggedDocument(words=['i', 's', 'o', 'e', 'u', 'g', 'e', 'n', 'o', 'l'], tags=['isoeugenol']),
TaggedDocument(words=['c', 'i', 'n', 'n', 'a', 'm', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['cinnamaldehyde']),
TaggedDocument(words=['g', 'u', 'a', 'i', 'e', 'n', 'e'], tags=['guaiene']),
TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'b', 'u', 't', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'l'], tags=['3-methyl-1-butanethiol']),
TaggedDocument(words=['b', 'u', 't', 't', 'e', 'r', '_', 'a', 'c', 'i', 'd', 's'], tags=['butter_acids']),
TaggedDocument(words=['i', 's', 'o', 'p', 'u', 'l', 'e', 'g', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['isopulegyl_acetate']),
TaggedDocument(words=['4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '(', '2', 'h', ')', '-', 'f', 'u', 'r', 'a', 'n', 'o', 'n', 'e'], tags=['4-hydroxy-5-methyl-3(2h)-furanone']),
TaggedDocument(words=['2', '-', 'o', 'c', 't', 'a', 'n', 'o', 'n', 'e'], tags=['2-octanone']),
TaggedDocument(words=['i', 's', 'o', 'b', 'o', 'r', 'n', 'e', 'o', 'l'], tags=['isoborneol']),
TaggedDocument(words=['d', '-', 'h', 'e', 'x', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['d-hexalactone']),
TaggedDocument(words=['b', 'e', 'n', 'z', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e', '_', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 'l'], tags=['benzaldehyde_dimethyl_acetal']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_sulfide']),
TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['propyl_alcohol']),
TaggedDocument(words=['1', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', '-', '2', '-', 'b', 'u', 't', 'a', 'n', 'o', 'n', 'e'], tags=['1-(methylthio)-2-butanone']),
TaggedDocument(words=['t', 'r', 'a', 'n', 's', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'b', 'u', 't', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['trans-2-methyl-2-butenoic_acid']),
TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['isopropyl_formate']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'y', 'l', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['ethyl_phenylacetate']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'e', 't', 'h', 'y', 'l', '_', 't', 'r', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_ethyl_trisulfide']),
TaggedDocument(words=['t', 'h', 'a', 'u', 'm', 'a', 't', 'i', 'n'], tags=['thaumatin']),
TaggedDocument(words=['m', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'e', 'n', 'e'], tags=['m-dimethoxybenzene']),
TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['phenethyl_acetate']),
TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 's', 'a', 'l', 'i', 'c', 'y', 'l', 'a', 't', 'e'], tags=['benzyl_salicylate']),
TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2-ethylbutyraldehyde']),
TaggedDocument(words=['r', 'e', 's', 'o', 'r', 'c', 'i', 'n', 'o', 'l'], tags=['resorcinol']),
TaggedDocument(words=['a', 'm', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['amyl_hexanoate']),
TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['benzyl_formate']),
TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['benzyl_alcohol']),
TaggedDocument(words=['v', 'e', 'r', 'b', 'e', 'n', 'o', 'l'], tags=['verbenol']),
TaggedDocument(words=['g', 'l', 'y', 'c', 'e', 'r', 'o', 'l', '_', 't', 'r', 'i', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['glycerol_tributyrate']),
TaggedDocument(words=['a', ',', 'a', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['a,a-dimethylphenethyl_alcohol']),
TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['hexyl_formate']),
TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 'v', 'i', 'n', 'y', 'l', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['4-methyl-5-vinylthiazole']),
TaggedDocument(words=['c', 'i', 's', '-', '_', 'a', 'n', 'd', '_', 't', 'r', 'a', 'n', 's', '-', 'm', 'e', 'n', 't', 'h', 'o', 'n', 'e', '-', '8', '-', 't', 'h', 'i', 'o', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['cis-_and_trans-menthone-8-thioacetate']),
TaggedDocument(words=['l', 'a', 'u', 'r', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['lauryl_alcohol']),
TaggedDocument(words=['d', 'i', 'a', 'l', 'l', 'y', 'l', '_', 'p', 'o', 'l', 'y', 's', 'u', 'l', 'f', 'i', 'd', 'e', 's'], tags=['diallyl_polysulfides']),
TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['isobutyl_heptanoate']),
TaggedDocument(words=['2', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2-phenylpropionaldehyde']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', '-', '4', '-', 'p', 'r', 'o', 'p', 'y', 'l', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['2-methoxy-4-propylphenol']),
TaggedDocument(words=['f', 'a', 'r', 'n', 'e', 's', 'e', 'n', 'e'], tags=['farnesene']),
TaggedDocument(words=['2', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2-hydroxy-4-methyl_benzaldehyde']),
TaggedDocument(words=['(', 'z', ')', '-', '4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '6', '-', 'd', 'o', 'd', 'e', 'c', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['(z)-4-hydroxy-6-dodecenoic_acid_lactone']),
TaggedDocument(words=['m', 'e', 'n', 't', 'h', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['menthyl_isovalerate']),
TaggedDocument(words=['o', 'l', 'e', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['oleic_acid']),
TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', 'a', '-', '1', ',', '3', '-', 'd', 'i', 'e', 'n', 'e'], tags=['p-mentha-1,3-diene']),
TaggedDocument(words=['l', 'i', 'n', 'a', 'l', 'y', 'l', '_', 'o', 'c', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['linalyl_octanoate']),
TaggedDocument(words=['1', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'a', 'c', 'e', 't', 'y', 'l', 'p', 'y', 'r', 'r', 'o', 'l', 'e'], tags=['1-methyl-2-acetylpyrrole']),
TaggedDocument(words=['a', 'm', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['amyl_alcohol']),
TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', '-', '2', '-', 'b', 'u', 't', 'a', 'n', 'o', 'n', 'e'], tags=['4-methylthio-2-butanone']),
TaggedDocument(words=['2', '(', '1', '0', ')', '-', 'p', 'i', 'n', 'e', 'n', '-', '3', '-', 'o', 'l'], tags=['2(10)-pinen-3-ol']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'y', 'l', '_', 't', 'r', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_propyl_trisulfide']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'p', 'e', 'n', 't', 'e', 'n', 'a', 'l'], tags=['2-methyl-2-pentenal']),
TaggedDocument(words=['4', '-', '(', 'p', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'h', 'e', 'n', 'y', 'l', ')', '-', '2', '-', 'b', 'u', 't', 'a', 'n', 'o', 'n', 'e'], tags=['4-(p-methoxyphenyl)-2-butanone']),
TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'm', 'e', 't', 'h', 'y', 'l', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['benzyl_methyl_sulfide']),
TaggedDocument(words=['h', 'e', 'p', 't', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['heptyl_isobutyrate']),
TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 't', 'i', 'g', 'l', 'a', 't', 'e'], tags=['benzyl_tiglate']),
TaggedDocument(words=['5', ',', '6', ',', '7', ',', '8', '-', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'q', 'u', 'i', 'n', 'o', 'x', 'a', 'l', 'i', 'n', 'e'], tags=['5,6,7,8-tetrahydroquinoxaline']),
TaggedDocument(words=['t', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', '(', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'r', 'o', 'p', 'e', 'n', '-', '1', '-', 'y', 'l', ')', 'p', 'y', 'r', 'a', 'n'], tags=['tetrahydro-4-methyl-2-(2-methylpropen-1-yl)pyran']),
TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'c', 'y', 'c', 'l', 'o', 'p', 'e', 'n', 't', 'a', 'd', 'e', 'c', 'a', 'n', 'o', 'n', 'e'], tags=['3-methyl-1-cyclopentadecanone']),
TaggedDocument(words=['c', 'a', 'r', 'v', 'e', 'o', 'l'], tags=['carveol']),
TaggedDocument(words=['2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['2,6-dimethoxyphenol']),
TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['isopropyl_butyrate']),
TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', 'e', 'n', 'e', '_', 'g', 'l', 'y', 'c', 'o', 'l'], tags=['propylene_glycol']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'p', 'r', 'o', 'p', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'l'], tags=['2-methyl-1-propanethiol']),
TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'b', 'u', 't', 'a', 'n', 'o', 'l'], tags=['3-methyl-2-butanol']),
TaggedDocument(words=['5', '-', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'c', 'y', 'c', 'l', 'o', 'p', 'e', 'n', 't', '-', '2', '-', 'e', 'n', '-', '1', '-', 'o', 'n', 'e'], tags=['5-ethyl-2-hydroxy-3-methylcyclopent-2-en-1-one']),
TaggedDocument(words=['(', 'e', ')', '-', '3', '-', '(', 'z', ')', '-', '6', '-', 'n', 'o', 'n', 'a', 'd', 'i', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['(e)-3-(z)-6-nonadien-1-ol']),
TaggedDocument(words=['b', 'o', 'r', 'n', 'e', 'o', 'l'], tags=['borneol']),
TaggedDocument(words=['6', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', ',', '5', '-', 'h', 'e', 'p', 't', 'a', 'd', 'i', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['6-methyl-3,5-heptadien-2-one']),
TaggedDocument(words=['a', 'c', 'e', 't', 'i', 'c', '_', 'a', 'n', 'h', 'y', 'd', 'r', 'i', 'd', 'e'], tags=['acetic_anhydride']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_phenyl_disulfide']),
TaggedDocument(words=['3', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['3-(methylthio)propyl_acetate']),
TaggedDocument(words=['3', '-', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'n', 'e'], tags=['3-heptanone']),
TaggedDocument(words=['n', '-', 'o', 'c', 't', 'a', 'n', 'a', 'l'], tags=['n-octanal']),
TaggedDocument(words=['o', '-', 'm', 'e', 't', 'h', 'y', 'l', 'a', 'n', 'i', 's', 'o', 'l', 'e'], tags=['o-methylanisole']),
TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['butyl_isovalerate']),
TaggedDocument(words=['1', '-', 'o', 'c', 't', 'e', 'n', '-', '3', '-', 'o', 'n', 'e'], tags=['1-octen-3-one']),
TaggedDocument(words=['2', ',', '4', ',', '5', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', 'd', '-', '3', '-', 'o', 'x', 'a', 'z', 'o', 'l', 'i', 'n', 'e'], tags=['2,4,5-trimethyl-d-3-oxazoline']),
TaggedDocument(words=['2', '-', 't', 'r', 'i', 'd', 'e', 'c', 'e', 'n', 'a', 'l'], tags=['2-tridecenal']),
TaggedDocument(words=['2', ',', '4', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'e', 'n', 'z', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2,4-dimethylbenzaldehyde']),
TaggedDocument(words=['1', '-', 'a', 'm', 'i', 'n', 'o', '-', '2', '-', 'p', 'r', 'o', 'p', 'a', 'n', 'o', 'l'], tags=['1-amino-2-propanol']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 't', 'h', 'i', 'o', 'm', 'e', 't', 'h', 'y', 'l', 'f', 'u', 'r', 'a', 'n'], tags=['2-methyl-5-thiomethylfuran']),
TaggedDocument(words=['t', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['tetrahydrofurfuryl_alcohol']),
TaggedDocument(words=['c', 'i', 's', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'l', 'a', 'c', 't', 'a', 't', 'e'], tags=['cis-3-hexenyl_lactate']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'f', 'u', 'r', 'o', 'a', 't', 'e'], tags=['methyl_furoate']),
TaggedDocument(words=['p', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'e', 'n', 'e'], tags=['p-dimethoxybenzene']),
TaggedDocument(words=['a', 'c', 'e', 't', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['acetic_acid']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 't', 'i', 'g', 'l', 'a', 't', 'e'], tags=['ethyl_tiglate']),
TaggedDocument(words=['m', 'y', 'r', 'c', 'e', 'n', 'e'], tags=['myrcene']),
TaggedDocument(words=['3', '-', 'o', 'x', 'o', 'd', 'e', 'c', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'g', 'l', 'y', 'c', 'e', 'r', 'i', 'd', 'e'], tags=['3-oxodecanoic_acid_glyceride']),
TaggedDocument(words=['(', 'e', ')', '-', '2', '-', 'o', 'c', 't', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['(e)-2-octen-1-ol']),
TaggedDocument(words=['o', 'c', 't', 'y', 'l', '_', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['octyl_2-methylbutyrate']),
TaggedDocument(words=['s', 'a', 'n', 't', 'a', 'l', 'o', 'l', ',', '_', 'a', '_', 'a', 'n', 'd'], tags=['santalol,_a_and']),
TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'l', 'a', 'c', 't', 'a', 't', 'e'], tags=['butyl_lactate']),
TaggedDocument(words=['n', 'e', 'r', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['neryl_acetate']),
TaggedDocument(words=['c', 'i', 's', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['cis-3-hexenyl_butyrate']),
TaggedDocument(words=['t', 'r', 'a', 'n', 's', '-', '2', '-', 'o', 'c', 't', 'e', 'n', '-', '1', '-', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['trans-2-octen-1-yl_acetate']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'l', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['2-methylbutyl-2-methyl_butyrate']),
TaggedDocument(words=['d', ',', 'l', '-', 'v', 'a', 'l', 'i', 'n', 'e'], tags=['d,l-valine']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['ethyl_propyl_disulfide']),
TaggedDocument(words=['a', 'l', 'l', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['allyl_disulfide']),
TaggedDocument(words=['2', ',', '3', '-', 'd', 'i', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2,3-diethyl-5-methylpyrazine']),
TaggedDocument(words=['t', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'a', 'm', 'i', 'n', 'e'], tags=['trimethylamine']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 't', 'r', 'a', 'n', 's', '-', '2', '-', 'd', 'e', 'c', 'e', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_trans-2-decenoate']),
TaggedDocument(words=['a', 'c', 'e', 't', 'o', 'p', 'h', 'e', 'n', 'o', 'n', 'e'], tags=['acetophenone']),
TaggedDocument(words=['f', 'o', 'r', 'm', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['formic_acid']),
TaggedDocument(words=['4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '2', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '(', '2', 'h', ')', '-', 'f', 'u', 'r', 'a', 'n', 'o', 'n', 'e'], tags=['4-hydroxy-2,5-dimethyl-3(2h)-furanone']),
TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'y', 'l', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['isoamyl_phenylacetate']),
TaggedDocument(words=['5', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '2', ',', '4', '-', 'd', 'e', 'c', 'a', 'd', 'i', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'd', '-', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['5-hydroxy-2,4-decadienoic_acid_d-lactone']),
TaggedDocument(words=['e', 'u', 'g', 'e', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['eugenyl_acetate']),
TaggedDocument(words=['d', 'i', 's', 'o', 'd', 'i', 'u', 'm', '_', 's', 'u', 'c', 'c', 'i', 'n', 'a', 't', 'e'], tags=['disodium_succinate']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '-', 't', 'r', 'a', 'n', 's', '-', '2', ',', '_', 'c', 'i', 's', '-', '4', '-', 'd', 'e', 'c', 'a', 'd', 'i', 'e', 'n', 'o', 'a', 't', 'e'], tags=['ethyl-trans-2,_cis-4-decadienoate']),
TaggedDocument(words=['1', '-', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'a', 'c', 'e', 't', 'y', 'l', 'p', 'y', 'r', 'r', 'o', 'l', 'e'], tags=['1-ethyl-2-acetylpyrrole']),
TaggedDocument(words=['b', 'i', 'p', 'h', 'e', 'n', 'y', 'l'], tags=['biphenyl']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 't', 'h', 'i', 'o', 'p', 'h', 'e', 'n', '-', '3', '-', 'o', 'n', 'e'], tags=['2-methyltetrahydrothiophen-3-one']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '3', '-', 'n', 'o', 'n', 'e', 'n', 'o', 'a', 't', 'e'], tags=['methyl_3-nonenoate']),
TaggedDocument(words=['s', 'o', 'd', 'i', 'u', 'm', '_', 'c', 'i', 't', 'r', 'a', 't', 'e'], tags=['sodium_citrate']),
TaggedDocument(words=['e', 's', 't', 'r', 'a', 'g', 'o', 'l', 'e'], tags=['estragole']),
TaggedDocument(words=['o', '-', 'p', 'r', 'o', 'p', 'y', 'l', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['o-propylphenol']),
TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', '_', '(', '3', '_', 'o', 'r', '_', '5', '_', 'o', 'r', '_', '6', ')', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e', '_', '(', '8', '5', '%', ')', '_', 'p', 'l', 'u', 's', '_', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', '_', '(', '3', '_', 'o', 'r', '_', '5', '_', 'o', 'r', '_', '6', ')', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e', '_', '(', '1', '3', '%', ')'], tags=['2-ethyl_(3_or_5_or_6)-methoxypyrazine_(85%)_plus_2-methyl_(3_or_5_or_6)-methoxypyrazine_(13%)']),
TaggedDocument(words=['f', 'u', 'r', 'f', 'u', 'r', 'a', 'l'], tags=['furfural']),
TaggedDocument(words=['a', 'l', 'p', 'h', 'a', '-', 't', 'e', 'r', 'p', 'i', 'n', 'e', 'o', 'l'], tags=['alpha-terpineol']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['methyl_isovalerate']),
TaggedDocument(words=['a', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'e', 'n', 'z', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['a-methylbenzyl_acetate']),
TaggedDocument(words=['r', 'h', 'o', 'd', 'i', 'n', 'o', 'l'], tags=['rhodinol']),
TaggedDocument(words=['q', 'u', 'i', 'n', 'i', 'n', 'e'], tags=['quinine']),
TaggedDocument(words=['3', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '2', '-', 'o', 'x', 'o', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['3-hydroxy-2-oxopropionic_acid']),
TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['isoamyl_isovalerate']),
TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['phenethyl_propionate']),
TaggedDocument(words=['g', '-', 'i', 'o', 'n', 'o', 'n', 'e'], tags=['g-ionone']),
TaggedDocument(words=['2', '-', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'y', 'l', 'p', 'y', 'r', 'r', 'o', 'l', 'e'], tags=['2-propionylpyrrole']),
TaggedDocument(words=['g', 'e', 'r', 'a', 'n', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['geranyl_isobutyrate']),
TaggedDocument(words=['b', 'i', 's', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', 'm', 'e', 't', 'h', 'a', 'n', 'e'], tags=['bis-(methylthio)methane']),
TaggedDocument(words=['d', '-', 'f', 'e', 'n', 'c', 'h', 'o', 'n', 'e'], tags=['d-fenchone']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'o', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['methyl_o-methoxybenzoate']),
TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['phenethyl_formate']),
TaggedDocument(words=['l', 'a', 'u', 'r', 'i', 'c', '_', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['lauric_aldehyde']),
TaggedDocument(words=['p', 'r', 'o', 'p', 'i', 'o', 'n', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['propionic_acid']),
TaggedDocument(words=['m', '-', 'c', 'r', 'e', 's', 'o', 'l'], tags=['m-cresol']),
TaggedDocument(words=['c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['cyclohexyl_butyrate']),
TaggedDocument(words=['2', ',', '3', '-', 'p', 'e', 'n', 't', 'a', 'n', 'e', 'd', 'i', 'o', 'n', 'e'], tags=['2,3-pentanedione']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 's', 'o', 'r', 'b', 'a', 't', 'e'], tags=['methyl_sorbate']),
TaggedDocument(words=['4', ',', '5', '-', 'd', 'i', 'h', 'y', 'd', 'r', 'o', '-', '3', '-', '(', '2', 'h', ')', 't', 'h', 'i', 'o', 'p', 'h', 'e', 'n', 'o', 'n', 'e'], tags=['4,5-dihydro-3-(2h)thiophenone']),
TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'o', 'x', 'o', 'b', 'u', 't', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['3-methyl-2-oxobutanoic_acid']),
TaggedDocument(words=['2', "'", '-', 'a', 'm', 'i', 'n', 'o', 'a', 'c', 'e', 't', 'o', 'p', 'h', 'e', 'n', 'o', 'n', 'e'], tags=["2'-aminoacetophenone"]),
TaggedDocument(words=['(', 'e', ')', '-', '3', ',', '7', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '5', ',', '7', '-', 'o', 'c', 't', 'a', 't', 'r', 'i', 'e', 'n', '-', '3', '-', 'o', 'l'], tags=['(e)-3,7-dimethyl-1,5,7-octatrien-3-ol']),
TaggedDocument(words=['t', 'r', 'a', 'n', 's', ',', '_', 't', 'r', 'a', 'n', 's', '-', '2', ',', '4', '-', 'd', 'o', 'd', 'e', 'c', 'a', 'd', 'i', 'e', 'n', 'a', 'l'], tags=['trans,_trans-2,4-dodecadienal']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', '3', '-', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'o', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['ethyl_3-mercaptopropionate']),
TaggedDocument(words=['2', ',', '4', '-', 'p', 'e', 'n', 't', 'a', 'd', 'i', 'e', 'n', 'a', 'l'], tags=['2,4-pentadienal']),
TaggedDocument(words=['5', '-', 'i', 's', 'o', 'p', 'r', 'o', 'p', 'e', 'n', 'y', 'l', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'v', 'i', 'n', 'y', 'l', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'f', 'u', 'r', 'a', 'n'], tags=['5-isopropenyl-2-methyl-2-vinyltetrahydrofuran']),
TaggedDocument(words=['b', 'i', 's', 'a', 'b', 'o', 'l', 'e', 'n', 'e'], tags=['bisabolene']),
TaggedDocument(words=['(', 'z', ')', '(', 'z', ')', '-', '3', ',', '6', '-', 'n', 'o', 'n', 'a', 'd', 'i', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['(z)(z)-3,6-nonadien-1-ol']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'y', 'l', '_', 't', 'r', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['ethyl_propyl_trisulfide']),
TaggedDocument(words=['p', 'h', 'e', 'n', 'y', 'l', 'a', 'c', 'e', 't', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['phenylacetaldehyde']),
TaggedDocument(words=['p', '-', 't', 'o', 'l', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['p-tolyl-3-methyl_butyrate']),
TaggedDocument(words=['5', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '2', '-', 'd', 'e', 'c', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd', '_', 'd', '-', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['5-hydroxy-2-decenoic_acid_d-lactone']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'j', 'a', 's', 'm', 'o', 'n', 'a', 't', 'e'], tags=['methyl_jasmonate']),
TaggedDocument(words=['v', 'a', 'n', 'i', 'l', 'l', 'i', 'n', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['vanillin_acetate']),
TaggedDocument(words=['u', 'n', 'd', 'e', 'c', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['undecen-1-ol']),
TaggedDocument(words=['2', '-', 'h', 'e', 'p', 't', 'y', 'l', 'f', 'u', 'r', 'a', 'n'], tags=['2-heptylfuran']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'a', 'n'], tags=['methyl_mercaptan']),
TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', 'a', 'm', 'i', 'n', 'e'], tags=['phenethylamine']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_heptanoate']),
TaggedDocument(words=['c', 'i', 's', '-', '2', '-', 'n', 'o', 'n', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['cis-2-nonen-1-ol']),
TaggedDocument(words=['h', 'y', 'd', 'r', 'o', 'x', 'y', 'c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'o', 'l'], tags=['hydroxycitronellol']),
TaggedDocument(words=['(', 'z', ')', '-', '4', '-', 'd', 'o', 'd', 'e', 'c', 'e', 'n', 'a', 'l'], tags=['(z)-4-dodecenal']),
TaggedDocument(words=['3', '-', 'o', 'x', 'o', 'b', 'u', 't', 'a', 'n', 'a', 'l', ',', '_', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 'l'], tags=['3-oxobutanal,_dimethyl_acetal']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_hexanoate']),
TaggedDocument(words=['1', '-', '(', 'p', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'h', 'e', 'n', 'y', 'l', ')', '-', '2', '-', 'p', 'r', 'o', 'p', 'a', 'n', 'o', 'n', 'e'], tags=['1-(p-methoxyphenyl)-2-propanone']),
TaggedDocument(words=['4', '-', '(', 'p', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'p', 'h', 'e', 'n', 'y', 'l', ')', '-', '2', '-', 'b', 'u', 't', 'a', 'n', 'o', 'n', 'e'], tags=['4-(p-hydroxyphenyl)-2-butanone']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'c', 'i', 's', '-', '4', '-', 'h', 'e', 'p', 't', 'e', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_cis-4-heptenoate']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'h', 'e', 'p', 't', 'a', 'n', '-', '3', '-', 'o', 'n', 'e'], tags=['2-methylheptan-3-one']),
TaggedDocument(words=['2', ',', '3', ',', '5', ',', '6', '-', 't', 'e', 't', 'r', 'a', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2,3,5,6-tetramethylpyrazine']),
TaggedDocument(words=['2', '-', 's', 'e', 'c', '-', 'b', 'u', 't', 'y', 'l', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', 'n', 'o', 'n', 'e'], tags=['2-sec-butylcyclohexanone']),
TaggedDocument(words=['s', 'o', 'd', 'i', 'u', 'm', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['sodium_benzoate']),
TaggedDocument(words=['3', '-', 'o', 'c', 't', 'a', 'n', 'o', 'l'], tags=['3-octanol']),
TaggedDocument(words=['3', '-', 'c', 'a', 'r', 'e', 'n', 'e'], tags=['3-carene']),
TaggedDocument(words=['d', '-', 'u', 'n', 'd', 'e', 'c', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['d-undecalactone']),
TaggedDocument(words=['c', 'u', 'm', 'i', 'n', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['cuminaldehyde']),
TaggedDocument(words=['l', 'i', 'n', 'a', 'l', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['linalyl_propionate']),
TaggedDocument(words=['3', '-', 'h', 'e', 'x', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['3-hexen-1-ol']),
TaggedDocument(words=['3', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 't', 'e'], tags=['3-phenylpropyl_cinnamate']),
TaggedDocument(words=['n', '-', 'b', 'u', 't', 'y', 'l', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['n-butyl-2-methylbutyrate']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2-methylbutyraldehyde']),
TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['benzyl_isobutyrate']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'm', 'y', 'r', 'i', 's', 't', 'a', 't', 'e'], tags=['methyl_myristate']),
TaggedDocument(words=['2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['2,6-dimethylthiophenol']),
TaggedDocument(words=['h', 'e', 'p', 't', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['heptanoic_acid']),
TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['butyl_acetate']),
TaggedDocument(words=['d', '-', 'p', 'i', 'p', 'e', 'r', 'i', 't', 'o', 'n', 'e'], tags=['d-piperitone']),
TaggedDocument(words=['c', 'i', 's', '-', '6', '-', 'n', 'o', 'n', 'e', 'n', 'a', 'l'], tags=['cis-6-nonenal']),
TaggedDocument(words=['2', ',', '2', ',', '4', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '3', '-', 'o', 'x', 'a', 'c', 'y', 'c', 'l', 'o', 'p', 'e', 'n', 't', 'a', 'n', 'e'], tags=['2,2,4-trimethyl-1,3-oxacyclopentane']),
TaggedDocument(words=['2', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['2-hydroxybenzoic_acid']),
TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['3-methylbutyraldehyde']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', '3', '-', 'h', 'e', 'x', 'e', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_3-hexenoate']),
TaggedDocument(words=['2', ',', '4', '-', 'd', 'i', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['2,4-dihydroxybenzoic_acid']),
TaggedDocument(words=['g', 'e', 'r', 'a', 'n', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['geranyl_formate']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'e', 't', 'h', 'y', 'l', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_ethyl_sulfide']),
TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 's', 'a', 'l', 'i', 'c', 'y', 'l', 'a', 't', 'e'], tags=['isoamyl_salicylate']),
TaggedDocument(words=['2', ',', '5', '-', 'x', 'y', 'l', 'e', 'n', 'o', 'l'], tags=['2,5-xylenol']),
TaggedDocument(words=['d', '-', 'n', 'e', 'o', 'm', 'e', 'n', 't', 'h', 'o', 'l'], tags=['d-neomenthol']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_furfuryl_disulfide']),
TaggedDocument(words=['4', '-', 'e', 't', 'h', 'y', 'l', '-', '2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['4-ethyl-2,6-dimethoxyphenol']),
TaggedDocument(words=['2', '-', 'p', 'e', 'n', 't', 'y', 'l', 'p', 'y', 'r', 'i', 'd', 'i', 'n', 'e'], tags=['2-pentylpyridine']),
TaggedDocument(words=['3', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '4', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'b', 'u', 't', 'a', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['3-hydroxy-4-phenylbutan-2-one']),
TaggedDocument(words=['g', 'e', 'r', 'a', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['geranyl_acetate']),
TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['phenethyl_benzoate']),
TaggedDocument(words=['1', '-', 'p', 'e', 'n', 't', 'e', 'n', '-', '3', '-', 'o', 'l'], tags=['1-penten-3-ol']),
TaggedDocument(words=['a', 'n', 'i', 's', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['anisyl_alcohol']),
TaggedDocument(words=['d', 'i', 'e', 't', 'h', 'y', 'l', '_', 's', 'u', 'c', 'c', 'i', 'n', 'a', 't', 'e'], tags=['diethyl_succinate']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'p', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['methyl_p-hydroxybenzoate']),
TaggedDocument(words=['n', 'o', 'n', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['nonyl_isovalerate']),
TaggedDocument(words=['c', 'i', 'n', 'n', 'a', 'm', 'y', 'l', '_', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 't', 'e'], tags=['cinnamyl_cinnamate']),
TaggedDocument(words=['n', '-', 'o', 'c', 't', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['n-octyl_isovalerate']),
TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['propyl_propionate']),
TaggedDocument(words=['a', '-', 'p', 'h', 'e', 'l', 'l', 'a', 'n', 'd', 'r', 'e', 'n', 'e'], tags=['a-phellandrene']),
TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['isoamyl_alcohol']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', '(', 'p', '-', 'i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', 'p', 'h', 'e', 'n', 'y', 'l', ')', '-', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2-methyl-3-(p-isopropylphenyl)-propionaldehyde']),
TaggedDocument(words=['2', '-', 'a', 'c', 'e', 't', 'o', 'x', 'y', '-', '3', '-', 'b', 'u', 't', 'a', 'n', 'o', 'n', 'e'], tags=['2-acetoxy-3-butanone']),
TaggedDocument(words=['a', 'c', 'o', 'n', 'i', 't', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['aconitic_acid']),
TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['hexyl_alcohol']),
TaggedDocument(words=['2', ',', '4', ',', '5', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', '_', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['2,4,5-trimethyl_thiazole']),
TaggedDocument(words=['d', 'i', 'm', 'e', 't', 'h', 'y', 'l', '_', 't', 'r', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['dimethyl_trisulfide']),
TaggedDocument(words=['1', '-', 'p', 'h', 'e', 'n', 'y', 'l', '-', '1', ',', '2', '-', 'p', 'r', 'o', 'p', 'a', 'n', 'e', 'd', 'i', 'o', 'n', 'e'], tags=['1-phenyl-1,2-propanedione']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['ethyl_formate']),
TaggedDocument(words=['s', 'o', 'd', 'i', 'u', 'm', '_', 'd', 'i', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['sodium_diacetate']),
TaggedDocument(words=['3', '-', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', 'c', 'y', 'l', 'c', 'o', 'p', 'e', 'n', 't', '-', '2', '-', 'e', 'n', '-', '1', '-', 'o', 'n', 'e'], tags=['3-ethyl-2-hydroxy-4-methylcylcopent-2-en-1-one']),
TaggedDocument(words=['c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['citronellyl_butyrate']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'l', 'a', 'u', 'r', 'a', 't', 'e'], tags=['methyl_laurate']),
TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'y', 'l', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['isobutyl_phenylacetate']),
TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['isobutyl_alcohol']),
TaggedDocument(words=['a', 'l', 'l', 'y', 'l', '_', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['allyl_heptanoate']),
TaggedDocument(words=['2', '-', 'a', 'c', 'e', 't', 'y', 'l', '-', '2', '-', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'i', 'n', 'e'], tags=['2-acetyl-2-thiazoline']),
TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['butyl_alcohol']),
TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['isobutyl_hexanoate']),
TaggedDocument(words=['t', 'a', 'n', 'n', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['tannic_acid']),
TaggedDocument(words=['m', 'y', 'r', 'i', 's', 't', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['myristic_acid']),
TaggedDocument(words=['2', ',', '2', ',', '3', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'c', 'y', 'c', 'l', 'o', 'p', 'e', 'n', 't', '-', '3', '-', 'e', 'n', '-', '1', '-', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['2,2,3-trimethylcyclopent-3-en-1-yl_acetaldehyde']),
TaggedDocument(words=['h', 'y', 'd', 'r', 'o', 'g', 'e', 'n', '_', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['hydrogen_sulfide']),
TaggedDocument(words=['v', 'a', 'n', 'i', 'l', 'l', 'i', 'n'], tags=['vanillin']),
TaggedDocument(words=['4', '-', '(', '2', '-', 'f', 'u', 'r', 'y', 'l', ')', '-', '3', '-', 'b', 'u', 't', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['4-(2-furyl)-3-buten-2-one']),
TaggedDocument(words=['p', 'h', 'e', 'n', 'y', 'l', 'a', 'c', 'e', 't', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['phenylacetic_acid']),
TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'c', 'r', 'o', 't', 'o', 'n', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['3-methylcrotonic_acid']),
TaggedDocument(words=['5', '-', '_', 'a', 'n', 'd', '_', '6', '-', 'd', 'e', 'c', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['5-_and_6-decenoic_acid']),
TaggedDocument(words=['2', '-', 'o', 'c', 't', 'e', 'n', '-', '4', '-', 'o', 'n', 'e'], tags=['2-octen-4-one']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'f', 'u', 'r', 'a', 'n', '-', '3', '-', 'o', 'n', 'e'], tags=['2-methyltetrahydrofuran-3-one']),
TaggedDocument(words=['1', ',', '2', '-', 'e', 't', 'h', 'a', 'n', 'e', 'd', 'i', 't', 'h', 'i', 'o', 'l'], tags=['1,2-ethanedithiol']),
TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', '-', '1', '-', 'h', 'e', 'x', 'a', 'n', 'o', 'l'], tags=['3-methylthio-1-hexanol']),
TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['isoamyl_butyrate']),
TaggedDocument(words=['s', 'a', 'l', 'i', 'c', 'y', 'l', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['salicylaldehyde']),
TaggedDocument(words=['l', '-', 'g', 'l', 'u', 't', 'a', 'm', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['l-glutamic_acid']),
TaggedDocument(words=['5', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '4', '-', 'o', 'c', 't', 'a', 'n', 'o', 'n', 'e'], tags=['5-hydroxy-4-octanone']),
TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['hexyl_benzoate']),
TaggedDocument(words=['2', '-', 'p', 'e', 'n', 't', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'l'], tags=['2-pentanethiol']),
TaggedDocument(words=['3', '-', 'e', 't', 'h', 'y', 'l', '-', '2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['3-ethyl-2,6-dimethylpyrazine']),
TaggedDocument(words=['c', 'i', 's', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['cis-3-hexenyl-2-methylbutyrate']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'b', 'u', 't', 'e', 'n', 'a', 'l'], tags=['2-methyl-3-butenal']),
TaggedDocument(words=['g', 'l', 'y', 'c', 'i', 'n', 'e'], tags=['glycine']),
TaggedDocument(words=['1', '0', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'm', 'e', 't', 'h', 'y', 'l', 'e', 'n', 'e', '-', '2', '-', 'p', 'i', 'n', 'e', 'n', 'e'], tags=['10-hydroxymethylene-2-pinene']),
TaggedDocument(words=['4', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', '-', '2', '-', 'o', 'x', 'o', 'b', 'u', 't', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['4-(methylthio)-2-oxobutanoic_acid']),
TaggedDocument(words=['s', '-', 'm', 'e', 't', 'h', 'y', 'l', '_', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'e', 'n', 't', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'a', 't', 'e'], tags=['s-methyl_4-methylpentanethioate']),
TaggedDocument(words=['f', 'u', 'm', 'a', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['fumaric_acid']),
TaggedDocument(words=['2', ',', '2', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', '-', '(', '1', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'r', 'o', 'p', 'e', 'n', '-', '1', '-', 'y', 'l', ')', '-', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', 't', 'e', 't', 'r', 'a', 'h', 'y', 'd', 'r', 'o', 'f', 'u', 'r', 'a', 'n'], tags=['2,2-dimethyl-5-(1-methylpropen-1-yl)-__________________________________tetrahydrofuran']),
TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', 'h', 'e', 'x', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'l'], tags=['2-ethylhexanethiol']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'a', 'c', 'r', 'y', 'l', 'a', 't', 'e'], tags=['ethyl_acrylate']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', '-', '4', '-', 'v', 'i', 'n', 'y', 'l', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['2-methoxy-4-vinylphenol']),
TaggedDocument(words=['2', '-', '(', '1', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'r', 'o', 'p', 'y', 'l', ')', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['2-(1-methylpropyl)thiazole']),
TaggedDocument(words=['2', '-', 't', 'r', 'a', 'n', 's', '-', '4', '-', 'c', 'i', 's', '-', '7', '-', 'c', 'i', 's', '-', 't', 'r', 'i', 'd', 'e', 'c', 'a', 't', 'r', 'i', 'e', 'n', 'a', 'l'], tags=['2-trans-4-cis-7-cis-tridecatrienal']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'p', '-', 'a', 'n', 'i', 's', 'a', 't', 'e'], tags=['ethyl_p-anisate']),
TaggedDocument(words=['2', '-', 'i', 's', 'o', 'p', 'r', 'o', 'p', 'y', 'l', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['2-isopropylphenol']),
TaggedDocument(words=['4', '-', 'e', 't', 'h', 'y', 'l', 'o', 'c', 't', 'a', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['4-ethyloctanoic_acid']),
TaggedDocument(words=['d', 'l', '-', 'i', 's', 'o', 'm', 'e', 'n', 't', 'h', 'o', 'n', 'e'], tags=['dl-isomenthone']),
TaggedDocument(words=['c', 'i', 'n', 'n', 'a', 'm', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['cinnamyl_butyrate']),
TaggedDocument(words=['1', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', ',', '3', '-', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', 'a', 'd', 'i', 'o', 'n', 'e'], tags=['1-methyl-2,3-cyclohexadione']),
TaggedDocument(words=['d', 'i', 'h', 'y', 'd', 'r', 'o', '-', 'a', '-', 'i', 'o', 'n', 'o', 'n', 'e'], tags=['dihydro-a-ionone']),
TaggedDocument(words=['t', 'a', 'u', 'r', 'i', 'n', 'e'], tags=['taurine']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_propyl_disulfide']),
TaggedDocument(words=['2', '-', 'e', 't', 'h', 'y', 'l', '-', '4', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '(', '2', 'h', ')', '-', 'f', 'u', 'r', 'a', 'n', 'o', 'n', 'e'], tags=['2-ethyl-4-hydroxy-5-methyl-3(2h)-furanone']),
TaggedDocument(words=['o', '-', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['o-(methylthio)phenol']),
TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'n', 'o', 'n', 'a', 'n', 'o', 'a', 't', 'e'], tags=['isoamyl_nonanoate']),
TaggedDocument(words=['1', '0', '-', 'u', 'n', 'd', 'e', 'c', 'e', 'n', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['10-undecenoic_acid']),
TaggedDocument(words=['d', 'i', 'e', 't', 'h', 'y', 'l', '_', 'm', 'a', 'l', 'o', 'n', 'a', 't', 'e'], tags=['diethyl_malonate']),
TaggedDocument(words=['b', 'o', 'r', 'n', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['bornyl_isovalerate']),
TaggedDocument(words=['h', 'e', 'p', 't', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['heptyl_formate']),
TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['benzyl_butyrate']),
TaggedDocument(words=['5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'p', 'h', 'e', 'n', 'y', 'l', '-', '2', '-', 'h', 'e', 'x', 'e', 'n', 'a', 'l'], tags=['5-methyl-2-phenyl-2-hexenal']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'b', 'u', 't', 'a', 'n', 'e', 't', 'h', 'i', 'o', 'l'], tags=['2-methyl-1-butanethiol']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', '3', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_3-hydroxyhexanoate']),
TaggedDocument(words=['m', 'a', 'l', 't', 'o', 'l'], tags=['maltol']),
TaggedDocument(words=['h', 'e', 'p', 't', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['heptyl_butyrate']),
TaggedDocument(words=['2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '4', '-', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'l'], tags=['2,6-dimethyl-4-heptanol']),
TaggedDocument(words=['2', '(', '4', ')', '-', 'i', 's', 'o', 'b', 'u', 't', 'y', 'l', '-', '4', '(', '2', ')', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'd', 'i', 'h', 'y', 'd', 'r', 'o', '-', '4', 'h', '-', '1', ',', '3', ',', '5', '-', 'd', 'i', 't', 'h', 'i', 'a', 'z', 'i', 'n', 'e'], tags=['2(4)-isobutyl-4(2),6-dimethyldihydro-4h-1,3,5-dithiazine']),
TaggedDocument(words=['4', '-', 'e', 't', 'h', 'y', 'l', 'b', 'e', 'n', 'z', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['4-ethylbenzaldehyde']),
TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'i', 's', 'o', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['propyl_isobutyrate']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'y', 'l', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['methyl_phenylacetate']),
TaggedDocument(words=['b', 'o', 'r', 'n', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['bornyl_formate']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'l', 'i', 'n', 'o', 'l', 'e', 'a', 't', 'e', '_', '(', '4', '8', '%', ')', '_', 'm', 'e', 't', 'h', 'y', 'l', '_', 'l', 'i', 'n', 'o', 'l', 'e', 'n', 'a', 't', 'e', '_', '(', '5', '2', '%', ')', '_', 'm', 'i', 'x', '-', 't', 'u', 'r', 'e'], tags=['methyl_linoleate_(48%)_methyl_linolenate_(52%)_mix-ture']),
TaggedDocument(words=['g', '-', 'u', 'n', 'd', 'e', 'c', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['g-undecalactone']),
TaggedDocument(words=['5', 'h', '-', '5', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '6', ',', '7', '-', 'd', 'i', 'h', 'y', 'd', 'r', 'o', 'c', 'y', 'c', 'l', 'o', 'p', 'e', 'n', 't', 'a', '(', 'b', ')', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['5h-5-methyl-6,7-dihydrocyclopenta(b)pyrazine']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '-', 'c', 'i', 's', '-', '4', '-', 'o', 'c', 't', 'e', 'n', 'o', 'a', 't', 'e'], tags=['methyl-cis-4-octenoate']),
TaggedDocument(words=['2', ',', '5', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '4', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', '-', '3', '(', '2', 'h', ')', '-', 'f', 'u', 'r', 'a', 'n', 'o', 'n', 'e'], tags=['2,5-dimethyl-4-methoxy-3(2h)-furanone']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'c', 'r', 'o', 't', 'o', 'n', 'a', 't', 'e'], tags=['ethyl_crotonate']),
TaggedDocument(words=['3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'p', 'h', 'e', 'n', 'y', 'l', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['3-hexenyl_phenylacetate']),
TaggedDocument(words=['3', '-', 'n', 'o', 'n', 'a', 'n', 'o', 'n', 'e'], tags=['3-nonanone']),
TaggedDocument(words=['l', '-', 'm', 'a', 'l', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['l-malic_acid']),
TaggedDocument(words=['a', 'c', 'e', 't', 'o', 'n', 'e'], tags=['acetone']),
TaggedDocument(words=['a', 'c', 'e', 't', 'a', 'l'], tags=['acetal']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', '1', '0', '-', 'u', 'n', 'd', 'e', 'c', 'e', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_10-undecenoate']),
TaggedDocument(words=['p', '-', 'm', 'e', 'n', 't', 'h', 'a', '-', '8', '-', 't', 'h', 'i', 'o', 'l', '-', '3', '-', 'o', 'n', 'e'], tags=['p-mentha-8-thiol-3-one']),
TaggedDocument(words=['a', 'l', 'l', 'y', 'l', '_', '2', '-', 'f', 'u', 'r', 'o', 'a', 't', 'e'], tags=['allyl_2-furoate']),
TaggedDocument(words=['g', 'e', 'r', 'a', 'n', 'y', 'l', '_', 't', 'i', 'g', 'l', 'a', 't', 'e'], tags=['geranyl_tiglate']),
TaggedDocument(words=['3', ',', '7', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'o', 'c', 't', 'a', 'n', 'o', 'l'], tags=['3,7-dimethyl-1-octanol']),
TaggedDocument(words=['2', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', '-', '4', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['2-methoxy-4-methylphenol']),
TaggedDocument(words=['p', ',', 'a', ',', 'a', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'e', 'n', 'z', 'y', 'l', '_', 'a', 'l', 'c', 'o', 'h', 'o', 'l'], tags=['p,a,a-trimethylbenzyl_alcohol']),
TaggedDocument(words=['b', 'u', 't', 'y', 'l', '_', 'f', 'o', 'r', 'm', 'a', 't', 'e'], tags=['butyl_formate']),
TaggedDocument(words=['h', 'e', 'x', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['hexyl_propionate']),
TaggedDocument(words=['2', ',', '3', ',', '5', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2,3,5-trimethylpyrazine']),
TaggedDocument(words=['d', 'e', 'h', 'y', 'd', 'r', 'o', 'm', 'e', 'n', 't', 'h', 'o', 'f', 'u', 'r', 'o', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['dehydromenthofurolactone']),
TaggedDocument(words=['d', 'i', 'h', 'y', 'd', 'r', 'o', 'c', 'a', 'r', 'v', 'e', 'o', 'l'], tags=['dihydrocarveol']),
TaggedDocument(words=['2', '-', 't', 'r', 'a', 'n', 's', '-', '3', ',', '7', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'o', 'c', 't', 'a', '-', '2', ',', '6', '-', 'd', 'i', 'e', 'n', 'y', 'l', '-', '2', '-', 'e', 't', 'h', 'y', 'l', '_', 'b', 'u', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['2-trans-3,7-dimethylocta-2,6-dienyl-2-ethyl_butanoate']),
TaggedDocument(words=['i', 's', 'o', 'e', 'u', 'g', 'e', 'n', 'y', 'l', '_', 'm', 'e', 't', 'h', 'y', 'l', '_', 'e', 't', 'h', 'e', 'r'], tags=['isoeugenyl_methyl_ether']),
TaggedDocument(words=['1', ',', '1', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'e', 't', 'h', 'a', 'n', 'e'], tags=['1,1-dimethoxyethane']),
TaggedDocument(words=['2', ',', '6', ',', '6', '-', 't', 'r', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'c', 'y', 'c', 'l', 'o', 'h', 'e', 'x', '-', '2', '-', 'e', 'n', 'e', '-', '1', ',', '4', '-', 'd', 'i', 'o', 'n', 'e'], tags=['2,6,6-trimethylcyclohex-2-ene-1,4-dione']),
TaggedDocument(words=['c', 'a', 'r', 'v', 'o', 'n', 'e'], tags=['carvone']),
TaggedDocument(words=['d', 'i', 'h', 'y', 'd', 'r', 'o', 'c', 'a', 'r', 'v', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['dihydrocarvyl_acetate']),
TaggedDocument(words=['d', 'l', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'a', 'l', 'a', 'n', 'i', 'n', 'e'], tags=['dl-phenylalanine']),
TaggedDocument(words=['2', ',', '3', ',', '5', '-', 't', 'r', 'i', 't', 'h', 'i', 'a', 'h', 'e', 'x', 'a', 'n', 'e'], tags=['2,3,5-trithiahexane']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['ethyl_3-methylpentanoate']),
TaggedDocument(words=['4', '-', 'a', 'c', 'e', 't', 'y', 'l', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'i', 'm', 'i', 'd', 'i', 'n', 'e'], tags=['4-acetyl-2-methylpyrimidine']),
TaggedDocument(words=['t', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['thiazole']),
TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'b', 'u', 't', 'y', 'l', '-', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'r', 'o', 'p', 'a', 'n', 'o', 'a', 't', 'e'], tags=['3-methylbutyl-2-methylpropanoate']),
TaggedDocument(words=['b', 'o', 'r', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['bornyl_acetate']),
TaggedDocument(words=['3', '-', 'p', 'h', 'e', 'n', 'y', 'l', 'p', 'r', 'o', 'p', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['3-phenylpropyl_acetate']),
TaggedDocument(words=['2', '-', 'h', 'e', 'p', 't', 'a', 'n', 'o', 'n', 'e'], tags=['2-heptanone']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'o', 'l', 'e', 'a', 't', 'e'], tags=['ethyl_oleate']),
TaggedDocument(words=['i', 'n', 'd', 'o', 'l', 'e'], tags=['indole']),
TaggedDocument(words=['n', '-', 'b', 'u', 't', 'y', 'r', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['n-butyric_acid']),
TaggedDocument(words=['p', 'y', 'r', 'r', 'o', 'l', 'i', 'd', 'i', 'n', 'e'], tags=['pyrrolidine']),
TaggedDocument(words=['d', ',', 'l', '-', 'm', 'e', 't', 'h', 'i', 'o', 'n', 'i', 'n', 'e'], tags=['d,l-methionine']),
TaggedDocument(words=['2', '-', 'a', 'c', 'e', 't', 'y', 'l', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-acetyl-3-methylpyrazine']),
TaggedDocument(words=['2', '-', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'n', 'e'], tags=['2-pentanone']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '-', '3', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['ethyl-3-hydroxybutyrate']),
TaggedDocument(words=['p', '-', 'v', 'i', 'n', 'y', 'l', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['p-vinylphenol']),
TaggedDocument(words=['c', 'i', 't', 'r', 'a', 'l', '_', '(', 'n', 'e', 'r', 'a', 'l', ')'], tags=['citral_(neral)']),
TaggedDocument(words=['2', '-', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'y', 'l', '-', '2', '-', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'i', 'n', 'e'], tags=['2-propionyl-2-thiazoline']),
TaggedDocument(words=['i', 's', 'o', 'b', 'o', 'r', 'n', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['isobornyl_propionate']),
TaggedDocument(words=['3', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', '-', '2', '-', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'n', 'e'], tags=['3-hydroxy-2-pentanone']),
TaggedDocument(words=['2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2,6-dimethylpyrazine']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '(', 'e', ')', '-', '2', '-', '(', 'z', ')', '-', '4', '-', 'd', 'e', 'c', 'a', 'd', 'i', 'e', 'n', 'o', 'a', 't', 'e'], tags=['methyl_(e)-2-(z)-4-decadienoate']),
TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'p', 'h', 'e', 'n', 'y', 'l', '-', '2', '-', 'p', 'e', 'n', 't', 'e', 'n', 'a', 'l'], tags=['4-methyl-2-phenyl-2-pentenal']),
TaggedDocument(words=['i', 's', 'o', 'p', 'h', 'o', 'r', 'o', 'n', 'e'], tags=['isophorone']),
TaggedDocument(words=['i', 's', 'o', 'b', 'o', 'r', 'n', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['isobornyl_acetate']),
TaggedDocument(words=['1', '-', 'o', 'c', 't', 'e', 'n', '-', '3', '-', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['1-octen-3-yl_acetate']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '2', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'f', 'u', 'r', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_2-methyl-3-furyl_disulfide']),
TaggedDocument(words=['o', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['o-methoxycinnamaldehyde']),
TaggedDocument(words=['3', '-', 'm', 'e', 'r', 'c', 'a', 'p', 't', 'o', '-', '3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', '-', 'b', 'u', 't', 'a', 'n', 'o', 'l'], tags=['3-mercapto-3-methyl-1-butanol']),
TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['benzyl_hexanoate']),
TaggedDocument(words=['f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['furfuryl_acetate']),
TaggedDocument(words=['p', 'i', 'p', 'e', 'r', 'i', 'd', 'i', 'n', 'e'], tags=['piperidine']),
TaggedDocument(words=['g', 'l', 'y', 'c', 'e', 'r', 'o', 'l'], tags=['glycerol']),
TaggedDocument(words=['2', '-', 'a', 'c', 'e', 't', 'y', 'l', '-', '3', '-', 'e', 't', 'h', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['2-acetyl-3-ethylpyrazine']),
TaggedDocument(words=['3', '-', 'd', 'e', 'c', 'a', 'n', 'o', 'n', 'e'], tags=['3-decanone']),
TaggedDocument(words=['(', 'e', ',', 'e', ')', '-', '3', ',', '5', '-', 'o', 'c', 't', 'a', 'd', 'i', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['(e,e)-3,5-octadien-2-one']),
TaggedDocument(words=['a', 'c', 'e', 't', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e', '_', 'd', 'i', 'i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 'l'], tags=['acetaldehyde_diisoamyl_acetal']),
TaggedDocument(words=['a', '-', 'd', 'a', 'm', 'a', 's', 'c', 'o', 'n', 'e'], tags=['a-damascone']),
TaggedDocument(words=['b', 'e', 'n', 'z', 'o', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['benzoic_acid']),
TaggedDocument(words=['1', ',', '3', ',', '5', '-', 'u', 'n', 'd', 'e', 'c', 'a', 't', 'r', 'i', 'e', 'n', 'e', '_', '(', 'a', '_', 'm', 'i', 'x', 't', 'u', 'r', 'e', '_', 'o', 'f', '_', '1', ',', '3', '(', 'e', ')', ',', '5', '(', 'z', ')', '-', '_', 'a', 'n', 'd', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '1', ',', '3', '(', 'e', ')', ',', '5', '(', 'e', ')', '-', 'i', 's', 'o', 'm', 'e', 'r', 's', ')'], tags=['1,3,5-undecatriene_(a_mixture_of_1,3(e),5(z)-_and_______________1,3(e),5(e)-isomers)']),
TaggedDocument(words=['3', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '2', '-', 'b', 'u', 't', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['3-methyl-2-buten-1-ol']),
TaggedDocument(words=['b', 'e', 'n', 'z', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['benzyl_isovalerate']),
TaggedDocument(words=['w', '-', 'p', 'e', 'n', 't', 'a', 'd', 'e', 'c', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['w-pentadecalactone']),
TaggedDocument(words=['f', 'a', 'r', 'n', 'e', 's', 'o', 'l'], tags=['farnesol']),
TaggedDocument(words=['c', 'i', 's', '-', '3', '-', 'o', 'c', 't', 'e', 'n', '-', '1', '-', 'o', 'l'], tags=['cis-3-octen-1-ol']),
TaggedDocument(words=['g', 'e', 'r', 'a', 'n', 'i', 'o', 'l'], tags=['geraniol']),
TaggedDocument(words=['f', 'u', 'r', 'f', 'u', 'r', 'y', 'l', '_', 'p', 'r', 'o', 'p', 'i', 'o', 'n', 'a', 't', 'e'], tags=['furfuryl_propionate']),
TaggedDocument(words=['c', 'i', 'n', 'n', 'a', 'm', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['cinnamyl_acetate']),
TaggedDocument(words=['g', '-', 'd', 'e', 'c', 'a', 'l', 'a', 'c', 't', 'o', 'n', 'e'], tags=['g-decalactone']),
TaggedDocument(words=['3', ',', '4', '-', 'x', 'y', 'l', 'e', 'n', 'o', 'l'], tags=['3,4-xylenol']),
TaggedDocument(words=['p', '-', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['p-ethoxybenzaldehyde']),
TaggedDocument(words=['a', '-', 'h', 'e', 'x', 'y', 'l', '_', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['a-hexyl_cinnamaldehyde']),
TaggedDocument(words=['p', 'h', 'e', 'n', 'e', 't', 'h', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['phenethyl_hexanoate']),
TaggedDocument(words=['2', '-', 'a', 'c', 'e', 't', 'y', 'l', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e'], tags=['2-acetylthiazole']),
TaggedDocument(words=['i', 's', 'o', 'b', 'u', 't', 'y', 'l', '_', 'a', 'n', 'g', 'e', 'l', 'a', 't', 'e'], tags=['isobutyl_angelate']),
TaggedDocument(words=['4', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '5', '-', 't', 'h', 'i', 'a', 'z', 'o', 'l', 'e', 'e', 't', 'h', 'a', 'n', 'o', 'l'], tags=['4-methyl-5-thiazoleethanol']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', '(', 'm', 'e', 't', 'h', 'y', 'l', 't', 'h', 'i', 'o', ')', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['methyl_(methylthio)acetate']),
TaggedDocument(words=['p', '-', 'm', 'e', 't', 'h', 'y', 'l', 'a', 'n', 'i', 's', 'o', 'l', 'e'], tags=['p-methylanisole']),
TaggedDocument(words=['p', 'i', 'p', 'e', 'r', 'o', 'n', 'a', 'l'], tags=['piperonal']),
TaggedDocument(words=['a', 'l', 'l', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['allyl_hexanoate']),
TaggedDocument(words=['e', 'r', 'y', 't', 'h', 'r', 'o', 'b', 'i', 'c', '_', 'a', 'c', 'i', 'd'], tags=['erythrobic_acid']),
TaggedDocument(words=['d', 'i', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'a', 'c', 'e', 't', 'o', 'n', 'e'], tags=['dihydroxyacetone']),
TaggedDocument(words=['i', 's', 'o', 'p', 'r', 'o', 'p', 'e', 'n', 'y', 'l', 'p', 'y', 'r', 'a', 'z', 'i', 'n', 'e'], tags=['isopropenylpyrazine']),
TaggedDocument(words=['3', ',', '4', '-', 'd', 'i', 'm', 'e', 't', 'h', 'y', 'l', '-', '1', ',', '2', '-', 'c', 'y', 'c', 'l', 'o', 'p', 'e', 'n', 't', 'a', 'n', 'e', 'd', 'i', 'o', 'n', 'e'], tags=['3,4-dimethyl-1,2-cyclopentanedione']),
TaggedDocument(words=['4', '-', 'p', 'r', 'o', 'p', 'e', 'n', 'y', 'l', '-', '2', ',', '6', '-', 'd', 'i', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['4-propenyl-2,6-dimethoxyphenol']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'h', 'e', 'x', 'a', 'n', 'o', 'a', 't', 'e'], tags=['methyl_hexanoate']),
TaggedDocument(words=['3', '-', 'o', 'c', 't', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['3-octen-2-one']),
TaggedDocument(words=['2', '-', 'h', 'e', 'x', 'e', 'n', '-', '1', '-', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['2-hexen-1-yl_acetate']),
TaggedDocument(words=['p', 'u', 'l', 'e', 'g', 'o', 'n', 'e'], tags=['pulegone']),
TaggedDocument(words=['a', 'm', 'y', 'l', '_', 'o', 'c', 't', 'a', 'n', 'o', 'a', 't', 'e'], tags=['amyl_octanoate']),
TaggedDocument(words=['d', 'i', 'a', 'l', 'l', 'y', 'l', '_', 't', 'r', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['diallyl_trisulfide']),
TaggedDocument(words=['l', 'i', 'n', 'a', 'l', 'y', 'l', '_', 'b', 'e', 'n', 'z', 'o', 'a', 't', 'e'], tags=['linalyl_benzoate']),
TaggedDocument(words=['3', '-', 'o', 'c', 't', 'a', 'n', 'o', 'n', 'e'], tags=['3-octanone']),
TaggedDocument(words=['b', 'e', 'n', 'z', 'o', 'p', 'h', 'e', 'n', 'o', 'n', 'e'], tags=['benzophenone']),
TaggedDocument(words=['i', 's', 'o', 'p', 'u', 'l', 'e', 'g', 'o', 'n', 'e'], tags=['isopulegone']),
TaggedDocument(words=['2', '-', 'p', 'e', 'n', 't', 'a', 'n', 'o', 'l'], tags=['2-pentanol']),
TaggedDocument(words=['4', '-', 'h', 'e', 'x', 'e', 'n', 'e', '-', '3', '-', 'o', 'n', 'e'], tags=['4-hexene-3-one']),
TaggedDocument(words=['o', '-', 'm', 'e', 't', 'h', 'o', 'x', 'y', 'b', 'e', 'n', 'z', 'a', 'l', 'd', 'e', 'h', 'y', 'd', 'e'], tags=['o-methoxybenzaldehyde']),
TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'l', 'a', 'u', 'r', 'a', 't', 'e'], tags=['isoamyl_laurate']),
TaggedDocument(words=['m', 'e', 't', 'h', 'y', 'l', '_', 'd', 'i', 's', 'u', 'l', 'f', 'i', 'd', 'e'], tags=['methyl_disulfide']),
TaggedDocument(words=['s', 't', 'y', 'r', 'e', 'n', 'e'], tags=['styrene']),
TaggedDocument(words=['c', 'i', 's', '-', '3', '-', 'h', 'e', 'x', 'e', 'n', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['cis-3-hexenyl_isovalerate']),
TaggedDocument(words=['4', '-', 'p', 'h', 'e', 'n', 'y', 'l', '-', '3', '-', 'b', 'u', 't', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['4-phenyl-3-buten-2-one']),
TaggedDocument(words=['e', 't', 'h', 'y', 'l', '_', 'i', 's', 'o', 'v', 'a', 'l', 'e', 'r', 'a', 't', 'e'], tags=['ethyl_isovalerate']),
TaggedDocument(words=['1', '-', 'p', '-', 'm', 'e', 'n', 't', 'h', 'e', 'n', '-', '9', '-', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['1-p-menthen-9-yl_acetate']),
TaggedDocument(words=['2', '-', 'h', 'y', 'd', 'r', 'o', 'x', 'y', 'a', 'c', 'e', 't', 'o', 'p', 'h', 'e', 'n', 'o', 'n', 'e'], tags=['2-hydroxyacetophenone']),
TaggedDocument(words=['2', '-', 'p', 'e', 'n', 't', 'y', 'l', '_', 'b', 'u', 't', 'y', 'r', 'a', 't', 'e'], tags=['2-pentyl_butyrate']),
TaggedDocument(words=['i', 's', 'o', 'a', 'm', 'y', 'l', '_', 'c', 'i', 'n', 'n', 'a', 'm', 'a', 't', 'e'], tags=['isoamyl_cinnamate']),
TaggedDocument(words=['t', 'h', 'e', 'a', 's', 'p', 'i', 'r', 'a', 'n', 'e'], tags=['theaspirane']),
TaggedDocument(words=['d', 'i', 'a', 'c', 'e', 't', 'y', 'l'], tags=['diacetyl']),
TaggedDocument(words=['c', 'i', 't', 'r', 'o', 'n', 'e', 'l', 'l', 'o', 'l'], tags=['citronellol']),
TaggedDocument(words=['2', '-', 'p', 'e', 'n', 't', 'a', 'd', 'e', 'c', 'a', 'n', 'o', 'n', 'e'], tags=['2-pentadecanone']),
TaggedDocument(words=['(', 'e', ')', '-', '7', '-', 'm', 'e', 't', 'h', 'y', 'l', '-', '3', '-', 'o', 'c', 't', 'e', 'n', '-', '2', '-', 'o', 'n', 'e'], tags=['(e)-7-methyl-3-octen-2-one']),
TaggedDocument(words=['2', '-', 'n', 'o', 'n', 'e', 'n', 'a', 'l'], tags=['2-nonenal']),
TaggedDocument(words=['(', 'z', ')', '-', '4', '-', 'p', 'r', 'o', 'p', 'e', 'n', 'y', 'l', 'p', 'h', 'e', 'n', 'o', 'l'], tags=['(z)-4-propenylphenol']),
TaggedDocument(words=['p', 'r', 'o', 'p', 'y', 'l', '_', 'a', 'c', 'e', 't', 'a', 't', 'e'], tags=['propyl_acetate']),
TaggedDocument(words=['2', '-', 'p', 'r', 'o', 'p', 'y', 'l', 'p', 'y', 'r', 'i', 'd', 'i', 'n', 'e'], tags=['2-propylpyridine']),
...]
In [7]:
def make_plot_simple(name, points, labels, publish):
traces = []
traces.append(go.Scattergl(
x = points[:, 0],
y = points[:, 1],
mode = 'markers',
marker = dict(
color = sns.xkcd_rgb["black"],
size = 8,
opacity = 0.6,
#line = dict(width = 1)
),
text = labels,
hoverinfo = 'text',
)
)
layout = go.Layout(
xaxis=dict(
autorange=True,
showgrid=False,
zeroline=True,
showline=True,
autotick=True,
ticks='',
showticklabels=False
),
yaxis=dict(
autorange=True,
showgrid=False,
zeroline=True,
showline=True,
autotick=True,
ticks='',
showticklabels=False
)
)
fig = go.Figure(data=traces, layout=layout)
if publish:
plotter = py.iplot
else:
plotter = offline.plot
plotter(fig, filename=name + '.html')
In [8]:
"""
Train Doc2Vec Model
"""
time_start = time.time()
cores = multiprocessing.cpu_count()
#dm/m,d50,n5,w5,mc5,s0.001,t3
#model = gensim.models.doc2vec.Doc2Vec(size=50, min_count=5, iter=55)
# PV-DM w/ average
model = gensim.models.doc2vec.Doc2Vec(size=50, window=5, min_count=3, iter=100)
model.build_vocab(corpus, keep_raw_vocab=False)
print "Unique Character Count", len(model.wv.vocab)
print "Total Compoounds Count:", model.corpus_count
%time model.train(corpus, total_examples=model.corpus_count, epochs=model.iter)
print 'Doc2Vec training done! Time elapsed: {} seconds'.format(time.time()-time_start)
save_name = 'embeddings' + os.sep + 'embeddings_flavor_compounds_50dim.bin'
model.save_word2vec_format(save_name, doctag_vec=True, word_vec=False, prefix='*dt_', fvocab=None, binary=True)
Unique Character Count 46
Total Compoounds Count: 1107
CPU times: user 4.24 s, sys: 2.57 s, total: 6.81 s
Wall time: 5.05 s
Doc2Vec training done! Time elapsed: 5.10063791275 seconds
In [9]:
"""
TSNE of Doc2Vec
"""
time_start = time.time()
X = model.docvecs
tsne = TSNE(n_components=2)
X_tsne = tsne.fit_transform(X)
print 't-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start)
t-SNE done! Time elapsed: 5.66017103195 seconds
In [10]:
labels = []
for doc_id in range(0, len(model.docvecs)):
labels.append(model.docvecs.index_to_doctag(doc_id))
make_plot_simple(name='compound2vec_char2',
points=X_tsne,
labels=labels,
publish=False)
In [36]:
load_name = 'embeddings' + os.sep + 'embeddings_flavor_compounds_50dim.bin'
#char_embbeding = gensim.models.Word2Vec.load(load_name)
from gensim.models.keyedvectors import KeyedVectors
word_vectors = KeyedVectors.load_word2vec_format(load_name, binary=True)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-36-0aabed540d8b> in <module>()
4 from gensim.models.keyedvectors import KeyedVectors
5 word_vectors = KeyedVectors.load_word2vec_format(load_name, binary=True)
----> 6 print len(word_vectors)
TypeError: object of type 'KeyedVectors' has no len()
In [ ]:
Content source: dhyeon/ingredient2vec
Similar notebooks: