Distribution of MI scores of entries in several thesauri. Do thesauri just happen to contain better features?


In [71]:
%cd ~/NetBeansProjects/thesisgenerator/
from collections import Counter, defaultdict
from thesisgenerator.utils.conf_file_utils import parse_config_file
from thesisgenerator.utils.data_utils import get_tokenized_data, get_tokenizer_settings_from_conf
from thesisgenerator.plugins.bov import ThesaurusVectorizer
from discoutils.thesaurus_loader import Vectors
from sklearn.feature_selection import f_classif, chi2
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pylab as plt
from thesisgenerator.utils.misc import calculate_log_odds
from scipy.sparse import csr_matrix
%matplotlib inline


/Volumes/LocalDataHD/m/mm/mmb28/NetBeansProjects/thesisgenerator

In [22]:
conf, configspec_file = parse_config_file('conf/exp1/exp1_base.conf')
tr_data = conf['training_data'] + '.gz'
xtr, ytr, _, _ = get_tokenized_data(tr_data,
                                        get_tokenizer_settings_from_conf(conf))

X, vocab = ThesaurusVectorizer(min_df=0).fit_transform(xtr)
vocab = {str(k): v for k,v in vocab.items()}
phrases = [str(s) for s in vocab.keys()]

In [3]:
fvalues, pvalues = f_classif(X, ytr)

In [4]:
scores = {}
paths = ['../FeatureExtractionToolkit/exp10-13-composed-ngrams/AN_NN_gigaw-100_Baroni.events.filtered.strings',
        '../FeatureExtractionToolkit/exp10-13-composed-ngrams/AN_NN_gigaw-100_Add.events.filtered.strings',
        '../FeatureExtractionToolkit/exp10-13-composed-ngrams/AN_NN_gigaw-100_Left.events.filtered.strings',
        '../FeatureExtractionToolkit/exp10-13-composed-ngrams/AN_NN_gigaw-100_Right.events.filtered.strings']
for path in paths:
    v = Vectors.from_tsv(path)
    scores_this_composer = []
    for i, (df, idx) in enumerate(vocab.items()):
        if df in v:
            scores_this_composer.append(fvalues[idx])
    scores[path] = scores_this_composer
    del v


---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
<ipython-input-4-4de761d3eff4> in <module>()
      5         '../FeatureExtractionToolkit/exp10-13-composed-ngrams/AN_NN_gigaw-100_Right.events.filtered.strings']
      6 for path in paths:
----> 7     v = Vectors.from_tsv(path)
      8     scores_this_composer = []
      9     for i, (df, idx) in enumerate(vocab.items()):

/Volumes/LocalDataHD/m/mm/mmb28/NetBeansProjects/DiscoUtils/discoutils/thesaurus_loader.py in from_tsv(cls, tsv_file, sim_threshold, lowercasing, ngram_separator, row_filter, column_filter, max_len, max_neighbours, merge_duplicates, immutable, **kwargs)
    353         # ready-made thesaurus from disk.
    354         allow_lexical_overlap = kwargs.pop('allow_lexical_overlap', True)
--> 355         if is_hdf(tsv_file):
    356             import pandas as pd
    357 

/Volumes/LocalDataHD/m/mm/mmb28/NetBeansProjects/DiscoUtils/discoutils/misc.py in is_hdf(path_to_file)
     97     Checks if a file is a HDF store
     98     """
---> 99     return _check_file_magic(path_to_file, b'Hierarchical Data Format')
    100 
    101 

/Volumes/LocalDataHD/m/mm/mmb28/NetBeansProjects/DiscoUtils/discoutils/misc.py in _check_file_magic(file, magic_substr)
     80     import magic
     81 
---> 82     return magic_substr in magic.from_file(os.path.realpath(file))
     83 
     84 

/home/m/mm/mmb28/anaconda3/lib/python3.4/site-packages/magic.py in from_file(filename, mime)
    118     """
    119     m = _get_magic_type(mime)
--> 120     return m.from_file(filename)
    121 
    122 def from_buffer(buffer, mime=False):

/home/m/mm/mmb28/anaconda3/lib/python3.4/site-packages/magic.py in from_file(self, filename)
     73         self._thread_check()
     74         if not os.path.exists(filename):
---> 75             raise IOError("File does not exist: " + filename)
     76 
     77         return magic_file(self.cookie, filename)

OSError: File does not exist: /Volumes/LocalDataHD/m/mm/mmb28/NetBeansProjects/FeatureExtractionToolkit/exp10-13-composed-ngrams/AN_NN_gigaw-100_Left.events.filtered.strings

In [ ]:
f, axes = plt.subplots(2, 2, sharey=True, sharex=True)
names, colors = 'BALR', 'rgbc'
for i, (_, vals) in enumerate(scores.items()):
    axes.flat[i].hist(vals, bins=20, alpha=.3, color=colors[i]);
    axes.flat[i].set_title(names[i])
plt.savefig('midist.pdf', format='pdf')

In [ ]:
X1 = [
        [1, 1, 0, 0],
        [1, 1, 1, 0],
        [0, 1, 1, 1],
        [0, 0, 1, 1],
    ]
y1 = np.array([0, 0, 1, 1])
f_classif(X1, y1)

In [ ]:
calculate_log_odds(csr_matrix(X1), y1)

what happens to good classifier features when redirected through a thesaurus


In [5]:
# are very informative features (ones strongly associated with class C)
# replaced by other informative feats or not?
print('shape is', X.shape)
a, b = chi2(X, ytr) # small values of A mean informative features
plt.hist(a, bins=20);


shape is (2000, 47370)

In [6]:
X.shape


Out[6]:
(2000, 47370)

In [7]:
ytr[ytr != 'Automotive'] = 'NotAutomotive' # lo_score assumes two classes, let's make it that was
# pre-select features using chi2 (much faster, but selects both positive and negative features)
premask, _ = f_classif(X, ytr)
# %time lo_automotive = calculate_log_odds(X, ytr, column_indices=np.argsort(premask)[:1000])
# %time lo_automotive = calculate_log_odds(X, ytr, column_indices=set(np.random.randint(0,X.shape[1],20000)))
%time lo_automotive = calculate_log_odds(X, ytr)


CPU times: user 38.5 s, sys: 93.2 ms, total: 38.6 s
Wall time: 39 s

In [8]:
plt.hist(lo_automotive, bins=20);
max(lo_automotive)


Out[8]:
18.09217669226155

In [9]:
premask[premask>100] = -1
plt.scatter(lo_automotive, premask);
# looks like there are both good and bad features (according to chi2) in terms of LO score


Out[9]:
<matplotlib.collections.PathCollection at 0x10d541fd0>

In [10]:
# import pandas as pd
# density_df = pd.DataFrame({'X':lo_automotive[:100], 'Y':premask[:100]})
# sns.kdeplot(lo_automotive[:100], premask[:100])
# data = np.vstack([lo_automotive[:100], premask[:100]])
# sns.kdeplot(density_df.ix[:11], shade=True);
# data.shape
# density_df

In [55]:
orig_lo_automotive = lo_automotive.copy()
lo_automotive[np.asarray(X.sum(axis=0)).ravel() < 5] = 0 # de-select infreq features
# lets look at the informative features
inv_vocab = {v:k for k,v in vocab.items()}
good_feats_idx = np.where((lo_automotive>10)|(lo_automotive<-10))[0] # index of 100 most informative features (either for or against automotive)
good_feats = set(str(inv_vocab[idx]) for idx in good_feats_idx)
good_feats, len(good_feats)


Out[55]:
({'small/J_bird/N',
  'great/N_leash/N',
  'jerry/N',
  'catgenie/N',
  'rash/N',
  'exo/N',
  'silencer/N',
  'granule/N',
  'artificial/J',
  'melted/J_snow/N',
  'weakest/J_link/N',
  'neem/N',
  'ferret/N',
  'white/J_fur/N',
  'use/V_conditioner/N',
  'chemical/N_smell/N',
  'fun/N_toy/N',
  'foster/J',
  'multimeter/N',
  'herpes/N',
  'blazer/N',
  'cookout/N',
  'fogging/N',
  'favorite/J_toy/N',
  'lab/N',
  'cooking/N',
  'air/N_tool/N',
  'service/N_center/N',
  'tear/V_stain/N',
  'giraffe/N',
  'salt/N_water/N',
  'interwoven/J',
  'cloudy/J',
  'fit/V_dog/N',
  'melted/J',
  'strong/J_chewer/N',
  'warn/N',
  'kitten/N_collar/N',
  'iii/N',
  'chicken/N_flavor/N',
  'chewer/N',
  'society/N',
  'dog/N_smell/N',
  'guinea/N',
  'prep/N',
  'westie/N',
  'hound/N',
  'healthy/J_treat/N',
  'terra/N',
  'black/J_walmart/N',
  'great/J_dog/N',
  'entertaining/J',
  'panic/N_mouse/N',
  'client/N',
  'sidewalk/N',
  'raccoon/N',
  'bacterial/J',
  'fox/N',
  'canine/N',
  'amp/N',
  'old/J_cat/N',
  'extang/N',
  'old/J_son/N',
  'tablet/N',
  'chill/N',
  'service/N_patch/N',
  'valve/N_allow/V_fuel/N',
  'musty/J_smell/N',
  'seborrhea/N',
  'place/V_patch/N',
  'sturdiness/N',
  'cookie/N',
  'usaf/N',
  'best/J_friend/N',
  'rubber/N_chicken/N',
  'roadie/N',
  'transom/N',
  'hatchling/N',
  'gasoline/N',
  'male/N',
  'wellness/N',
  'goldfish/N',
  'large/J_lab/N',
  'smaller/J_dog/N',
  'love/V_bone/N',
  'window/N_vent/N',
  'phone/N_charger/N',
  'timothy/N_hay/N',
  'furminator/N',
  'scratcher/N',
  'turquoise/N',
  'kibble/N',
  'aquatic/J_turtle/N',
  'tummy/N',
  'long/J_hair/N',
  'digestion/N',
  'appetite/N',
  'pomeranian/N',
  'wet/J_food/N',
  'chondroitin/N',
  'adopt/V_cat/N',
  'notice/N_small/N',
  'small/J_ball/N',
  'chocolate/N',
  'long/J_cat/N',
  'dandruff/N',
  'gas/N_mileage/N',
  'love/V_brand/N',
  'pound/N_shepard/N',
  'german/N_shepard/N',
  'amphibian/N',
  'basset/N',
  'rough/J_idea/N',
  'littermaid/N',
  'large/N',
  'mint/N',
  'pet/N_bed/N',
  'poop/N',
  'sani-solution/N',
  'leak/N_air/N',
  'terrier/N_mix/N',
  'cd/N_holder/N',
  'pet/J_cover/N',
  'good/J_choice/N',
  'quality-beware/N',
  'remove/V_odor/N',
  'shih-tzu/N',
  'bar/N_force/N',
  'natural/J_food/N',
  'gt/N',
  'ziploc/N',
  'midsection/N',
  'ml/N',
  'love/V_food/N',
  'snake/N',
  'short-haired/J_dog/N',
  'merrick/N',
  'makita/N',
  'pup/N_corn/N',
  'need/V_tie/N',
  'lettuce/N',
  'clinic/N',
  'crack/V_window/N',
  'pollen/N',
  'shoulder/N_strap/N',
  'buy/V_cage/N',
  'old/J_puppy/N',
  'yeast/N_infection/N',
  'fresbee/N',
  'excellent/J_toy/N',
  'propane/N',
  'muffler/N',
  'century/N',
  'mustang/N',
  'elegance/N_leash/N',
  'ear/N_mite/N',
  'puppy/N_food/N',
  'old/J_self/N',
  'rottie/N',
  'vacation/N',
  'bio/N',
  'battery/N_charger/N',
  'nutritional/J',
  'paws/N',
  'automotive/J',
  'rake/N',
  'toothpaste/N',
  'buffalo/N',
  'window/N_channel/N',
  'shiny/J_coat/N',
  'bedroom/N',
  'porch/N',
  'hardwood/N_floor/N',
  'rope/N_leash/N',
  'good/J_toy/N',
  'lava/N',
  'yamaha/N',
  'english/J',
  'skin/N_condition/N',
  'play/V_tug/N',
  'domestic/J',
  'hydor/N',
  'windshield/N',
  'sodium/N',
  'hartz/N_product/N',
  'screen/N_cover/N',
  'holley/N_pump/N',
  'washable/J',
  'betta/N',
  'fuel/N',
  'mg/N',
  'boat/N_cushion/N',
  'plush/J',
  'love/V_dog/N',
  'open/V_bag/N',
  'human/J_food/N',
  'injured/J',
  'frontline/N',
  'chameleon/N',
  'initial/J_inspection/N',
  'corrosion/N',
  'superwinch/N',
  'rubber/N_core/N',
  'zilla/N',
  'cat/N_sitter/N',
  'omega/N',
  'spindle/N',
  'french/J_puppy/N',
  'nose/N',
  'high/J_food/N',
  'exceptional/J',
  'puppy/J',
  'grass/N',
  'breeder/N',
  'mercruiser/N',
  'paul/N',
  'skin/N_problem/N',
  'entire/J_purpose/N',
  'bug/N_shield/N',
  'moo/N',
  'martingale/N',
  'tile/N_floor/N',
  'oster/N',
  'e-collar/N',
  'german/J',
  'sierra/N',
  'lab/N_mix/N',
  'yummy/N',
  'suburban/J',
  'little/J_time/N',
  'cherokee/N',
  'study/N',
  'turn/V_nose/N',
  'enormous/J',
  'pine/N',
  'shih/N',
  'gallon/N_tank/N',
  'dip/N',
  'people/N_food/N',
  'buy/V_glue/N',
  'mite/J',
  'powerful/J_chewer/N',
  'muzzle/N',
  'jerky/N',
  'berry/N',
  'minty/J',
  'love/V_bed/N',
  'saddle/N',
  'eating/N',
  'diamond/N',
  'german/J_dog/N',
  'symptom/N',
  'zoo/N',
  'smelly/J',
  'bark/N',
  'oil/N_filter/N',
  'boat/N_motor/N',
  'grammy/N',
  'grille/N',
  'love/N_love/N',
  'springer/N',
  'diet/N',
  'car/N_shine/V_headlight/N',
  'transition/N',
  'amp/N_fuse/N',
  'fountain/N',
  'p-bath/N',
  'real/J_meat/N',
  'defense/N',
  'hippo/N',
  'rottweiler/N',
  'wingaling/N',
  'yeast/N',
  'stray/J_cat/N',
  'wild/N',
  'product/N_fit/N',
  'in-channel/J',
  'old/J_terrier/N',
  'nolan/N',
  'soft/N',
  'torin/N',
  'breakfast/N',
  'ridgeback/N',
  'disgusting/J',
  'compact/J_car/N',
  'discomfort/N',
  'window/N_visor/N',
  'science/N',
  'yellow/J_lab/N',
  'jet/N_ski/N',
  'freeze-dried/J',
  'hooked/J',
  'order/V_leash/N',
  'cockatiel/N',
  'rinse/N',
  'use/V_comb/N',
  'yorkie-poo/N',
  'roman/J_harness/N',
  'minis/N',
  'trac/N',
  'sulcata/N',
  'ribbed/J',
  'edelbrock/N',
  'flavored/J',
  'fish/N_tank/N',
  'sine/N',
  'persian/J',
  'chest/N_strap/N',
  'contain/V_snow/N',
  'zoey/N',
  'bald/J',
  'egg/N',
  'grooming/N',
  'consistency/N',
  'dust/N_cover/N',
  'local/J_shelter/N',
  'recommend/V_food/N',
  'dog/N_shampoo/N',
  'order/V_harness/N',
  'apso/N',
  'older/J_cat/N',
  'fuel/N_leak/N',
  'ventshade/N',
  'kong/N',
  'orbee/N',
  'indoor/J_cat/N',
  'pour/V_water/N',
  'weakest/J',
  'flexus/N',
  'protect/V_seat/N',
  'thank/V_goodness/N',
  'fish/N_food/N',
  'smile/N',
  'medium-sized/J',
  'use/V_wrench/N',
  'cat/N_genie/N',
  'mud/N_guard/N',
  'coral/N',
  'major/J_store/N',
  'skull/N_decal/N',
  'pellet/N',
  'fit/V_vehicle/N',
  'torque/N_wrench/N',
  'walmart/N_leash/N',
  'quality/N_ingredient/N',
  'hay/N',
  'genuine/J',
  'costume/N',
  'loose/J_stool/N',
  'crab/N',
  'pound/N_mix/N',
  'liver/N',
  'anvil/N',
  'solar/J',
  'vet/N_visit/N',
  'anti/J',
  'living/N_room/N',
  'poodle/N',
  'parrotlet/N',
  'new/J_toy/N',
  'sherpa/N',
  'nice/J_fit/N',
  'hull/N',
  'x-small/N',
  'wood/N_floor/N',
  'great/J_dane/N',
  'contain/V_dirt/N',
  'booty/N',
  'ranger/N',
  'use/V_collar/N',
  'pooch/N',
  'creeper/N',
  'bearded/J_dragon/N',
  'gross/J',
  'stainless/J_screw/N',
  'international/N',
  'carpad/N_liner/N',
  'yorkshire/N_terrier/N',
  'terrarium/N',
  'large/J_box/N',
  'clove/N',
  'lobster/N',
  'undercover/N',
  'breed/N',
  'regular/J_collar/N',
  'travel/N_trailer/N',
  'chin/N_force/N',
  'chewber/N',
  'emitter/N',
  'spout/N',
  'greyhound/N',
  'wobbly/J',
  'input/N',
  'flexi/N',
  'tricky/J',
  'reef/N',
  'cocker/N',
  'taurus/N',
  'buy/V_bed/N',
  'doberman/N',
  'kid-proof/V_backseat/N',
  'pet/N_store/N',
  'start/V_car/N',
  'rothco/N_garment/N',
  'lung/N',
  'technical/J',
  'chiropractor/N',
  'lined/N',
  'vue/N',
  'aquarium/N_decoration/N',
  'naturals/N',
  'buffer/N',
  'like/V_food/N',
  'use/V_harness/N',
  'crunch/N',
  'shine/V_headlight/N',
  'sausage/N',
  'chow/N',
  'power/N_inverter/N',
  'carrot/N',
  'adult/J_cat/N',
  'best/J_belt/N',
  'cairn/N_terrier/N',
  'northern/J_tool/N',
  'habitat/N',
  'super/J_shipping/N',
  'clock/N',
  'nutrition/N',
  'princess/N',
  'amazon/N_customer/N',
  'antibiotic/N',
  'water-loving/J_lab/N',
  'leather/N_seat/N',
  'tahoe/N',
  'substrate/N',
  'fda/N',
  'adorable/J',
  'dog/N_bowl/N',
  'mpg/N',
  'clicker/N',
  'cat/N_bed/N',
  'jw/N',
  'irritating/J',
  'merrick/N_brand/N',
  'scratch/V_post/N',
  'pot/N_pie/N',
  'adams/N',
  'oxbow/N',
  'great/J_comb/N',
  'brass/N',
  'cats/N',
  'icky/J_poo/N',
  'reflective/J_leash/N',
  'retriever/N',
  'coop/N',
  'no-where/J',
  'behavior/N',
  'calm/J',
  'cr-v/N',
  'tortoise/N_food/N',
  'mutt/N',
  'recipe/N',
  'purchase/V_muffler/N',
  'roman/J',
  'hungry/J',
  'paul/N_shampoo/N',
  'btw/N',
  'rainy/J_day/N',
  'hitch/N_cover/N',
  'notice/V_improvement/N',
  'salmon/N',
  'nugget/N',
  'thanksgiving/N_dinner/N',
  'squeak/N',
  'parrot/N',
  'german/N',
  'tender/N_neck/N',
  'reflectiveness/N',
  'unhappy/J_look/N',
  'breath/N_deflector/N',
  'unraveling/N',
  'plan/N_peel/V_material/N',
  'stripe/N',
  'old/J_retriever/N',
  'rambunctious/J_lab/N',
  'shih/N_tzu/N',
  'clear/J_water/N',
  'cat/N_love/V_toy/N',
  'adult/N',
  'powersport/N',
  'trailer/N_cover/N',
  'alarm/N',
  'ankle/N',
  'gate/N',
  'nap/N',
  'pet/N_shampoo/N',
  'small/J_cat/N',
  'retreiver/N',
  'small/J_pond/N',
  'lb/N_yorkie/N',
  'fourth/J',
  'thanksgiving/N',
  'whitening/N',
  'dog/N_neck/N',
  'order/V_bottle/N',
  'conure/N',
  'hyundai/N',
  'pink/J',
  'shaving/N',
  'fram/N_filter/N',
  'chewy/N',
  'pwc/N',
  'litter/N_mat/N',
  'drip/N',
  'pond/N',
  'digestion/N_problem/N',
  'tach/N',
  'short/J_dog/N',
  'shurhold/N',
  'match/V_dad/N',
  'heat/N_lamp/N',
  'cat/N_food/N',
  'gerbil/N',
  'soft/J_toy/N',
  'pull/V_tail/N',
  'generator/N',
  'enclosure/N',
  'little/J_food/N',
  'flea/N_free/N',
  'old/J_kitten/N',
  'clean/V_ear/N',
  'hail/N',
  'ribbon/N_type/N',
  'clump/N',
  'black/J_jacket/N',
  'poodle/N_mix/N',
  'mite/N',
  'isnt/N',
  'wildside/N',
  'doggie/J',
  'destroy/V_toy/N',
  'flea/N_control/N',
  'cricket/N',
  'comb/N',
  'backpack/N',
  'explorer/N',
  'undercoat/N',
  'lover/N',
  'nutri-vet/N',
  'hitachi/N',
  'hide/N',
  'order/V_piece/N',
  'actron/N',
  'shift/V_dog/N',
  'fuse/N',
  'heavy/J_gauge/N',
  'osi/N',
  'circumference/N',
  'ventshade/N_ventvisor/N',
  'vitamin/N',
  'undercover/N_mouse/N',
  'garbage/N_defeat/V_purpose/N',
  'decor/N',
  'good/J_treat/N',
  'stick/V_head/N',
  'visor/N',
  'malaseb/N',
  'dry/J_food/N',
  'grassland/N',
  'quality/N_food/N',
  'alcohol/N_pad/N',
  'macaw/N',
  'beard/N',
  'fit/V_lb/N',
  'pass/V_test/N',
  'petsmart/N',
  'love/V_taste/N',
  'dogs/N',
  'painting/N',
  'merrick/N_food/N',
  'acid/N',
  'halloween/N',
  'doggles/N',
  'crc/N',
  'tzu/N',
  'fit/V_car/N',
  'nutritious/J',
  'veggy/N',
  'cat/N_box/N',
  'poisoning/N',
  'power/N_supply/N',
  'medicine/N',
  'bearing/N_buddy/N',
  'mcgard/N',
  'favorite/J_treat/N',
  'unbraided/J',
  'use/V_model/N',
  'henrietta/N',
  'snack/N',
  'shepherd/N_mix/N',
  'correction/N',
  'buy/V_toy/N',
  'excited/J',
  'tire/N_spoon/N',
  'coupon/N',
  'dog/N_bone/N',
  'african/J',
  'german/J_mix/N',
  'itching/N',
  'squeeker/N',
  'bowel/N',
  'playpen/N',
  'black/J_lab/N',
  'storage/N_shed/N',
  'rawhide/N',
  'beagle/N',
  'organic/N',
  'pleasure/N',
  'zealand/N',
  'cowboy/N_cookout/N',
  'dog/N_love/V_food/N',
  'pet/J_store/N',
  'wand/N',
  'male/J_cat/N',
  'braided/J',
  'private/J',
  'water/N_spot/N',
  'bloodworm/N',
  'jack/N_russell/N',
  'conversion/N',
  'percentage/N',
  'bad/J_smell/N',
  'little/J_guy/N',
  'belly/N',
  'bout/N',
  'dependable/J',
  'allergy/N',
  'koralia/N',
  'step/N_bar/N',
  'beneficial/J',
  'rothco/N',
  'northern/N',
  'senior/J',
  'xx/N',
  'bacterium/N',
  'collie/N_mix/N',
  'frise/N',
  'bulb/N_toy/N',
  'awful/J_smell/N',
  'scraper/N',
  'atv/N',
  'fabulous/N',
  'roadside/N',
  'welding/N',
  'flitz/N',
  'sugar/N',
  'elevated/J',
  'tire/N_cover/N',
  'flexi/N_leash/N',
  'water/N_dragon/N',
  'schnauzer/N',
  'cody/N',
  'zoo/N_med/N',
  'ragdoll/N',
  'universal/N',
  'mineral/N',
  'feast/N',
  'transfer/N_switch/N',
  'walmart/N_sell/V_brand/N',
  'lund/N',
  'laminate/N',
  'avenger/N',
  'russell/N',
  'reduce/V_odor/N',
  'jean/N',
  'firstrax/N',
  'shepard/N',
  'tough/J_toy/N',
  'mouth/N_half/N',
  'maltese/J',
  'merrick/N_product/N',
  'beardie/N',
  'spark/V_plug/N',
  'camry/N',
  'latex/N',
  'rpm/N',
  'chrome/J',
  'cancer/N',
  'occidental/N',
  'calming/J_cap/N',
  'siberian/J_husky/N',
  'vent/V_visor/N',
  'feliway/N',
  'tonneau/N',
  'entire/J_time/N',
  'boston/N',
  'liner/N_w/V_carpad/N',
  'labradoodle/N_puppy/N',
  'sweater/N',
  'ventshade/N_visor/N',
  'tannin/N',
  'little/J_comb/N',
  'cat/N_urine/N',
  'joint/J_health/N',
  'msd/N',
  'probiotic/N',
  'haired/J_dog/N',
  'tartar/N',
  'toyota/N',
  'destructive/J',
  'mooring/N',
  'turtle/N_tank/N',
  'ford/N_explorer/N',
  'blitz/N',
  'quick/J_hitch/N',
  'use/V_brush/N',
  'eater/N',
  'petco/N',
  'parasite/N',
  'tropical/J',
  'medicated/J',
  'jericho/N',
  'boundary/N',
  'urine/N',
  'pet/N_food/N',
  'seat/N_cover/N',
  'tree/N_frog/N',
  'circle/N',
  'model/N_number/N',
  'pet/N_cover/N',
  'brushing/N',
  'genie/N',
  'theft/N',
  'insane/J',
  'purchase/V_toy/N',
  'fraying/J',
  'good/J_review/N',
  'protect/V_brain/N',
  'dane/N',
  'tundra/N',
  'love/V_cat/N',
  'rid/N',
  'best/J_shampoo/N',
  'help/V_dog/N',
  'hip/N_problem/N',
  'likes/N',
  'pitbull/N',
  'buy/V_harness/N',
  'dash/N',
  'fearful/J',
  'heavy-duty/J',
  'dog/N_treat/N',
  'turducken/N',
  'meaty/J',
  'little/J_harness/N',
  'stew/N',
  'chest/N',
  'stereo/N',
  'dentist/N',
  'snuggle/N',
  'guinea/N_pig/N',
  'buy/V_collar/N',
  'sirius/N',
  'golden/N_retriever/N',
  'appealing/J',
  'supplement/N',
  'rusting/J',
  'flea/N_collar/N',
  'buy/V_charger/N',
  'wholesome/J',
  'comfortable/J_alpacas/N',
  'cocker/N_spaniel/N',
  'bronco/N',
  'fit/V_hitch/N',
  'thief/N',
  'cup/N_holder/N',
  'potty/N',
  'pet/N_owner/N',
  'usb/J',
  'bird/N_bath/N',
  'persian/J_cat/N',
  'dermatrix/N',
  'cute/J_harness/N',
  'trio/N',
  'ran/N',
  'dry/J_skin/N',
  'winch/N',
  'traction/N',
  'pet/N_toy/N',
  'slider/N',
  'jump/V_fence/N',
  'tidy/N',
  'biscuit/N',
  'product/N_purchase/V_collar/N',
  'drink/V_water/N',
  'med/N',
  'akita/N',
  'chewy/J',
  'replace/V_leash/N',
  'picky/J_cat/N',
  'fur/N',
  'converter/N',
  'kongs/N',
  'skittish/J',
  'carefresh/N',
  'little/J_boy/N',
  'dematting/N',
  'webbing/N',
  'dewalt/N',
  'pigskin/N',
  'dog/N_harness/N',
  'bird/N_cage/N',
  'hot/J_spot/N',
  'barrier/N',
  'chihuahua/N',
  'rebuild/V_kit/N',
  'catnip/N',
  'spaniel/N',
  'ezy/N',
  'fyi/N',
  'medication/N',
  'cut/V_nail/N',
  'holistic/J',
  'hartz/N',
  'labradoodle/N',
  'pc/N',
  'soy/N',
  'ford/N_ranger/N',
  'biospot/N',
  'ram/N',
  'grooming/N_tool/N',
  'nice/J_bed/N',
  'pillow/N',
  'aging/N',
  'stove/N',
  'feral/J',
  'hp/N',
  'new/J_truck/N',
  'pet/N_stroller/N',
  'accord/N',
  'cozy/N_collar/N',
  'non-slip/J',
  'perch/N',
  'better/J_tie/N',
  'new/J_variety/N',
  'excellent/J_condition/N',
  'infection/N',
  'jrt/N',
  'install/N',
  'instalation/N',
  'hyper/J',
  'older/J_model/N',
  'doggy/J',
  'zip/N',
  'grain-free/J',
  'fabric/N_seat/N',
  'cairn/N',
  'tail/N_light/N',
  'use/V_food/N',
  'air/N_pump/N',
  'hatchrite/N',
  'allow/V_fuel/N',
  'poop/N_tent/N',
  'watt/N_generator/N',
  'av/N',
  'collie/N',
  'crystal/N',
  'try/V_lot/N',
  'garage/N_mat/N',
  'outfit/N',
  'taiwan/N',
  'shoulder/N',
  'hitch/N_pin/N',
  'hind/J',
  'pal/N',
  'carpad/N',
  'tall/J_year/N',
  'quality/N_purchase/V_leash/N',
  'preventative/J',
  'helemt/N',
  'urine/N_odor/N',
  'attack/N',
  'soothing/J',
  'persian/N',
  'birdbath/N',
  'documentation/N',
  'flowmaster/N',
  'footage/N',
  'shepherd/N_dog/N',
  'water/N_dish/N',
  'pug/N',
  'itchy/J',
  'heeler/N',
  'fit/V_tire/N',
  'activity/N',
  'water/N_bowl/N',
  'chew/V_hole/N',
  'yorkshire/N',
  'k&n/N',
  'window/N_perch/N',
  'leopard/N_gecko/N',
  'havanese/N',
  'chewing/J',
  'inflammation/N',
  'bamboo/N',
  'canned/J',
  'canoe/N',
  'car/N_cover/N',
  'immune/J',
  'daybreak/N',
  'syrup/N',
  'jolly/J_pet/N',
  'chew/V_toy/N',
  'small/J_unit/N',
  'mounting/N',
  'grease/N_gun/N',
  'topper/N',
  'leakage/N',
  'piggy/N',
  'gram/N',
  'eheim/N',
  'similar/J_brand/N',
  'respiratory/J',
  'coil/N',
  'spindrift/N',
  'rear/J_leg/N',
  'cow/N',
  'enzyme/N',
  'pork/N',
  'rabbit/N_food/N',
  'incontinence/N',
  'bigger/J_dog/N',
  'silverado/N',
  'digital/N',
  'veterinarian/N',
  'small/J_leash/N',
  'best/J_toy/N',
  'flour/N',
  'tunnel/N',
  'dogit/N',
  'little/J_toy/N',
  'bite/N',
  ':-rrb-/N',
  'album/N',
  'wool/N',
  'cupholder/N',
  'douxo/N',
  'best/J_food/N',
  'xantrex/N',
  'love/V_post/N',
  'orange/J',
  'bearing/N',
  'chicken/N_wing/N',
  'digestible/J',
  'scoopable/J',
  'oatmeal/N',
  'cat/N_owner/N',
  'stock/N_muffler/N',
  'carpeting/N',
  'chin/N_bar/N',
  'fall/V_kind/N',
  'molly/N',
  'florescent/J',
  'pay/N',
  'usaf/N_service/N',
  'pleasant/J_scent/N',
  'cat/N_door/N',
  'dog/N_owner/N',
  'healthy/J',
  'love/V_chew/N',
  'coon/N',
  'lather/N',
  'buy/V_food/N',
  'new/J_home/N',
  'hype/N',
  'palatable/J',
  'pea/N',
  'pound/N_dog/N',
  'bella/N',
  'yum/N',
  'floss/N',
  'unbelievable/J',
  'medium/N_size/N',
  'wording/N',
  'fido/N',
  'unscented/J',
  'calming/J',
  'freeze/N',
  'grain/N',
  'weimaraner/N',
  'zeus/N',
  'slipper/N',
  'smallest/J',
  'geneflora/N',
  'dog/N_park/N',
  'purchase/V_mat/N',
  'tortoise/N',
  'precious/J',
  'mo/N',
  'lupine/N',
  ...},
 1893)

In [12]:
v1 = Vectors.from_tsv('../FeatureExtractionToolkit/word2vec_vectors/composed/AN_NN_word2vec-gigaw_100percent-rep0_Add.events.filtered.strings', allow_lexical_overlap=False)
# v3 = Vectors.from_tsv('../FeatureExtractionToolkit/word2vec_vectors/composed/AN_NN_word2vec-wiki_15percent-rep0_Add.events.filtered.strings', allow_lexical_overlap=False)
v3 = Vectors.from_tsv('../FeatureExtractionToolkit/word2vec_vectors/composed/AN_NN_word2vec-wiki_50percent-rep0_Add.events.filtered.strings', allow_lexical_overlap=False)
v4 = Vectors.from_tsv('../FeatureExtractionToolkit/glove/AN_NN_glove-wiki_Add.events.filtered.strings', allow_lexical_overlap=False)
# v5 = Vectors.from_tsv('../FeatureExtractionToolkit/exp10-13-composed-ngrams/AN_NN_gigaw-100_Baroni.events.filtered.strings', allow_lexical_overlap=False)
# v6 = Vectors.from_tsv('../FeatureExtractionToolkit/exp10-13-composed-ngrams/AN_NN_gigaw-100_Add.events.filtered.strings', allow_lexical_overlap=False)

In [60]:
def evaluate(v):
    v.init_sims(vocab=phrases, n_neighbors=5) # 1000
    
    # test 1- set overlap of good neighbours
    lengths = []
    for feat in good_feats:
        neigh = v.get_nearest_neighbours(feat)
        if neigh:
            neigh = set(x[0] for x in neigh)
            inters = neigh.intersection(good_feats)
            lengths.append(len(inters))
    #         print(feat, 'inters', inters)
    #     else:
    #         print('no neigh for', feat)
    print('mean overlap', np.mean(lengths))
    
    # test 2- how many good feats get messed up
    data = []
    for feat in good_feats:
        neigh = v.get_nearest_neighbours(feat)
        if neigh:
            neigh = set(x[0] for x in neigh[:3])
            data.append([lo_automotive[vocab[feat]]] + [orig_lo_automotive[vocab[n]] for n in neigh])
    df = pd.DataFrame(data, columns=['orig', 'r1', 'r2', 'r3'])
    # plt.hist(df[[1,2,3]].sum(axis=1));
    flat = df[df.orig>0][[1,2,3]].values.flat
    print('dist for positive feats:', np.histogram(flat, bins=[-20,-10,0,10,20])[0]/len(flat))
    
    flat = df[df.orig<0][[1,2,3]].values.flat
    print('dist for negative feats:', np.histogram(flat, bins=[-20,-10,0,10,20])[0]/len(flat))

In [61]:
evaluate(v1)


mean overlap 0.59509202454
dist for positive feats: [ 0.06543967  0.09611452  0.67075665  0.0593047 ]
dist for negative feats: [ 0.12883436  0.1595092   0.62099523  0.00886162]

In [62]:
evaluate(v3)


mean overlap 0.495685522531
dist for positive feats: [ 0.03614458  0.10843373  0.68273092  0.05890228]
dist for negative feats: [ 0.13140218  0.14399664  0.6099916   0.00587741]

In [63]:
evaluate(v4)


mean overlap 0.554054054054
dist for positive feats: [ 0.04761905  0.09659864  0.69659864  0.06394558]
dist for negative feats: [ 0.13990729  0.1011378   0.67214496  0.00758534]

In [20]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.cross_validation import cross_val_score
cross_val_score(MultinomialNB(alpha=0.001), X, ytr, cv=10).mean()
# clf = MultinomialNB(alpha=0.001).fit(X,ytr)
# accuracy_score(clf.predict(X), ytr)


Out[20]:
0.95700000000000007

In [23]:
v1.init_sims(vocab=phrases, n_neighbors=5)
v3.init_sims(vocab=phrases, n_neighbors=5)
v4.init_sims(vocab=phrases, n_neighbors=5)

In [30]:
for feat in good_feats:
    neigh = [v.get_nearest_neighbours(feat) for v in [v1, v3, v4]]
    if all(x for x in neigh):# and feat.count('_'):
        print(feat)
        for n in neigh:
            print(n)
            print('-')
        print('###########')


digital/N
[('computer/N', 2.0121083136266775), ('operator/N', 2.1441956254448318), ('software/N', 2.1498316598963738), ('communication/N', 2.175665703446318), ('desktop/N', 2.1916399484784326)]
-
[('audio/N', 1.8117980594866103), ('versatile/N', 1.8593645465837654), ('react/N', 1.9780421379769435), ('c&c/N', 1.9920199131690399)]
-
[('dab/N', 5.2622223722927171), ('switchover/N', 5.3010152205239898)]
-
###########
jerry/N
[('joe/N', 2.6350659958772154), ('ted/N', 2.6943719802961534), ('ray/N', 2.7340543008980465), ('tom/N', 2.7776620724333769), ('bob/N', 2.7804617127032079)]
-
[('josh/N', 1.0154339717653609), ('marty/N', 1.1122483242580357), ('tom/N', 1.1209686392218312), ('terry/N', 1.1650721409067808), ('charlie/N', 1.1818791011556804)]
-
[('tom/N', 4.3164749378959684), ('jeff/N', 4.3366096813929431), ('larry/N', 4.4023622929868012), ('bennett/N', 4.6758739884989415), ('terry/N', 4.6861391485638801)]
-
###########
album/N
[('movie/N', 1.9347721188999425), ('soundtrack/N', 1.9934358336325324)]
-
[('song/N', 1.8970508519726463), ('single/J', 1.9628007912236205), ('cd/N', 2.04977288673363), ('band/N', 2.0619547610776956)]
-
[('song/N', 3.6708262153835345), ('release/N', 4.2994997929769685), ('single/J', 4.5124081598534511)]
-
###########
wool/N
[('latex/N', 1.7275101890694031), ('cotton/N', 1.7457585332633487), ('coffee/N', 1.8070239759779385), ('lower-priced/J', 1.8928652323972188), ('pea/N', 1.9080213856173902)]
-
[('linen/N', 1.2986373248309468), ('cloth/N', 1.510415234561765), ('cotton/N', 1.5499986114920965), ('silk/N', 1.6952710498955834), ('leather/N', 1.6958474612056267)]
-
[('cotton/N', 5.0591400638188491), ('linen/N', 5.2901918889615915), ('cloth/N', 5.3881003273362502)]
-
###########
silencer/N
[('loaded/J', 1.8800154957247872), ('pistol/N', 1.9396574818612331), ('magnum/N', 2.0964141057376251), ('knife/N', 2.1257943848172873)]
-
[('dozer/N', 1.5620782272734024), ('sear/N', 1.601099127283176), ('propellor/N', 1.6295853387132189), ('telescoping/N', 1.6752594325602495)]
-
[('muffler/N', 3.2808530854866094), ('jetpack/N', 3.4740588673622095), ('hairdryer/N', 3.5102369931900612), ('atomizer/N', 3.5179655372810292), ('deadbolt/N', 3.5194256522381604)]
-
###########
stripe/N
[('bandana/N', 2.2961227795208043), ('shades/N', 2.3080979282006546), ('glitter/N', 2.3265910824183837)]
-
[('yellow/J', 1.7772710886680407), ('gray/J', 1.8222968244535682), ('blotch/N', 1.8275599224334076), ('white/J', 1.8300948461518669)]
-
[('blue/J', 4.5229221031638396), ('yellow/J', 4.5858438376738251), ('purple/J', 4.8117310212678142), ('orange/J', 4.8529735892460817)]
-
###########
bearing/N
[('adhesive/J', 2.072918760092775)]
-
[('grip/N', 1.6493282378652097), ('billet/J', 1.6945510069278362), ('stiffener/N', 1.7199108764056787), ('frame/N', 1.7485868240839084)]
-
[('bushing/N', 4.6122655742406033)]
-
###########
registration/N
[('manual/J', 2.0009249208237607), ('stuffing/N', 2.03320808371127), ('seating/N', 2.0547497665628955), ('identification/N', 2.098735770097611)]
-
[('mandatory/J', 1.6848894862334369), ('cipa/N', 1.8621461471896998), ('licence/N', 1.8812973141116609)]
-
[('admission/N', 4.8655888890408736), ('prior/J', 4.968043011168179), ('licence/N', 4.9976761565813765), ('trademark/N', 5.0177848564290803)]
-
###########
earlier/J
[('late/J', 1.8071175824367229), ('yesterday/N', 1.8930029353393825), ('thursday/N', 1.9249158661182677), ('tuesday/N', 1.9553813846064358), ('wednesday/N', 1.9653602651055768)]
-
[('later/J', 1.0992751116684869), ('previous/J', 1.3884618290112329), ('actual/J', 1.5342274368311402), ('original/J', 1.5463645173735876), ('thier/J', 1.6536177348170249)]
-
[('later/J', 2.2721836674758493), ('previous/J', 3.0635562480723282), ('subsequent/J', 3.2607168047664006), ('recent/J', 3.6751830602950921), ('similar/J', 3.7772474099646951)]
-
###########
alarm/N
[('frustration/N', 2.2661427402358085)]
-
[('siren/N', 1.7867803827653554), ('noise/N', 1.9556677750630169), ('shutoff/N', 1.9836483490452039), ('beeper/N', 2.0064280951387592)]
-
[('siren/N', 4.4929613377600965), ('anti-theft/J', 4.7038114800549771), ('distress/N', 4.7371757199204678), ('smoke/N', 4.9248629576784158)]
-
###########
blazer/N
[('lemon/J', 1.3981650904176059), ('motif/N', 1.418740594271132), ('sleek/J', 1.4228858199634569), ('sled/N', 1.4584256864644052), ('oversized/J', 1.4623092279114871)]
-
[('khaki/J', 2.0295081314873147), ('shirt/N', 2.1420510431480988), ('sweater/N', 2.2032505123682879), ('blue/J', 2.2151402641635229)]
-
[('plaid/J', 4.901978295850971), ('chevelle/N', 4.9194429932597048), ('jetpack/N', 5.008513227144558), ('sweatshirt/N', 5.0102751930291225), ('denim/J', 5.018192980990368)]
-
###########
linkage/N
[('compatibility/N', 2.0471591584911142), ('interaction/N', 2.1348659387153304)]
-
[('mechanism/N', 1.7109247770889677), ('coupling/N', 1.7606192108153398), ('functional/J', 1.8117200022903714), ('interaction/N', 1.8214558983978766), ('pre-treatment/N', 1.842174118127355)]
-
[('coupling/N', 4.6061049960860645), ('spring-loaded/J', 4.7941720838942565), ('driveline/N', 4.9160589900744673), ('tweezers/N', 4.919899350806884)]
-
###########
furnace/N
[('propane/N', 2.6197470460296857)]
-
[('scrubber/N', 2.0663836935798061), ('stove/N', 2.0939505315349218)]
-
[('stove/N', 4.9458769799273217), ('blast/N', 4.9572417716272428), ('graphite/N', 5.2788860228859837), ('fiery/J', 5.3229073973172794), ('kettle/N', 5.3295063518073613)]
-
###########
navy/N_patch/N
[('patrol/N', 2.8979660760317296), ('ship/N', 2.909776167495802)]
-
[('black/J_base/N', 2.8205286412195165), ('insignia/N', 2.8465860093685968), ('pink/J_base/N', 2.8673057247540608), ('color/V_base/N', 2.9074955955538764)]
-
[('color/V_base/N', 6.9202056244073411)]
-
###########
cart/N
[('sled/N', 1.7300293073265458), ('off-road/J', 1.8015709669156463), ('mini/N', 1.80677624784647), ('bike/N', 1.8332464460895377), ('sledge/N', 1.8443520459485023)]
-
[('car/N', 1.5823277701599048), ('buggy/N', 1.6202823366946426), ('bike/N', 1.6208178393700294), ('truck/N', 1.6537735454789606), ('snowmobile/N', 1.6567965676513914)]
-
[('wagon/N', 4.4821140224434268), ('bike/N', 4.5492572907980264), ('buggy/N', 4.6797759560863588), ('mule/N', 4.693589534014877), ('cuttle/N', 4.7837807094184415)]
-
###########
hyundai/N
[('nissan/N', 2.3546573820726766), ('toyota/N', 2.3559914155493069), ('motor/N', 2.4365131716951165)]
-
[('toyota/N', 1.7577219854105877), ('nissan/N', 2.0755826222936089), ('chrysler/N', 2.0898424959517139), ('mitsubishi/N', 2.1544290763822489), ('honda/N', 2.1593048063267046)]
-
[('elantra/N', 5.0153120324440428), ('nissan/N', 5.2750100261870605), ('yamaha/N', 5.3425800210043652)]
-
###########
pwc/N
[('cob/N', 2.2079802915900704), ('msg/N', 2.2286257361890218), ('ca/N', 2.2566830492513885), ('oversight/N', 2.2755457041274787), ('dab/N', 2.2827307235172385)]
-
[('pumps/N', 1.7278267260434772), ('omc/N', 1.7278315312877892), ('fastenal/N', 1.7284857438628034), ('fom/N', 1.7584917773541235), ('ait/N', 1.7808436070662643)]
-
[('stap/N', 3.2168425788501054), ('lactulose/N', 3.2334961219375229), ('fom/N', 3.2530950593093646), ('byer/N', 3.3436895189644922), ('wrt/N', 3.4043071208112226)]
-
###########
drip/N
[('bowel/N', 1.8087543556548531), ('vegetarian/J', 1.8561585830142586), ('aspirin/N', 1.8563855537274963), ('stool/N', 1.8767222041075959), ('dosage/N', 1.8880883703113807)]
-
[('medicated/J', 1.5649055442062547), ('spray/N', 1.6172476981070019), ('ice-cold/J', 1.6192540336174217), ('aerated/J', 1.6712808305785225), ('rinse/N', 1.6760294523892991)]
-
[('doubler/N', 4.9638030405629516), ('runt/N', 5.0120566115056011), ('siphon/N', 5.0231634975249424), ('multimeter/N', 5.0283614057411388), ('ice-cold/J', 5.0888140202408652)]
-
###########
match/V_dad/N
[('mom/N', 2.5465376633309997), ('fancy/J', 2.6558764423118593), ('game/N', 2.738807213400515)]
-
[('mom/N', 2.4001331411486859), ('grandpa/N', 2.6752738202273156), ('timmy/N', 2.7230341037260577)]
-
[('mom/N', 5.6445060725394738), ('thing/N', 6.0932220013510081), ('squirm/V_time/N', 6.1385743692344414), ('sure/J', 6.1519389662223576)]
-
###########
amp/N
[('ebay/N', 1.316322681937893), ('exchangeable/J', 1.3526476084618126), ('yahoo/N', 1.3665474081234108), ('bg/N', 1.3803551213442717), ('smart/N', 1.4164054646225015)]
-
[('pushbutton/N', 1.6626199181052865), ('versatile/N', 1.7014683523582868), ('casio/N', 1.7108004770175413), ('slt/N', 1.7149110513817813)]
-
[('install/N', 4.7118250583422556), ('globetrotter/N', 4.8330066329921371), ('slt/N', 4.8502190203995932), ('nub/N', 4.8658835927167434)]
-
###########
hail/N
[('confetti/N', 2.8903662462172459), ('canopy/N', 2.9246146968854245), ('pile/N', 2.9543372334600506), ('swarm/N', 2.9560980291936794)]
-
[('blow/N', 2.1592543423737269), ('burning/N', 2.1730985683408393), ('bummer/N', 2.197305877496921), ('bling/N', 2.2677572199153948)]
-
[('whack/N', 4.9759260452101772), ('wake/N', 4.9972543040043496), ('mighty/J', 5.2067732016965182), ('like/J', 5.2477822764041964), ('panting/N', 5.2694827323874955)]
-
###########
black/J_jacket/N
[('white/J_coat/N', 2.2658482460853304), ('wear/V_coat/N', 2.2781687399123198)]
-
[('blue/J_bag/N', 1.9711301707921416), ('white/J_bag/N', 2.0096429054311118)]
-
[('white/J_bag/N', 5.7745755069560749)]
-
###########
turbo/N
[('corolla/N', 1.9387155713494728), ('lexus/N', 1.9863952375066887), ('chevrolet/N', 2.0334017307378121), ('taurus/N', 2.0862008337588827), ('off-road/J', 2.1206474619545737)]
-
[('engine/N', 1.4815803322825392), ('turbo-charged/J', 1.5286499434108689), ('gls/N', 1.7125705292345379), ('honda/N', 1.7482274499845092), ('supercharger/N', 1.7599412059782931)]
-
[('alternator/N', 5.0203131293307788), ('turbo-charged/J', 5.0494189204479767), ('lag/N', 5.209164684243146)]
-
###########
throttle/N
[('pedal/N', 1.43924454800447), ('vibration/N', 1.4560415748309488), ('sled/N', 1.4728253299107581), ('aerodynamics/N', 1.5019019425895301), ('seatbelt/N', 1.5138208611355071)]
-
[('valve/N', 1.6825664724624527), ('limiter/N', 1.8220627548173254), ('brake/N', 1.8339789173944046), ('damper/N', 1.8967279725322082), ('airflow/N', 1.8984301397616867)]
-
[('pedal/N', 5.4690733116599377), ('diaphragm/N', 5.485499750372977), ('pinhole/N', 5.5196425278246952)]
-
###########
explorer/N
[('polar/N', 2.0229193721050449), ('voyager/N', 2.0506997975361192), ('artist/N', 2.0607537703341778), ('orbiter/N', 2.0774994379826803)]
-
[('mariner/N', 2.0156267310177247)]
-
[('mariner/N', 5.048893069697554), ('trapper/N', 5.1796563476783861), ('mountaineer/N', 5.4318756644760384)]
-
###########
hitachi/N
[('toshiba/N', 1.1792909391541528), ('sony/N', 2.1357186810686049), ('nintendo/N', 2.1712693584025988)]
-
[('garmin/N', 1.9640080840484415), ('toshiba/N', 1.9760260960201568)]
-
[('slt/N', 4.4085579443099077), ('teva/N', 4.4597502903010167), ('maxell/N', 4.465495436929479), ('toybox/N', 4.56742004419322)]
-
###########
packer/N
[('smart/N', 2.5090980655156239), ('songwriter/N', 2.5257930501945611), ('zee/N', 2.5420050453657344)]
-
[('fisher/N', 1.3995246415532538), ('smith/N', 1.425539768565071), ('mcdonald/N', 1.4403958220280577), ('brown/N', 1.4414388733234311), ('jones/N', 1.4841055052744216)]
-
[('hickey/N', 3.6806357110719334), ('lambie/N', 3.9202067755353163), ('air-dried/J', 4.0968720517497248), ('pitty/N', 4.1434489230482852), ('snook/N', 4.2364890099694579)]
-
###########
gasoline/N
[('fuel/N', 1.7780609216341816), ('unleaded/J', 1.7966878576735541), ('pump/N', 1.9322374514177201), ('diesel/N', 1.952354580832723)]
-
[('fuel/N', 1.6747259406129151), ('propane/N', 2.0019795546119759), ('gas-powered/J', 2.0669299218953716)]
-
[('fuel/N', 4.1064341297120537), ('fuel/N_happy/N', 4.4735927769689772), ('diesel/N', 4.9322984975771691)]
-
###########
fuse/N
[('aerosol/N', 1.7041868951137973), ('dart/N', 1.7255962302330166), ('seatbelt/N', 1.762124691825218), ('screwdriver/N', 1.7674657528594344)]
-
[('unscrewed/J', 1.6691762083774921), ('noisemaker/N', 1.7098509661290002), ('floatation/N', 1.7390974771618426), ('divot/N', 1.7457004173257857), ('choke/N', 1.7550537665435586)]
-
[('silencer/N', 4.4185097052508562), ('crimped/J', 4.6069697816432447), ('padlock/N', 4.6077193914372652), ('eyelet/N', 4.6472072837274006), ('carabiner/N', 4.6683628640193557)]
-
###########
garbage/N_defeat/V_purpose/N
[('favorite/J_bedding/N', 2.958467123912127), ('use/V_ingredient/N', 2.9735304936254332), ('use/V_bedding/N', 3.0070491097591039)]
-
[('attack/V_equipment/N', 3.0628463390163541), ('punish/V_product/N', 3.1256366696007407), ('use/V_supplies/N', 3.130232360899472)]
-
[('eliminate/V_way/N', 7.1319570365973162), ('lose/V_bag/N', 7.2049439827301232), ('make/V_waste/N', 7.442499985981728)]
-
###########
slick/J_underside/N
[('upright/N', 2.4018294839717442)]
-
[('belly/N', 2.6851241813974989)]
-
[('yellowish/J', 5.2854002197230994), ('brownish/J', 5.6665380593168164)]
-
###########
idle/J
[('stuck/J', 1.6755004780270302), ('unusable/J', 1.8592720159624905), ('untouched/J', 1.8687872709588444), ('sealed/J', 1.897010982281617), ('sickly/J', 1.9075249858708181)]
-
[('floatation/N', 1.5455420320092244), ('perk/J', 1.5973471647111064), ('check-out/N', 1.6186864240087158), ('adjusting/N', 1.6371961980616005), ('trouble-free/J', 1.6385564580312264)]
-
[('dwell/N', 4.5666858870548559), ('dark/N', 4.5773402450714764), ('skittish/J', 4.5870923913570776), ('rasp/N', 4.5946340348710022), ('mop/N', 4.5989525949034311)]
-
###########
painting/N
[('artwork/N', 1.5815249385356787), ('cuisine/N', 1.7606970250479501), ('collection/N', 1.7740409327242181), ('soundtrack/N', 1.8105413201142819), ('artist/N', 1.8193003242960337)]
-
[('drawing/N', 1.2647624523556893), ('paperweight/N', 1.7728642423885927), ('work/N', 1.8152853940547098), ('bust/N', 1.8578614264795363), ('craftmanship/N', 1.906027836136966)]
-
[('drawing/N', 3.4289116906996888), ('work/N', 4.4801707315006416), ('painter/N', 4.6050326933397567), ('photography/N', 4.6159477818437251), ('illustration/N', 4.6971621323093578)]
-
###########
coast/N
[('island/N', 2.3884431400116419), ('tip/N', 2.6263012957114538), ('port/N', 2.6342529293568862), ('shetland/N', 2.6625193753084102)]
-
[('north/N', 2.045294891458199), ('west/N', 2.1158107190170306), ('shore/N', 2.1504662865180082), ('south/N', 2.2154202075174982), ('western/J', 2.2729324133001101)]
-
[('shore/N', 4.3413956281978026), ('north/N', 4.4763629845585582), ('south/N', 4.5815984232177085), ('northern/J', 4.6143141284051099), ('caribbean/N', 4.6241648390657533)]
-
###########
crc/N
[('fr/N', 1.8665125056380278), ('dl/N', 2.0336807645550352), ('ia/N', 2.0383684136174591), ('gab/N', 2.040656574802993)]
-
[('mpt/N', 1.756499670682762), ('tsp/N', 1.8167397142707105), ('cipa/N', 1.9059682180299711), ('consult/N', 1.944547248126955), ('available/N', 1.9690508365038886)]
-
[('vac/N', 4.488044807157566), ('fom/N', 4.5257358469623483), ('multi-color/J', 4.5708876133375895), ('fce/N', 4.6036513453805332), ('omc/N', 4.6789261256199595)]
-
###########
interface/N
[('micro/J', 1.4037209576788712), ('seamless/J', 1.4204706241522498), ('gadget/N', 1.4954491288586427), ('compatibility/N', 1.5127582871294787), ('branded/J', 1.5283210655622763)]
-
[('functionality/N', 1.6400432439238137), ('desktop/N', 1.7676285901636608), ('application/N', 1.7807834334585626), ('module/N', 1.8063488238460461)]
-
[('functionality/N', 4.1052330144586193), ('application/N', 4.6001286692165468), ('hardware/N', 4.6302079420236621), ('configuration/N', 4.635486416930914), ('controller/N', 4.6588516734518395)]
-
###########
accelerator/N
[('membrane/N', 1.455065597774472), ('padlock/N', 1.480625228162388), ('gel/N', 1.4962111683431834), ('orbiter/N', 1.5063129599615759), ('miniature/N', 1.5161686313463381)]
-
[('sensor/N', 2.0007406150687839), ('solid-state/J', 2.0221177661773586), ('micron/N', 2.0927564528068974), ('detector/N', 2.1145089938405652)]
-
[('flywheel/N', 5.1759115486957459), ('microchip/N', 5.3048357684458054), ('solid-state/J', 5.3747473100052803), ('high-energy/J', 5.3883495020297278)]
-
###########
bar/N_force/N
[('guard/N', 2.3800827646210863)]
-
[('army/N', 2.2942389264179561)]
-
[('army/N', 6.4122563183162766)]
-
###########
cruiser/N
[('battleship/N', 2.3905395373404481), ('craft/N', 2.4156132439136346)]
-
[('battleship/N', 1.3390897552931635)]
-
[('battleship/N', 4.5117127030679818), ('ship/N', 5.5162876749265894)]
-
###########
fulton/N
[('jeff/N', 3.1075333628404218)]
-
[('greenwood/N', 1.8218338570369121), ('adams/N', 1.8418395933890015), ('pierce/N', 1.8966243528440503)]
-
[('porter/N', 3.941808396449908), ('briggs/N', 4.2015364243738258), ('stratton/N', 4.2478713132889281), ('hickey/N', 4.2949026493799609), ('pierce/N', 4.3382334898238231)]
-
###########
propane/N
[('heater/N', 1.4811872409379259), ('cylinder/N', 1.5364040487576356), ('ammonia/N', 1.6970927268594678), ('fitting/N', 1.7126246786863701), ('coil/N', 1.7179495238022879)]
-
[('gas/N', 1.9514247221219032), ('liquid/J', 1.9736689165637753), ('gasoline/N', 2.0019795546119759), ('scrubber/N', 2.006921748867923)]
-
[('humidifier/N', 4.5111139578053221), ('safflower/N', 4.7551334646870851), ('slick/N', 4.7572240830618862), ('glycerin/N', 4.8287460953031056), ('freon/N', 4.8416431572226584)]
-
###########
club/N
[('team/N', 1.7016638737225944), ('player/N', 1.7420069046207995), ('english/N', 1.8051290841923311), ('football/N', 1.8457155957565639)]
-
[('team/N', 1.5597797171534036), ('competition/N', 1.8162650534270763), ('outfit/N', 1.8724092437610524)]
-
[('football/N', 4.7010249045432646), ('team/N', 4.7320775162856323), ('league/N', 4.952694057316684)]
-
###########
century/N
[('pivot/J', 2.0449715571273095), ('tasting/N', 2.0554360371206748), ('warp/N', 2.0597366543870952), ('xbox/N', 2.0682464492217605), ('melody/N', 2.0769730791290715)]
-
[('late/J', 2.0203669667098212), ('early/J', 2.1040852705191861), ('ad/N', 2.4200465628339947), ('bc/N', 2.4297427119773509)]
-
[('early/J', 4.4963473757063079), ('later/J', 4.6674024582194535), ('late/J', 5.0150281458346768), ('decade/N', 5.1016312597594711)]
-
###########
conversion/N
[('fingertip/N', 2.1568214259161542), ('pivot/J', 2.2085186189636796), ('horrid/J', 2.2141438291556068), ('cushion/N', 2.2460741970829377), ('convertible/N', 2.2632961668138707)]
-
[('pre-treatment/N', 1.6731775692648561), ('check-out/N', 1.7014452176394537), ('trouble-free/J', 1.7356347863340935), ('shutoff/N', 1.7619650550956825), ('separation/N', 1.7780420815283506)]
-
[('recovery/N', 4.5146383561747223), ('conventional/J', 4.6806910183215464), ('introduction/N', 4.7779712866560846), ('separation/N', 4.7838011388070889), ('return/N', 4.7984464467941068)]
-
###########
rifle/N
[('pistol/N', 1.812400709351818), ('automatic/J', 2.4125040184512447), ('pistol/N_model/N', 2.5258930112311249), ('gun/N', 2.5493476164578048)]
-
[('pistol/N', 1.796606397799184), ('gun/N', 1.8184587412646578), ('cannon/N', 2.0404883926718047), ('battery/N', 2.1233106207118762)]
-
[('pistol/N', 5.0050878009099886), ('gun/N', 5.3858837524034984), ('firing/N', 5.7633061110156216)]
-
###########
dependable/J
[('diligent/J', 1.0783745465706172), ('whisper/N', 1.2074890695037623), ('pitiful/J', 1.2262374301764867), ('thoughtful/J', 1.2441273314323573), ('tact/N', 1.2755345944019174)]
-
[('versatile/J', 1.734979685468887), ('intimidating/J', 1.777528407620665), ('savvy/J', 1.7879682830484083), ('resilient/J', 1.7941562072073118), ('nimble/J', 1.8244320142188988)]
-
[('rock-solid/J', 3.9452204195353899), ('top-notch/J', 3.9569611535108606), ('resilient/J', 3.9965781782173355), ('well-produced/J', 4.0006776543881903), ('steadier/J', 4.015219104357195)]
-
###########
automotive/J
[('high-end/J', 1.7686634534842269), ('high-tech/J', 1.7915555176959221), ('industry/N', 1.8126450801300438)]
-
[('manufacturer/N', 1.6404333875808892), ('manufacturing/N', 1.6888641708562506), ('automobile/N', 1.752784667820682), ('aftermarket/J', 1.8807560850075136)]
-
[('supplier/N', 4.2100088671012328), ('manufacturer/N', 4.2487978736137819), ('aftermarket/N', 4.4817374894890927), ('electronics/N', 4.5548799425672026), ('automobile/N', 4.5557563565622115)]
-
###########
northern/N
[('pivot/J', 3.0457201563102285), ('western/J', 3.0562032678224824), ('upc/N', 3.0890939832273943)]
-
[('southern/N', 1.8306366182150986), ('western/N', 1.9721627672264612)]
-
[('southern/N', 3.0851606331444068), ('western/N', 3.8352132737678355), ('central/N', 4.6063745200263515), ('western/J', 4.729347839329118)]
-
###########
lava/N
[('vapor/N', 2.2398211562004517), ('puddle/N', 2.2593979323889446), ('dust/N', 2.3243137343908811), ('mist/N', 2.3307135350624475), ('haze/N', 2.3492546285068059)]
-
[('sand/N', 1.9249627347983524), ('weathered/J', 1.9853092509927823), ('icy/N', 1.9948244201761858), ('tuff/N', 2.0285906782582943), ('mud/N', 2.0780429807655403)]
-
[('cave/N', 5.5341930867556481), ('tuff/N', 5.5725335232147328), ('reservoir/N', 5.6002388512168837), ('mud/N', 5.6131163802518822), ('sand/N', 5.6608975618886612)]
-
###########
yamaha/N
[('honda/N', 1.6795648564527477), ('toyota/N', 2.3006527445179685), ('maxi/N', 2.4169656468981131), ('ford/N', 2.488091880645189)]
-
[('honda/N', 1.681597602968699), ('toyota/N', 1.916954648509716), ('bmw/N', 1.9394032057062887), ('suzuki/N', 1.9563706199587914), ('offenhauser/N', 2.006837080800326)]
-
[('c&c/N', 4.3504253893509075), ('honda/N', 4.4505518284637464), ('custom-made/J', 4.8283823633139287), ('gasoline-powered/J', 4.8426275604302882), ('atv/N', 4.8676045816218485)]
-
###########
navy/N
[('ship/N', 2.0676965245795085), ('patrol/N', 2.2309694413361352), ('craft/N', 2.2574468549334203), ('helicopter/N', 2.3570052076957846), ('battleship/N', 2.3584057558027767)]
-
[('fleet/N', 2.2681175661568385), ('army/N', 2.4196296465764942), ('force/N', 2.5389543763062292), ('ship/N', 2.594561939040132)]
-
[('ship/N', 4.7352898026606569), ('fleet/N', 4.8106352109123787), ('marine/N', 5.2400023365495754), ('commander/N', 5.2605070792674544), ('officer/N', 5.2642905382531842)]
-
###########
windshield/N
[('headlight/N', 1.304162750082418), ('crumpled/J', 1.4236241018528102), ('undergrowth/N', 1.4932696916716137), ('mini/N', 1.5262311979599148), ('radiator/N', 1.5329604688670551)]
-
[('windscreen/N', 1.2364953337747771), ('headlight/N', 1.7495848450822575), ('wheel/N', 1.9206509760267878), ('diffuser/N', 1.9478359492677264)]
-
[('windscreen/N', 2.9828338324246291), ('wiper/N', 3.6782449392817758), ('washer/N', 4.5885848837310395), ('diffuser/N', 4.6367666769229396), ('decal/N', 4.7624138966515712)]
-
###########
classy/J
[('gritty/J', 1.3472827605620272), ('stylish/J', 1.4083372188205989), ('virtuoso/J', 1.496616262072731), ('delightful/J', 1.5668665544025155), ('brilliant/J', 1.5685637571525983)]
-
[('old-fashioned/J', 1.4833463838274388), ('tough/J', 1.5331984730236723), ('peppy/J', 1.5603912039656527), ('tacky/J', 1.5862268406403097), ('crafty/J', 1.6108382601513094)]
-
[('appetizing/J', 3.1572030036643195), ('well-produced/J', 3.1811075933075572), ('laidback/J', 3.1899007935826149), ('pricey/J', 3.1937868700173158), ('so-so/J', 3.2315887200100204)]
-
###########
atv/N
[('zee/N', 1.5891986825073563), ('tv/N', 1.7949191029595521), ('advertisment/N', 1.9373744844417347), ('website/N', 1.9997634055097251), ('info/N', 2.0060502306982939)]
-
[('offroad/N', 1.8403733643898468), ('tv/N', 1.946230973640527), ('entertainment/N', 1.9613509548667301), ('commercial/N', 2.0068583275066509)]
-
[('offroad/N', 3.7624128212563281), ('snowmobile/N', 4.5825636281990683)]
-
###########
compass/N
[('cellphone/N', 1.2415656566307838), ('mold/N', 1.2501452376149498), ('catalog/N', 1.2591043198216871), ('micro/J', 1.2722984098605068), ('insulin/N', 1.2894339067493541)]
-
[('floatation/N', 1.719757354930225), ('pfd/N', 1.7399870182333044), ('pushbutton/N', 1.7473009081290183), ('multi-color/J', 1.7579719035734813), ('chock/N', 1.7736576821939161)]
-
[('pivot/N', 4.5444244774659861), ('brilliance/N', 4.7700029107060296), ('lock/J', 4.8528211248332669)]
-
###########
roadside/N
[('trailer/N', 2.0686729347391926), ('windshield/N', 2.2083193640205643), ('barn/N', 2.2235608412908521), ('two-story/J', 2.2249778153262936), ('apartment/N', 2.2586173121008213)]
-
[('tent/N', 1.7963335340730842), ('windowsill/N', 1.8916178411430142), ('look-out/N', 1.907293046710222), ('shelter/N', 1.9099527329049808)]
-
[('no-kill/J', 4.8927108132881711), ('rooftop/N', 4.9052100765996762), ('cookout/N', 4.9378935019104047), ('shelter/N', 4.9468701303022913), ('bedwetting/N', 4.9875870838180854)]
-
###########
welding/N
[('duct/N', 2.175964025272946), ('aerosol/N', 2.1914653003003743), ('enzyme/N', 2.2192122857332688), ('vapor/N', 2.2240831697837504), ('faulty/J', 2.2310337870476493)]
-
[('soldering/J', 1.8221320997532797), ('sprayer/N', 1.840470980813633), ('antistatic/J', 1.8689819437826247), ('polishing/N', 1.9297638281487342), ('machinery/N', 1.9318179371291784)]
-
[('welder/N', 4.3587220967781368), ('soldering/J', 4.8113742702143849), ('ultrasonic/J', 5.0224676873975005), ('centrifuge/N', 5.1975620759965908)]
-
###########
universal/N
[('operator/N', 2.7890600133376733), ('digital/N', 2.8078333637171746), ('yahoo/N', 2.8086418764382346)]
-
[('entertainment/N', 2.0229141174630882), ('c&c/N', 2.0579632888083639), ('react/N', 2.0948770769220015)]
-
[('conjunction/N', 5.0337485729832592), ('abit/N', 5.265890944238496), ('original/N', 5.3608027299733756)]
-
###########
fuel/N
[('pump/N', 1.5451387192215145), ('gasoline/N', 1.7780609216341816), ('supply/N', 1.8611990345729981), ('unleaded/J', 1.8617082446344035)]
-
[('gasoline/N', 1.6747259406129151), ('gas/N', 1.7976076865411252)]
-
[('gasoline/N', 4.1064341297120537), ('injector/N', 5.036525922838579), ('gas/N', 5.153807161507113)]
-
###########
guitar/N
[('oversized/J', 1.4611971705570677), ('tray/N', 1.4772820148781705), ('bald/J', 1.4973483956514986), ('pumpkin/N', 1.4988470441166093), ('lemon/J', 1.5053703277657555)]
-
[('bass/N', 1.2664099387072916), ('keyboard/N', 1.3874769654905841), ('fiddle/N', 1.6371621657223934), ('sax/N', 1.7376141977871853)]
-
[('bass/N', 3.2584255839039806), ('keyboard/N', 4.2723366506691631), ('acoustic/J', 4.6321684075015019), ('riff/N', 4.6738052882999952)]
-
###########
lund/N
[('zen/N', 2.0652149413706997), ('binder/N', 2.2029076674500234), ('miki/N', 2.2240022076833208), ('hagen/N', 2.2460180429003778), ('sander/N', 2.2588731121941095)]
-
[('hagen/N', 1.9604638830739409), ('hasle/N', 1.96417378401216), ('knudsen/N', 2.0328917415396601)]
-
[('larson/N', 4.5280555913789309), ('pate/N', 4.6558274367238957), ('tiel/N', 4.7761318779764652), ('newby/N', 4.8714637190256269), ('ditto/N', 4.8770297104330833)]
-
###########
dealership/N
[('assembler/N', 1.953947751207346), ('mid-size/J', 2.0451121611931495), ('autopart/N', 2.0713091897652789)]
-
[('minivan/N', 1.9617579394869444), ('buick/N', 1.9846435943141254), ('car/N', 2.0400784933359315)]
-
[('automobile/N', 4.3193138221003347), ('minivan/N', 4.4374321130431955), ('motorhome/N', 4.4622442076580704), ('dealer/N', 4.4922659467463646)]
-
###########
corrosion/N
[('membrane/N', 1.0744118466825872), ('alga/N', 1.1999938146720555), ('pod/N', 1.2375036742581951), ('toxicity/N', 1.2429550295692937), ('mating/N', 1.2628909481698254)]
-
[('aeration/N', 2.05789843741697), ('breakage/N', 2.093688851274929), ('abrasion/N', 2.1211567146907013)]
-
[('breakage/N', 4.7267274668341104), ('abrasion/N', 5.0363220651241125), ('resistant/J', 5.1069258038110368)]
-
###########
avenger/N
[('viper/N', 1.6112459216954444), ('heavy-duty/J', 1.6324540794825249), ('vector/N', 1.6344996371175839), ('fragrance/N', 1.693843484832237), ('cellphone/N', 1.7195467853499051)]
-
[('cobra/N', 1.6843402661662028), ('invincible/J', 1.9388576788893321), ('behemoth/N', 1.9471046538045693), ('rogue/N', 1.9520427693627094), ('ace/N', 1.9905960182650451)]
-
[('racoon/N', 4.6153111909804094), ('favorit/N', 4.7884643827254267)]
-
###########
bonnet/N
[('sage/N', 1.6202027463449251), ('mane/N', 1.6350803493521038), ('chestnut/N', 1.6566065704871125), ('noe/N', 1.6791878684433026), ('porch/N', 1.6920417531941154)]
-
[('bowtie/N', 1.7092638635149002), ('ruffle/N', 1.7248020643138362), ('splotch/N', 1.7636544373505545), ('sewn/N', 1.7841516734237732)]
-
[('scoop/N', 4.4725844030948148), ('twirl/N', 4.5508692342091086), ('gunwale/N', 4.6279047442435548), ('spindrift/N', 4.6403816651443668), ('hairdryer/N', 4.645153689016758)]
-
###########
brain/N
[('neurological/J', 1.873404229958455), ('arthritis/N', 1.9197590867873986), ('illness/N', 1.9560603786751125), ('anemia/N', 1.9575498048307405), ('digestive/J', 1.9906814557848871)]
-
[('gut/N', 1.8162236831044971), ('sneezing/N', 1.8189221347134741), ('arthritic/J', 1.8303101486381834), ('clouding/N', 1.8322720573979285), ('actinic/J', 1.8403098786506671)]
-
[('tissue/N', 4.5961396118263211), ('abnormal/J', 5.063484251523648), ('heart/N', 5.1635672878442289), ('nervous/J', 5.2110133069969242), ('lung/N', 5.243609332675824)]
-
###########
occidental/N
[('arco/N', 1.7305733373220882), ('mobil/N', 1.8477711416371678), ('poly/N', 1.987166723771957), ('wal-mart/N', 2.0886903343603302), ('bp/N', 2.1264545203640446)]
-
[('laguna/N', 2.6904329346802665), ('sierra/N', 2.8349892922491313), ('lagos/N', 2.8438540850611154)]
-
[('sierra/N', 5.3524422861538641), ('caster/N', 5.3777335188394195), ('convertible/N', 5.5133438809794022), ('marineland/N', 5.5337304761907244)]
-
###########
tracker/N
[('sponge/N', 2.0116621385265541), ('backpack/N', 2.0419497176320833), ('puppy/N', 2.044458805413873), ('sledge/N', 2.0586806904300374), ('dog/N', 2.0684562268229874)]
-
[('controller/N', 1.666882581847331), ('install/N', 1.6796438048495304), ('pushbutton/N', 1.688169689485598), ('jetpack/N', 1.6901159477640899), ('tracking/N', 1.6916453256043278)]
-
[('keyed/J', 4.2396205652977956), ('seahorse/N', 4.3606557429734121), ('borzoi/N', 4.3612309462535919), ('pfd/N', 4.4158405702054049), ('rottweiler/N', 4.4391663597071922)]
-
###########
sierra/N
[('nigeria/N', 3.4410607239322517), ('caesar/N', 3.6060561247300895), ('como/N', 3.6342204825839222)]
-
[('occidental/N', 2.8349892922491313), ('durango/N', 2.8507433085829605), ('laguna/N', 2.980697086289156)]
-
[('peru/N', 5.2586408692484401), ('occidental/N', 5.3524422861538641), ('sahara/N', 5.4510020298224058)]
-
###########
suburban/J
[('hotel/N', 1.9271146814572053), ('plush/J', 1.9654213144034516), ('luxurious/J', 2.0427846594098269), ('downtown/N', 2.0453834318637227)]
-
[('rural/J', 1.8276876204389507), ('neighborhood/N', 1.9140242385382833), ('area/N', 2.1291930229516991)]
-
[('neighborhood/N', 4.3749715532810045), ('rural/J', 4.4996688005098768), ('neighborhood/N_stroller/N', 4.8349617340659483), ('downtown/N', 5.1166762315226677)]
-
###########
cherokee/N
[('sleek/J', 1.5732731141156318), ('chevrolet/N', 1.5904571903670659), ('off-road/J', 1.6445164940846557), ('axle/N', 1.6681091477859613), ('dodge/N', 1.7560129569351028)]
-
[('reservation/N', 2.0305319569938161), ('papillion/N', 2.0448847704482889), ('californian/N', 2.1955809531465635)]
-
[('reservation/N', 4.931733090296655), ('burmese/N', 5.1221195583227068), ('wagoneer/N', 5.1524068300621408), ('highland/N', 5.1998026826993167)]
-
###########
bass/N
[('beetle/N', 1.7817882869807866), ('smart/N', 1.82582200424188), ('bee/N', 1.8667069449924139), ('glitter/N', 1.8941600121672875), ('dry/N', 1.9140353842239295)]
-
[('guitar/N', 1.2664099387072916), ('keyboard/N', 1.4198694981682867), ('sax/N', 1.7096066016196652), ('fiddle/N', 1.8343554566911449)]
-
[('guitar/N', 3.2584255839039806), ('keyboard/N', 4.3173439857765548), ('horn/N', 4.542735079092), ('guitarist/N', 4.7913559710027807), ('trio/N', 4.9154980213203219)]
-
###########
toyota/N
[('honda/N', 1.5929094373640333), ('nissan/N', 1.6351126950165933), ('mazda/N', 1.8842712243255468)]
-
[('honda/N', 1.2166894905086867), ('nissan/N', 1.3994738600433005), ('bmw/N', 1.4799876739483331), ('buick/N', 1.4966446708083072), ('minivan/N', 1.5640286567174395)]
-
[('camry/N', 3.8866351767095404), ('celica/N', 4.1205907354537166), ('nissan/N', 4.2719296767853043)]
-
###########
mooring/N
[('undercarriage/N', 1.2959384199511765), ('undergrowth/N', 1.3578408632500518), ('membrane/N', 1.4025610452250801), ('cascade/N', 1.4040244529253725), ('pod/N', 1.4191770649406064)]
-
[('boat/N', 1.9556509327445935), ('landing/N', 2.0057594283605518), ('sail/N', 2.0526617164664342), ('gunwale/N', 2.0736878276027797)]
-
[('driveway/N', 4.699532305690961), ('gunwale/N', 4.7745183605407782), ('luau/N', 4.8517347174135557), ('deadbolt/N', 4.9197895149535613), ('choker/N', 4.9562625889074923)]
-
###########
blitz/N
[('onslaught/N', 2.2336721485353137), ('campaign/N', 2.4595189429108966), ('relentless/J', 2.4791858075570077), ('drive/N', 2.5098573707147263)]
-
[('superbowl/N', 1.7321541574336079), ('infamous/J', 1.7896858161943971), ('westies/N', 1.8126765572240984), ('wack/N', 1.8348534546789994), ('terrors/N', 1.8476273997037871)]
-
[('globetrotter/N', 4.8778978884922344), ('onslaught/N', 4.9902330402887003), ('argo/N', 5.0576417768606348), ('jinx/N', 5.0800818057451593)]
-
###########
cam/N
[('thi/N', 3.1734277554959158)]
-
[('pumps/N', 1.7684474845523945), ('choke/N', 1.8181004238524769), ('boats/N', 1.8255362467070555), ('dyno/N', 1.8552791039959553)]
-
[('pcv/N', 5.1456546998775581), ('propellor/N', 5.1469204837519102), ('atomizer/N', 5.1570785116264224), ('twin/J', 5.1808240422814968), ('deadbolt/N', 5.2091885784256275)]
-
###########
saddle/N
[('groove/N', 1.5818465315870436), ('dark/N', 1.5843209099664881), ('outdoors/N', 1.594837635285848), ('swimsuit/N', 1.6034198400744368), ('mating/N', 1.6039952760996776)]
-
[('mountain/N', 2.0252988838283321), ('hill/N', 2.0852124398998138), ('pant/J', 2.092046866243451), ('craggy/N', 2.0955781031916612)]
-
[('suction/J', 4.8827399122211084), ('harness/N', 4.9728025889644982), ('gunwale/N', 4.9891632114155176), ('boot/N', 5.0112068550013982), ('bump/N', 5.0879436953567998)]
-
###########
co-worker/N
[('canine/J', 1.7605262649982711), ('bleach/N', 1.7749158435606522), ('anecdote/N', 1.7873261134440848), ('sibling/N', 1.8144917996481418), ('pitbull/N', 1.831756961855697)]
-
[('coworker/N', 1.1990219638304762), ('friend/N', 1.3354326243723378), ('girlfriend/N', 1.4594419487553416), ("o'reily/N", 1.6055784459364781), ('partner/N', 1.6191750282895812)]
-
[('coworker/N', 1.8514165406531293), ('fiance/N', 3.102318482935464), ('in-law/N', 3.4641164400924809), ('mother-in-law/N', 3.5571451968789534), ('boyfriend/N', 3.5676320105948429)]
-
###########
theft/N
[('shredding/N', 2.0725960929316138), ('misrepresentation/N', 2.131834101611219), ('fraud/N', 2.1818521586760817)]
-
[('fraud/N', 2.230803102418939), ('killing/N', 2.5862398221218745)]
-
[('fraud/N', 5.1908494170498729), ('suspect/J', 5.4228742826316765)]
-
###########
nolan/N
[('joey/N', 2.4580548448177395), ('matt/N', 2.5505116113814381), ('russell/N', 2.5688585552414303), ('bennett/N', 2.6273242726881545), ('smith/N', 2.6315709420330395)]
-
[('parker/N', 1.0975743073712638), ('smith/N', 1.1194350012055727), ('foster/N', 1.158995543362265), ('wright/N', 1.2026392592168791), ('coleman/N', 1.2051969609120576)]
-
[('larson/N', 3.760209839358835), ('hickey/N', 3.7926012520982204), ('briggs/N', 3.8432484861372154), ('alway/N', 3.967079661408754), ('parker/N', 3.9796014216589577)]
-
###########
organizer/N
[('ebay/N', 2.0207422881251338), ('ferret/N', 2.0416997485240889)]
-
[('leader/N', 1.5871856618342435), ('demonstration/N', 1.8968464255112498), ('zoa/N', 1.9162635392504905)]
-
[('meeting/N', 4.673384191458263)]
-
###########
saturn/N
[('console/N', 1.576568600544987), ('playstation/N', 1.6315744980676339), ('mid-size/J', 1.6683554047996425), ('high-end/J', 1.7243732166180599), ('taurus/N', 1.7265030340268726)]
-
[('halo/N', 2.2850995985336171), ('neo/N', 2.3535176630054679), ('ultra/N', 2.4379169371147609), ('zeebo/N', 2.4388270547364637)]
-
[('moon/N', 5.0942484333952542), ('rocket/N', 5.1004121729191656)]
-
###########
acceleration/N
[('gradual/J', 1.6809017517143268), ('deceleration/N', 1.7875308307449687), ('adjustment/N', 2.047339315136012), ('shrinkage/N', 2.0496976121411814), ('revival/N', 2.0690310852880431)]
-
[('vibration/N', 1.7593820917871612), ('overhead/N', 1.759788519163018), ('limiter/N', 1.7695875128592666), ('timing/N', 1.79301413636929), ('readout/N', 1.8147337225083164)]
-
[('deceleration/N', 3.7558309896590134), ('gravity/N', 5.0990571057279794), ('rotation/N', 5.3147706533987904), ('reconfiguration/N', 5.3246502564298046), ('velocity/N', 5.3466523947727325)]
-
###########
gp/N
[('racing/N', 2.5172582008817947), ('racer/N', 2.5383054978470279), ('circuit/N', 2.5694732010259971)]
-
[('pt/N', 2.810258355827528)]
-
[('tracking/N', 5.4560865768847533), ('gps/N', 5.5826766367240017), ('pt/N', 5.6075533866188021)]
-
###########
dash/N
[('sled/N', 1.718983587392205), ('horrid/J', 1.735880217075997), ('wobbly/J', 1.7689872284075983), ('perfection/N', 1.7792909316230361), ('crawl/N', 1.796268863434282)]
-
[('chock/N', 1.5670581815665781), ('semi-truck/N', 1.5846324303361004), ('blinker/N', 1.6508724711970926), ('six-pack/N', 1.6587662831097576), ('superbowl/N', 1.668345879994104)]
-
[('em/N', 5.0108882454817332), ('beeper/N', 5.0467401256596327), ('pug/N', 5.0984515259694296), ('atomizer/N', 5.105855713707057)]
-
###########
heavy-duty/J
[('cellphone/N', 1.0568173229483899), ('suv/N', 1.0631177950860968), ('seatbelt/N', 1.10355285715576), ('pod/N', 1.1110145058401755), ('bulk/J', 1.1210513277495582)]
-
[('light-weight/J', 1.621844421502374), ('gas-powered/J', 1.7745299998998258), ('fitment/N', 1.7863170615667607), ('muffler/N', 1.8410272768441962), ('pneumatic/J', 1.8539816115041612)]
-
[('light-weight/J', 3.9606831757460728), ('american-made/J', 4.206133619444584), ('sunshade/N', 4.3304584910675459), ('genset/N', 4.3870635340514497), ('gasoline-powered/J', 4.3934738617532485)]
-
###########
radiator/N
[('heavy-duty/J', 1.262646786404547), ('headlight/N', 1.3013825704292936), ('wash/N', 1.3345757101077356), ('dessert/N', 1.3616214425676396), ('fridge/N', 1.3619830048531998)]
-
[('diffuser/N', 1.6284433584053553), ('blower/N', 1.7027990603396592), ('baffle/N', 1.7165545922633612), ('headlight/N', 1.7349334763057707), ('headlamp/N', 1.8147554357925759)]
-
[('grille/N', 5.039363324496458), ('atomizer/N', 5.0618763615089417), ('telescoping/N', 5.105338776373709)]
-
###########
stereo/N
[('laptop/N', 1.3759777602460179), ('high-end/J', 1.5317748291755049), ('compression/N', 1.5351035155237247), ('gadget/N', 1.5682373463255961), ('shampoo/N', 1.5887243562200715)]
-
[('audio/N', 1.6784400200087717), ('audio/J', 1.9210472615017611), ('amp/N', 1.9859151024484372), ('tape/N', 2.0398084343292648), ('pcm/N', 2.1093776752778712)]
-
[('audio/N', 4.6587316789930062), ('pcm/N', 4.7001764389716261), ('cassette/N', 4.9400825636740118), ('hdtv/N', 5.1653779231887773)]
-
###########
rusting/J
[('masonry/N', 1.3417965046252329), ('freezer/N', 1.3748304108633522), ('kiln/N', 1.3851851392839922), ('melon/N', 1.3963271212329078), ('fitting/N', 1.4428203959038752)]
-
[('splashing/N', 1.6290940550240276), ('grommet/N', 1.6945359261872011), ('spillage/N', 1.7274672948903298), ('mold/N', 1.7470471604240405), ('plexiglass/N', 1.7683883402718705)]
-
[('fraying/J', 3.327916623658862), ('washout/N', 3.4605366287630885), ('wth/N', 3.5206226876900617), ('dampness/N', 3.5298818204206217), ('scrubbing/N', 3.5338012439221873)]
-
###########
electronics/N
[('auto/N', 2.0202737188230628)]
-
[('technology/N', 1.6603514445047367), ('accessories/N', 1.8704970208100498), ('manufacturing/N', 1.8783078875360493), ('equipment/N', 1.9073404858718821)]
-
[('automotive/J', 4.5548799425672026), ('technician/N', 4.7689311091383999), ('manufacturing/N', 4.8170961460018624), ('equipment/N', 4.8971212338781243)]
-
###########
thief/N
[('screwdriver/N', 1.6879510879233697), ('headlight/N', 1.7134091318387474), ('fridge/N', 1.7201023517069958), ('goldfish/N', 1.7250015339830056)]
-
[('witch/N', 1.3377625734244971), ('ghost/N', 1.4020047410729335), ('thumper/N', 1.5924811440986424), ('beast/N', 1.6251420055476555), ('hunter/N', 1.6413146975889272)]
-
[('rogue/N', 4.8409761858744984), ('killer/N', 4.9033962723291094), ('ghost/N', 5.0145059836500359)]
-
###########
telephone/N
[('phone/N', 1.8834229013171777), ('operator/N', 2.5559469588824202), ('hold/V_phone/N', 2.5574580631507788), ('e-mail/J', 2.560386803309143)]
-
[('phone/N', 1.7491341894558381), ('operator/N', 1.7587603152206339), ('cellphone/N', 1.7683868364939983), ('hookup/N', 1.9471076332627226)]
-
[('phone/N', 4.1909307574906318), ('operator/N', 5.0350033983883273), ('switching/N', 5.1710930809633462), ('cable/N', 5.244650560739295), ('cordless/J', 5.2458277014988779)]
-
###########
tacoma/N
[('toda/N', 1.2794339245481754), ('gph/N', 1.3373284063534956), ('bu/N', 1.348800675175551), ('jb/N', 1.3555873190175265), ('noe/N', 1.3617117905877136)]
-
[('washington/N', 1.9285440570413281), ('houston/N', 2.0253575787069336), ('baltimore/N', 2.0818784520091405), ('chicago/N', 2.1103378019641608)]
-
[('pittsburgh/N', 5.2043290157436237), ('wa/N', 5.3240328157813783), ('brooklyn/N', 5.3331670250624068)]
-
###########
johnson/N
[('greenwood/N', 1.3790699844025271), ('ritchie/N', 1.5367165352517069), ('sheldon/N', 1.6414476285379169), ('dexter/N', 1.6579213841396323), ('jackson/N', 1.6611542723486885)]
-
[('smith/N', 0.87698569864600495), ('miller/N', 0.8809579979109835), ('coleman/N', 0.95085524444165281), ('jackson/N', 0.95198054843978408), ('walker/N', 0.97982378174043561)]
-
[('smith/N', 2.7008810460334973), ('miller/N', 2.9100346756157385), ('brown/N', 3.0723276725134645), ('bennett/N', 3.0898867517134025), ('walker/N', 3.1163772226283837)]
-
###########
oven/N
[('refrigerator/N', 1.9672540472684907), ('toothpaste/N', 2.0583072155081266), ('laundry/N', 2.0583181962840644), ('jar/N', 2.060856555766069), ('fitting/N', 2.0802269389502062)]
-
[('stove/N', 1.6058099757395556), ('wood-fired/J', 1.6534331045759991), ('freezer/N', 1.7672777127670656), ('refrigerator/N', 1.8541226484127546), ('tub/N', 1.8670378196335791)]
-
[('wood-fired/J', 4.2117277092494945), ('stove/N', 4.5132432904214239), ('heater/N', 4.9082842362506875), ('coke/N', 5.0264984854410324), ('microwave/N', 5.0988726299615488)]
-
###########
sears/N
[('kmart/N', 1.7764528714002705), ('boots/N', 1.9766217664402312), ('wal-mart/N', 2.0021152511793288), ('hp/N', 2.0609135826540044), ('amp/N', 2.0810957131481014)]
-
[('penney/N', 1.5941652103432005), ('walmart/N', 1.770478306289093), ('meijer/N', 1.8725148145209836), ('wal-mart/N', 1.8798239384488449), ('lowes/N', 1.8941185432091447)]
-
[('kmart/N', 4.0850084824004957), ('walmart/N', 4.2877698620686253), ('penney/N', 4.5204826092424018), ('wal-mart/N', 4.6218666691064323), ('mcdonald/N', 4.6765021302265009)]
-
###########
ski/N
[('skier/N', 1.9283304184903443), ('skiing/N', 2.2364942840994391), ('cross-country/N', 2.3075541767290009)]
-
[('skiing/N', 2.0404186251995902)]
-
[('skiing/N', 4.6596158123245521), ('racer/N', 5.0922322226748458), ('mountaineer/N', 5.0983578396854412)]
-
###########
expedition/N
[('orbiter/N', 1.7028793236390689), ('pod/N', 1.7092430113531663), ('mariner/N', 1.7662663510998555), ('alligator/N', 1.7913546229435584), ('pitbull/N', 1.7942230730052826)]
-
[('mission/N', 2.0942570571490244), ('colony/N', 2.1471253178827006), ('visit/N', 2.1853704290245775), ('retreat/N', 2.1887962402797796)]
-
[('antarctic/J', 5.8907331087312889), ('explorer/N', 5.9900876444401892), ('everest/N', 6.0409896217151386), ('trip/N', 6.0618650101602398)]
-
###########
trio/N
[('youngster/N', 1.5530976210792762), ('pitbull/N', 1.7323751869156117), ('ike/N', 1.751679870322965), ('sage/N', 1.755104390139006), ('long-haired/J', 1.7591354588049861)]
-
[('duo/N', 1.2069740413752748), ('band/N', 1.5392942253191082), ('budgie/N', 1.7640903455741279), ('westies/N', 1.765751059321971), ('zing/N', 1.7839140624202914)]
-
[('duo/N', 3.3775771706972142), ('duet/N', 4.2045302505798441), ('solo/N', 4.4218802667769053), ('jazz/N', 4.5024881249597986), ('blues/N', 4.5178795161878762)]
-
###########
pt/N
[('t/N', 4.1956139038344569), ('t/N_blow/N', 4.6894207333196976), ('t/N_fit/N', 4.7005634047683236)]
-
[('assist/N', 2.4925972473885305), ('d/N', 2.5576345078732059), ('g/N', 2.6467788695585233), ('points/N', 2.7896180001173239)]
-
[('equipo/N', 4.8647883416111739), ('points/N', 5.0535615274923673), ('pl/N', 5.1194703197565277), ('powerhead/N', 5.1310878025384641), ('prob/N', 5.1344464889170869)]
-
###########
nissan/N
[('mazda/N', 1.5398779801394191), ('honda/N', 1.6133353790981619), ('toyota/N', 1.6351126950165933), ('bmw/N', 1.9782320414731493)]
-
[('toyota/N', 1.3994738600433005), ('mazda/N', 1.5519219654609551), ('honda/N', 1.5718188551301702), ('subaru/N', 1.5746438292085596), ('minivan/N', 1.5757667919353169)]
-
[('toyota/N', 4.2719296767853043)]
-
###########
ran/N
[('henrietta/N', 2.4892155374284273), ('katie/N', 2.6498015709314862), ('zen/N', 2.6963130567376385)]
-
[('kongs/N', 2.2221166129969454), ('sun/N', 2.2486333346911915), ('ship/N', 2.278564204210765), ('fleet/N', 2.3215741721201213), ('japanese/J', 2.3333346599225946)]
-
[('parrotlet/N', 4.9550245888054896), ('okey/N', 5.0864911906171626), ('seeder/N', 5.0958108302603815), ('b&s/N', 5.0971555763780652), ('conair/N', 5.1027227430114204)]
-
###########
genuine/J
[('undeniable/J', 1.8239651701094723), ('inescapable/J', 1.8249533954940098), ('befitting/J', 1.8326598981790712), ('simple/J', 1.8445936500401905), ('win-win/J', 1.861914908164922)]
-
[('so-so/J', 1.436679905083514), ('strong/J', 1.4443088162061126), ('godsend/N', 1.449280873221561), ('true/J', 1.454699619621604), ('leery/J', 1.4877884707081017)]
-
[('evident/J', 3.8776415975361878), ('imitation/N', 3.8818627397665675), ('sort/N', 3.8862764662395026), ('kind/N', 3.9420686867120929), ('affection/N', 4.0049329137306406)]
-
###########
cd/N
[('pirate/J', 1.929717423930267), ('notebook/N', 1.9552035692388159), ('dvd/N', 2.06396132051785)]
-
[('cassette/N', 1.8322196452870105), ('dvd/N', 1.836356322005287)]
-
[('dvd/N', 4.0107016423229478), ('release/N', 4.4099276493537856), ('disc/N', 4.4609229007997886)]
-
###########
anvil/N
[('crap/N', 1.3484543280032761), ('inconvenient/J', 1.4485197715507003), ('revolting/J', 1.4966504931583613), ('shabby/J', 1.5144143430901151), ('crumb/N', 1.5206432107987429)]
-
[('hammer/N', 1.3533974853706934), ('blade/N', 1.5421070235171075), ('crusher/N', 1.6198930528270965), ('punch/N', 1.628786743679824), ('dropper/N', 1.6448443019801053)]
-
[('snap-on/N', 4.1851321625575935), ('bristle/N', 4.1901521448729051), ('clicking/N', 4.2312561111582445), ('choker/N', 4.2748078040911954), ('gunk/N', 4.2773916205965978)]
-
###########
solar/J
[('sensor/N', 2.137997224545078), ('ignition/N', 2.2722142591520433), ('underwater/J', 2.2765899792508359), ('compression/N', 2.2781013159279282), ('vector/N', 2.3277283859794582)]
-
[('thermal/J', 2.1732540896960719), ('detector/N', 2.3173266189537913), ('flux/N', 2.3935763198593487)]
-
[('thermal/J', 5.1191214159485421), ('eclipse/N', 5.2842364390186969), ('heating/N', 5.3583939406823191)]
-
###########
binder/N
[('bleach/N', 1.7649501622108072), ('klee/N', 1.8409567182022391), ('sasha/N', 1.8847847044711543), ('holly/N', 1.8906312533498995), ('mandy/N', 1.9153262813747385)]
-
[('paperweight/N', 1.8020673141204686), ('mold/N', 1.8633176215334573), ('plexiglass/N', 1.8898845032618865), ('antistatic/J', 1.907689677590344), ('keyring/N', 1.9109237813418944)]
-
[('antacid/N', 4.0340221708638397), ('clumpy/J', 4.1661151971034114), ('glycerine/N', 4.3181940844900657), ('freon/N', 4.3312003256374565), ('antiseptic/N', 4.3385353099614159)]
-
###########
colorado/N
[('minnesota/N', 2.4118847595862447), ('houston/N', 2.491066259629565), ('baltimore/N', 2.6216706635187794)]
-
[('arizona/N', 1.6796492137525934), ('minnesota/N', 1.7776493368864863), ('idaho/N', 1.7895402069358406), ('arkansas/N', 1.8049583962494167), ('montana/N', 1.8820114877896974)]
-
[('arizona/N', 3.2633366496023664), ('oregon/N', 3.603168566485476), ('arkansas/N', 3.8211869104664324), ('montana/N', 3.8891922006250863), ('texas/N', 4.1277112137375838)]
-
###########
compression/N
[('vector/N', 1.265973427736238), ('gadget/N', 1.362815124157662), ('micro/J', 1.3638703339410436), ('additive/N', 1.4150413248526756), ('bulky/J', 1.4150796202378901)]
-
[('preload/N', 1.8587078270096835), ('readout/N', 1.9251697938457741), ('acceleration/N', 1.9260812307434869), ('wattage/N', 1.9715736320612594), ('sensor/N', 1.9954466202785914)]
-
[('on-the-fly/J', 5.4291793581236556), ('converter/N', 5.4448599575671359)]
-
###########
electric/N
[('hitachi/N', 2.5474062574259064), ('toshiba/N', 2.760912505314284), ('mitsubishi/N', 2.8787691820872401)]
-
[('cables/N', 2.0115335969032939), ('pumps/N', 2.1205166230259569), ('company/N', 2.1759261905929081)]
-
[('works/N', 5.2493214242876185), ('traction/N', 5.3895010428950654), ('limited/N', 5.4953016693278087)]
-
###########
converter/N
[('catalytic/J', 2.1033746826741035)]
-
[('transistor/N', 1.8712116514783814), ('capacitor/N', 1.9905734060894564)]
-
[('transformer/N', 4.9293120824972121), ('torque/N', 5.0074908919429904), ('switching/N', 5.0361909669866574), ('alternator/N', 5.0936856624525815), ('catalytic/J', 5.1401588637313544)]
-
###########
hull/N
[('lincoln/N', 2.3430864414257959), ('plymouth/N', 3.1463268792243357)]
-
[('plymouth/N', 2.0214617150936882), ('cannon/N', 2.0238188718108976), ('pumps/N', 2.0545521388098145), ('lock/N', 2.0572647342205177), ('barrow/N', 2.0683116472525924)]
-
[('bow/N', 5.3168678828703282), ('boat/N', 5.4547998464055487), ('mallet/N', 5.5402885767652936)]
-
###########
pickup/N
[('minivan/N', 1.9617598853312195)]
-
[('coupler/N', 1.833323711035838), ('pushbutton/N', 1.929590128016307), ('muffler/N', 1.9325665110991586)]
-
[('full-size/J', 4.8140178256422139), ('gibson/N', 4.9533778358073999), ('coil/N', 5.0138139814937288), ('noiseless/J', 5.0545117669348638)]
-
###########
ranger/N
[('flint/N', 2.38540484065939), ('sleek/J', 2.4136750122817054), ('ready/N', 2.4138141885846252), ('malibu/N', 2.4158810195585771), ('chestnut/N', 2.416714444158977)]
-
[('trooper/N', 1.8978609698653093), ('cobra/N', 1.9225388735237317), ('hunter/N', 1.9291724701532418), ('falcon/N', 1.987104018559221)]
-
[('ghost/N', 5.2575116683304666), ('beige/N', 5.2961642909334872), ('trooper/N', 5.3002127970659814), ('rogue/N', 5.30596122108172)]
-
###########
exhaust/N
[('propane/N', 1.9571040324828994), ('coil/N', 2.0085803182881747)]
-
[('compressor/N', 1.8326639467289034)]
-
[('inlet/J', 4.8193028247565026), ('recirculation/N', 4.9093312560568787)]
-
###########
pc/N
[('high-end/J', 1.5426065848023887), ('desktop/N', 1.6284358819753115), ('software/N', 1.6906875773405872), ('workstation/N', 1.7382417757919801)]
-
[('console/N', 1.7460273092206184), ('netbook/N', 1.8711497442612759), ('zeebo/N', 1.9957873999213387), ('hardware/N', 2.0076641603672871)]
-
[('tablet/J', 4.649359748567), ('desktop/N', 4.8183930932178001), ('console/N', 4.8890840337071317), ('compatible/J', 5.1059428262528552), ('asus/N', 5.1513983502396705)]
-
###########
international/N
[('rotary/J', 2.7764012728433292), ('center/N', 2.7895130169074425)]
-
[('national/N', 2.0968054839415), ('american/N', 2.1761913725783786), ('spca/N', 2.1972228030942791), ('biannual/J', 2.2073000478104978)]
-
[('american/N', 5.3017719987102412), ('association/N', 5.3543381478826113), ('conjunction/N', 5.4010017485371176)]
-
###########
chin/N_force/N
[('officer/N', 2.5549025151624152), ('guard/N', 2.6282163592147216)]
-
[('army/N', 2.4706604717121383), ('tan/N', 2.5612443064435459)]
-
[('army/N', 6.4524565050191551)]
-
###########
stove/N
[('heater/N', 1.6598341215956494), ('propane/N', 1.7287333507192775), ('aerosol/N', 1.8302661037669781), ('bedding/N', 1.8347734085892984), ('freezer/N', 1.8506305651772117)]
-
[('burner/N', 1.5581788682785596), ('refrigerator/N', 1.5909581007658711), ('wood-fired/J', 1.5995060639473293), ('oven/N', 1.6058099757395556), ('freezer/N', 1.6445501314470856)]
-
[('oven/N', 4.5132432904214239), ('pellet/N', 4.5429741048592822), ('refrigerator/N', 4.6751262331478269), ('heater/N', 4.6927410011125685), ('burner/N', 4.7740927143769429)]
-
###########
input/N
[('lower-priced/J', 1.7678520722199385), ('low-end/J', 1.8145455670271273), ('cellphone/N', 1.8534290025959097), ('insulin/N', 1.8569499809451573)]
-
[('output/N', 1.6135437715997356), ('datum/N', 1.7117251024788722), ('packet/N', 1.713113760692951)]
-
[('output/N', 3.87303431861028), ('feedback/N', 4.0734552172910909), ('switching/N', 4.7104544598666909)]
-
###########
accord/N
[('deal/N', 1.6584265165041843), ('compromise/N', 2.2642786321902046), ('arrangement/N', 2.3595006014372282)]
-
[('particular/N', 2.2368448862145356), ('no-kill/J', 2.3144374759084165), ('peace/N', 2.3220984744167388)]
-
[('accordance/N', 5.1196269728065156), ('compromise/N', 5.353688755622052), ('cr-v/N', 5.3853265952275358)]
-
###########
dual/J
[('individual/J', 2.2860188187291315), ('looser/J', 2.3592920175169403), ('quirk/N', 2.3751881512430582), ('floatation/N', 2.4176190999717861), ('ebay/N', 2.4218731205227177)]
-
[('standard/J', 1.6939504267508665), ('optional/J', 1.922997381447606), ('setup/N', 1.9538754788842008), ('double/J', 1.9690136774923919)]
-
[('configuration/N', 4.8350125730317401), ('standard/J', 4.9229083146829984), ('double/J', 4.9487899762387366), ('separate/J', 5.1302971544600604)]
-
###########
taurus/N
[('cellphone/N', 1.244335394942188), ('hybrid/N', 1.2559919198330753), ('mid-size/J', 1.2955522759252003), ('suv/N', 1.3060656969081181), ('heavy-duty/J', 1.3148948789266397)]
-
[('jaguar/N', 2.0464651021896669), ('sxt/N', 2.0977733391162805), ('falcon/N', 2.110455121280236), ('aerostar/N', 2.150614063939094)]
-
[('aerostar/N', 4.8840413932000013), ('slt/N', 4.9049217148385766), ('pods/N', 4.9770771400558971), ('wix/N', 4.988222247618384), ('hartz/N', 4.9984344030796271)]
-
###########
milwaukee/N
[('cleveland/N', 2.303304132347904), ('indiana/N', 2.8000133132272058)]
-
[('chicago/N', 1.8834565058953401), ('cleveland/N', 1.9245522746333228), ('detroit/N', 1.9772154540866822), ('houston/N', 2.0166675501854576)]
-
[('detroit/N', 3.9554789996106665), ('cincinnati/N', 4.1856023203843673), ('chicago/N', 4.2785906968167682), ('brewer/N', 4.3549487993247418)]
-
###########
technical/J
[('extensive/J', 2.01984332977919), ('readjustment/N', 2.0551694201159925), ('practical/J', 2.1153690615684773), ('overload/N', 2.1234843297291484), ('insulin/N', 2.1314343398795783)]
-
[('practical/J', 1.5349220984839642), ('specialist/N', 1.6161833386043012), ('management/N', 1.7631472750719579), ('creative/J', 1.8035597824929688), ('basic/J', 1.8177909191952117)]
-
[('skill/N', 4.4554394228018639), ('difficulty/N', 4.6170504593642914), ('practical/J', 4.6549625573539251), ('scientific/J', 4.6893259663019791), ('specialist/N', 4.7363700100415516)]
-
###########
satellite/N
[('cable/N', 1.8127334412047782), ('tv/N', 1.9594184329229665), ('channel/N', 1.9610760170010793), ('hookup/N', 2.0833283393323354), ('internet/N', 2.1818439462210457)]
-
[('cable/N', 1.845285475441854), ('broadcast/N', 1.8554925904893509), ('channel/N', 1.8606681411166233), ('transponder/N', 1.9688317560356439), ('radio/N', 1.9832788779391708)]
-
[('tracking/N', 4.9206579748203376), ('radar/N', 5.1050748944217261), ('sirius/N', 5.1831771353275196), ('transmitter/N', 5.2366651130602193), ('gps/N', 5.3944799006876467)]
-
###########
mechanic/N
[('pilot/N', 1.8253746626363203), ('seatbelt/N', 1.8305492421206486), ('screwdriver/N', 1.8466738925985948)]
-
[('model/N', 1.9069335997060088), ('concept/N', 1.9458544457294522)]
-
[('mechanical/J', 5.0879089218055968), ('concept/N', 5.3179509760194312)]
-
###########
cheap/J_brain/N
[('digestive/J_enzyme/N', 2.6082409322935831), ('skin/N_allergy/N', 2.6206588701254017), ('shampoo/N_therapy/N', 2.6566968317881194)]
-
[('breathing/N', 2.2225316240573769), ('painless/J', 2.3150106477798582), ('numbing/J', 2.3170663347598546), ('overload/N', 2.319226053358169)]
-
[('hard/J', 6.3361877422157393), ('cause/V_toy/N', 6.3711519124377336)]
-
###########
taiwan/N
[('china/N', 1.587822897250581), ('cosco/N', 1.9679395920492782), ('fedex/N', 2.2186592527739255), ('ebay/N', 2.3198672786511647)]
-
[('china/N', 1.3755369728802118), ('japan/N', 1.6926957117452053), ('chinese/J', 2.3132585152683891), ('china/N_issue/N', 2.3245071852710883)]
-
[('china/N', 3.9406083886865741), ('kong/N', 4.2078679935870147), ('japan/N', 4.4827514283669583), ('chinese/N', 5.0615725149249808), ('hawaii/N', 5.2313794011579775)]
-
###########
clock/N
[('orbiter/N', 1.9092249609627858), ('sled/N', 1.9159954932304419), ('porch/N', 1.937036179978247), ('accelerator/N', 1.9630265633114554)]
-
[('timer/N', 1.7114937595224557), ('frame/N', 1.7914929218185063)]
-
[('timer/N', 4.8712891740049677)]
-
###########
gutter/N
[('scum/N', 1.8837105157453296), ('whisper/N', 2.0475461070312506), ('baptism/N', 2.0761651362049429), ('laughing/N', 2.0915681597406044)]
-
[('poop/N', 1.7715160268384695), ('windowsill/N', 1.7883997112251753), ('lather/N', 1.8099635253168169), ('crumpled/J', 1.8196211783157381), ('trash/N', 1.8440697405465847)]
-
[('downspout/N', 3.8277816998467924), ('gunk/N', 4.1121201427927661), ('atomizer/N', 4.1747741196233594), ('panting/N', 4.1984644708964733)]
-
###########
uncle/N
[('cousin/N', 1.4744973105475152), ('grandfather/N', 1.5305242993525301), ('father/N', 1.575279227975829), ('aunt/N', 1.6453574973404492), ('brother/N', 1.6863375243810128)]
-
[('grandfather/N', 1.1125539275284944), ('father/N', 1.1154626967551595), ('father-in-law/N', 1.1417661516210817), ('nephew/N', 1.1697524217340378), ('brother/N', 1.2089525095690539)]
-
[('nephew/N', 2.4504370390163057), ('grandfather/N', 2.7365420272539214), ('cousin/N', 3.0487368444773968), ('brother/N', 3.1953992852352577), ('grandson/N', 3.3479195547724858)]
-
###########
vector/N
[('cellphone/N', 1.2627352013525723), ('compression/N', 1.265973427736238), ('catalog/N', 1.2748567448893449), ('insulin/N', 1.2929817651438145), ('fragrance/N', 1.3181207068685894)]
-
[('discrete/J', 1.5105380619366151), ('linear/J', 1.770923441216768), ('bundle/N', 2.028467330808871), ('function/N', 2.050200151518931)]
-
[('bundle/N', 5.1401320448372729), ('discrete/J', 5.5888068609267583), ('linear/J', 5.721651147337278)]
-
###########
documentation/N
[('email/N', 1.6677625393740096), ('shredding/N', 1.7138216097986321), ('efficacy/N', 1.7526703782176842), ('catalog/N', 1.7697896830666617)]
-
[('document/N', 1.7331694796540389), ('information/N', 1.7456059568790412), ('application/N', 1.9240984181329785), ('add-in/N', 1.9874622372510149)]
-
[('description/N', 4.4673844247740755), ('document/N', 4.7064877226551864), ('inclusion/N', 4.7298541946805299), ('webpage/N', 4.7367360489401342), ('on-line/J', 4.7911175926494431)]
-
###########
transmission/N
[('switching/N', 2.1175745714859229), ('spread/N', 2.1771859807764407), ('compression/N', 2.2464386487542773)]
-
[('switching/N', 1.7760613505463771), ('shutoff/N', 1.7841173461264146), ('readout/N', 1.7984237096355238), ('switch/N', 1.8047594354075651), ('pushbutton/N', 1.8265445459318741)]
-
[('automatic/J', 5.1535584769748191), ('pulse/N', 5.2108656173843126)]
-
###########
downs/N
[('teamwork/N', 2.4867523634176036), ('thoughtful/J', 2.5465285868262741), ('methodical/J', 2.5479608676162822)]
-
[('hill/N', 1.9894592331113776), ('thorley/N', 2.0357268674115274), ('sunland/N', 2.0499622323534941)]
-
[('devon/N', 4.8578175227882952), ('darling/N', 5.0930248548805457), ('sussex/N', 5.4919310712683762)]
-
###########
helmet/N
[('sweater/N', 1.9974598603016471), ('jacket/N', 2.0501151078391389), ('sunglass/N', 2.0799949296142746), ('bandana/N', 2.1107767648313516)]
-
[('jacket/N', 1.4229922089514682), ('glove/N', 1.5770037437999418), ('mask/N', 1.5891019006764173), ('vest/N', 1.5930352193458988)]
-
[('visor/N', 4.2236495710804407), ('worn/J', 4.5588772437700058), ('mask/N', 4.5636143633219062), ('decal/N', 4.6129255630240769)]
-
###########
readjustment/N
[('overload/N', 1.3988100811422015), ('looser/J', 1.4217153930016482), ('quirk/N', 1.4308044782220308), ('micro/J', 1.4452221848986482), ('lowered/J', 1.4634660924691616)]
-
[('continual/J', 1.7271582105996426), ('adjustment/N', 1.7489690070150616), ('over-zealous/J', 1.7558736838282334), ('preexisting/J', 1.7658188928966623), ('measure/N', 1.7703220182007557)]
-
[('nicety/N', 3.7558225313005673), ('stress-free/J', 3.7849947612811818), ('quietness/N', 3.8525302477996202), ('healthiness/N', 3.8842215358579892), ('heft/N', 3.8860204128376634)]
-
###########
canoe/N
[('sandy/J', 1.7484060332318847), ('enclosure/N', 1.7848483245637297), ('rusting/J', 1.810253762088059), ('waterfall/N', 1.8343317568601105), ('tortoise/N', 1.8369028125676503)]
-
[('boat/N', 2.1157283243701022), ('sailboat/N', 2.1265704948783335), ('skiff/N', 2.1672500440217335)]
-
[('kayak/N', 4.2394272113403471), ('sailing/N', 5.3420380533367595)]
-
###########
daybreak/N
[('nightfall/N', 1.9937478932032529), ('dusk/N', 2.1400816793178885), ('dark/N', 2.1475135906503979), ('shuffling/N', 2.1504906911143467), ('sunrise/N', 2.1715327359796435)]
-
[('midnight/N', 1.6731099469599209), ('sunrise/N', 1.9212799298142651), ('dawn/N', 1.995272851428697), ('dusk/N', 2.0074484011337477), ('noon/N', 2.0122250737328429)]
-
[('nightfall/N', 3.7841794317804225), ('backside/N', 4.1518623573571176), ('tach/N', 4.1773090021242147), ('ouch/N', 4.1942851145462212), ('shite/N', 4.2606453994941891)]
-
###########
ignition/N
[('heavy-duty/J', 1.6223775803661933), ('seatbelt/N', 1.6304883261336121), ('aerodynamics/N', 1.6464859312484867), ('undergrowth/N', 1.6479320551776513), ('wiring/N', 1.6520710338925033)]
-
[('compressor/N', 1.7635099375113934), ('tailpipe/N', 1.8332897916013542), ('hydraulic/J', 1.8363539846597343), ('injector/N', 1.8614095525010306)]
-
[('timing/N', 4.8654442791397789)]
-
###########
pontiac/N
[('flint/N', 1.8267180717654312), ('malibu/N', 1.8852319198115113), ('aquarium/N', 1.9832204593464975), ('dry/N', 2.0093101916337233), ('cats/N', 2.0188764877902075)]
-
[('buick/N', 1.5930288687654615), ('dodge/N', 1.6564196980618924), ('cadillac/N', 1.7437237768088309), ('chevrolet/N', 1.7540861354719386), ('honda/N', 1.9459730381327667)]
-
[('firebird/N', 4.8980557834929765), ('cadillac/N', 5.1661019412835829), ('jeep/N', 5.1723713357547147)]
-
###########
coil/N
[('shampoo/N', 1.3117844621482253), ('coating/N', 1.3379188080757403), ('fitting/N', 1.35157385283681), ('starch/N', 1.3548084612900941), ('latex/N', 1.3592139301935691)]
-
[('solenoid/J', 1.75595778483704), ('baffle/N', 1.7747943478306547), ('clamp/N', 1.7756482955084023), ('tube/N', 1.7785265231006142)]
-
[('pickup/N', 5.0138139814937288)]
-
###########

In [166]:
doc_scores = defaultdict(list)
doc_repl_scores = defaultdict(list)
scores = defaultdict(list)
repl_scores = defaultdict(list)
for doc, label in zip(xtr, ytr):
    lo_sum = sum(orig_lo_automotive[vocab[feat]] for feat in doc if feat in vocab)
    lo_repl_sum = sum(orig_lo_automotive[vocab[repl[0]]] for repl in v3.get_nearest_neighbours(feat) for feat in doc if feat in vocab)
    doc_scores[label].append(lo_sum)
    doc_repl_scores[label].append(lo_repl_sum)
    
    scores[label].extend([orig_lo_automotive[vocab[feat]] for feat in doc if feat in vocab])
    lo_repl = [orig_lo_automotive[vocab[repl[0]]] for repl in v3.get_nearest_neighbours(feat) for feat in doc if feat in vocab]
    repl_scores[label].extend(lo_repl)

In [171]:
f, axes = plt.subplots(1, 2)
for label in doc_scores.keys():
    sns.kdeplot(np.array(doc_scores[label]), shade=True, alpha=.5, label=label, ax=axes[0])
    sns.kdeplot(np.array(doc_repl_scores[label]), shade=True, alpha=.5, label=label, ax=axes[1])
plt.legend();

axes[0].set_xlim(-500, 500)
axes[1].set_xlim(-100, 400)

# most replacements at the document level are towards non-automotive


Out[171]:
(-100, 400)

In [130]:
f, axes = plt.subplots(1, 2)
for label in scores.keys():
    sns.kdeplot(np.array(scores[label]), shade=True, alpha=.5, label=label, ax=axes[0])
    sns.kdeplot(np.array(repl_scores[label]), shade=True, alpha=.5, label=label, ax=axes[1])
plt.legend();
axes[0].set_ylim(0, .3)


Out[130]:
(0, 0.3)

In [138]:
plt.hist(np.exp(repl_scores['Pet_Supplies']));



In [135]:
plt.hist([len(doc) for doc in xtr], bins=100);



In [164]:
orig_scores, repl_scores = [], []
for i, (doc, label) in enumerate(zip(xtr, ytr)):
    if i > 600:
        break
    for feat in doc:
        if feat in vocab:
            neigh = v1.get_nearest_neighbours(feat)
            if neigh:
                for repl in neigh:
                    orig_scores.append(orig_lo_automotive[vocab[feat]])
                    repl_scores.append(orig_lo_automotive[vocab[repl[0]]])
#                 print(orig_lo_automotive[vocab[feat]],
#                       '-->', 
#                       [orig_lo_automotive[vocab[repl[0]]] for repl in neigh])

In [165]:
sns.kdeplot(np.array(orig_scores), np.array(repl_scores), shade=False)
# plt.axvline(0);
# plt.axhline(0);



In [ ]: