Summary

This notebook replicates the experiments in the ICWSM'17 paper entitled "Identifying Leading Indicators of Product Recalls from Online Reviews using Positive Unlabeled Learning and Domain Adaptation," by Shreesh Kumara Bhat and Aron Culotta. A full version of the paper is here: https://arxiv.org/abs/1703.00518

This notebook first downloads all the required data files from Dropbox into the local folder data (~194M).


In [1]:
from collections import Counter, defaultdict
from datetime import datetime
import gzip
from IPython.display import display
from itertools import groupby, cycle
import json
import math
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import random
from sklearn.feature_selection import chi2, f_classif
from scipy.sparse import hstack as sp_hstack
from scipy.sparse import vstack as sp_vstack
from scipy.sparse import csr_matrix, lil_matrix
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, auc, classification_report, confusion_matrix, f1_score, precision_recall_curve, precision_score, recall_score, roc_auc_score, roc_curve
from sklearn.naive_bayes import BernoulliNB
from sklearn.preprocessing import scale
from tabulate import tabulate
import urllib.request

%matplotlib inline

PATH = 'data'
complaints_file = PATH + os.path.sep + 'complaints.csv'
reviews_file = PATH + os.path.sep + 'reviews.json.gz'
test_file = PATH + os.path.sep + 'test.csv'
recalls_file = PATH + os.path.sep + 'recalls.csv'
recalled_asins_file = PATH + os.path.sep + 'recalled_asins.txt'

# Formatting for matplotlib
plt.rcParams["xtick.labelsize"] = "16"
plt.rcParams["ytick.labelsize"] = "16"


/usr/lib64/python3.4/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)

Download data from Dropbox.


In [2]:
def download_data(path):
    """
    Download any required files if not already present.
    """
    files = [('https://www.dropbox.com/s/k18rpimaif014b0/complaints.csv?dl=1', 'complaints.csv'),
             ('https://www.dropbox.com/s/jwr77xpsa7d1w4b/recalls.csv?dl=1', 'recalls.csv'),
             ('https://www.dropbox.com/s/ww0bmhf4iw84a33/reviews.json.gz?dl=1', 'reviews.json.gz'),
             ('https://www.dropbox.com/s/mtppphs0bml727a/test.csv?dl=1', 'test.csv'),
             ('https://www.dropbox.com/s/53g3hqfodeb52xi/recalled_asins.txt?dl=1', 'recalled_asins.txt')]

    if not os.path.exists(path):
        os.makedirs(path)
        
    for url, name in files:
        if not os.path.exists(path + os.path.sep + name):
            print('fetching %s' % name)
            urllib.request.urlretrieve(url, path + os.path.sep + name)        

download_data(PATH)


fetching complaints.csv
fetching recalls.csv
fetching reviews.json.gz
fetching test.csv
fetching recalled_asins.txt

Read and explore review data.


In [3]:
def parse_all_reviews(filename):
    """
    Parse all reviews into a sparse document x term csr_matrix.
    """
    
    def iter_reviews(filename):
        i = 0
        for line in gzip.open(filename, 'rt'):
            js = json.loads(line)
            yield js['reviewText'], js['asin'], js['reviewTime'], js['overall'], js['reviewText']
            i += 1
            if i % 100000 == 0:
                print('read %d reviews' % i)
                      
    records = []
    vec = CountVectorizer(min_df=50, ngram_range=(1,2), max_df=.95, binary=True)
    X = vec.fit_transform(r[0] for r in iter_reviews(filename) if 
                      not records.append(r[1:]))
    return X, vec, pd.DataFrame(records, columns=['ASIN', 'review_time', 'review_score', 'reviewText'])

X_reviews, vec, reviews_df = parse_all_reviews(reviews_file)
print('X_reviews has shape %s' % str(X_reviews.shape))
reviews_df.head()


read 100000 reviews
read 200000 reviews
read 300000 reviews
read 400000 reviews
read 500000 reviews
read 600000 reviews
read 700000 reviews
read 800000 reviews
read 900000 reviews
X_reviews has shape (915446, 136160)
Out[3]:
ASIN review_time review_score reviewText
0 0188399313 05 27, 2013 5.0 They work very well. Easy to clean, we wash th...
1 0188399399 04 9, 2013 5.0 it came early and was not disappointed. i love...
2 0188399518 02 14, 2014 4.0 I ended up with a variety of different brands ...
3 0188399518 07 8, 2013 3.0 These flannel wipes are OK, but in my opinion ...
4 0316967297 09 6, 2013 4.0 Cute quilt, the colors are perfect and my litt...

In [4]:
# reformat date string.
def format_dates(reviews_df):
    new_dates = []
    for x in reviews_df['review_time']:
        parts = [x.replace(',', '') for x in x.split()]
        new_dates.append('%s-%s-%s' % (parts[2], parts[0].zfill(2), parts[1].zfill(2)))
    print(new_dates[:10])
    reviews_df['review_time'] = new_dates
    
format_dates(reviews_df)


['2013-05-27', '2013-04-09', '2014-02-14', '2013-07-08', '2013-09-06', '2013-03-22', '2012-03-07', '2013-04-23', '2012-11-26', '2013-08-02']

In [5]:
# Number of reviews by score.
pd.value_counts(reviews_df.review_score)


Out[5]:
5.0    534132
4.0    165123
3.0     82931
1.0     76938
2.0     56322
Name: review_score, dtype: int64

In [6]:
# Exploring length distribution of reviews.
lengths = [len(x.split()) for x in reviews_df['reviewText']]
lengths = [l for l in lengths if l != 0]
print(np.median(lengths))
pd.DataFrame(lengths).describe()


55.0
Out[6]:
0
count 915104.000000
mean 81.951987
std 87.139825
min 1.000000
25% 31.000000
50% 55.000000
75% 100.000000
max 4546.000000

Read and explore complaints data.


In [7]:
def parse_complaints(complaints_file, vec):
    """
    Parse all the CPSC complaints, using the same vectorizer fit on the Amazon review data.
    """
    complaints_df = pd.read_csv(complaints_file)
    X = vec.transform(complaints_df['Incident Description'])
    vec.features = np.array(vec.get_feature_names())
    return X, complaints_df

X_complaints, complaints_df = parse_complaints(complaints_file, vec)
print('complaints feature matrix has shape %s' % str(X_complaints.shape))
complaints_df.head()


complaints feature matrix has shape (2010, 136160)
Out[7]:
Report No. Report Date Sent to Manufacturer / Importer / Private Labeler Publication Date Category of Submitter Product Description Product Category Product Sub Category Product Type Product Code ... Submitter Has Product Product Was Damaged Before Incident Damage Description Damage Repaired Product Was Modified Before Incident Have You Contacted The Manufacturer If Not Do You Plan To Answer Explanation Company Comments Associated Report Numbers
0 20160509-F1AD6-2147419650 5/9/2016 5/17/2016 6/8/2016 Consumer Munchkin pacifier clip Baby Nursery Equipment & Supplies Pacifiers or Teething Rings 1525 ... NaN Yes NaN NaN NaN Yes Yes NaN NaN NaN
1 20160506-66663-2147419715 5/6/2016 5/16/2016 5/31/2016 Consumer Baby Einstein Bouncer, Multicolor, has a piano... Baby Nursery Equipment & Supplies Baby Bouncer Seats (Excl. Jumpers) 1558 ... NaN Yes NaN NaN NaN Yes Yes I thought I was contacting them. the link on t... NaN NaN
2 20160429-84BCB-2147419859 4/29/2016 5/9/2016 5/31/2016 Consumer Luxury teether toys for happy baby teething.\r... Baby Nursery Equipment & Supplies Pacifiers or Teething Rings 1525 ... NaN Yes NaN NaN NaN Yes Yes I still have the product and plan on reaching ... NaN NaN
3 20160505-69C2D-2147419760 5/5/2016 5/13/2016 5/27/2016 Consumer Graco Lauren Classic Crib, model #2354497, pro... Baby Nursery Equipment & Supplies Cribs 1543 ... NaN Yes NaN NaN NaN Yes NaN I still have this product. Graco said they are... NaN NaN
4 20160504-C585C-2147419771 5/4/2016 5/12/2016 5/26/2016 Consumer Baby swing savanah model CMH84 Baby Nursery Equipment & Supplies Portable Baby Swings (For Home Use) 1553 ... NaN NaN NaN NaN NaN Yes Yes Tryin to figure out who to contact about incid... NaN NaN

5 rows × 40 columns


In [8]:
# Frequency of each victim severity.
complaints_df['(Primary) Victim Severity'].value_counts()


Out[8]:
Incident, No Injury                                       909
Injury, First Aid Received by Non-Medical Professional    299
Injury, No First Aid or Medical Attention Received        298
Injury, Seen by Medical Professional                      194
Unspecified                                                87
Injury, Emergency Department Treatment Received            82
Injury, Level of care not known                            66
Injury, Hospital Admission                                 30
No Incident, No Injury                                     26
Death                                                      19
Name: (Primary) Victim Severity, dtype: int64

In [9]:
# Frequency of product types in complaint data.
complaints_df['Product Type'].value_counts()


Out[9]:
Cribs                                                           407
Bassinets or Cradles                                            258
Diapers                                                         209
Pacifiers or Teething Rings                                     186
Baby Exercisers                                                 132
High Chairs                                                     116
Night-lights                                                    101
Baby Gates or Barriers                                           74
Playpens                                                         57
Infant & Toddler Play Ctrs, Excl Jumpers,bouncers&exercisers     55
Baby Bouncer Seats (Excl. Jumpers)                               55
Attached Highchair                                               49
Portable Baby Swings (For Home Use)                              46
Baby Mattresses or Pads                                          43
Baby Walkers or Jumpers                                          32
Cribs, Not Specified                                             28
Baby Carriers or Slings (Backpacks)                              25
Baby Baths or Bathinettes                                        21
Baby Bottles or Nipples                                          20
Potty Chairs or Training Seats                                   20
Portable Cribs                                                   19
Crib Mobiles or Crib Gyms                                        18
Baby Bathtub Seats or Rings (Not Toys)                           10
Bottle Warmers                                                    8
Baby Carriers (Bicycle-mounted)                                   7
Baby Changing Tables                                              6
Baby Carriers, Not Specified                                      3
Baby Harnesses                                                    2
Desks, Chests, Bureaus or Buffets                                 1
Sterilizers (Home Use)                                            1
Jewelry                                                           1
Name: Product Type, dtype: int64

In [10]:
# complaints by year
sorted(Counter([d[-4:] for d in complaints_df['Report Date']]).items())


Out[10]:
[('2011', 502),
 ('2012', 447),
 ('2013', 432),
 ('2014', 332),
 ('2015', 240),
 ('2016', 57)]

In [11]:
# Distribution of number of words per complaint.
lengths = [len(x.split()) for x in complaints_df['Incident Description']]
print(np.median(lengths))
display(pd.DataFrame(lengths).describe())


98.0
0
count 2010.000000
mean 124.513930
std 114.461285
min 4.000000
25% 56.000000
50% 98.000000
75% 159.000000
max 1683.000000

Read labeled data.


In [12]:
def parse_test_data(filename, vec):
    """
    Parse labeled Amazon reviews using the same
    vectorizer fit to the unlabeled Amazon reviews.
    """
    df = pd.read_csv(test_file)
    df.dropna(inplace=True)
    df.rename(columns={'Review Text': 'text'}, inplace=True)
    X = vec.transform(t for t in df['text'])
    return X, df

X_test, test_df = parse_test_data(test_file, vec)
print('X_test has shape %s' % str(X_test.shape))
test_df.head()


X_test has shape (448, 136160)
Out[12]:
label ASIN text
0 1 B002NU50LO We purchased this dresser 2 years ago and were...
1 1 B004C43JJ4 I just got my order today and put my six month...
2 1 B00020L78M I personally didn't buy this gate, specificall...
3 1 B00HVSVPQ2 I bought it for my son who is only six months ...
4 1 B0091DHACS These are currently being recalled, and the mu...

In [13]:
# Label distribution
display(test_df['label'].value_counts())


0    351
1     97
Name: label, dtype: int64

Read recalled amazon products.


In [14]:
# This file contains manually labeled instances
# of recall / product pairs. The label indicates whether the match is valid.
recalls_df = pd.read_csv(recalls_file, sep='\t')
recalls_df.head()


Out[14]:
label RecallNumber RecallName AmazonTitle AmazonAsin NumReviews Score RecallTitle RecallDescription RecallDate
0 1 8263 Munchkin Deluxe Bottle and Food Warmers with P... Munchkin Deluxe Bottle And Food Warmer With P... B00007C65S 32 9 Baby Bottle and Food Warmers Recalled by Munch... Baby Bottle and Food Warmers Recalled by Munch... 2008-04-08T00:00:00
1 1 11056 The First Years American Red Cross Cabinet Swi... The First Years American Red Cross Cabinet And... B001ODU26E 4 8 The First Years® Recalls American Red Cross® C... This recall involves The First Years American ... 2010-12-02T00:00:00
2 0 14072 Zoom Car Seat Adapter, Adapter clips can loose... Phil&Teds Car Seat Adapter For Peg Perego ... B003BNTNLA 5 7 Joovy Recalls Zoom Car Seat Stroller Adapter d... This recall involves all Joovy’s Zoom gray met... 2013-12-30T00:00:00
3 0 13061 Dream On Me Ultra 2 in 1 Infant Bath Tub; Todd... Dream On Me 2 in 1 Baby Tunes Musical Activity... B005GU18FU 11 7 Dream On Me Recalls Bath Seats Due to Drowning... The recall includes all Dream On Me bath seats... 2012-12-06T00:00:00
4 0 13061 Dream On Me Ultra 2 in 1 Infant Bath Tub; Todd... Dream On Me 2 In 1 Baby Bather and Changing St... B003ZUXWNE 7 7 Dream On Me Recalls Bath Seats Due to Drowning... The recall includes all Dream On Me bath seats... 2012-12-06T00:00:00

In [15]:
# Number of unique recalls.
len(set(recalls_df[recalls_df.label==1]['RecallNumber']))


Out[15]:
47

In [16]:
# Number of unique recalled ASINs
recalled_asins = set(l.strip() for l in open(recalled_asins_file))
print('%d recalled ASINs' % len(recalled_asins))


137 recalled ASINs

Classification Experiments

Here we perform classification experiments to evaluate the effectiveness of the proposed domain adaptation method for positive unlabeled learning.


In [17]:
class Data:
    """
    Container for all the data.
    """
    def __init__(self, X_complaints, complaints_df,
                 X_test, test_df,
                 X_reviews, reviews_df,
                 recalls_df, recalled_asins, vec):
        self.X_complaints = X_complaints
        self.complaints_df = complaints_df
        self.X_test = X_test
        self.test_df = test_df
        self.X_reviews = X_reviews
        self.reviews_df = reviews_df
        self.recalls_df = recalls_df
        self.recalled_asins = recalled_asins
        self.vec = vec

data = Data(X_complaints, complaints_df,
            X_test, test_df,
            X_reviews, reviews_df,
            recalls_df, recalled_asins, vec)

In [18]:
class Evaluator(object):
    """
    Evaluation metrics.
    """
    def __init__(self, data):
        self.data = data
        
    def evaluate(self, model):
        """
        Evaluate on test data.
        """
        model.fit(self.data)
        preds = model.predict(self.data)
        probas = model.predict_proba(self.data)
        truths = np.array(self.data.test_df['label'])
        f1 = f1_score(truths, preds)
        recall = recall_score(truths, preds)
        precision = precision_score(truths, preds)
        roc_auc = roc_auc_score(truths, probas, average=None)
        precisions, recalls, pr_auc, pr_at_k = self.evaluate_recalls(model)
        return {'f1': f1, 'roc_auc': roc_auc, 'pr_auc': pr_auc, 'recall': recall, 'precision': precision, 'pr_at_k': pr_at_k}
        
    def evaluate_recalls(self, model):
        """
        Evaluate against recalled products.
        """
        asin2recall_score = model.score_asin_recalls(self.data)
        asins = set(self.data.reviews_df.ASIN)
        probas = np.array([asin2recall_score[x] for x in asins])
        truths = np.array([1 if x in data.recalled_asins else 0 for x in asins])
        roc_auc = roc_auc_score(truths, probas)
        prec, recall, thresholds = precision_recall_curve(truths, probas)
        prec = self._interpolate(prec)
        pr_auc = auc(recall, prec)
        # evaluate precision at number of true positives.
        rank = sum(truths)
        pred = np.argsort(probas)[::-1][:rank]
        correct = len(set(pred) & set(np.where(truths==1)[0]))
        pr_at_k = correct / rank
        return prec, recall, pr_auc, pr_at_k
        
    def plot_prec_recalls(self, results):
        plt.figure()
        for r in results:
            plt.plot(r['prcurve'][1], r['prcurve'][0], '.-', label=r['model'])
        plt.xlabel('recall')
        plt.ylabel('precision')
        plt.legend(loc="best")
        plt.xlim(-.01, .2)
        plt.show()
       
    def _interpolate(self, prec):
        p_temp = prec[0]
        n = len(prec)
        for i in range(n):
            if prec[i] < p_temp:
                prec[i] = p_temp
            else:
                p_temp = prec[i]
        return prec
    
    
    def confusion(self, truths, preds, labels):
        m = confusion_matrix(truths, preds)
        m = np.vstack((labels, m))
        m = np.hstack((np.matrix([''] + list(labels)).T, m))
        return tabulate(m.tolist(), headers='firstrow')
    
    def top_terms(self, model, n=10):
        """
        Print top terms per class.
        """
        coef = model.get_coef()
        print('\n\nTOP FEATURES:')
        coefs = [-coef[0], coef[0]]
        for li, label in enumerate(model.clf.classes_):
            print('\nCLASS %s' % label)
            coef = coefs[li]
            top_coef_ind = np.argsort(coef)[::-1][:n]
            top_coef_terms = self.data.vec.features[top_coef_ind]
            top_coef = coef[top_coef_ind]
            print('\n'.join(['%s\t%.3f' % (term, weight)
                             for term, weight in zip(top_coef_terms, top_coef)]))

            
    def top_error_terms(self, model):
        """
        Print top terms appearing in incorrectly labeled documents.
        """
        truths = np.array(self.data.test_df['label'])
        preds = model.predict(self.data)
        X = self.data.X_test
        data = np.array(self.data.test_df['text'])
        print('\n\nERROR ANALYSIS:\n')
        for label in model.clf.classes_:
            print('\nincorrectly labeled %s' % label)
            iserror = np.zeros(len(truths))
            ind = [i for i, (t, p) in enumerate(zip(truths, preds)) if t != p and p == label]
            iserror[ind] = 1
            corrs, _ = f_classif(X, iserror)
            corrs = np.nan_to_num(corrs)
            pos_mask, pos_counts, neg_counts = self.get_pos_mask(X, iserror)
            corrs *= pos_mask
            # ignore features from only 1 incorrect instance.
            corrs *= np.sign(X.sign()[np.where(iserror == 1)].sum(axis=0).A1 - 1)
            for fidx in np.argsort(corrs)[::-1][:5]:
                print('\n\t%s (%d incorrect, %d correct) corr=%.4f' %
                      (self.data.vec.features[fidx], pos_counts[fidx], neg_counts[fidx], corrs[fidx]))
                matches = []
                for midx in range(X.shape[0]):
                    if X[midx, fidx] > 0 and iserror[midx] == 1:
                        matches.append(midx)
                for m in matches[:3]:
                    print('\t\t' + data[m])
    
    def get_pos_mask(self, X, y, reg=1):
        """Get mask for indices that are more associated with class 1 than class 0."""
        pos_counts = X.sign()[np.where(y == 1)].sum(axis=0).A1
        neg_counts = X.sign()[np.where(y == 0)].sum(axis=0).A1
        posp = (1. + pos_counts) / pos_counts.sum()
        negp = (1. + neg_counts) / neg_counts.sum()
        diffs = posp - negp
        diffs = np.array([1 if v > 0 else -1 for v in diffs])
        return np.array(diffs), pos_counts, neg_counts
    
    
def average_results(results):
    avg = {}
    for k in results[0].keys():
        vals = [r[k] for r in results]
        avg[k] = np.mean(vals)
        avg[k+'_se'] = np.std(vals) / math.sqrt(len(vals))
    return avg
        
def evaluate_models(models, data, seeds=[42, 11111, 12345678, 919191, 5555]):
    """
    Evaluate all models using multiple seeds and average the results.
    """
    evaluator = Evaluator(data)
    all_results = []
    for m in models:
        results = []
        for seed in seeds:
            m.seed = seed
            name = str(m)
            print('Evaluating %s' % name)
            results.append(evaluator.evaluate(m))
        r = average_results(results)
        r['model'] = m
        all_results.append(r)
    df = pd.DataFrame(all_results).sort_values('f1', ascending=False)
    mdl = df['model']
    df.drop(labels=['model'], axis=1,inplace = True)
    df.insert(0, 'model', mdl)
    return df

In [19]:
# Here we implement the different classification models.


# Some helper functions.
def _count(probs):
    return len(np.where(np.array(probs) >= .5)[0])

def _count_mean(probs):
    return len(np.where(np.array(probs) >= .5)[0]) / len(probs)

def _mean(probs):
    return np.mean(probs)

def _max(probs):
    return max(probs)

class Model(object):
    """
    Abstract base class.
    """
    def __init__(self, C=1):
        self.C = C
        self.make_clf()        
        
    def fit(self, data):
        pass

    def predict(self, data):
        return self.clf.predict(data.X_test)

    def predict_proba(self, data):
        """
        Predict the probability of recall on each test example.
        """
        return self.clf.predict_proba(data.X_test)[:,1]

    def predict_reviews(self, data):
        return self.clf.predict(data.X_reviews)

    def predict_proba_reviews(self, data):
        return self.clf.predict_proba(data.X_reviews)[:,1]

    def score_asin_recalls(self, data, aggregator_fn=_count):
        """
        Compute a score indicating the likelihood that each product
        should be recalled, based on the classification of each review.
        """
        probas = self.predict_proba_reviews(data)

        preds = {}
        for asin, group in groupby([x for x in zip(data.reviews_df['ASIN'], probas)],
                                   key=lambda x: x[0]):
            preds[asin] = aggregator_fn([x[1] for x in group])
        return preds
        
    def make_clf(self):
        self.clf = LogisticRegression(class_weight='balanced', C=self.C)
        
    def get_coef(self):
        return self.clf.coef_

    
class RandomNegativeSamples(Model):
    """
    Sample n_neg random examples from the unlabeled data
    and pretend they are negative.
    """
    def __init__(self, n_neg=-1, seed=42, C=1):
        super().__init__(C=C)
        self.seed = seed
        self.n_neg = n_neg
        
    def fit(self, data):
        random.seed(self.seed)
        if self.n_neg == -1:
            neg = data.X_complaints.shape[0]
        else:
            neg = self.n_neg
        samplei = random.sample(range(data.X_reviews.shape[0]), neg)
        self.neg_sample_idx = samplei
        X = sp_vstack((data.X_complaints, data.X_reviews[samplei]))
        y = np.concatenate(([1] * data.X_complaints.shape[0],
                            [0] * neg))
        self.clf.fit(X, y)
        
    def __str__(self):
        return "RandomNegSamples(C=%g,n=%d)" % (self.C, self.n_neg)
    

class RandomNegativeSamplesThreshold(Model):
    """
    Sample n_neg random examples from the unlabeled data
    with review >= threshold and pretend they are negative.
    """
    
    def __init__(self, threshold=4.5, n_neg=-1, seed=42, C=1):
        super().__init__(C=C)
        self.seed = seed
        self.threshold = threshold
        self.n_neg = n_neg
        
    def fit(self, data):
        if self.n_neg == -1:
            neg = data.X_complaints.shape[0]
        else:
            neg = self.n_neg
        random.seed(self.seed)
        pos_reviews = list(np.where(data.reviews_df['review_score'] >= self.threshold)[0])
        samplei = random.sample(pos_reviews, neg)
        self.neg_sample_idx = samplei
        X = sp_vstack((data.X_complaints, data.X_reviews[samplei]))
        y = np.concatenate(([1] * data.X_complaints.shape[0],
                            [0] * neg))
        self.clf.fit(X, y)
        
    def __str__(self):
        return "RandomNegSamplesThresh(C=%d, nneg=%d, t=%.1f)" % (self.C, self.n_neg, self.threshold)

    
class RandomNegativeSamplesThresholdInformedPrior(Model):
    """
    Sample n_neg random examples from the unlabeled data
    and pretend they are negative.
    Also implement the informed prior approach, described
    in the paper.
    """

    def __init__(self, threshold=4.5, n_neg=-1, seed=42, C=1000):
        super().__init__(C=C)
        self.seed = seed
        self.threshold = threshold
        self.n_neg = n_neg
        
    def fit(self, data):
        if self.n_neg == -1:
            neg = data.X_complaints.shape[0]
        else:
            neg = self.n_neg
        random.seed(self.seed)
        pos_reviews = list(np.where(data.reviews_df['review_score'] >= self.threshold)[0])
        samplei = random.sample(pos_reviews, neg)
        self.neg_sample_idx = samplei
        X = sp_vstack((data.X_complaints, data.X_reviews[samplei]))
        y = np.concatenate(([1] * data.X_complaints.shape[0],
                            [0] * neg))
        self.clf.fit(X, y)
        
        # Now, predict on test set, compute priors and refit.
        predictions = self.clf.predict(data.X_reviews)
        nneg = len(np.where(predictions==0)[0])
        npos = len(np.where(predictions==1)[0])
        ## get positive/negative coefficients.
        pos_coef_i = np.where(self.clf.coef_[0] > 0)
        neg_coef_i = np.where(self.clf.coef_[0] < 0)

        pos_pct = npos / (nneg + npos)
        ppos_counts = data.X_reviews[np.where(predictions==1)].sum(axis=0).A1
        pneg_counts = data.X_reviews[np.where(predictions==0)].sum(axis=0).A1
        pos_probs = ((1 + ppos_counts) / (2 + ppos_counts + pneg_counts))[pos_coef_i]
        print('pos probs1', pos_probs[:20])
        neg_probs = ((1 + pneg_counts) / (2 + ppos_counts + pneg_counts))[neg_coef_i]
        # Normalize so that pos and neg coef are in similar range.
        pos_probs = pos_probs / pos_probs.sum()
        neg_probs = neg_probs / neg_probs.sum()
        print('pos probs2', pos_probs[:20])

        transform = np.zeros(X.shape[1])
        transform[pos_coef_i] = pos_probs  # pos_probs[pos_coef_i]  
        transform[neg_coef_i] = neg_probs  # neg_probs[neg_coef_i]
        transform *= len(transform) / transform.sum()
        print('mean=', np.mean(transform))
        print('mean2=', np.mean(transform[pos_coef_i]))
        print('pos probs3', transform[pos_coef_i][:20])

        print('transform:', sorted(transform)[:10], sorted(transform)[::-1][:10])
        tops = []
        for i in sorted(pos_coef_i[0], key=lambda x: -transform[x])[:20]:
            tops.append({'term': data.vec.features[i], 'coef': self.clf.coef_[0][i], 'transform': transform[i]})
        display(pd.DataFrame(tops).sort_values('transform', ascending=False))
        # Some debug statements.
        print('hazard transform=', transform[data.vec.vocabulary_['hazard']])
        print('crib transform=', transform[data.vec.vocabulary_['crib']])
        print('pampers transform=', transform[data.vec.vocabulary_['pampers']])
        print('very dangerous transform=', transform[data.vec.vocabulary_['very dangerous']])

        tops = []
        for i in sorted(neg_coef_i[0], key=lambda x: -transform[x])[:20]:
            tops.append({'term': data.vec.features[i], 'coef': self.clf.coef_[0][i], 'transform': transform[i]})
        display(pd.DataFrame(tops).sort_values('transform', ascending=False))
        self.transform = csr_matrix(transform)
        self.clf.fit(X.multiply(self.transform), y)
        
    def predict(self, data):
        return self.clf.predict(data.X_test.multiply(self.transform))

    def predict_proba(self, data):
        """
        Predict the probability of recall on each test example.
        """
        return self.clf.predict_proba(data.X_test.multiply(self.transform))[:,1]

    def predict_reviews(self, data):
        return self.clf.predict(data.X_reviews.multiply(self.transform))

    def predict_proba_reviews(self, data):
        return self.clf.predict_proba(data.X_reviews.multiply(self.transform))[:,1]
        
    def __str__(self):
        return "RandNegSampThreshInfoPrior(C=%g, nneg=%d, t=%.1f)" % (self.C, self.n_neg, self.threshold)

In [20]:
# Collect all models for comparison.
models = [
          RandomNegativeSamples(n_neg=20000, C=1),

          RandomNegativeSamplesThreshold(threshold=3.0, n_neg=20000, C=1),
          RandomNegativeSamplesThreshold(threshold=4.0, n_neg=20000, C=1),
          RandomNegativeSamplesThreshold(threshold=5.0, n_neg=20000, C=1),

          RandomNegativeSamplesThresholdInformedPrior(threshold=3.0, n_neg=20000, C=1),
          RandomNegativeSamplesThresholdInformedPrior(threshold=4.0, n_neg=20000, C=1),
          RandomNegativeSamplesThresholdInformedPrior(threshold=5.0, n_neg=20000, C=1),

          RandomNegativeSamplesThresholdInformedPrior(threshold=5.0, n_neg=1000, C=1),
          RandomNegativeSamplesThresholdInformedPrior(threshold=5.0, n_neg=5000, C=1),
          RandomNegativeSamplesThresholdInformedPrior(threshold=5.0, n_neg=10000, C=1),
          RandomNegativeSamplesThresholdInformedPrior(threshold=5.0, n_neg=30000, C=1),
          RandomNegativeSamplesThresholdInformedPrior(threshold=5.0, n_neg=40000, C=1),
          RandomNegativeSamplesThresholdInformedPrior(threshold=5.0, n_neg=50000, C=1),
         ]

results = evaluate_models(models, data, seeds=[123456, 42, 987987])


Evaluating RandomNegSamples(C=1,n=20000)
Evaluating RandomNegSamples(C=1,n=20000)
Evaluating RandomNegSamples(C=1,n=20000)
Evaluating RandomNegSamplesThresh(C=1, nneg=20000, t=3.0)
Evaluating RandomNegSamplesThresh(C=1, nneg=20000, t=3.0)
Evaluating RandomNegSamplesThresh(C=1, nneg=20000, t=3.0)
Evaluating RandomNegSamplesThresh(C=1, nneg=20000, t=4.0)
Evaluating RandomNegSamplesThresh(C=1, nneg=20000, t=4.0)
Evaluating RandomNegSamplesThresh(C=1, nneg=20000, t=4.0)
Evaluating RandomNegSamplesThresh(C=1, nneg=20000, t=5.0)
Evaluating RandomNegSamplesThresh(C=1, nneg=20000, t=5.0)
Evaluating RandomNegSamplesThresh(C=1, nneg=20000, t=5.0)
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=20000, t=3.0)
pos probs1 [ 0.08571429  0.01005025  0.01612903  0.04819277  0.07142857  0.07079646
  0.03816794  0.03225806  0.03703704  0.02797203  0.02994012  0.03977273
  0.05392157  0.02020202  0.01403509  0.01851852  0.01369863  0.02597403
  0.02409639  0.01010101]
pos probs2 [  1.01988815e-04   1.19584875e-05   1.91914437e-05   5.73431089e-05
   8.49906792e-05   8.42385493e-05   4.54148667e-05   3.83828874e-05
   4.40692411e-05   3.32830632e-05   3.56248356e-05   4.73243555e-05
   6.41596304e-05   2.40377679e-05   1.66999229e-05   2.20346205e-05
   1.62995823e-05   3.09057015e-05   2.86715544e-05   1.20188839e-05]
mean= 1.0
mean2= 2.13423618295
pos probs3 [ 6.94339853  0.81413383  1.30655349  3.90391885  5.78616544  5.73496044
  3.09184413  2.61310697  3.00023393  2.26591094  2.42533881  3.22184212
  4.36798764  1.63649124  1.13693075  1.50011697  1.10967556  2.10406016
  1.95195943  0.81824562]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [61.523784421246077, 55.231579196345898, 47.130947580881831, 45.903579154296366, 44.89506678449964, 41.799259135794578, 40.503158077320329, 40.193973664516356, 30.973003235597897, 28.229473811465688]
coef term transform
0 0.000309 safety commission 61.523784
1 0.000702 consumer product 55.231579
2 0.094143 have choked 47.130948
3 0.006806 product safety 45.903579
4 0.362624 cpsc 44.895067
5 0.193792 be recalled 41.799259
6 0.105946 dangerous product 40.503158
7 0.005696 commission 40.193974
8 0.035305 extremely dangerous 30.973003
9 0.005788 seriously injured 28.229474
10 0.182547 leaned forward 27.576618
11 0.995862 recalled 27.002105
12 0.016994 to recall 27.002105
13 0.046329 he leaned 26.241483
14 0.087320 her throat 25.888617
15 0.079764 choked on 25.684930
16 0.009330 plastic broke 25.016656
17 0.014789 happened if 25.001949
18 0.183882 been recalled 24.558476
19 0.001871 face first 24.109023
hazard transform= 12.510975495
crib transform= 1.90864838583
pampers transform= 1.62012632309
very dangerous transform= 19.7466920266
coef term transform
0 -0.054946 super easy 0.756345
1 -0.075466 are great 0.756289
2 -0.046282 cup holders 0.756272
3 -0.065321 so cute 0.756248
4 -0.034513 love love 0.756227
5 -0.060443 are soft 0.756226
6 -0.034254 work great 0.756214
7 -0.022103 really love 0.756213
8 -0.025111 it super 0.756210
9 -0.143647 love that 0.756204
10 -0.017637 will love 0.756188
11 -0.028556 but overall 0.756187
12 -0.028631 great quality 0.756183
13 -0.009706 how well 0.756173
14 -0.045497 still loves 0.756171
15 -0.011078 colors are 0.756168
16 -0.030595 and cute 0.756160
17 -0.117890 works great 0.756160
18 -0.026571 vibrant 0.756158
19 -0.024115 are perfect 0.756152
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=20000, t=3.0)
pos probs1 [ 0.01507538  0.01612903  0.01748252  0.05421687  0.05357143  0.0619469
  0.03053435  0.04444444  0.02797203  0.0239521   0.03977273  0.05882353
  0.03398058  0.01299892  0.03571429  0.01403509  0.01851852  0.03225806
  0.01369863  0.02739726]
pos probs2 [  1.78314965e-05   1.90777839e-05   2.06787168e-05   6.41289363e-05
   6.33654966e-05   7.32721966e-05   3.61167207e-05   5.25698934e-05
   3.30859469e-05   2.83310803e-05   4.70440808e-05   6.95778001e-05
   4.01930011e-05   1.53754204e-05   4.22436644e-05   1.66010190e-05
   2.19041223e-05   3.81555678e-05   1.62030493e-05   3.24060987e-05]
mean= 1.0
mean2= 2.16257425114
pos probs3 [ 1.21396828  1.29881553  1.40780704  4.36589798  4.31392301  4.98837115
  2.45882634  3.57895835  2.25249127  1.92877995  3.20276102  4.73685663
  2.73633951  1.04675862  2.87594867  1.13019737  1.49123264  2.59763106
  1.1031036   2.2062072 ]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [63.198061921474853, 56.734623770414927, 46.973828283031715, 46.851818339439419, 45.599378918467963, 41.873812640873986, 41.800047850549483, 41.562096914018845, 31.72258533399544, 29.605353959893939]
coef term transform
0 0.000322 safety commission 63.198062
1 0.000671 consumer product 56.734624
2 0.003853 product safety 46.973828
3 0.091019 have choked 46.851818
4 0.358254 cpsc 45.599379
5 0.197924 be recalled 41.873813
6 0.005214 commission 41.800048
7 0.095590 dangerous product 41.562097
8 0.024793 seriously injured 31.722585
9 0.038375 extremely dangerous 29.605354
10 0.010754 plastic broke 28.421140
11 0.161353 leaned forward 28.269964
12 0.068356 her throat 28.225806
13 0.151812 arm stuck 27.767780
14 0.020216 to recall 26.842188
15 0.978791 recalled 26.657069
16 0.047072 he leaned 24.951893
17 0.104567 choked on 24.059766
18 0.007896 happened if 23.859722
19 0.190692 been recalled 23.319910
hazard transform= 11.9596858041
crib transform= 2.04866652901
pampers transform= 1.77158438096
very dangerous transform= 20.7450701875
coef term transform
0 -0.040329 so cute 0.756491
1 -0.026346 super easy 0.756487
2 -0.102125 are great 0.756431
3 -0.053281 love love 0.756370
4 -0.022434 are soft 0.756368
5 -0.021039 really love 0.756355
6 -0.014006 it super 0.756353
7 -0.072784 great to 0.756342
8 -0.006480 will love 0.756331
9 -0.010272 colors are 0.756311
10 -0.158837 love that 0.756305
11 -0.037007 and cute 0.756303
12 -0.166537 works great 0.756303
13 -0.066330 vibrant 0.756301
14 -0.006770 just love 0.756291
15 -0.096246 neutral 0.756279
16 -0.102921 great price 0.756276
17 -0.013747 are nice 0.756271
18 -0.015055 great gift 0.756270
19 -0.037904 these work 0.756270
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=20000, t=3.0)
pos probs1 [ 0.01612903  0.05120482  0.05357143  0.05309735  0.03816794  0.03225806
  0.02962963  0.02797203  0.0239521   0.03409091  0.04901961  0.02912621
  0.01256031  0.01851852  0.03225806  0.04781705  0.02409639  0.01428571
  0.00826446  0.01398601]
pos probs2 [  1.90839316e-05   6.05857345e-05   6.33859155e-05   6.28249782e-05
   4.51604487e-05   3.81678631e-05   3.50578891e-05   3.30966086e-05
   2.83402097e-05   4.03364917e-05   5.80001841e-05   3.44622453e-05
   1.48614053e-05   2.19111807e-05   3.81678631e-05   5.65773106e-05
   2.85109339e-05   1.69029108e-05   9.77854344e-06   1.65483043e-05]
mean= 1.0
mean2= 2.19634158144
pos probs3 [ 1.29923406  4.12467681  4.31531313  4.27712452  3.07452335  2.59846812
  2.38674109  2.25321711  1.92940148  2.74610835  3.94865254  2.34618966
  1.01176447  1.49171318  2.59846812  3.85178331  1.94102438  1.15075017
  0.66572324  1.12660856]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [62.198774891855123, 54.92216709899278, 46.866915924473837, 45.614072915950644, 44.303881459854189, 41.813517547121222, 41.242886013536989, 40.27625587259471, 30.512315054995995, 29.136014886557877]
coef term transform
0 0.001510 safety commission 62.198775
1 0.000437 consumer product 54.922167
2 0.117785 have choked 46.866916
3 0.290730 cpsc 45.614073
4 0.004052 product safety 44.303881
5 0.007271 commission 41.813518
6 0.204041 be recalled 41.242886
7 0.079250 dangerous product 40.276256
8 0.024925 seriously injured 30.512315
9 0.152220 leaned forward 29.136015
10 0.036079 extremely dangerous 28.430298
11 0.180001 been recalled 28.065807
12 0.968364 recalled 27.869317
13 0.007379 serious injury 27.199549
14 0.031881 to recall 26.850837
15 0.086748 her throat 25.743586
16 0.188896 arm stuck 24.999055
17 0.033067 he leaned 24.959933
18 0.024676 face first 24.932920
19 0.098794 choked on 24.558693
hazard transform= 12.5303907159
crib transform= 2.03962344233
pampers transform= 1.93326028188
very dangerous transform= 19.8592064967
coef term transform
0 -0.050110 so cute 0.752067
1 -0.068935 super easy 0.752063
2 -0.085507 are perfect 0.752019
3 -0.101070 are great 0.752008
4 -0.131239 love that 0.752004
5 -0.040556 also love 0.752000
6 -0.076168 great price 0.751958
7 -0.024094 love love 0.751947
8 -0.021744 are soft 0.751945
9 -0.030350 really love 0.751932
10 -0.010568 will love 0.751908
11 -0.028522 but overall 0.751906
12 -0.080791 love these 0.751904
13 -0.084411 love them 0.751893
14 -0.024971 and cute 0.751880
15 -0.134213 works great 0.751880
16 -0.014960 just love 0.751868
17 -0.052358 are easy 0.751868
18 -0.008053 great gift 0.751848
19 -0.042026 these work 0.751848
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=20000, t=4.0)
pos probs1 [ 0.01507538  0.01612903  0.05421687  0.07142857  0.0619469   0.04580153
  0.03225806  0.03703704  0.02797203  0.02994012  0.04545455  0.05392157
  0.04854369  0.01447426  0.02020202  0.01851852  0.03225806  0.01369863
  0.01818182  0.06444906]
pos probs2 [  1.55543786e-05   1.66415126e-05   5.59395423e-05   7.36981271e-05
   6.39151899e-05   4.72568143e-05   3.32830251e-05   3.82138437e-05
   2.88608050e-05   3.08914305e-05   4.68988082e-05   5.56348607e-05
   5.00861058e-05   1.49341633e-05   2.08439147e-05   1.91069218e-05
   3.32830251e-05   1.41338874e-05   1.87595233e-05   6.64968548e-05]
mean= 1.0
mean2= 2.22789449571
pos probs3 [ 1.05894209  1.13295418  3.80836404  5.01736849  4.35134613  3.21724392
  2.26590835  2.60159848  1.96484361  2.10308859  3.19287086  3.78762131
  3.40986208  1.01671784  1.41905372  1.30079924  2.26590835  0.96223505
  1.27714834  4.52710588]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [56.016696340039786, 48.691280602931407, 41.468852846107097, 40.868746997870289, 40.389816368989003, 36.526442629346576, 36.462097750962521, 36.254533627142997, 26.607257160071804, 26.154367676496115]
coef term transform
0 0.001109 safety commission 56.016696
1 0.000945 consumer product 48.691281
2 0.328170 cpsc 41.468853
3 0.074076 have choked 40.868747
4 0.006838 product safety 40.389816
5 0.213595 be recalled 36.526443
6 0.003579 commission 36.462098
7 0.084593 dangerous product 36.254534
8 0.023888 seriously injured 26.607257
9 0.173409 leaned forward 26.154368
10 0.005332 plastic broke 25.824691
11 0.039767 extremely dangerous 24.791703
12 0.945911 recalled 24.221779
13 0.042526 he leaned 23.744166
14 0.017658 to recall 23.414386
15 0.170728 been recalled 23.202491
16 0.082703 her throat 23.173001
17 0.081996 choked on 23.128845
18 0.168018 arm stuck 23.010690
19 0.027412 first into 22.964110
hazard transform= 11.9153210287
crib transform= 1.99012129877
pampers transform= 2.52875372049
very dangerous transform= 19.6525181417
coef term transform
0 -0.038867 are perfect 0.757949
1 -0.080609 so cute 0.757897
2 -0.037098 super easy 0.757889
3 -0.047753 love love 0.757876
4 -0.116802 are great 0.757817
5 -0.166313 love that 0.757811
6 -0.014396 just love 0.757797
7 -0.013212 great gift 0.757776
8 -0.055244 these work 0.757776
9 -0.099074 love these 0.757766
10 -0.022087 in great 0.757758
11 -0.033653 works very 0.757748
12 -0.039976 great to 0.757724
13 -0.010060 pricey but 0.757724
14 -0.026124 great quality 0.757698
15 -0.011436 great little 0.757689
16 -0.171237 works great 0.757684
17 -0.030785 great price 0.757677
18 -0.016034 buying more 0.757658
19 -0.022507 are soft 0.757651
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=20000, t=4.0)
pos probs1 [ 0.0201005   0.01612903  0.08333333  0.01666667  0.05421687  0.07079646
  0.04580153  0.03703704  0.03496503  0.02994012  0.04545455  0.05392157
  0.03883495  0.01265823  0.03703704  0.03225806  0.05821206  0.01204819
  0.01449275  0.01818182]
pos probs2 [  2.04264965e-05   1.63906162e-05   8.46848501e-05   1.69369700e-05
   5.50961675e-05   7.19446514e-05   4.65443451e-05   3.76377112e-05
   3.55321049e-05   3.04256947e-05   4.61917364e-05   5.47960795e-05
   3.94647845e-05   1.28635215e-05   3.76377112e-05   3.27812323e-05
   5.91561531e-05   1.22435928e-05   1.47278000e-05   1.84766946e-05]
mean= 1.0
mean2= 2.22164208328
pos probs3 [ 1.39063588  1.11587315  5.7653446   1.15306892  3.75094709  4.89799187
  3.16873901  2.56237538  2.4190257   2.07138129  3.14473342  3.73051709
  2.68676253  0.87574855  2.56237538  2.2317463   4.0273509   0.8335438
  1.00266863  1.25789337]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [44.026267825965014, 40.010102258364078, 36.252486821266054, 35.707940725206647, 28.302600745263231, 26.30945984770948, 25.760050323702934, 24.708619698245677, 24.417930054736903, 24.417930054736903]
coef term transform
0 0.087824 have choked 44.026268
1 0.350106 cpsc 40.010102
2 0.182726 be recalled 36.252487
3 0.085475 dangerous product 35.707941
4 0.027400 seriously injured 28.302601
5 0.042119 he leaned 26.309460
6 0.176600 leaned forward 25.760050
7 0.025582 face first 24.708620
8 0.036379 extremely dangerous 24.417930
9 0.008237 plastic broke 24.417930
10 0.012195 to recall 24.342566
11 0.907292 recalled 24.254208
12 0.086792 choked on 24.045706
13 0.143552 arm stuck 23.856598
14 0.179617 been recalled 23.165729
15 0.056270 her throat 22.823632
16 0.032190 first into 22.617890
17 0.009580 happened if 22.207253
18 0.214492 is dangerous 21.720601
19 0.005191 serious injury 21.563886
hazard transform= 12.3506493129
crib transform= 1.96011715675
pampers transform= 1.93715578434
very dangerous transform= 19.9311635904
coef term transform
0 -0.068405 are perfect 0.758899
1 -0.051351 also love 0.758880
2 -0.229245 love that 0.758843
3 -0.052369 super easy 0.758839
4 -0.011543 love love 0.758826
5 -0.057430 are soft 0.758824
6 -0.093075 are great 0.758807
7 -0.088662 great to 0.758799
8 -0.021812 will love 0.758787
9 -0.029720 and cute 0.758759
10 -0.016050 vibrant 0.758757
11 -0.014625 great gift 0.758726
12 -0.018091 these work 0.758726
13 -0.045323 works very 0.758698
14 -0.051409 cup holders 0.758694
15 -0.013936 pricey but 0.758674
16 -0.031808 can beat 0.758650
17 -0.119674 love these 0.758650
18 -0.011255 great little 0.758639
19 -0.102801 great price 0.758626
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=20000, t=4.0)
pos probs1 [ 0.01507538  0.01612903  0.02097902  0.01666667  0.04819277  0.03571429
  0.07079646  0.03816794  0.03225806  0.03703704  0.02797203  0.0239521
  0.03409091  0.05392157  0.01052632  0.01851852  0.01612903  0.05197505
  0.02409639  0.03030303]
pos probs2 [  1.59710557e-05   1.70873122e-05   2.22254551e-05   1.76568893e-05
   5.10560655e-05   3.78361914e-05   7.50027157e-05   4.04356244e-05
   3.41746245e-05   3.92375318e-05   2.96339401e-05   2.53751703e-05
   3.61163645e-05   5.71252302e-05   1.11517196e-05   1.96187659e-05
   1.70873122e-05   5.50630644e-05   2.55280328e-05   3.21034351e-05]
mean= 1.0
mean2= 2.13149655604
pos probs3 [ 1.08730947  1.16330422  1.51310898  1.20208103  3.47589694  2.57588791
  5.10618489  2.75285731  2.32660844  2.67129117  2.01747864  1.72754159
  2.4588021   3.88908567  0.75920907  1.33564558  1.16330422  3.74869343
  1.73794847  2.18560186]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [44.58627802266782, 39.972814811250743, 38.081926875831577, 37.225734971487363, 28.412824230131456, 28.38957314647341, 27.357706088959464, 26.516493201197342, 25.455833473149447, 24.70483761972703]
coef term transform
0 0.097777 have choked 44.586278
1 0.348655 cpsc 39.972815
2 0.187012 be recalled 38.081927
3 0.093058 dangerous product 37.225735
4 0.027220 seriously injured 28.412824
5 0.170727 leaned forward 28.389573
6 0.150988 arm stuck 27.357706
7 0.006212 plastic broke 26.516493
8 0.031533 extremely dangerous 25.455833
9 0.944766 recalled 24.704838
10 0.099355 choked on 24.188216
11 0.074376 her throat 23.793769
12 0.074566 bruise on 23.647496
13 0.032826 he leaned 23.364392
14 0.196156 been recalled 23.171336
15 0.079805 an unsafe 22.868859
16 0.016743 to recall 22.705975
17 0.005195 happened if 22.260760
18 0.197837 is dangerous 21.805191
19 0.084945 hazard to 21.510924
hazard transform= 12.2345135446
crib transform= 1.85577122699
pampers transform= 1.87524639919
very dangerous transform= 20.3787697334
coef term transform
0 -0.019607 also love 0.765346
1 -0.089914 are great 0.765313
2 -0.040619 love love 0.765291
3 -0.019955 will love 0.765252
4 -0.043356 how well 0.765237
5 -0.030687 still loves 0.765234
6 -0.176153 love that 0.765226
7 -0.036675 and cute 0.765223
8 -0.032474 vibrant 0.765221
9 -0.022013 just love 0.765212
10 -0.062500 super easy 0.765199
11 -0.011823 are nice 0.765191
12 -0.005157 great gift 0.765191
13 -0.070969 these work 0.765191
14 -0.158091 works great 0.765182
15 -0.094045 love these 0.765180
16 -0.055547 in great 0.765172
17 -0.021649 works very 0.765162
18 -0.065848 great to 0.765138
19 -0.027989 pricey but 0.765138
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=20000, t=5.0)
pos probs1 [ 0.0201005   0.02097902  0.05722892  0.07079646  0.03816794  0.03225806
  0.03703704  0.04195804  0.0239521   0.03977273  0.04901961  0.05339806
  0.01265823  0.01654771  0.03703704  0.03225806  0.04109589  0.06029106
  0.02409639  0.01694915]
pos probs2 [  1.86545594e-05   1.94698810e-05   5.31121153e-05   6.57036694e-05
   3.54223027e-05   2.99375590e-05   3.43727530e-05   3.89397621e-05
   2.22290857e-05   3.69116495e-05   4.54933495e-05   4.95568331e-05
   1.17476497e-05   1.53573387e-05   3.43727530e-05   2.99375590e-05
   3.81396300e-05   5.59539825e-05   2.23629959e-05   1.57299039e-05]
mean= 1.0
mean2= 2.19549163146
pos probs3 [ 1.2700024   1.3255095   3.61587281  4.47310581  2.41155037  2.03814902
  2.34009702  2.651019    1.51335616  2.5129451   3.09718723  3.3738292
  0.79977999  1.04552762  2.34009702  2.03814902  2.59654601  3.80934713
  1.52247276  1.07089186]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [51.185919650409389, 45.950996049799343, 39.05834664232944, 38.06181901715307, 37.909571741084456, 34.37134504524991, 33.761705112925085, 33.629458802574916, 27.762060113420432, 26.94552893361395]
coef term transform
0 0.000489 safety commission 51.185920
1 0.001652 consumer product 45.950996
2 0.104694 have choked 39.058347
3 0.315255 cpsc 38.061819
4 0.005814 product safety 37.909572
5 0.181024 be recalled 34.371345
6 0.005105 commission 33.761705
7 0.094375 dangerous product 33.629459
8 0.027799 seriously injured 27.762060
9 0.040540 extremely dangerous 26.945529
10 0.163109 leaned forward 26.214066
11 0.071244 her throat 23.449220
12 0.933195 recalled 23.239584
13 0.010669 plastic broke 23.228904
14 0.177406 been recalled 23.157431
15 0.039774 he leaned 23.137297
16 0.163501 arm stuck 22.876466
17 0.010156 to recall 22.230922
18 0.084141 choked on 21.574553
19 0.016158 face first 21.060873
hazard transform= 12.2855093605
crib transform= 2.09451875605
pampers transform= 2.27457430447
very dangerous transform= 18.7272584317
coef term transform
0 -0.040616 super easy 0.780839
1 -0.031266 love love 0.780825
2 -0.035879 are soft 0.780824
3 -0.125359 are great 0.780806
4 -0.035414 are perfect 0.780748
5 -0.011094 great gift 0.780723
6 -0.020518 these work 0.780723
7 -0.124954 love that 0.780717
8 -0.034575 works very 0.780694
9 -0.150607 works great 0.780671
11 -0.022074 pricey but 0.780669
10 -0.078288 great to 0.780669
12 -0.084417 so cute 0.780640
13 -0.006706 great little 0.780633
14 -0.079870 great price 0.780620
15 -0.026168 can carry 0.780606
16 -0.024982 glad bought 0.780594
17 -0.029822 they wash 0.780592
18 -0.019830 nice quality 0.780591
19 -0.056346 love these 0.780576
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=20000, t=5.0)
pos probs1 [ 0.02512563  0.01612903  0.05084746  0.05722892  0.05357143  0.07079646
  0.05343511  0.03225806  0.05185185  0.04195804  0.03592814  0.05113636
  0.07352941  0.01265823  0.04761905  0.01403509  0.05555556  0.01265823
  0.03225806  0.02409639]
pos probs2 [  2.23760318e-05   1.43639688e-05   4.52830202e-05   5.09661302e-05
   4.77088963e-05   6.30489249e-05   4.75874996e-05   2.87279375e-05
   4.61774996e-05   3.73664083e-05   3.19963855e-05   4.55403101e-05
   6.54827988e-05   1.12729881e-05   4.24079078e-05   1.24991728e-05
   4.94758924e-05   1.12729881e-05   2.87279375e-05   2.14594232e-05]
mean= 1.0
mean2= 2.2228752408
pos probs3 [ 1.52336024  0.97789899  3.08286801  3.46977414  3.24802166  4.29237081
  3.23975697  1.95579799  3.14376417  2.54390508  2.17831393  3.10038431
  4.45806894  0.76746503  2.88713036  0.85094369  3.36831876  0.76746503
  1.95579799  1.46095753]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [48.350297094808333, 42.027431764047861, 36.523938329411536, 35.27548371015164, 34.8620991354233, 32.497539367977204, 31.471924874992165, 31.29276780739259, 25.721706871985575, 24.965186081633057]
coef term transform
0 0.000605 safety commission 48.350297
1 0.000552 consumer product 42.027432
2 0.321763 cpsc 36.523938
3 0.085365 have choked 35.275484
4 0.005135 product safety 34.862099
5 0.183927 be recalled 32.497539
6 0.005528 commission 31.471925
7 0.098098 dangerous product 31.292768
8 0.025386 seriously injured 25.721707
10 0.005084 plastic broke 24.965186
9 0.045568 extremely dangerous 24.965186
11 0.154176 leaned forward 23.219900
12 0.151239 arm stuck 22.997487
13 0.845105 recalled 22.230904
14 0.183242 been recalled 22.221759
15 0.037975 he leaned 22.202439
16 0.004158 happened if 21.706943
17 0.021354 face first 20.931695
18 0.057167 bruise on 20.872533
19 0.087009 her throat 20.626612
hazard transform= 12.0585811502
crib transform= 2.008429441
pampers transform= 2.18267055457
very dangerous transform= 18.474435288
coef term transform
0 -0.056875 are perfect 0.778423
1 -0.067761 super easy 0.778361
2 -0.026278 love love 0.778348
3 -0.020138 are soft 0.778346
4 -0.127362 loves these 0.778322
5 -0.103464 are great 0.778287
6 -0.019045 just love 0.778267
7 -0.006922 great gift 0.778246
8 -0.083266 these work 0.778245
9 -0.032004 in great 0.778226
10 -0.015455 pricey but 0.778192
11 -0.007597 definitely buy 0.778167
12 -0.116372 love these 0.778167
13 -0.032289 great little 0.778156
14 -0.040527 great price 0.778143
15 -0.029445 buying more 0.778124
16 -0.046686 they wash 0.778115
17 -0.037464 nice quality 0.778114
18 -0.031284 bibs are 0.778095
19 -0.008702 really love 0.778089
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=20000, t=5.0)
pos probs1 [ 0.0201005   0.01612903  0.02097902  0.06024096  0.05357143  0.07079646
  0.04580153  0.04301075  0.05185185  0.04195804  0.03592814  0.05113636
  0.06372549  0.05339806  0.01754456  0.04166667  0.01052632  0.01851852
  0.01554404  0.03225806]
pos probs2 [  1.81435671e-05   1.45587494e-05   1.89365552e-05   5.43760521e-05
   4.83558464e-05   6.39038913e-05   4.13424030e-05   3.88233318e-05
   4.68036834e-05   3.78731104e-05   3.24302682e-05   4.61578533e-05
   5.75213336e-05   4.81993549e-05   1.58364642e-05   3.76101027e-05
   9.50149963e-06   1.67156012e-05   1.40307119e-05   2.91174989e-05]
mean= 1.0
mean2= 2.17981557377
pos probs3 [ 1.23521405  0.99115966  1.28920068  3.70192163  3.29206602  4.35057692
  2.8145908   2.64309243  3.18639476  2.57840136  2.20785266  3.14242666
  3.91605239  3.28141208  1.07814648  2.56049579  0.6468621   1.13799813
  0.95521087  1.98231932]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [49.783816942811164, 39.1057539337707, 38.499984939960143, 36.359040264613, 33.675640673252829, 33.305991082088241, 32.70826884322296, 27.111131928874645, 25.139413243138311, 24.400018735987182]
coef term transform
0 0.000681 safety commission 49.783817
1 0.085111 have choked 39.105754
2 0.305678 cpsc 38.499985
3 0.005806 product safety 36.359040
4 0.204428 be recalled 33.675641
5 0.007200 commission 33.305991
6 0.078297 dangerous product 32.708269
7 0.031247 extremely dangerous 27.111132
8 0.021551 seriously injured 25.139413
9 0.007024 plastic broke 24.400019
10 0.167001 leaned forward 24.188513
11 0.909811 recalled 22.814900
12 0.022167 to recall 22.759963
13 0.043061 he leaned 22.503512
14 0.182853 been recalled 22.245031
15 0.006363 happened if 22.001297
16 0.163226 arm stuck 21.190310
17 0.059252 bruise on 21.155572
18 0.056202 choked on 20.983575
19 0.072693 her throat 20.906316
hazard transform= 12.1993399573
crib transform= 1.98680853113
pampers transform= 1.59774937501
very dangerous transform= 18.0440479172
coef term transform
0 -0.045964 are perfect 0.784348
1 -0.025519 love love 0.784272
2 -0.047174 are soft 0.784270
3 -0.007944 really love 0.784256
4 -0.103692 loves these 0.784246
5 -0.029826 will love 0.784231
6 -0.028986 how well 0.784216
7 -0.078673 still loves 0.784213
8 -0.136279 are great 0.784211
9 -0.017515 just love 0.784190
10 -0.042817 these work 0.784169
11 -0.110932 love these 0.784158
12 -0.041703 also love 0.784154
13 -0.113129 loves them 0.784118
14 -0.060624 great to 0.784114
15 -0.025287 pricey but 0.784114
16 -0.012199 great little 0.784079
17 -0.024881 super easy 0.784069
18 -0.005174 buying more 0.784046
19 -0.018044 they wash 0.784037
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=1000, t=5.0)
pos probs1 [ 0.22857143  0.08542714  0.06451613  0.07342657  0.10542169  0.23214286
  0.13274336  0.09923664  0.07526882  0.11111111  0.0979021   0.07185629
  0.07386364  0.09803922  0.0685434   0.08928571  0.12121212  0.06315789
  0.09259259  0.08064516]
pos probs2 [  7.85316911e-05   2.93507263e-05   2.21662031e-05   2.52276193e-05
   3.62203771e-05   7.97587488e-05   4.56074533e-05   3.40953430e-05
   2.58605703e-05   3.81751276e-05   3.36368257e-05   2.46881065e-05
   2.53777837e-05   3.36839361e-05   2.35498784e-05   3.06764418e-05
   4.16455938e-05   2.16995462e-05   3.18126063e-05   2.77077539e-05]
mean= 1.0
mean2= 2.16140707346
pos probs3 [ 5.34643753  1.99819744  1.50907511  1.71749632  2.46588327  5.42997562
  3.10495542  2.32121095  1.76058763  2.59896269  2.2899951   1.68076629
  1.72771951  2.29320237  1.60327572  2.08845216  2.83523202  1.47730511
  2.16580224  1.88634389]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [20.133736268248057, 17.861961748140924, 17.808801147700027, 16.063468181417612, 15.983620532562613, 15.845288647544368, 14.782899770604251, 14.105820391797948, 13.821756114632858, 13.415233875967186]
coef term transform
0 0.000887 safety commission 20.133736
1 0.030602 have choked 17.861962
2 0.004502 consumer product 17.808801
3 0.142718 cpsc 16.063468
4 0.006279 product safety 15.983621
5 0.039004 dangerous product 15.845289
6 0.117738 be recalled 14.782900
7 0.002023 commission 14.105820
8 0.021048 seriously injured 13.821756
9 0.033536 extremely dangerous 13.415234
10 0.058837 leg stuck 13.024347
11 0.078349 arm stuck 12.905194
12 0.033588 choked on 12.408462
13 0.000557 crib were 12.357332
14 0.078417 leaned forward 11.944169
15 0.005050 plastic broke 11.695332
16 0.066169 been recalled 11.430732
17 0.002227 happened if 11.262172
18 0.000084 got hurt 11.245512
19 0.016341 had happened 11.177840
hazard transform= 7.29442194494
crib transform= 2.9725776627
pampers transform= 2.05837844907
very dangerous transform= 9.71911254598
coef term transform
0 -0.022151 just great 2.961582
1 -0.006894 great love 2.960185
2 -0.004003 best stroller 2.959723
3 -0.025013 cute too 2.958532
4 -0.065863 great quality 2.958349
5 -0.000256 great stroller 2.958033
6 -0.002675 perfect love 2.957760
7 -0.000945 much love 2.957712
8 -0.004824 just love 2.957543
9 -0.000142 far love 2.957269
10 -0.103704 love these 2.957081
11 -0.011254 great gift 2.957060
12 -0.000105 re great 2.957034
13 -0.002634 great fit 2.956966
14 -0.010690 quality easy 2.956946
15 -0.000903 too highly 2.956846
16 -0.000043 time great 2.956785
17 -0.000672 well love 2.956605
18 -0.007872 colors they 2.956529
19 -0.001934 great don 2.956485
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=1000, t=5.0)
pos probs1 [ 0.09843571  0.22857143  0.08040201  0.06451613  0.08741259  0.12951807
  0.21428571  0.15044248  0.13740458  0.10752688  0.11111111  0.1048951
  0.08982036  0.11363636  0.15196078  0.12135922  0.0952381   0.12121212
  0.0877193   0.09259259]
pos probs2 [  2.95944627e-05   6.87194567e-05   2.41726732e-05   1.93966208e-05
   2.62803866e-05   3.89393006e-05   6.44244906e-05   4.52301734e-05
   4.13103604e-05   3.23277014e-05   3.34052914e-05   3.15364639e-05
   2.70042775e-05   3.41645026e-05   4.56866486e-05   3.64863620e-05
   2.86331069e-05   3.64421361e-05   2.63725985e-05   2.78377429e-05]
mean= 1.0
mean2= 2.14844736178
pos probs3 [ 2.01479102  4.67842061  1.64567559  1.32052195  1.78916872  2.65098758
  4.38601932  3.0792702   2.81240934  2.20086991  2.27423224  2.14700247
  1.83845121  2.32591934  3.11034704  2.48399153  1.94934192  2.48098063
  1.79544651  1.89519353]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [17.618102932457433, 16.048843429950779, 15.258030855141609, 14.549606266510292, 14.195610925416958, 14.157095701975903, 13.672684234582995, 12.704331830605714, 12.343352088175441, 12.341054368129242]
coef term transform
0 0.000872 safety commission 17.618103
1 0.002292 consumer product 16.048843
2 0.017139 have choked 15.258031
3 0.118388 cpsc 14.549606
4 0.040189 dangerous product 14.195611
5 0.006078 product safety 14.157096
6 0.114750 be recalled 13.672684
7 0.077787 arm stuck 12.704332
8 0.004272 commission 12.343352
9 0.024803 extremely dangerous 12.341054
10 0.010991 seriously injured 12.094781
11 0.061572 leaned forward 11.540519
12 0.078326 leg stuck 11.164413
13 0.009245 hinge broke 11.021279
14 0.018246 had happened 10.686879
15 0.003831 plastic broke 10.535046
16 0.073682 been recalled 10.465585
17 0.030703 choked on 10.358851
18 0.009745 was caused 10.234045
19 0.062147 was strapped 10.234045
hazard transform= 6.70898511178
crib transform= 2.55826471547
pampers transform= 2.00587283681
very dangerous transform= 9.52531620171
coef term transform
0 -0.001160 great easy 3.084866
1 -0.000029 re great 3.083841
2 -0.000001 too love 3.082497
3 -0.029517 kids love 3.082373
4 -0.032480 them easy 3.081706
5 -0.000026 girls love 3.081534
6 -0.004637 cute too 3.081457
7 -0.051376 great quality 3.081267
8 -0.000485 perfect love 3.080653
9 -0.021444 comfortable easy 3.080553
10 -0.007746 clean love 3.080362
11 -0.008106 use easy 3.080225
12 -0.001460 far love 3.080141
13 -0.012911 children love 3.080065
14 -0.040314 great gift 3.079924
15 -0.000045 too highly 3.079701
16 -0.000066 cuddly and 3.079255
17 -0.009100 much stuff 3.079111
18 -0.009641 great too 3.078952
19 -0.000570 great all 3.078886
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=1000, t=5.0)
pos probs1 [ 0.09538344  0.31428571  0.12060302  0.08064516  0.08741259  0.12048193
  0.19642857  0.17699115  0.13740458  0.15053763  0.11851852  0.11888112
  0.08982036  0.09090909  0.14215686  0.13106796  0.07165357  0.11309524
  0.13131313  0.07017544]
pos probs2 [  2.95086230e-05   9.72300696e-05   3.73107622e-05   2.49490648e-05
   2.70426926e-05   3.72733016e-05   6.07687935e-05   5.47554696e-05
   4.25086356e-05   4.65715876e-05   3.66658848e-05   3.67780619e-05
   2.77875811e-05   2.81244003e-05   4.39788416e-05   4.05482859e-05
   2.21673520e-05   3.49880932e-05   4.06241338e-05   2.17100634e-05]
mean= 1.0
mean2= 2.14939698175
pos probs3 [ 2.00894705  6.61942314  2.54011669  1.69853233  1.84106651  2.53756637
  4.13713946  3.72775237  2.89398791  3.17059368  2.49621344  2.50385046
  1.89177852  1.91470917  2.99407954  2.7605273   1.50915332  2.38198939
  2.76569103  1.47802112]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [18.662355219089577, 16.753705253500872, 16.083557043360837, 14.743260623080767, 14.717884959357738, 14.607378036692928, 14.237777401718001, 13.344499800607455, 12.699027007275451, 12.445609616886362]
coef term transform
0 0.001153 safety commission 18.662355
1 0.001477 consumer product 16.753705
2 0.030728 have choked 16.083557
3 0.005414 product safety 14.743261
4 0.148264 cpsc 14.717885
5 0.044863 dangerous product 14.607378
6 0.139361 be recalled 14.237777
7 0.003083 commission 13.344500
8 0.028408 extremely dangerous 12.699027
9 0.010930 seriously injured 12.445610
10 0.063387 leaned forward 11.427147
11 0.039924 choked on 10.916177
12 0.003181 plastic broke 10.840633
13 0.081311 been recalled 10.769156
14 0.084687 arm stuck 10.530900
15 0.013225 hinge broke 10.530900
16 0.061636 leg stuck 10.530900
17 0.025749 wedged between 10.530900
18 0.487445 recalled 10.434065
19 0.030352 we woke 10.211782
hazard transform= 6.8177829548
crib transform= 2.22603162053
pampers transform= 1.68494407121
very dangerous transform= 9.33487020061
coef term transform
0 -0.003210 great easy 3.129270
1 -0.040028 cute too 3.129022
2 -0.013174 re great 3.128231
3 -0.001136 great love 3.127559
4 -0.007227 just love 3.127255
5 -0.010193 cute love 3.126922
6 -0.013057 great quality 3.126721
7 -0.030617 works perfect 3.126248
8 -0.003089 girls love 3.125890
9 -0.000159 person than 3.125719
10 -0.009842 definitely good 3.125609
11 -0.007634 made great 3.125255
12 -0.011515 great bag 3.125055
13 -0.001026 seat easy 3.124878
14 -0.010255 are perfect 3.124857
15 -0.014045 clean love 3.124701
16 -0.007827 use easy 3.124563
17 -0.003820 much cuter 3.124053
18 -0.001372 time great 3.123966
19 -0.046951 great little 3.123788
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=5000, t=5.0)
pos probs1 [ 0.03517588  0.01612903  0.0753012   0.09734513  0.0610687   0.05376344
  0.05185185  0.04895105  0.04790419  0.05113636  0.07843137  0.06796117
  0.02787192  0.07407407  0.01265823  0.03225806  0.08731809  0.02409639
  0.04116223  0.02857143]
pos probs2 [  2.30738645e-05   1.05799517e-05   4.93943527e-05   6.38542217e-05
   4.00584430e-05   3.52665056e-05   3.40125854e-05   3.21097834e-05
   3.14230900e-05   3.35432559e-05   5.14476082e-05   4.45796022e-05
   1.82827842e-05   4.85894077e-05   8.30325322e-06   2.11599034e-05
   5.72769110e-05   1.58061929e-05   2.70006515e-05   1.87416287e-05]
mean= 1.0
mean2= 2.25176953099
pos probs3 [ 1.57086869  0.72028311  3.36276753  4.34719541  2.7271788   2.4009437
  2.31557681  2.18603406  2.13928397  2.28362486  3.50255316  3.03497932
  1.24469195  3.30796688  0.56528548  1.44056622  3.8994121   1.07608561
  1.83820435  1.27593008]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [37.874127098997562, 33.493164636053812, 30.130397102313466, 30.042353734157359, 29.027409351246639, 26.650475086752497, 26.437271286058476, 25.567301248896037, 21.672047705681877, 21.652146835428727]
coef term transform
0 0.000561 safety commission 37.874127
1 0.001285 consumer product 33.493165
2 0.206390 cpsc 30.130397
3 0.063057 have choked 30.042354
4 0.004870 product safety 29.027409
5 0.066088 dangerous product 26.650475
6 0.138981 be recalled 26.437271
7 0.002385 commission 25.567301
8 0.042933 extremely dangerous 21.672048
9 0.025698 seriously injured 21.652147
10 0.105207 leaned forward 20.428455
11 0.084513 leg stuck 19.283943
12 0.097001 arm stuck 19.248945
13 0.007322 plastic broke 19.045133
14 0.117245 been recalled 18.994615
15 0.026804 he leaned 18.240409
16 0.656412 recalled 18.119674
17 0.012408 face first 18.075676
18 0.062615 choked on 17.699640
19 0.007693 serious injury 17.399047
hazard transform= 10.4531753333
crib transform= 2.67463323649
pampers transform= 1.96493232532
very dangerous transform= 15.4631415679
coef term transform
0 -0.026905 great gift 1.283171
1 -0.013443 love love 1.282964
2 -0.034365 they wash 1.282956
3 -0.021354 granddaughter loves 1.282801
4 -0.020510 great love 1.282759
5 -0.162338 are great 1.282697
6 -0.013495 just love 1.282696
7 -0.111860 great to 1.282660
8 -0.001235 smooth ride 1.282603
9 -0.030555 are soft 1.282580
10 -0.004383 excellent quality 1.282566
11 -0.004369 for on 1.282535
12 -0.001818 great easy 1.282503
13 -0.006315 works perfect 1.282490
14 -0.032234 are perfect 1.282457
15 -0.010474 just great 1.282406
16 -0.003411 cute too 1.282401
17 -0.000276 one loves 1.282399
18 -0.003563 its perfect 1.282394
19 -0.016166 great condition 1.282390
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=5000, t=5.0)
pos probs1 [ 0.04020101  0.03225806  0.03846154  0.0753012   0.08928571  0.11504425
  0.06870229  0.06451613  0.05925926  0.06293706  0.05389222  0.06818182
  0.10294118  0.08252427  0.02531646  0.05952381  0.03859649  0.07407407
  0.02040816  0.01265823]
pos probs2 [  2.45677682e-05   1.97136527e-05   2.35047398e-05   4.60183158e-05
   5.45645745e-05   7.03062128e-05   4.19855657e-05   3.94273054e-05
   3.62147102e-05   3.84623014e-05   3.29347851e-05   4.16674932e-05
   6.29097447e-05   5.04324999e-05   1.54714743e-05   3.63763830e-05
   2.35872125e-05   4.52683877e-05   1.24719027e-05   7.73573714e-06]
mean= 1.0
mean2= 2.32879523842
pos probs3 [ 1.67257366  1.34210548  1.60020268  3.13292694  3.71475623  4.78644696
  2.85837731  2.68421095  2.46549747  2.61851348  2.24220017  2.83672294
  4.28289542  3.43344459  1.05329797  2.47650415  1.60581743  3.08187183
  0.84908714  0.52664898]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [36.338779920887077, 32.14952663619983, 28.430267672404167, 27.569757072857406, 27.232540209486917, 25.296004016812294, 24.828951311068945, 24.772603371225379, 22.026319287085009, 20.802634882246952]
coef term transform
0 0.000747 safety commission 36.338780
1 0.001620 consumer product 32.149527
2 0.004401 product safety 28.430268
3 0.207560 cpsc 27.569757
4 0.047664 have choked 27.232540
5 0.118144 be recalled 25.296004
6 0.062101 dangerous product 24.828951
7 0.004731 commission 24.772603
8 0.038586 extremely dangerous 22.026319
9 0.024125 seriously injured 20.802635
10 0.106876 arm stuck 20.085303
11 0.108927 leaned forward 19.474807
12 0.003463 plastic broke 18.355266
13 0.118795 been recalled 18.261137
14 0.011615 face first 17.830830
15 0.616804 recalled 17.359440
16 0.011173 was caused 17.335529
17 0.004682 happened if 16.950295
18 0.023260 he leaned 16.407712
19 0.162380 is dangerous 16.085758
hazard transform= 9.90821794762
crib transform= 2.3645280759
pampers transform= 1.9970529487
very dangerous transform= 15.0977571722
coef term transform
0 -0.022564 they wash 1.279903
1 -0.027158 granddaughter loves 1.279748
2 -0.041605 loves them 1.279721
3 -0.031197 great love 1.279705
4 -0.028614 are perfect 1.279655
5 -0.087715 love these 1.279651
6 -0.006822 great gift 1.279574
7 -0.005415 love love 1.279535
8 -0.001285 great easy 1.279450
9 -0.007134 works perfect 1.279437
10 -0.003447 just great 1.279354
11 -0.009131 cute too 1.279348
12 -0.002644 its perfect 1.279342
13 -0.007778 great condition 1.279337
14 -0.027284 order more 1.279325
15 -0.021562 kids love 1.279314
16 -0.063685 perfect gift 1.279311
17 -0.006889 great have 1.279289
18 -0.002234 these blankets 1.279272
19 -0.021895 good size 1.279245
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=5000, t=5.0)
pos probs1 [ 0.03517588  0.03225806  0.02797203  0.07831325  0.05357143  0.09734513
  0.06870229  0.05376344  0.05185185  0.05594406  0.04790419  0.06818182
  0.08333333  0.06796117  0.03361378  0.05952381  0.03157895  0.09259259
  0.03225806  0.08523909]
pos probs2 [  2.11722115e-05   1.94159912e-05   1.68362441e-05   4.71364123e-05
   3.22444139e-05   5.85916194e-05   4.13516148e-05   3.23599853e-05
   3.12094080e-05   3.36724882e-05   2.88333282e-05   4.10383450e-05
   5.01579772e-05   4.09055348e-05   2.02319908e-05   3.58271266e-05
   1.90072335e-05   5.57310858e-05   1.94159912e-05   5.13050411e-05]
mean= 1.0
mean2= 2.23895813464
pos probs3 [ 1.44140416  1.32184068  1.1462115   3.20904695  2.1951997   3.98891745
  2.81521793  2.2030678   2.1247365   2.29242299  1.96297298  2.79389053
  3.41475509  2.78484881  1.37739393  2.43911078  1.29401245  3.79417232
  1.32184068  3.4928472 ]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [34.752697333401606, 30.732795776179778, 27.647173710298272, 26.821349041029624, 26.635089672689141, 25.114972892361969, 24.258420132664572, 23.147347454832609, 19.885926678704561, 19.616678155008369]
coef term transform
0 0.001107 safety commission 34.752697
1 0.000892 consumer product 30.732796
2 0.215214 cpsc 27.647174
3 0.052237 have choked 26.821349
4 0.008167 product safety 26.635090
5 0.060487 dangerous product 25.114973
6 0.156912 be recalled 24.258420
7 0.003301 commission 23.147347
8 0.004294 plastic broke 19.885927
9 0.109024 leaned forward 19.616678
10 0.015218 seriously injured 19.246801
11 0.034908 extremely dangerous 18.680719
12 0.122094 arm stuck 18.369027
13 0.025996 he leaned 17.891393
14 0.009828 face first 17.073775
15 0.127482 been recalled 17.058324
16 0.652671 recalled 17.050225
17 0.029688 choked on 16.990489
18 0.160019 lodged in 16.872907
19 0.002963 happened if 16.694358
hazard transform= 10.0317916089
crib transform= 2.2883498959
pampers transform= 1.80299068554
very dangerous transform= 14.075223181
coef term transform
0 -0.015536 love love 1.309494
1 -0.041191 are perfect 1.309106
2 -0.015463 great love 1.308901
3 -0.011353 great gift 1.308766
4 -0.006002 looks just 1.308739
5 -0.093288 and cuddly 1.308703
6 -0.000980 great easy 1.308639
7 -0.009425 works perfect 1.308626
8 -0.004267 just great 1.308541
9 -0.013785 cute too 1.308535
10 -0.001943 one loves 1.308534
11 -0.015339 its perfect 1.308528
12 -0.014099 great condition 1.308524
13 -0.093832 love these 1.308501
14 -0.173151 are great 1.308491
15 -0.015282 baby book 1.308487
16 -0.023053 great little 1.308465
17 -0.035855 great price 1.308421
18 -0.001665 great car 1.308406
19 -0.001002 great bag 1.308377
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=10000, t=5.0)
pos probs1 [ 0.0201005   0.06325301  0.07964602  0.05343511  0.03225806  0.03703704
  0.04195804  0.02994012  0.04545455  0.06372549  0.0631068   0.05555556
  0.03225806  0.04109589  0.02409639  0.02663438  0.02142857  0.01398601
  0.03369066  0.02510345]
pos probs2 [  1.59012732e-05   5.00387204e-05   6.30070360e-05   4.22718961e-05
   2.55189787e-05   2.92995682e-05   3.31925178e-05   2.36852797e-05
   3.59585610e-05   5.04124923e-05   4.99230507e-05   4.39493523e-05
   2.55189787e-05   3.25104798e-05   1.90623697e-05   2.10701495e-05
   1.69518930e-05   1.10641726e-05   2.66522871e-05   1.98590453e-05]
mean= 1.0
mean2= 2.21225710015
pos probs3 [ 1.08255868  3.40663608  4.28951901  2.87787069  1.73733207  1.9947146
  2.25974661  1.61249384  2.44805883  3.43208248  3.39876129  2.9920719
  1.73733207  2.21331346  1.29776613  1.43445578  1.15408488  0.75324887
  1.81448771  1.3520038 ]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [44.312963637397282, 39.780955992663465, 35.039685426716737, 34.272823624448527, 32.763187345752577, 30.160084789514706, 29.600955627658927, 28.665979206623994, 24.552590033480982, 22.917997560421504]
coef term transform
0 0.000674 safety commission 44.312964
1 0.001685 consumer product 39.780956
2 0.251464 cpsc 35.039685
3 0.082300 have choked 34.272824
4 0.006198 product safety 32.763187
5 0.163349 be recalled 30.160085
6 0.003099 commission 29.600956
7 0.074420 dangerous product 28.665979
8 0.045054 extremely dangerous 24.552590
9 0.128361 leaned forward 22.917998
10 0.027261 seriously injured 22.848549
11 0.117937 arm stuck 22.285777
12 0.008700 plastic broke 22.176533
13 0.065441 her throat 20.543504
14 0.032669 he leaned 20.480943
15 0.788093 recalled 20.428629
16 0.160297 been recalled 19.983249
17 0.004762 happened if 19.947146
18 0.015022 face first 19.875906
19 0.114735 leg stuck 19.584471
hazard transform= 11.4895561103
crib transform= 2.46398289036
pampers transform= 2.15429177068
very dangerous transform= 16.8583774298
coef term transform
0 -0.017904 great gift 0.959842
1 -0.018977 these work 0.959842
2 -0.038453 works very 0.959806
3 -0.039860 loves them 0.959780
4 -0.006288 great little 0.959732
5 -0.032879 super easy 0.959720
6 -0.121242 are great 0.959691
7 -0.027762 are perfect 0.959684
8 -0.038266 are soft 0.959683
9 -0.051360 they wash 0.959681
10 -0.016716 nice quality 0.959680
11 -0.028836 bibs are 0.959656
12 -0.063358 great quality 0.959574
13 -0.034300 granddaughter loves 0.959565
14 -0.012644 great love 0.959534
15 -0.070079 love these 0.959493
16 -0.009933 just love 0.959487
17 -0.133109 so cute 0.959486
18 -0.146752 works great 0.959464
19 -0.084632 great to 0.959460
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=10000, t=5.0)
pos probs1 [ 0.03517588  0.01612903  0.02797203  0.06927711  0.07142857  0.08849558
  0.05343511  0.03225806  0.05185185  0.04895105  0.04191617  0.0625
  0.07352941  0.0631068   0.05555556  0.01265823  0.03225806  0.02409639
  0.02142857  0.02097902]
pos probs2 [  2.67578424e-05   1.22691489e-05   2.12779646e-05   5.26982120e-05
   5.43348025e-05   6.73174544e-05   4.06474095e-05   2.45382979e-05
   3.94430418e-05   3.72364381e-05   3.18850937e-05   4.75429522e-05
   5.59328849e-05   4.80045342e-05   4.22604019e-05   9.62895234e-06
   2.45382979e-05   1.83298129e-05   1.63004407e-05   1.59584735e-05]
mean= 1.0
mean2= 2.45448318131
pos probs3 [ 1.82167391  0.83528366  1.44860383  3.58769428  3.69911335  4.58297229
  2.76727564  1.67056732  2.68528229  2.5350567   2.17073718  3.23672418
  3.8079108   3.26814869  2.87708816  0.65553908  1.67056732  1.24789366
  1.10973401  1.08645287]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [40.643422654365153, 36.486708973805086, 33.06918201590107, 32.9557371376304, 31.504115382758581, 29.829650072003744, 28.399644445792092, 27.277431284036499, 23.609046983050348, 23.539812241164569]
coef term transform
0 0.000558 safety commission 40.643423
1 0.000756 consumer product 36.486709
2 0.258297 cpsc 33.069182
3 0.063114 have choked 32.955737
4 0.005346 product safety 31.504115
5 0.142525 be recalled 29.829650
6 0.080092 dangerous product 28.399644
7 0.004693 commission 27.277431
8 0.040811 extremely dangerous 23.609047
9 0.022710 seriously injured 23.539812
10 0.135499 leaned forward 22.588203
11 0.110721 arm stuck 22.322236
12 0.005010 plastic broke 22.085883
13 0.013932 face first 20.961642
14 0.005318 happened if 20.459294
15 0.032507 he leaned 20.423274
16 0.731980 recalled 20.417405
17 0.143253 been recalled 20.386969
18 0.080545 choked on 19.262456
19 0.015616 was caused 18.988782
hazard transform= 11.2590050105
crib transform= 2.16592586758
pampers transform= 2.07150347722
very dangerous transform= 17.3581662565
coef term transform
0 -0.046468 are perfect 0.932160
1 -0.058869 great to 0.931883
2 -0.027812 buying more 0.931802
3 -0.013700 love love 0.931797
4 -0.020760 they wash 0.931792
5 -0.002297 nice quality 0.931790
6 -0.110198 love these 0.931772
7 -0.037772 buy more 0.931690
8 -0.082884 granddaughter loves 0.931679
9 -0.015124 great love 0.931648
10 -0.081226 are great 0.931603
11 -0.023289 just love 0.931603
12 -0.085981 super easy 0.931572
13 -0.005377 fits great 0.931564
14 -0.010040 great gift 0.931552
15 -0.051509 these work 0.931552
16 -0.002919 smooth ride 0.931535
17 -0.043371 and works 0.931509
18 -0.018459 excellent quality 0.931508
19 -0.004873 really love 0.931469
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=10000, t=5.0)
pos probs1 [ 0.02512563  0.01612903  0.02447552  0.06626506  0.05357143  0.07964602
  0.0610687   0.05376344  0.05185185  0.04895105  0.03592814  0.0625
  0.06862745  0.05825243  0.02256868  0.04761905  0.01754386  0.03703704
  0.02072539  0.03225806]
pos probs2 [  1.91036813e-05   1.22633309e-05   1.86093902e-05   5.03830823e-05
   4.07317775e-05   6.05569790e-05   4.64321535e-05   4.08777696e-05
   3.94243378e-05   3.72187804e-05   2.73171203e-05   4.75204071e-05
   5.21792706e-05   4.42908649e-05   1.71595680e-05   3.62060245e-05
   1.33390617e-05   2.81602413e-05   1.57580625e-05   2.45266618e-05]
mean= 1.0
mean2= 2.0866153799
pos probs3 [ 1.30057862  0.83488757  1.26692729  3.43008024  2.77301942  4.12271913
  3.16110101  2.78295855  2.68400892  2.53385457  1.85974955  3.23518932
  3.55236474  3.01532208  1.16822339  2.46490615  0.90812332  1.91714923
  1.07280889  1.66977513]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [43.245062278185721, 33.881255404727327, 33.053500503607808, 31.92053460584264, 30.055952375161336, 29.608452639797818, 28.84504674511879, 22.075409465089901, 22.026820889598142, 21.31418844905232]
coef term transform
0 0.001121 safety commission 43.245062
1 0.067835 have choked 33.881255
2 0.255756 cpsc 33.053501
3 0.007122 product safety 31.920535
4 0.072981 dangerous product 30.055952
5 0.176967 be recalled 29.608453
6 0.004950 commission 28.845047
7 0.007241 plastic broke 22.075409
8 0.130176 leaned forward 22.026821
9 0.032472 extremely dangerous 21.314188
10 0.020071 seriously injured 21.175785
11 0.031054 he leaned 20.413589
12 0.779196 recalled 19.931741
13 0.144836 been recalled 19.908857
14 0.042404 choked on 19.884578
15 0.004983 happened if 19.810542
16 0.058871 her throat 19.744661
17 0.012332 face first 19.719249
18 0.137869 arm stuck 19.634252
19 0.044502 fell forward 19.517208
hazard transform= 10.9277505858
crib transform= 2.05515736584
pampers transform= 1.96699510544
very dangerous transform= 16.489607605
coef term transform
0 -0.021753 love love 0.999110
1 -0.056056 are soft 0.999108
2 -0.025866 will love 0.999059
3 -0.048431 are perfect 0.999011
4 -0.028363 pricey but 0.998910
5 -0.112322 love these 0.998878
6 -0.040542 super easy 0.998852
7 -0.007736 buying more 0.998823
8 -0.150090 are great 0.998821
9 -0.018612 they wash 0.998812
10 -0.018059 nice quality 0.998810
11 -0.001782 bibs are 0.998785
12 -0.015641 really love 0.998779
13 -0.062853 great to 0.998745
14 -0.006987 great seat 0.998719
15 -0.035608 granddaughter loves 0.998691
16 -0.109246 loves them 0.998670
17 -0.005596 great love 0.998658
18 -0.004411 can wear 0.998612
19 -0.007664 just love 0.998609
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=30000, t=5.0)
pos probs1 [ 0.0201005   0.02097902  0.05421687  0.0619469   0.03816794  0.03225806
  0.03703704  0.02797203  0.0239521   0.03977273  0.04901961  0.04854369
  0.01265823  0.01447426  0.03703704  0.03225806  0.02739726  0.05197505
  0.02409639  0.01428571]
pos probs2 [  1.96528307e-05   2.05117831e-05   5.30093672e-05   6.05672416e-05
   3.73178751e-05   3.15396235e-05   3.62121603e-05   2.73490442e-05
   2.34186426e-05   3.88869222e-05   4.79278593e-05   4.74625402e-05
   1.23763080e-05   1.41518953e-05   3.62121603e-05   3.15396235e-05
   2.67870775e-05   5.08174807e-05   2.35597188e-05   1.39675476e-05]
mean= 1.0
mean2= 2.08208453116
pos probs3 [ 1.33796472  1.3964422   3.60887772  4.12341781  2.54060094  2.14721757
  2.46532388  1.86192293  1.59434119  2.64742166  3.26292866  3.23124974
  0.84257905  0.96346104  2.46532388  2.14721757  1.82366424  3.45965409
  1.60394565  0.95091064]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [52.239900848093548, 46.140777526935999, 41.148496679759312, 39.2966685159289, 38.274153161687885, 35.411912142639927, 34.355481098850284, 34.044052596522917, 27.230622802781902, 25.492497943029861]
coef term transform
0 0.000446 safety commission 52.239901
1 0.001021 consumer product 46.140778
2 0.125960 have choked 41.148497
3 0.346409 cpsc 39.296669
4 0.005699 product safety 38.274153
5 0.203077 be recalled 35.411912
6 0.102296 dangerous product 34.355481
7 0.006882 commission 34.044053
8 0.029717 seriously injured 27.230623
9 0.174497 leaned forward 25.492498
10 0.010443 to recall 24.653239
11 0.037560 extremely dangerous 24.471965
12 0.007890 plastic broke 24.471965
13 0.179959 arm stuck 24.100666
14 0.072727 her throat 24.017846
15 1.026880 recalled 23.106035
16 0.072744 choked on 22.729084
17 0.044563 he leaned 22.500421
18 0.077257 bruise on 21.824179
19 0.196307 been recalled 21.685926
hazard transform= 12.104740227
crib transform= 1.86342563503
pampers transform= 1.86378484961
very dangerous transform= 19.3606459447
coef term transform
0 -0.029401 are perfect 0.732417
1 -0.037871 super easy 0.732359
2 -0.050797 love love 0.732346
3 -0.028392 are soft 0.732345
4 -0.111585 are great 0.732328
5 -0.014392 will love 0.732308
6 -0.022346 how well 0.732294
7 -0.122148 love that 0.732284
8 -0.032615 and cute 0.732281
9 -0.008972 great gift 0.732250
10 -0.035083 these work 0.732250
11 -0.005254 can buy 0.732241
12 -0.045156 also love 0.732236
13 -0.029895 works very 0.732222
14 -0.143818 works great 0.732201
15 -0.072392 great to 0.732199
16 -0.020611 pricey but 0.732199
17 -0.006094 great little 0.732166
18 -0.021004 still love 0.732164
19 -0.020179 can carry 0.732140
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=30000, t=5.0)
pos probs1 [ 0.0201005   0.01612903  0.03389831  0.05120482  0.05357143  0.05309735
  0.05343511  0.03225806  0.03703704  0.04195804  0.0239521   0.03977273
  0.04901961  0.01543124  0.04761905  0.03703704  0.03225806  0.01333333
  0.05613306  0.02597403]
pos probs2 [  1.95166475e-05   1.56605357e-05   3.29136682e-05   4.97174838e-05
   5.20153507e-05   5.15550378e-05   5.18829961e-05   3.13210714e-05
   3.59612301e-05   4.07392956e-05   2.32563644e-05   3.86174573e-05
   4.75957457e-05   1.49830094e-05   4.62358673e-05   3.59612301e-05
   3.13210714e-05   1.29460428e-05   5.45025712e-05   2.52195640e-05]
mean= 1.0
mean2= 2.2015263226
pos probs3 [ 1.32869336  1.06616927  2.24076253  3.3847663   3.54120507  3.50986698
  3.53219437  2.13233854  2.44824054  2.77353125  1.58329329  2.62907649
  3.24031837  1.02004328  3.14773784  2.44824054  2.13233854  0.8813666
  3.71053505  1.71694791]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [51.877907235875675, 45.069882752575275, 39.024364343193689, 38.459633282197565, 37.458080332140341, 34.637707224779184, 33.808146146206596, 33.051247351888534, 27.041929651545168, 25.274483269091231]
coef term transform
0 0.000876 safety commission 51.877907
1 0.000188 consumer product 45.069883
2 0.357030 cpsc 39.024364
3 0.099461 have choked 38.459633
4 0.004071 product safety 37.458080
5 0.199229 be recalled 34.637707
6 0.006804 commission 33.808146
7 0.102644 dangerous product 33.051247
8 0.025662 seriously injured 27.041930
10 0.004352 plastic broke 25.274483
9 0.045481 extremely dangerous 25.274483
11 0.047588 he leaned 24.206547
12 0.173367 leaned forward 23.909413
13 0.939408 recalled 23.021903
14 0.081013 her throat 22.488478
15 0.067302 bruise on 21.672949
16 0.167528 arm stuck 21.654266
17 0.206521 been recalled 21.535654
18 0.009116 serious injury 21.461849
19 0.110703 choked on 21.362392
hazard transform= 12.4615443719
crib transform= 1.75655419819
pampers transform= 2.11527983052
very dangerous transform= 20.1420343973
coef term transform
0 -0.061144 are perfect 0.720625
1 -0.055170 super easy 0.720568
2 -0.030545 love love 0.720555
3 -0.018313 are soft 0.720554
4 -0.116446 loves these 0.720532
5 -0.017579 will love 0.720518
6 -0.028787 how well 0.720504
7 -0.071094 still loves 0.720501
8 -0.115854 are great 0.720499
9 -0.058507 and cute 0.720491
10 -0.021815 just love 0.720480
11 -0.063367 so cute 0.720480
12 -0.079080 great price 0.720466
13 -0.011157 great gift 0.720460
14 -0.069786 these work 0.720460
15 -0.145178 works great 0.720452
16 -0.104681 love these 0.720451
17 -0.024524 and holds 0.720448
18 -0.028490 in great 0.720443
19 -0.098826 just loves 0.720426
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=30000, t=5.0)
pos probs1 [ 0.01507538  0.01612903  0.05722892  0.05357143  0.0619469   0.03816794
  0.03225806  0.05185185  0.03496503  0.03592814  0.05113636  0.05882353
  0.05339806  0.01535149  0.04166667  0.01052632  0.01851852  0.01554404
  0.03225806  0.04989605]
pos probs2 [  1.44561077e-05   1.54664808e-05   5.48780554e-05   5.13708113e-05
   5.94022361e-05   3.66000691e-05   3.09329616e-05   4.97218717e-05
   3.35287346e-05   3.44522806e-05   4.90357744e-05   5.64071653e-05
   5.12045627e-05   1.47208779e-05   3.99550754e-05   1.00939138e-05
   1.77578113e-05   1.49055204e-05   3.09329616e-05   4.78464105e-05]
mean= 1.0
mean2= 2.11790325089
pos probs3 [ 0.98417181  1.05295801  3.73609801  3.49732483  4.04410423  2.49173271
  2.10591603  3.38506502  2.28263625  2.34551127  3.33835552  3.84019982
  3.48600663  1.00219737  2.72014154  0.68719365  1.20895179  1.01476783
  2.10591603  3.25738363]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [39.170038126099882, 38.540800565841252, 34.730767138475237, 33.694656452559045, 25.921348759919042, 25.717701799964569, 24.961298805847964, 24.307647773288934, 24.179035880308568, 23.906596039403688]
coef term transform
0 0.096737 have choked 39.170038
1 0.343633 cpsc 38.540801
2 0.215181 be recalled 34.730767
3 0.091366 dangerous product 33.694656
4 0.033225 extremely dangerous 25.921349
5 0.020952 seriously injured 25.717702
6 0.006061 plastic broke 24.961299
7 0.192478 leaned forward 24.307648
8 0.023194 to recall 24.179036
9 0.051944 he leaned 23.906596
10 0.076137 her throat 23.555865
11 0.953646 recalled 22.961746
12 0.192052 been recalled 22.154999
13 0.193364 arm stuck 21.385940
14 0.027783 face first 20.983949
15 0.092343 an unsafe 20.699614
16 0.013654 serious injury 20.348072
17 0.069234 bruise on 20.334173
18 0.210948 is dangerous 20.306173
19 0.035762 first into 20.087199
hazard transform= 12.5489196219
crib transform= 1.85274745883
pampers transform= 1.17510114378
very dangerous transform= 18.988245629
coef term transform
0 -0.057630 are perfect 0.729353
1 -0.023159 love love 0.729283
2 -0.040618 are soft 0.729281
3 -0.009550 really love 0.729268
4 -0.118116 loves these 0.729259
5 -0.025952 will love 0.729245
6 -0.028169 how well 0.729231
7 -0.121223 are great 0.729226
8 -0.016767 just love 0.729207
9 -0.022283 super easy 0.729195
10 -0.112130 love these 0.729177
11 -0.046526 we keep 0.729177
12 -0.046095 also love 0.729173
13 -0.101532 loves them 0.729140
14 -0.045198 pricey but 0.729136
15 -0.012348 great little 0.729103
16 -0.151013 love that 0.729102
17 -0.155300 cuddly 0.729098
18 -0.027865 messes 0.729098
19 -0.011973 look great 0.729095
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=40000, t=5.0)
pos probs1 [ 0.01507538  0.01612903  0.05120482  0.0619469   0.03816794  0.03225806
  0.03703704  0.02797203  0.0239521   0.03977273  0.05392157  0.04854369
  0.01265823  0.01339766  0.03703704  0.03225806  0.02739726  0.04573805
  0.02409639  0.00714286]
pos probs2 [  1.56228141e-05   1.67147313e-05   5.30642372e-05   6.41964015e-05
   3.95539442e-05   3.34294625e-05   3.83819755e-05   2.89877857e-05
   2.48218764e-05   4.12170078e-05   5.58796408e-05   5.03064727e-05
   1.31178904e-05   1.38841773e-05   3.83819755e-05   3.34294625e-05
   2.83921463e-05   4.73989469e-05   2.49714057e-05   7.40223813e-06]
mean= 1.0
mean2= 2.11159703483
pos probs3 [ 1.06360119  1.1379389   3.61261327  4.37049101  2.69283252  2.27587781
  2.61304489  1.97348845  1.68987334  2.80605389  3.80428594  3.42486466
  0.89306598  0.94523479  2.61304489  2.27587781  1.93293732  3.2269203
  1.7000533   0.50394437]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [43.614094731982782, 39.951252615937449, 37.53377681934753, 35.276106033221367, 28.862268572635664, 25.938313259721593, 25.544766437849951, 25.518885215521838, 24.90078072933273, 24.823926467822439]
coef term transform
0 0.130880 have choked 43.614095
1 0.385196 cpsc 39.951253
2 0.204602 be recalled 37.533777
3 0.110728 dangerous product 35.276106
4 0.031190 seriously injured 28.862269
5 0.009011 plastic broke 25.938313
6 0.197681 arm stuck 25.544766
7 0.196782 leaned forward 25.518885
8 0.036551 extremely dangerous 24.900781
9 0.011641 to recall 24.823926
10 0.081265 her throat 24.729641
11 1.078535 recalled 23.922877
12 0.049263 he leaned 23.848635
13 0.076250 choked on 23.230606
14 0.007374 happened if 22.646389
15 0.096855 an unsafe 22.370214
16 0.211191 been recalled 22.346855
17 0.080847 bruise on 21.975279
18 0.407371 lodged in 21.788183
19 0.252387 major safety 21.708373
hazard transform= 12.699398172
crib transform= 1.79321055532
pampers transform= 1.97546193786
very dangerous transform= 20.7161619918
coef term transform
0 -0.039699 are perfect 0.702097
1 -0.042502 also love 0.702079
2 -0.049475 super easy 0.702041
3 -0.057006 love love 0.702029
4 -0.053468 are soft 0.702028
5 -0.124748 great to 0.702004
6 -0.013462 will love 0.701993
7 -0.030713 how well 0.701979
8 -0.168623 are great 0.701975
9 -0.142282 love that 0.701969
10 -0.032280 and cute 0.701967
11 -0.007791 great gift 0.701937
12 -0.037611 these work 0.701937
13 -0.002277 can buy 0.701928
14 -0.180540 works great 0.701928
15 -0.070262 love these 0.701927
16 -0.033197 works very 0.701911
17 -0.021374 pricey but 0.701888
18 -0.051475 and highly 0.701881
19 -0.274672 perfect for 0.701857
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=40000, t=5.0)
pos probs1 [ 0.01507538  0.03174603  0.01612903  0.05357143  0.05309735  0.04580153
  0.03703704  0.04195804  0.02994012  0.04545455  0.04901961  0.0139559
  0.01851852  0.01554404  0.03225806  0.04989605  0.02409639  0.01428571
  0.01398601  0.01886792]
pos probs2 [  1.74028726e-05   3.66473190e-05   1.86192024e-05   6.18423508e-05
   6.12950733e-05   5.28728495e-05   4.27552055e-05   4.84359670e-05
   3.45625912e-05   5.24722976e-05   5.65877719e-05   1.61105583e-05
   2.13776027e-05   1.79438945e-05   3.72384048e-05   5.75995284e-05
   2.78166397e-05   1.64912935e-05   1.61453223e-05   2.17809537e-05]
mean= 1.0
mean2= 2.59896926894
pos probs3 [ 1.18478756  2.49494948  1.2675953   4.21022724  4.17296859  3.59958359
  2.91077439  3.29752064  2.35302121  3.57231402  3.85249551  1.09680681
  1.45538719  1.22162034  2.5351906   3.92137589  1.89375683  1.12272726
  1.09917355  1.48284733]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [60.684119204636488, 52.691631820568304, 47.154545086881463, 45.450163939162863, 44.534848137610268, 40.552908774718063, 39.295454239067894, 38.995488939533018, 30.960054855023184, 30.049465006346029]
coef term transform
0 0.001098 safety commission 60.684119
1 0.000060 consumer product 52.691632
2 0.109886 have choked 47.154545
3 0.390884 cpsc 45.450164
4 0.004259 product safety 44.534848
5 0.214720 be recalled 40.552909
6 0.118041 dangerous product 39.295454
7 0.006764 commission 38.995489
8 0.026136 seriously injured 30.960055
9 0.004638 plastic broke 30.049465
10 0.191898 leaned forward 29.262572
11 0.057721 he leaned 28.779769
12 0.046126 extremely dangerous 27.737968
13 0.943841 recalled 26.648641
14 0.124325 choked on 26.356707
15 0.080228 her throat 25.926898
16 0.070775 bruise on 25.767511
17 0.176953 arm stuck 25.745298
18 0.011205 serious injury 25.516529
19 0.404974 lodged in 25.426470
hazard transform= 13.8261783434
crib transform= 1.92179271719
pampers transform= 2.35772725434
very dangerous transform= 23.2942581916
coef term transform
0 -0.070929 great price 0.662283
1 -0.057035 are perfect 0.662245
2 -0.109664 are great 0.662200
3 -0.164367 love that 0.662196
4 -0.067864 super easy 0.662193
5 -0.036232 love love 0.662181
6 -0.027924 are soft 0.662180
7 -0.121256 loves these 0.662160
8 -0.016857 will love 0.662147
9 -0.064931 still loves 0.662132
10 -0.052259 and cute 0.662122
11 -0.063323 vibrant 0.662121
12 -0.028600 just love 0.662112
13 -0.092877 so cute 0.662112
14 -0.019988 great gift 0.662094
15 -0.079132 these work 0.662094
16 -0.151691 works great 0.662086
17 -0.023933 and holds 0.662082
18 -0.018155 also love 0.662081
19 -0.027893 in great 0.662078
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=40000, t=5.0)
pos probs1 [ 0.01005025  0.01612903  0.05722892  0.05309735  0.03816794  0.04301075
  0.04444444  0.03496503  0.03592814  0.05113636  0.04901961  0.04854369
  0.01265823  0.01335779  0.04166667  0.01851852  0.03225806  0.04365904
  0.02409639  0.02      ]
pos probs2 [  1.00929745e-05   1.61975962e-05   5.74721937e-05   5.33230600e-05
   3.83301895e-05   4.31935899e-05   4.46333762e-05   3.51136701e-05
   3.60808730e-05   5.13537425e-05   4.92279884e-05   4.87500468e-05
   1.27120375e-05   1.34145729e-05   4.18437902e-05   1.85972401e-05
   3.23951924e-05   4.38446367e-05   2.41988184e-05   2.00850193e-05]
mean= 1.0
mean2= 2.09599458145
pos probs3 [ 0.6871297   1.10273235  3.91270695  3.63023393  2.6095193   2.9406196
  3.03864025  2.39053866  2.45638583  3.49616279  3.35144145  3.31890319
  0.86543551  0.91326412  2.84872523  1.2661001   2.2054647   2.98494287
  1.64745556  1.36738811]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [39.778563275651827, 37.891477819050884, 36.099046172654042, 35.287435163884687, 27.146675764863495, 26.933402217889263, 26.141243329127814, 25.456693585648196, 24.073734376769927, 24.055901980935285]
coef term transform
0 0.112878 have choked 39.778563
1 0.358471 cpsc 37.891478
2 0.210131 be recalled 36.099046
3 0.105552 dangerous product 35.287435
4 0.005419 plastic broke 27.146676
5 0.022009 seriously injured 26.933402
6 0.034469 extremely dangerous 26.141243
7 0.202244 leaned forward 25.456694
8 0.052217 he leaned 24.073734
9 0.025004 to recall 24.055902
10 1.001171 recalled 23.654243
11 0.082581 her throat 23.259695
12 0.000754 happened if 22.789802
13 0.207549 been recalled 22.583559
14 0.070701 bruise on 22.416199
15 0.207929 arm stuck 22.396874
16 0.085070 choked on 21.261218
17 0.027813 face first 21.161959
18 0.264456 major safety 21.036740
19 0.080452 hazard to 20.990607
hazard transform= 12.7116450468
crib transform= 1.75090773308
pampers transform= 1.3673881126
very dangerous transform= 20.0752271379
coef term transform
0 -0.061982 are perfect 0.703816
1 -0.025023 love love 0.703748
2 -0.049120 are soft 0.703747
3 -0.010696 really love 0.703735
4 -0.103067 loves these 0.703725
5 -0.023692 will love 0.703712
6 -0.022112 how well 0.703698
7 -0.062363 still loves 0.703696
8 -0.203154 love that 0.703688
9 -0.019219 just love 0.703675
10 -0.026617 super easy 0.703663
11 -0.011857 great gift 0.703656
12 -0.038358 these work 0.703656
13 -0.130893 love these 0.703646
14 -0.044515 we keep 0.703646
15 -0.037627 also love 0.703642
16 -0.112927 are great 0.703619
17 -0.090956 loves them 0.703610
18 -0.049348 pricey but 0.703607
19 -0.082588 so cute 0.703581
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=50000, t=5.0)
pos probs1 [ 0.01507538  0.01612903  0.04819277  0.0619469   0.03816794  0.04444444
  0.02797203  0.02994012  0.04545455  0.04901961  0.03883495  0.01265823
  0.01264006  0.01851852  0.03225806  0.04158004  0.02409639  0.00714286
  0.04        0.01398601]
pos probs2 [  1.86897102e-05   1.99959803e-05   5.97470254e-05   7.67987207e-05
   4.73187320e-05   5.51000346e-05   3.46783434e-05   3.71182868e-05
   5.63523081e-05   6.07720969e-05   4.81456613e-05   1.56930478e-05
   1.56705210e-05   2.29583477e-05   3.99919606e-05   5.15488889e-05
   2.98735127e-05   8.85536270e-06   4.95900311e-05   1.73391717e-05]
mean= 1.0
mean2= 2.58908537745
pos probs3 [ 1.27239547  1.36132634  4.06757749  5.22845691  3.22145927  3.75121035
  2.36090162  2.52701296  3.83646513  4.13736436  3.27775662  1.0683827
  1.06684907  1.56300431  2.72265268  3.50944836  2.03378875  0.60287309
  3.37608932  1.18045081]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [52.175925821721378, 44.743352405586727, 44.226770064176769, 42.201116473451123, 34.528186205550917, 31.260086276630457, 31.030232701067, 29.584287837058515, 28.732675045753954, 28.547814084981642]
coef term transform
0 0.137381 have choked 52.175926
1 0.405288 cpsc 44.743352
2 0.202521 be recalled 44.226770
3 0.119609 dangerous product 42.201116
4 0.031107 seriously injured 34.528186
5 0.016123 to recall 31.260086
6 0.010686 plastic broke 31.030233
7 0.083943 her throat 29.584288
8 0.157448 leaned forward 28.732675
9 0.042361 extremely dangerous 28.547814
10 0.050338 he leaned 28.530332
11 0.205917 arm stuck 27.649007
12 1.114556 recalled 27.357965
13 0.085108 choked on 27.276331
14 0.000089 face first 27.129289
15 0.084713 bruise on 26.289220
16 0.009051 happened if 26.050072
17 0.101294 an unsafe 25.732388
18 0.273798 is dangerous 25.271599
19 0.009994 serious injury 25.211057
hazard transform= 14.3171195147
crib transform= 2.02322978178
pampers transform= 2.36326252251
very dangerous transform= 24.5491259264
coef term transform
0 -0.037166 are perfect 0.650168
1 -0.038593 also love 0.650151
2 -0.045371 super easy 0.650116
3 -0.050607 love love 0.650105
4 -0.043662 are soft 0.650104
5 -0.013759 really love 0.650092
6 -0.011023 will love 0.650072
7 -0.042295 to everyone 0.650060
8 -0.030848 how well 0.650059
9 -0.158737 are great 0.650055
10 -0.009754 great value 0.650050
11 -0.031869 and cute 0.650047
12 -0.182132 works great 0.650047
13 -0.130126 so cute 0.650037
14 -0.085463 great price 0.650024
15 -0.013177 great gift 0.650020
16 -0.038757 these work 0.650020
17 -0.056841 they fit 0.650015
18 -0.147866 love that 0.650015
19 -0.000194 can buy 0.650012
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=50000, t=5.0)
pos probs1 [ 0.01507538  0.03174603  0.01612903  0.05309735  0.03816794  0.04444444
  0.04195804  0.02994012  0.04545455  0.05392157  0.01267993  0.01851852
  0.01612903  0.02597403  0.02409639  0.01428571  0.01398601  0.01462069
  0.01886792  0.00381679]
pos probs2 [  1.84173065e-05   3.87835344e-05   1.97045376e-05   6.48680354e-05
   4.66290585e-05   5.42969481e-05   5.12593566e-05   3.65772854e-05
   5.55309697e-05   6.58749738e-05   1.54908355e-05   2.26237284e-05
   1.97045376e-05   3.17319827e-05   2.94381044e-05   1.74525905e-05
   1.70864522e-05   1.78618236e-05   2.30505912e-05   4.66290585e-06]
mean= 1.0
mean2= 2.59026747327
pos probs3 [ 1.25385023  2.64038302  1.34148492  4.41621585  3.1745063   3.69653623
  3.489737    2.49018159  3.78054841  4.48476822  1.05461608  1.54022343
  1.34148492  2.16031338  2.00414615  1.18817236  1.16324567  1.21603295
  1.56928425  0.31745063]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [63.168657052976869, 54.817952010746204, 49.903239071851715, 47.09743446540223, 46.437736358528689, 42.251409080834456, 41.586032559876436, 40.633680669192238, 31.853131322458545, 31.801083722258447]
coef term transform
0 0.000927 safety commission 63.168657
1 0.000002 consumer product 54.817952
2 0.114063 have choked 49.903239
3 0.404963 cpsc 47.097434
4 0.005074 product safety 46.437736
5 0.226370 be recalled 42.251409
6 0.121862 dangerous product 41.586033
7 0.006777 commission 40.633681
8 0.205142 leaned forward 31.853131
9 0.049417 extremely dangerous 31.801084
10 0.029234 seriously injured 31.504570
11 0.184021 arm stuck 30.114024
12 0.062944 he leaned 29.285938
13 0.137513 choked on 27.893071
14 0.001871 to recall 27.724022
15 0.082729 bruise on 27.269530
16 0.962315 recalled 27.246021
17 0.086213 her throat 26.580763
18 0.006958 happened if 25.670390
19 0.282246 is dangerous 25.145043
hazard transform= 14.231664476
crib transform= 1.89355308707
pampers transform= 1.99612956287
very dangerous transform= 24.4217144119
coef term transform
0 -0.078052 great price 0.650222
1 -0.049508 are perfect 0.650185
2 -0.102442 are great 0.650175
3 -0.081805 so cute 0.650140
4 -0.039294 love love 0.650122
5 -0.027001 are soft 0.650121
6 -0.170702 love that 0.650102
7 -0.110864 loves these 0.650101
8 -0.014534 will love 0.650089
9 -0.031840 how well 0.650076
10 -0.054782 still loves 0.650073
11 -0.047988 and cute 0.650064
12 -0.155314 works great 0.650064
13 -0.082190 vibrant 0.650063
14 -0.059160 super easy 0.650044
15 -0.020693 great gift 0.650037
16 -0.080134 these work 0.650037
17 -0.022921 up great 0.650029
18 -0.092235 love these 0.650028
19 -0.051657 and holds 0.650025
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=50000, t=5.0)
pos probs1 [ 0.01507538  0.01612903  0.05421687  0.05357143  0.05309735  0.03816794
  0.03225806  0.03703704  0.03496503  0.03592814  0.05113636  0.03921569
  0.03398058  0.01236094  0.04166667  0.01851852  0.03225806  0.04158004
  0.02409639  0.00968523]
pos probs2 [  1.81060036e-05   1.93714770e-05   6.51161697e-05   6.43409772e-05
   6.37715880e-05   4.58408998e-05   3.87429540e-05   4.44826509e-05
   4.19941110e-05   4.31508350e-05   6.14163873e-05   4.70992774e-05
   4.08117525e-05   1.48458785e-05   5.00429822e-05   2.22413254e-05
   3.87429540e-05   4.99389428e-05   2.89405199e-05   1.16322671e-05]
mean= 1.0
mean2= 2.47942311895
pos probs3 [ 1.23265673  1.31881015  4.43310883  4.38033373  4.34156971  3.12084846
  2.63762031  3.02837887  2.85895908  2.93770885  4.18122765  3.20651881
  2.77846411  1.01070741  3.40692623  1.51418944  2.63762031  3.39984322
  1.97027059  0.79192474]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [49.059737728525562, 46.3013589004157, 42.518439364722155, 40.883114773771311, 32.466002908583093, 30.972056646796442, 30.061113804243607, 29.575019198047329, 28.195251568118142, 27.639288861141161]
coef term transform
0 0.119380 have choked 49.059738
1 0.389740 cpsc 46.301359
2 0.225671 be recalled 42.518439
3 0.113521 dangerous product 40.883115
4 0.004987 plastic broke 32.466003
5 0.022133 seriously injured 30.972057
6 0.039725 extremely dangerous 30.061114
7 0.218680 leaned forward 29.575019
8 0.215887 arm stuck 28.195252
9 0.057253 he leaned 27.639289
10 1.074319 recalled 27.443378
11 0.025741 to recall 27.255410
12 0.089658 her throat 26.974426
13 0.090259 choked on 25.427303
14 0.010797 face first 25.308595
15 0.233771 been recalled 25.158840
16 0.083710 hazard to 25.103667
17 0.099912 an unsafe 24.928729
18 0.014824 serious injury 24.423679
19 0.225101 is dangerous 23.769253
hazard transform= 14.5967861637
crib transform= 1.83593827547
pampers transform= 1.63532459095
very dangerous transform= 22.8764243332
coef term transform
0 -0.063993 are perfect 0.657001
1 -0.034083 also love 0.656984
2 -0.091621 so cute 0.656955
3 -0.029207 love love 0.656937
4 -0.039489 are soft 0.656936
5 -0.012903 really love 0.656924
6 -0.219684 love that 0.656917
7 -0.092163 loves these 0.656916
8 -0.021039 will love 0.656903
9 -0.116712 love these 0.656900
10 -0.017855 how well 0.656890
11 -0.064606 still loves 0.656888
12 -0.117028 are great 0.656886
13 -0.025611 and cute 0.656879
14 -0.014724 just love 0.656869
15 -0.034018 super easy 0.656858
16 -0.014893 great gift 0.656851
17 -0.032790 these work 0.656851
18 -0.047630 we keep 0.656842
19 -0.058110 and holds 0.656839

In [21]:
models = results['model'].tolist()
results


Out[21]:
model f1 f1_se pr_at_k pr_at_k_se pr_auc pr_auc_se precision precision_se recall recall_se roc_auc roc_auc_se
11 RandNegSampThreshInfoPrior(C=1, nneg=40000, t=... 0.844340 0.002757 0.026764 0.001987 0.017120 0.000216 0.880867 0.006455 0.810997 0.007424 0.968573 0.001955
6 RandNegSampThreshInfoPrior(C=1, nneg=20000, t=... 0.842716 0.004200 0.026764 0.001987 0.015887 0.000225 0.857953 0.008991 0.828179 0.002806 0.969611 0.000989
9 RandNegSampThreshInfoPrior(C=1, nneg=10000, t=... 0.839954 0.001670 0.031630 0.001987 0.016307 0.000187 0.841555 0.005809 0.838488 0.002806 0.971667 0.000783
10 RandNegSampThreshInfoPrior(C=1, nneg=30000, t=... 0.834279 0.008755 0.026764 0.001987 0.017245 0.000564 0.867041 0.012572 0.804124 0.008417 0.969552 0.002114
8 RandNegSampThreshInfoPrior(C=1, nneg=5000, t=5.0) 0.831069 0.003827 0.036496 0.003441 0.015420 0.000152 0.817261 0.002994 0.845361 0.004860 0.972205 0.000882
5 RandNegSampThreshInfoPrior(C=1, nneg=20000, t=... 0.827318 0.004651 0.026764 0.001987 0.017016 0.000338 0.867892 0.003475 0.790378 0.005612 0.964432 0.001948
4 RandNegSampThreshInfoPrior(C=1, nneg=20000, t=... 0.821095 0.004076 0.021898 0.000000 0.018210 0.000259 0.875548 0.001621 0.773196 0.008417 0.963325 0.000837
12 RandNegSampThreshInfoPrior(C=1, nneg=50000, t=... 0.816033 0.012426 0.024331 0.001987 0.017151 0.000358 0.877241 0.009532 0.762887 0.014580 0.964432 0.002644
7 RandNegSampThreshInfoPrior(C=1, nneg=1000, t=5.0) 0.773960 0.001943 0.031630 0.001987 0.011101 0.000428 0.669547 0.006728 0.917526 0.008417 0.966018 0.001153
1 RandomNegSamplesThresh(C=1, nneg=20000, t=3.0) 0.764142 0.005421 0.026764 0.001987 0.016838 0.000525 0.783588 0.008551 0.745704 0.002806 0.956619 0.000623
3 RandomNegSamplesThresh(C=1, nneg=20000, t=5.0) 0.752931 0.004321 0.034063 0.001987 0.015665 0.000485 0.727640 0.005702 0.780069 0.002806 0.961367 0.000826
2 RandomNegSamplesThresh(C=1, nneg=20000, t=4.0) 0.747940 0.003574 0.041363 0.005256 0.016923 0.000265 0.736940 0.007574 0.759450 0.002806 0.958851 0.000123
0 RandomNegSamples(C=1,n=20000) 0.699726 0.002380 0.024331 0.001987 0.016046 0.000213 0.789573 0.009472 0.628866 0.009720 0.939985 0.000545

In [22]:
def print_main_results_table(results):
    """
    Print Table 3.
    """
    res = []
    for ii, r in results.sort_values('roc_auc', ascending=False).iterrows():
        if r['model'].n_neg == 20000:
            rr = []
            rr.append('informed prior' if type(r['model']) == RandomNegativeSamplesThresholdInformedPrior else 'baseline')
            try:
                rr.append('%.1f' % r['model'].threshold)
            except:
                rr.append('none')
            rr.append('%.1f $\pm $ %.2f' % ((r['roc_auc'] * 100), (r['roc_auc_se'] * 100)))
            rr.append('%.1f $\pm$ %.2f' % ((r['f1'] * 100), (r['f1_se'] * 100)))
            rr.append('%.1f $\pm$ %.2f' % ((r['precision'] * 100), (r['precision_se'] * 100)))
            rr.append('%.1f $\pm$ %.2f' % ((r['recall'] * 100), (r['recall_se'] * 100)))
            res.append(rr)
    df = pd.DataFrame(res, columns=['Model', 'Review Threshold', 'ROC AUC', 'F1', 'Precision', 'Recall'])
    display(df)
    print(df.to_latex(index=False, escape=False))
    
print_main_results_table(results)


Model Review Threshold ROC AUC F1 Precision Recall
0 informed prior 5.0 97.0 $\pm $ 0.10 84.3 $\pm$ 0.42 85.8 $\pm$ 0.90 82.8 $\pm$ 0.28
1 informed prior 4.0 96.4 $\pm $ 0.19 82.7 $\pm$ 0.47 86.8 $\pm$ 0.35 79.0 $\pm$ 0.56
2 informed prior 3.0 96.3 $\pm $ 0.08 82.1 $\pm$ 0.41 87.6 $\pm$ 0.16 77.3 $\pm$ 0.84
3 baseline 5.0 96.1 $\pm $ 0.08 75.3 $\pm$ 0.43 72.8 $\pm$ 0.57 78.0 $\pm$ 0.28
4 baseline 4.0 95.9 $\pm $ 0.01 74.8 $\pm$ 0.36 73.7 $\pm$ 0.76 75.9 $\pm$ 0.28
5 baseline 3.0 95.7 $\pm $ 0.06 76.4 $\pm$ 0.54 78.4 $\pm$ 0.86 74.6 $\pm$ 0.28
6 baseline none 94.0 $\pm $ 0.05 70.0 $\pm$ 0.24 79.0 $\pm$ 0.95 62.9 $\pm$ 0.97
\begin{tabular}{llllll}
\toprule
          Model & Review Threshold &           ROC AUC &               F1 &        Precision &           Recall \\
\midrule
 informed prior &              5.0 &  97.0 $\pm $ 0.10 &  84.3 $\pm$ 0.42 &  85.8 $\pm$ 0.90 &  82.8 $\pm$ 0.28 \\
 informed prior &              4.0 &  96.4 $\pm $ 0.19 &  82.7 $\pm$ 0.47 &  86.8 $\pm$ 0.35 &  79.0 $\pm$ 0.56 \\
 informed prior &              3.0 &  96.3 $\pm $ 0.08 &  82.1 $\pm$ 0.41 &  87.6 $\pm$ 0.16 &  77.3 $\pm$ 0.84 \\
       baseline &              5.0 &  96.1 $\pm $ 0.08 &  75.3 $\pm$ 0.43 &  72.8 $\pm$ 0.57 &  78.0 $\pm$ 0.28 \\
       baseline &              4.0 &  95.9 $\pm $ 0.01 &  74.8 $\pm$ 0.36 &  73.7 $\pm$ 0.76 &  75.9 $\pm$ 0.28 \\
       baseline &              3.0 &  95.7 $\pm $ 0.06 &  76.4 $\pm$ 0.54 &  78.4 $\pm$ 0.86 &  74.6 $\pm$ 0.28 \\
       baseline &             none &  94.0 $\pm $ 0.05 &  70.0 $\pm$ 0.24 &  79.0 $\pm$ 0.95 &  62.9 $\pm$ 0.97 \\
\bottomrule
\end{tabular}


In [23]:
def plot_f1_v_nneg(results):
    """ Plot Figure 3"""
    nnegs = []
    f1s = []
    ses = []
    for i, r in results.iterrows():
        if type(r['model']) == RandomNegativeSamplesThresholdInformedPrior and r['model'].threshold == 5.0:
            nnegs.append(r['model'].n_neg)
            f1s.append(r['f1'])
            ses.append(r['f1_se'])
    plt.figure(figsize=(8,6))
    vals = sorted(zip(nnegs, f1s, ses), key=lambda x: x[0])
    plt.plot([v[0] for v in vals], [v[1] for v in vals], 'bo-')
    plt.errorbar([v[0] for v in vals], [v[1] for v in vals], yerr=[v[2] for v in vals])
    plt.xlabel('Number of negative training examples', size=16)
    plt.ylabel('F1', size=16)
    plt.tight_layout()
    plt.savefig('paper/figs/nneg.pdf')
    plt.show()
    
plot_f1_v_nneg(results)



In [24]:
def plot_rocs(models, data, names):
    """
    Plot Figure 2.
    """
    truths = np.array(data.test_df['label'])    
    plt.figure(figsize=(8,6))
    formats = ['bo-', 'g^--', 'rs:']
    for model, name, fmt in zip(models, names, formats):
        probas = model.predict_proba(data)
        fpr, tpr, thresh = roc_curve(truths, probas)
        auc = roc_auc_score(truths, probas)
        plt.plot(fpr, tpr, fmt, ms=4, label='%s' % (name))
    plt.legend(loc='lower right', prop={'size':16})
    plt.xlabel('False Positive Rate', size=16)
    plt.ylabel('True Positive Rate', size=16)
    plt.xlim((0, .5))
    plt.ylim((0.39, 1.005))
    plt.tight_layout()
    plt.savefig('paper/figs/roc.pdf')
    plt.show()
    
def get_models(models, names):
    r = []
    for n in names:
        for m in models:
            if str(m) == n:
                  r.append(m)
    return r

submodels = get_models(models, ['RandNegSampThreshInfoPrior(C=1, nneg=20000, t=5.0)',
                                'RandomNegSamplesThresh(C=1, nneg=20000, t=5.0)'])
                                
plot_rocs(submodels, data, [r'informed prior, $\tau=5$', r'baseline, $\tau=5$'])



In [25]:
best_model = get_models(models, ['RandNegSampThreshInfoPrior(C=1, nneg=20000, t=5.0)'])[0]
baseline_model = get_models(models, ['RandomNegSamplesThresh(C=1, nneg=20000, t=5.0)'])[0]

In [26]:
def error_analysis_recalled_products(model, data, n):
    """
    Look at the worst reviews for the top products predicted
    to be recalled by this model.
    """
    probas = model.predict_proba_reviews(data)
    scores = model.score_asin_recalls(data, _max)
    asins = np.array(list(scores.keys()))
    found = 0
    for asin, score in sorted(scores.items(), key=lambda x: -x[1]):
        label = 1 if asin in data.recalled_asins else 0
        #if label == 0:
        print('\n\n------------\n', asin, label, score)
        idx = np.array(data.reviews_df[data.reviews_df.ASIN==asin].index.tolist())
        topi = probas[idx].argsort()[::-1][:1]
        for i in topi:
            ii = idx[i]
            print(probas[ii], data.reviews_df.iloc[ii]['reviewText'])
        found += 1
        if found >= n:
            break

error_analysis_recalled_products(best_model, data, 20)



------------
 B00115PFOO 0 1.0
1.0 How can I begin the review of this bed?  I have to say, this bed is sweet. The wooden pieces were veneered and cheap, but acceptable for a toddler, temporary bed- they look cute, at least. However, my friend and I spent an evening constructing this bed for her adorable toddler.  We nearly died in the process. First, the instructions appeared to have been translated from the original, incomprehensible language into Farsi, then French, then Yiddish, from there into Mandarin Chinese before finally being written in English.  All of the parts were listed in letter format ("Attach Side A to bar F using wooden dowels and bolt.  It is better this way"), yet none of the parts themselves had a letter, despite being plastered with warning labels.  Instead, tiny drawings on the instructions labeled the parts.  It was up to you to sort and translate.  This seemed like not a huge deal at first, but like the horror movie in which the innocent girl finds the latch to the bathroom not working and decides to take a shower anyway, this lack of clear designation of parts, and their top and bottom sides would result in tragedy.The instruction booklet listed "tools you will need for this job: 1 flat head screwdriver", so with our screwdriver and our newly sorted parts, we set to work.  Each of us began by slipping the rails into the head and foot board pieces, per step one.  My friend's pieces fell out as soon as she put them in, so that she was playing a sort of whack a mole game to try to get them all in at once.  "Hey," I said, "you really have to push them down first."  I demonstrated by shoving my rails in with force.  This worked great, and even though it was a bit of a chore to wrestle the top bar on without losing the wobbly rails, we appeared to be trucking along.  I lifted my completed headboard to set it aside: the entire thing fell to the floor with a clatter. My friend laughed, and I joined in.  How young we were, how innocent!  I said: "we need a mallet, something to pound the rails in better." She reread the instructions "It doesn't say we need a mallet.  It says, one flat head screwdriver." She looked up at me: "I don't have a mallet."We decided the work might proceed better if we handled the headboard together.  By straddling and shoving on the thing in ways that felt like a dirty game of Twister,, we finally managed to get the bars shoved in and the headboard assembled.  The footboard was accomplished with similar grunting effort.  Sweating slightly, my friend said, "you know what tool I do have?  Wine." She fetched a pair of glasses and a nice bottle of pinot.  This turned out to have been the saving grace of the evening.  So to recap thus far: Tools needed for the job: 1 flat head screwdriver. 1 mallet. at least 1 extra person. 2 or more glasses of wine.Now it was time to attach the head and footboards to those cute little sleigh sides ("Bar G" and "Bar M",  I think), using wooden dowels and screws with an allen wrench head (thoughtfully provided).  Why oh why must cheap manufacturers use allen wrenches, those tiny little instruments of torture?!? By the time my friend and I had (SPOILER ALERT) finally finished the bed, our fingers were sore and arthritic.  Days later, they continued to throb where we'd grasped that evil allen wrench.Anywoo, it was at this point that things really began to go wrong: on each of the sides, the screw sections were not quite lined up correctly, so that we (working together, and pausing for many wine breaks) had to carefully shimmy first one side, then the other, up and down and at angles, in order to get the screws in.  Early on we abandoned the wooden dowels, as those holes didn't line up either.  "Do you have a drill?" I asked. "Not. In. The. Instructions," grunted my friend, as she struggled to allen wrench.  Fetching a hammer and a potholder from the kitchen proved to be the ticket, as we could pound together the bars whilst simultaneously pulling the screw with the bar part of the flat head screwdriver (incidentally this was the only use we made of the tool so touted in the instructions) and allen wrenching. It was as easy as it sounds.  When the second bar finally started to come together, my friend's fingertips were painfully pinched by the pieces.  "No problem," she mumbled as she sucked on the injured fingers, "at least it's together now."After another half glass of wine (thank you, wine gods!), we moved on to the bottom bar.  As a foreshadowing note, at no point in the instructions was a top or bottom referred to at this stage- squinting at the line drawings was the only way to figure out inside and outside.  It had a metal plate which was attached with "self drilling" screws.  I put that in quotes because I think this appellation was completely facetious: I picture the instruction writer, laughing maniacally as he translates from the Farsi the words "little bastard tiny screws that you will never line up properly and that have PHILLIPS heads" into "self drilling screws".  My friend searched her house for the suddenly required Phillips head while I rubbed at the knot in my back and checked the clock: more than an hour had passed since we'd begun.  "I think we're nearly out of the tunnel," I called, and my friend and I cheered.  We screwed the plate and feet (full disclosure: we had abandoned the instructions and therefore had a frustrating moment when we realized that the feet had to go under the plate- at this point the "self drilling screws", constructed of the softest, cheapest alloy possible, had nearly been stripped, but we managed.  The error was out fault, but the screws deserve blame for being tiny, cheaper than cheap and Phillips head.  So there.Once the bottom bars were assembled, we had a fun flashback to our first bar assembly as we placed the bed support slats.  The wine had done its work, though, so we laughed, thinking we were nearly done.  (Tools required update: 1 Phillips head screwdriver, maybe your own well made screws, and perhaps another bottle of wine.).Allen wrenching was required to fully assemble the bed: we each sat at an end, balancing the bed with our legs as we reached through the slats to hold the bolt with one hand and allen wrenching.  There was no more laughter.  It felt like a Soviet work camp.  My back was killing me.  My fingers were on fire. I looked up and mopped my brow.  "Ok, just the side rail now, and we're done".  My friend grabbed the rail and then started to laugh, a scary, high pitched, wheezy sort of laugh.  Something was clearly wrong.  "Um." she said. "What?" I whispered, afraid of the response.  She gulped. "The side bar is upside down.  We have to disassemble back to step 6." I snatched the directions from the floor, and poured over them for help.  Not one indication of the top or bottom was listed, yet at this stage we saw that the predrilled holes had to be reversed. I looked at my friend.  She looked back at me.  She said, "we could just skip the side rails. Toddlers don't fall out of their beds do they?"  After we finished laughing, we compromised by putting up the one side that had accidentally been done properly and pushing the other side against the wall.  Maybe someday we will disassemble the bed to step six and remake it so that both side rails can be installed.  I think we'll at least wait until the nightmares stop, though.Ok, you might think to yourself, what dummies, to not look over the bar and see that the holes would have to be in one certain way.  Let me just say that between us, we have nearly 20 years of higher education.  We have assembled countless IKEA Glurgs and Bjorks. I have reupholstered sofas and built fences.  This toddler bed bested us. We looked over all of the directions before we started; we lined up the pieces and the "required tools". Proper instructions with all of the tools, a handful of stickers to label the parts and their sizes, and a better quality of hardware might have raised production costs by 30 cents or so: really, Dream On? You couldn't spare 30 cents to make your product actually easy to assemble?  The bottom line is this: although this bed looks cute, save yourself the aggravation and spend the same dough on a KRITTER bed or something from IKEA.  At least you'll know you can put it together without medical intervention.


------------
 B000I2Q0FE 0 1.0
1.0 A Personal Warning to all who have this baby toy by Lamaze. (Stretch The Giraffe)PLEASE READ!!I LOVED this giraffe..this thing is the cutest toy ever and I had placed this on my wish list for my baby shower last May. Well..I ended up receiving the giraffe from a good friend of mine for my baby shower like I had wanted. My son had only played with it maybe twice since he's been born, so it's still pretty brand new.My son is 10 months old now, and we just moved to a new house last week, so I re introduced him to stretch yesterday, (still in super new condition) and he had been sitting in his highchair playing with him, chewing and biting on him as most babies will do with their toys. It's to be expected.I was upstairs unpacking, my son was in his highchair next to my husband while he washed dishes when my son started choking!!He was choking on the white stuffing inside the toys foot! He had managed to chew the thing until it became unstitched at the seams and that's when the stuffing began to fill his mouth. He must have swallowed a majority of what he had in his mouth. Needless to say, it was a frightening moment for all of us!!UGH! I am just so disappointed and now concerned for anyone else's children who may have this toy!Upon close inspection of this toy after the fact, I noticed that the stitching is very poor. Not nearly strong enough to withstand the mouth of a teething baby! I noted other parts of the giraffes seams and stitching where it has signs of also beginning to become unstitched!The very worst part is the fact that had my son gone un noticed and not attended to right away, he could have very well choked completely, or the round small plastic squeaker (also inside the same foot he opened) could have also come out and easily become lodged inside his throat! In fact there are multiple "perfectly sized pieces" including a small round ball rattle, a more flat circular shaped rattle and the disc shaped squeaker (the one my son almost managed to get out) inside each foot. Again..they are very poorly secured inside the giraffe and are a major choking hazard! I hate to think of how many babies go to bed with this toy and are possibly left alone with it when they wake up in the morning or after a nap. Parent unsuspecting, because these toys made by learning curve are supposed to be designed with safety in mind. Safety being what Learning Curves number one priority is.  I would have never have thought to check those details since I was convinced this toy or any toy designed by a big name baby toy company was undoubtedly safe to let my baby play with.This toy needs to be recalled ASAP! I have contacted Learning Curve /Lamaze RC2 department regarding this serious issue, and Stretch the Giraffe is on his way back to them for inspection. I have no doubt they will issue a recall. If I were you...and just to warn parents ahead of time..please refrain from buying stretch for now! Or if you have him already, take him away from your baby...especially babies with teeth!In an effort to make me, the consumer happy, they offered me a new toy and that's on it's way now, but in reality, is that really enough? Or does that make up for the fact that we watched our little son choke and go through that trauma? Imagining the scenario if we had not been right there to help him...would the new toy suffice? It's horrifying imagining the things that COULD have happened and Thank God that didn't.In addition I am going to be weary of the new toy and I am not even sure he will play with it until I inspect every inch of it, and now I need to sit with him every second while he plays with in fear that some little thing could go wrong in a toy that has been deemed safe from a reputable company I have grown to love and trust. I just believe this is an honest mistake but a serious issue that was grossly overlooked. I know that Learning Curve will do the right thing in the end.I will never send my son to bed nor leave him to play with ANY toy, stuffed animal, or object ever again for fear that he will wake up, be alone and begin innocently teething while I am still asleep, and something similar to this may happen. It's a good thing that wasn't the case this time.Please parents...check the babies toys! You just never know!!Forward this to all your friends who you think may have this toy, may buy it, are expecting a baby, have infants and small children.-Tiffany


------------
 B004DC9T94 0 1.0
1.0 I have had my BOB Ironman for about 4 months now, and I couldn't be happier. I just wish I had known how great the Ironman was before we bought our carseat/stroller travel system, and never used the stroller. I run with my baby several miles at a time, and I have never had trouble with the wheels getting hung up or any other issues while going over (reasonable) obstacles on my runs. (I have run on grass, gravel trails, and paved walks.) The only quirk is the fixed front wheel. It does take a little getting used to, but I tend to lift the wheel just enough to take the weight off the front, and slide the stroller into my turns. It takes a little getting used to, but once you do, it seems like second nature. (My wife still has trouble with it, but I think it is mainly because she hasn't really pushed the stroller that much - I like to push it.)Also, the other thing that is a little difficult for us is the size of the stroller when it is folded up. It fits in my trunk, but not much else fits in there with it and I drive a 2003 Altima which holds a lot of stuff in the trunk. (I cannot fit a suitcase or any other "large" items with it.) Thus, we bought a Thule Rooftop Travel Bag (http://www.amazon.com/Thule-867-Tahoe-Rooftop-Cargo/dp/B00152VYRU/ref=sr_1_11?s=sporting-goods&ie;=UTF8&qid;=1364985747&sr;=1-11&keywords;=thule)that we put on our roof rack when we need to take the stroller and other large items with us. (Removing the wheels does help and they come off easily, but it doesn't solve the problem.) (I have not purchased the BOB Travel Bag [http://www.amazon.com/Bob-Single-Stroller-Travel-Black/dp/B000GKW8EA/ref=sr_1_1?s=baby-products&ie;=UTF8&qid;=1364987235&sr;=1-1&keywords;=bob+travel+bag] yet, but if I have to fly with my stroller, I will - the wheels will make moving it around in a (relatively) compact case much easier. (I personally don't want people rifling through my baby's stroller at the airport.))I would recommend some of the accessories available for this stroller:1. Warm Fuzzy [http://www.amazon.com/BOB-WF1001-Warm-Fuzzy/dp/B003KTLYIU/ref=sr_1_1?ie=UTF8&qid;=1364986828&sr;=8-1&keywords;=bob+warm+fuzzy] - this one depends on when you buy your BOB (i.e. age of baby) (it will keep your baby warmer during the cold weather, and it will fill in some of the space inside the stroller if you want to use it for a 2-8 month old so they fit a little more snug in the big interior of this stroller. ...I recently read that you can turn the Warm Fuzzy backwards (with the fuzzy side down) and use it during warm weather too, and it won't be so warm yet will still hold your baby snug on those runs. I have done this and it works. My daughter stays much cooler now.)2. Weather Shield [http://www.amazon.com/Weather-Shield-Revolution-Stroller-Strides/dp/B003KTLYO4/ref=sr_1_7?s=baby-products&ie;=UTF8&qid;=1364986937&sr;=1-7&keywords;=bob+weather+shield] and/or Sun Shield [http://www.amazon.com/BOB-Shield-Utility-Stroller-Ironman/dp/B005GYZLSG/ref=sr_1_17?s=baby-products&ie;=UTF8&qid;=1364986937&sr;=1-17&keywords;=bob+weather+shield]- You should definitely get at least one of these. Just evaluate which one you will need more. (I bought both.) The Weather shield is water proof on the front and it will keep some warmth in during the cold weather it is still breathable through intelligently placed air vents at the front and back; Sun Shield will breath better and block the sunlight so your baby doesn't get sunburned on your run or walk. I also discovered that by using the Weather Shield, all of the toys I put in the stroller with my daughter stay inside the stroller, so I don't have to constantly watch her to make sure her favorite teether doesn't end up on the sidewalk. Also, another note: the Sun Shield and Weather Shield for the Revolution fits the Ironman. You can check the BOB Gear website if you want to verify. bobgear.com3. Snack Tray [http://www.amazon.com/BOB-Single-Snack-Tray-Black/dp/B004LT5330/ref=pd_bxgy_ba_text_z] - This accessory can wait a while. Your baby won't use it right away, and anything you try to put on there will get knocked onto the ground pretty quickly. (Mine is 8 months old and still doesn't use it.)4. Handlebar Console [http://www.amazon.com/Handlebar-Console-Single-Strollers-Black/dp/B003KTLYGM/ref=pd_sbs_ba_2] - A great addition to my BOB: it has 2 drink holders & a zippered pocket (that we use for sanitizing wipes and car keys). (Also, if something happens to yours, like mine (one of the velcro straps ripped off the other day) contact BOB and explain the situation; they have exceptional customer service and will do whatever they can to make it right)I was reviewing the Ironman on Consumer Reports earlier, and I saw that the pre-2011 monels were recalled due to a strangulation hazard from a draw string that wasn't near the child area. I thought you might like to be aware that the new models have no draw strings (anywhere). I attached a copy of the recall notice below:FOR IMMEDIATE RELEASEFebruary 23, 2011Release #11-143 Firm's Recall Hotline: (855) 242-2245CPSC Recall Hotline: (800) 638-2772CPSC Media Contact: (301) 504-7908HC Media Contact: (613) 957-2983Note: This product has been recalled subsequently for another hazardJogging Strollers Recalled by B.O.B. Trailers Due to Strangulation HazardWASHINGTON, D.C. - The U.S. Consumer Product Safety Commission and Health Canada, in cooperation with the firm named below, today announced a voluntary recall of the following consumer product. Consumers should stop using recalled products immediately unless otherwise instructed. It is illegal to resell or attempt to resell a recalled consumer product.Name of Product: B.O.B. single and double strollersUnits: About 337,000 in the United States and 20,000 in CanadaImporter: B.O.B. Trailers Inc., of Boise, IdahoHazard: A drawstring on the stroller can get wrapped around a child's neck, posing a strangulation hazard.Incidents/Injuries: The firm has received one report of an 11-month-old girl who got entangled at the neck by the stroller's drawstring. The child was freed by her mother.Description: This recall involves the following 11 models of B.O.B. single and double strollers. The name "B.O.B" appears on the cargo basket under the stroller and on the front of the stroller. All of the recalled strollers have a yellow/orange drawstring at the rear of the canopy which is used to gather loose fabric when the canopy is pulled back. Strollers have the serial number either stamped in the frame or on a white label located on the stroller's rear right leg.Model Serial # rangesSport Utility Stroller 12362 - 35107AA00001 - AA025490AA900000 - AA999999Sport Utility Stroller D'Lux 12362 - 35107AB000001 - AB007940AB900000 - AB999999Ironman 800000 - 803700AC000001- AC027923AC900000 - AC999999Sport Utility Duallie 002001 - 008068AD000001 - AD011252AD900000 - AD999999Ironman Duallie AE000001 - AE008909AE900000 - AE999999Revolution AF000001 - AF189112AF900000 - AF999999Revolution 12" AK000001 - AK024149AK900000 - AK999999Stroller Strides AG000001 - AG011163AG900000 - AG999999Revolution Duallie AH000001 - AH072921AH900000 - AH999999Revolution Duallie 12" AL000001 - AL012657AL900000 - AL999999Stroller Strides Duallie AM000001 - AM003229AM900000 - AM999999Sold ... between April 2002 and February 2011 for between $300 and $600.Here is the Consumer Reports review:CR's TakeWhen we performed the impact test from the American Society for Testing and Materials safety standard for Strollers, the BOB Ironman Sport Utility D'Lux (and BOB Sport Utility) stroller failed. The standard specifies that the impact test be conducted at the stroller's claimed maximum weight capacity--which is 70 pounds for both of these models. (According to growth charts from the Centers for Disease Control and Prevention, a 70-lb. child may be anywhere from a large 7-year-old to a very small 12-year-old.) While this caused this stroller to receive a Fair score for Safety, it should perform well when using it with children in the more typical 40-pound range. Learn more in Detailed Test Results below.Detailed test resultsWhen we performed the impact test from the American Society for Testing and Materials safety standard for Strollers, the BOB Ironman Sport Utility D'Lux and BOB Sport Utility Stroller failed. The standard specifies that the impact test be conducted at the stroller's claimed maximum weight capacity--which is 70 pounds for both of these models. (According to growth charts from the Centers for Disease Control and Prevention, a 70-lb. child may be anywhere from a large 7-year-old to a very small 12-year-old--all of whom may be too old, or just unwilling, to ride in a stroller.)The standard specifies that, when a stroller is put through the impact test, "the fold locking/latching mechanism shall not disengage or break, or both."  But when loaded with a 70-lb. "passenger" in our test labs, the BOB strollers, at times, collapsed completely on impact. At other times, the shock absorber adjustment mechanism, which is also part of the fold mechanism, would partially disengage, slipping from the second setting (the one for a child passenger weighing between 41 and 70 lbs.) to the first setting (for a child passenger weighing up to 40 lbs.). When we tested using a far more typical 40-lb. "passenger," both BOB models passed the impact test.Because the BOB models failed our impact test at their claimed weight capacities, we lowered their safety and overall scores. However, we feel that the 70-lb. weight limits for the 2 BOB models are simply excessive, and that it's far more likely that the weight of a child riding in any stroller would be closer to 40 lbs. or less.The Ironman was Excellent in running performance, and both models were Excellent in maneuverability; they would have been higher in our Ratings without the Safety concern at what we believe to be an unrealistically high weight limit.We spoke with the manufacturer about our impact test findings. They told us that the models in question passed internal and third-party testing for impact at 70 pounds.  We asked them specifically why there is such a high weight limit for the strollers. They told us that their products are designed for longer duration runs and walks where older children may not be able to keep up and would be in a stroller. In answer to our question, they said there were no reports of injuries or fatalities with their products associated with these issues.The BOB Ironman Sport Utility D'Lux is part of the stroller test program at Consumer Reports. In our lab tests, jogging stroller models like the Ironman Sport Utility D'Lux are rated on multiple criteria, such as those listed below.Performance:Performance is panelists' judgments of ease of pushing and maneuvering on varied terrain while running.Off-road maneuverability:Off-road maneuverability is trained panelists' judgments of ease of pushing, maneuvering and turning while walking on a course that included flat pavement, up- and downhill sections of pavement, and a rough dirt/grass/mulch section with tree roots, fallen branches, and the like.Ease of use:Ease of use includes such items as opening and folding the stroller, using the harness, adjusting the backrest, lifting and carrying the folded stroller.Overall, I have been very satisfied with our purchase. I would buy it again, and the price on Amazon of $309 is the lowest I have seen online. (I paid around $350 on Albeebaby.com, which was the lowest at the time - Amazon has come down recently on this item!)


------------
 B001N44UX0 0 1.0
1.0 I have a 12 mos old daughter and purchased this gate about 6 weeks ago.  Today she reached through the narrow areas on the outside of the swinging door area of the gate.  She reached past her elbow, and then bent her arm, thus making the width of her arm increase.  She tried to pull her arm out but could not.  She then pulled harder and wedged the flesh of her arm in the narrow space between two bars.  She started screaming, which is when I saw what she did (I was folding clothes).  I immediately ran over and tried to pull her arm out without realizing exactly how it was stuck, causing her even more pain and to cry even harder.  It took me a few mins with her in pain and screaming to figure out it was stuck because it was bent and wedged in there.  I straightened her arm and was able to extract it.  Just as crib rails now have a spacing requirement to prevent this very issue, it shouldn't take a rocket scientist for a manufacturer of a SAFETY product to consider that a toddler might be interested in what's on the other side of the gate and try to reach through.  As a first-time parent, I embarrassingly admit I did not notice that narrow spacing and imagine what would/could happen, but a company with the majority of its business based in the SAFETY of babies and children has no excuse.  As we all find in the parenting journey, I have learned this lesson the hard way and will be sure not to allow such a dangerous product into my home again.  The reason I purchased this gate was because it was one of the few that expanded as widely as I needed.  However, that is nowhere near as important as protecting my baby from needless injury like this.  I shudder to think how this might have gone if I had been in the shower or restroom and unable to immediately respond, all the while thinking everything was ok because I have a safety gate installed to prevent her from getting to dangerous areas.  I have already reported this to the US Consumer Product Safety Commission and will follow this review with a report to Summer Infant itself.  DO NOT RISK YOUR CHILD'S SAFETY WITH THIS PRODUCT!!!!!I have included (in the "add your own images" are of the product at the top of this page) a picture of my daughter's arm immediately following the incident.  I am sure it will become a nasty bruise in a day or two.  We'll see if this site allows this image to be included.UPDATE:  Summer Infant has reached out and sent a free shipping label to return the product.  They will be sending it to quality control so they can investigate the safety concerns.  They will follow up with me.  I am pleased with their prompt response and attempt to investigate.  They have a mesh gate that is similar in width which I will try in exchange.


------------
 B004CJ9D6W 0 1.0
1.0 I have NEVER written a review before for anything! DO NOT BUY THIS PRODUCT!!!This is a very expensive monitor and the features are awesome!! It is really a fantastic monitor, truly. The clarity is great, the VOX feature is awesome, the intercom is fabulous for older kids (although there is a delay on it so after you speak you cannot hear the response as it won't pick up sounds for several seconds after you let go of the "talk" button) And I REALLY hate that you cannot mute the volume. The lullaby feature is nice, but I do NOT want to sit there and listen to the lullaby myself! But, you must. No mute...BUTafter only 10 months of use our camera just stopped working!! I unplugged the camera to move it to a safer location as our son had become mobile and I was concerned he could reach the cord and pull it down onto himself. When I plugged the camera back in... nothing. The green power light would not come on!!I (naturally) thought it was the outlet. Checked the fuse box, etc. But it was not the outlet. No other outlet in the house worked (despite other appliances working just fine) so clearly it was not a problem on our end.We then attempted three different sets of new batteries. That also did not work. So it is not a battery problem.When plugged in the camera briefly indicated that it was receiving power, so it was getting power, just not turning on...What a pain in the butt!! When I bought this system it had fabulous reviews. What a shock it was to come on here and see how the reviews have dramatically tanked since we decided on this system!I called but could not get through. I emailed and emailed and emailed again, but two weeks have gone by and there has been no response. I have sent a message through their "contact us" form on their website, but have received no response... by the time these guys get back to us our warranty will have expired! I think we'll be out the $230 we spent on this machine! And all this time we've been without a monitor for our little one-who happens to have a bedroom on a different level of our home. It has been a total nightmare!!!As of today I still have not heard back from Levana, and despite their bogus claim that you can call or reach them through live chat I have been unable to reach them despite multiple attempts.Just now I tried to plug my camera in and it lit up and started working!!! I have NO idea why.... and I'm sure it won't last. I can only hope that I'll get at least another 6 months-1 year out of the camera and I will NEVER buy another Levana product EVER again!!I will also be filing a complaint with the BBB and I recommend all the negative reviewers out there that have received horrible customer service do the same!!! Not all families have hundreds of dollars to throw down the drain on a fancy monitor just to replace it again before one year has passed!! So much for their wonderful "lifetime" support!!!!SHAME on you Levana!!!! Stop pedaling your horrible "made in china" GARBAGE!!!!IT IS A DANGEROUS MONITOR!!!When you're expecting something to let you know your child is safe and it fails on you it can really lead to suffering for your child as well as injury or even death!!! What of the battery life? It is NOT 4.5 hours as they claim. I brought my monitor upstairs with me to take a short nap and I did not expect to need the charger as the monitor was completely charged. I fell asleep and since my then four-month-old was still not a very good night sleeper I was exhausted and was sleeping very deeply. The monitor's battery failed and the machine shut off at some point because I woke up about 45 minutes later to the very faint sound of my infant son screaming his head off. Our bedrooms are on different levels of the house and I was COUNTING on this monitor to let me know if my son woke up.My poor infant son had rolled in such a way as to get his fat little thigh wedged between the bars in his crib with the leg hanging out. He had twisted and he couldn't get himself back through. This caused him to have his face smashed pretty hard into the mattress surface. Also, by the time I got to him his foot was absolutely purple. I had a very difficult time removing the swollen leg from the crib bars and thank GOD my son did not exhaust himself and suffocate in the crib mattress while I was sleeping and relying on my monitor to warn me if he needed me for anything.My poor son had a purple bruise on his nose because of being smashed into the mattress and his leg had such an awful and painful looking purple indented ring around it for days. Thank God his foot and lower leg did not suffer any permanent vascular damage from lack of oxygenated blood. Who knows how long my little sweetie was stuck in there before I woke up? That is an awful thing to happen to a child and his parents, but with all the concerns about crib bumpers I hadn't installed them!I installed mesh bumpers after that, but I have never again fallen asleep without the monitor attached to the plugged-in charging base!!!Save yourself the worry, hassle and headache!! Go with a tried-and-true brand that truly boasts of fabulous customer service!!!


------------
 B000HCX5EY 0 1.0
1.0 My 10-month-old son was enamored of his little friend's activity cube. I went on the hunt for one to give him for Christmas (his friend's model is no longer produced), and found this well-reviewed cube, which we happily snapped up from Target for about $[...].The baby loved it, but within one day of playing with it, the bottom brace for one of the little doors that opens and shuts, swung down. Turns out, the hot-glue-and-small-metal-pin construction they use was at fault. It appears that one of the metal pins was missing, causing the brace to swing down, which made the door fall off--including the two, inch-long, sharp metal pins that act like hinges. These two pins were loose on the floor; thank goodness I was right there and saw it happen, or they would have gone right down my son's throat (he was reaching for them as soon as they fell out). That was the end of that. We returned it to Target immediately.When I was researching activity cubes, I wasn't concerned with the toy tipping over--I figure my son is going to fall down quite a lot, and get bumped by things over the years--but these tiny metal pins are truly dangerous and could lead to serious time in the emergency room and hospital.Due to the poor construction of this toy, I can not recommend enough that you look elsewhere for a better-constructed option. I will be contacting the manufacturer about this frightening flaw to their product.


------------
 B00020L78M 0 1.0
1.0 I personally didn't buy this gate, specifically because of the alarming number of reviews I found online about plastic shards breaking off of it. The following are only two of MANY reviews I found:"Buyer Beware. We had two different gates sent to us. The initial one was missing a part. The second one had little pieces that were falling off of it the size of a piece of rice and sharper then you can imagine. We found this out because our nine month old was gagging on a piece - then investigated and found two other pieces on the floor. We took the part we needed out of the new gate and put it in the first one we received. We looked, and though that one looked okay. Last night I found two pieces that had broken off on the floor, thankfully before my son did. This happened to us twice - not a coincidence. I have talked to guest relations and they are putting out a product recall. This could have been a FATAL situation with our son!""DO NOT BUY THIS! I CAN'T BELIEVE I ORDERED THIS! Even after seeing one review that said they found dangerous shards of the product on the floor, their child had choked on these pieces. I just recieved the product and looked it over, it looked fine, set it up and put my 7 month old in it. He cried a bit so I climbed in...that is when I found some small rice sized VERY SHARP! pieces of the gate that had fallen off from it . I can't figure out where they came from. I assumed the other review must be a fluke or that the problem was fixed as they said the company was suposed to put a recall. I called the company to complain and they said they had never heard of the problem, and only offered to send a replacement gate. If a small child swallowed one of these incredibly sharp pieces it has the potential to be fatal!! DO NOT BUY!!!!!"In short, the product should probably be recalled. Do not put your children at risk!!


------------
 B0007CQ6OK 0 1.0
1.0 Update - July 23rdToo much to tell about how awful this company is, so I will just paste my email to them:"Hello,I am writing to voice my family's anger over your unsafe, cheap co-sleeper.  If you recall, I had a problem with my newly purchased co-sleeper back in May, which I immediately called about and was told to send the frame back.  At that time, I asked to speak to a supervisor about the situation, and was told that I would be contacted shortly.  However, Mayra was the only one who I was able to speak with, after numerous attempts to be put in contact with the supervisor.  After a huge delay due to mistakes on your end, I finally got the co-sleeper sent back to the company, after speaking with Veronica on June 13th.At this time (June 13th), I asked to speak with the manager of the company, and Veronica told me that Sharon was not in at the time, but would be in later that day.  I obviously never heard from Sharon, or anyone else from this company, for that matter, from that point on.  I was inquiring to speak with the manager after voicing my concern over just repairing the frame, instead of receiving a new product.We received the frame shipped back to us, without so much as an email or follow up from Arm's Reach.  The entire process to get our frame back took over a month from the time I first contacted customer service.  (I had initially asked for a replacement frame, since our daughter had been used to sleeping in it by this time, and I did not want to completely disrupt her sleep pattern while waiting for the repair, which it did).  The frame came back to us in a box with one sheet of paper, which did nothing to describe what was wrong with the frame, or what was done to "repair it".I use the term "repair" in disgust, since it clearly was never properly repaired.  We assembled the co-sleeper, again, according to exact instructions in the manual.  Two nights later, I noticed that now if I barely touched the rail of the co-sleeper nearest our bed, that it would collapse and had to be pulled back up into the lock position.  Obviously, we felt very uncomfortable using the co-sleeper, but after having dealt with your awful lack of customer service the first time around, I did not waste my time trying to reach Mayra, Veronica, or Sharon again.To avoid the problem of the collapsing side rail, we have been careful to not touch or even brush against it, for fear that it would collapse with our daughter in it.  Without touching it, it does stay up.  Tonight, (July 23rd) however, I laid my daughter down from the other side of the co-sleeper all together, and the other side rail immediately collapsed when I laid her in the co-sleeper.  She was in it!!!  I had not put any pressure on the side rail, it just collapsed.  After taking her out, waking her up in the process, and moving her elsewhere to be put to sleep for the night, I attempted to re-lock the side rail.  It took several tries, and does not seem secure at all.I can not believe that your remedy to fix the situation of a faulty co-sleeper was to send our same frame back to us "repaired", which still breaks and is worse off than when I first contacted you.  I am so turned off by your company, not only because of the safety hazards associated with putting a baby in this piece of crap product that you make, but the complete lack of care from the supervisor or manager. You would think that someone would be concerned that their products for babies are faulty and unsafe.  These are babies we are talking about here!  Not pets, not adults...innocent babies who need the safest place possible to sleep.  Your company disgusts me, and I am honestly surprised that you are still around.  Not for long, I am sure."Previous review:We had been using the euro mino co-sleeper for almost 3 months with our Ikea platform bed, and were mostly happy with it.  My husband had some frustration putting it together, but once he did it seemed sturdy and safe, and with an extra pad in it our baby was starting to sleep in it for most of the night.  Then, we needed to stay overnight at in-laws for a funeral, and needed to pack up the co-sleeper for the first time for the trip.  We were happy that the co-sleeper was advertised as travel friendly, since we knew we would eventually need an overnight bed, and this folds up into a small-ish bag with handle.  HOWEVER, we found out that it did not go back together - 3 of the 4 sides would not lock into place once we were at my in-laws.  There were 3 adults working on this for a couple hours, it just wouldn't lock together.  We had all of the instructions, warnings, manuals, etc with us...and it just wouldn't work. Extremely frustrating when it is late at night, and the customer service is only open M-F 9am-4pm.  We went online trying to find solutions from others that had the same problem; we tried pulling fast, made sure the bottom legs were unlocked, etc, etc...nothing worked.  Our baby had to sleep in a car seat.  It was such an awful experience, and of course I called Arm's Reach the next business day that they were open. I was told I had to send the co-sleeper back for repairs (no offer to help trouble shoot over the phone, or any other assistance).  They estimated it would take about 2 weeks for shipping and repairs, which is so frustrating since we finally had our baby getting close to sleeping through the night in this.  Now we will be disrupting the entire sleep situation. I asked if they would send a replacement in the meantime, and they said "they were unable to".  Currently waiting for UPS to pick up the broken product, which I guess will get fixed and we get the same one back??  Doesn't seem like a great idea.  Overall, I guess this product is fine...IF you plan to only set it up once, and never take it down to move it or travel with it, or set it up for other babies in the future?


------------
 B0002ZOI9W 0 1.0
1.0 This chair can tip over if your child reaches forward, overbalancing the chair.  My son tipped the chair and fell face first into the hardwood floor.  Luckily, he was not seriously injured.  The instructions for the chair say NOTHING about being usafe for infants who can sit up, but that is what a representative at Fisher Price had to say about it!  "NOT SAFE FOR INFANTS WHO CAN SIT UP."  Shouldn't that be on the instructions?  Infant to Toddler Rocker is a misleading name, if that's the case.  The response by Fisher Price was to offer a refund.  This product should be RECALLED!  This could have led to a serious injury.  I see numerous reviews about this product being unsafe.  Where is the U.S. Consumer Product Safety Commission?  I called them numerous times but could not get my call returned.  BEWARE!


------------
 B00115Q5A2 0 1.0
1.0 We bought this jumper two weeks ago.  My six month old daughter LOVES it.  However, yesterday my husband called me and said, "she got that purple thing off and had it in her mouth."  Of course I contacted baby einstine as the toy hook is a very small piece and is a choking hazard.  They put me in touch with Kids II Corporation.  I told the woman what happened, she said it was a mis-mold on the piece and she knows that because they have been getting a LOT of calls about this piece coming off.  She asked if I wanted a replacement part.  Of course I said NO!!!  She asked why I was calling if I didn't want a replacement part.  I explained to her that it was a choking hazard and they needed to do something about it as a child could die from the defective piece.  She then stopped me and said the toy hook was NOT defective, it was just mis-molded.  (Ummm...isn't that defective?) And she also informed me it wasn't a choking hazard, there was the possibility that a child could get it lodged in his/her throat, but it isn't a choking hazard.  (Ummm...isn't choking on a toy usually done when it gets lodged in their throat?)  She then informed me that she was not going to write a report up since I didn't want a replacement piece (can you say covering something up!).  I requested her name and extention.  She said there was no way to contact specifically her.  I requested a copy of our phone conversation as she said it was being recorded.  She said there was no way to get a copy of it.  I demanded to speak to a manager who took my information down and said she would forward it to the engineering department.This peice is a choking hazard and is dangerous!!!  Do not purchase it!!!


------------
 B001KVIBXQ 0 1.0
1.0 We were given this product when my daughter was in utero. It shipped to our house and remained in its original box until we opened it.  When we opened it, our daughter was the recommended age of 10 months.  We put her on it and, with a parent guiding her gently, with one hand on her back and the other holding her hand and the handlebar, we proceeded to let her rock gently.Immediately, the front chest area of the ladybug &#34;fell&#34; forward as if it had come off a stand or something inside the body of the ladybug.  The rocker went forward way too much and our daughter was jutted forward.  Had it not been for the parent right next to her, I believe she would have flipped headfirst over the rocker.  We attempted to use it again and the same thing happened. We put the rocker aside.Having repeatedly attempted to help her enjoy the rocker safely in the past year, it now sits in her room, with the tag on, and she uses the musical buttons and that's it.What a waste.  Today we once again attempted to let her use the rocker in a final attempt to enjoy it, and despite a parent sitting right next to her, my daughter rocked gently twice and flipped headfirst over the handle.  She hit her chin on the handle, splitting her chin open and the rocker fell over on her.With our experience, and having searched many websites selling this product and hundreds of reviews, it is now unequivocally my opinion that this product is unsafe. I've filed a report with the Consumer Product Safety Commission.  I have contacted the company requesting a statement with regards to their plan to recall or issue a product alert.


------------
 B001R95J2W 0 1.0
1.0 Attached is a letter I sent to Prince Lionheart.  It pretty much sums up my review, so I figure I would just copy and paste.  I will post updates if the company responds.Dear Sir or Madam:I bought two of these wipes warmers when I had my twins in November 2011.  I loved both of them over other wipes warmers I had owned in previous years (Dex baby and Babies R Us Brands).  The moistened replacement inserts were a plus for me- because of them, the problem of needing to add water was not an issue with your warmers as opposed to others I had used.  Because I had twins, they both received high usage and were both maintained/refilled and moistened on a weekly basis. However, a few months ago, I noticed that one of my AC inserts was severely corroded after taking on water.  Water??  The cord, nor the warmer had never been anywhere near water?!  I stopped plugging it in, aware that it could pose a fire risk.  And in a nursery right next to my baby's crib, no less!  I assumed it was just a fluke and went about using my other one, which was not anywhere near the nursery on the other end of my home.Last week, I noticed the SAME thing had happened to my second wipes warmer AC adapter insert.  A simple inspection showed me that when the top is lifted, condensation that has collected drips through onto the AC adapter insert - a design flaw and undoubtedly what is causing this problem with so many consumers.  I am fortunate that nobody was hurt and I am just out $60 for the wipes warmers.  However, in searching for a replacement, I started reading reviews of your premium wipes warmer on Amazon.com.  Alas, I am not the only one that is experiencing this issue.  I counted at least half a dozen people with the same issue - and that is just from the small population that actually takes the time to post a review.  Then, I saw someone had also reported this wipes warmer on saferproducts.gov for, you guessed it, the same thing.  I noticed that your corporate response was one averting blame and indicating it was not your product that is the problem.  Very disappointing, to say the least.  I take very good care of my items, live in a home with a whole house generator and a whole house surge protection system -it is very unlikely that there are wiring or power issues here that would have caused this.  Furthermore, it was not exposed to water other than the water that leaks through the cover when in use.  All signs point to the fact that this is not a consumer problem, but a manufacturing problem.PLEASE for the safety of every family using your products, bring this issue to light with your design team so that this issue can be fixed, and stop averting blame to the consumer.  My wipes warmers were refilled and instructions were carefully followed.  I agree that a lot of the times, product safety issues are the result of poor, neglectful parenting or the lack of common sense, but this is simply not the case here and to continue to ignore it and act like it is not your fault is unethical and simply put, unequivocally wrong.Update May 2013:  Price Lionheart did respond. Here is the response-"Dear Ms. XXXXXXXX,Your message has been forwarded to me, since it referred to a safety concern about one of our products. I am the head of operations for Prince Lionheart, where my responsibilities include oversight of quality control and regulatory compliance. Thank you for taking the time to explain your experience. I was greatly relieved that no injury to your family or damage to your home occurred, but sorry that you had to go through this troubling incident at all. I would very much like to learn as much detail as possible about the circumstances, since first hand information from real-world environments is extremely valuable in our efforts to improve our products. I hope you'll respond to this message with a phone number and convenient time when I may reach you for a short conversation.In the meantime, you might find some measure of reassurance in knowing that the issue of producing the safest possible wipe warmer, even though both water and electricity are inherent to its function, is always first and foremost in the efforts of our design team. Much care has been exercised in developing the channeling that returns condensation to the wipe tub, where it belongs, so your observation on this subject is of great interest to me. Of the many, many wipe warmers Prince Lionheart has sold over almost fifteen years, we are not aware of any instance of a fire being started or an individual being injured by one of them, but we're always seeking ways to further improve our warmer.Again, thank you for your time in bringing this to our attention. We're sorry for the distress it caused you, and I hope to speak with you soon.Best Regards,Richard Siegel"I did reply to this message providing him with a phone number with which to contact me, but I never heard from him after that.  It has been more than a month.  Not really impressed by Prince Lionheart to say the least.


------------
 B0000DJ3FM 0 1.0
1.0 This toy seemed like a great deal for the money--the least expensive rocking horse out there.  There's a reason it's so inexpensive--it's cheap!!!  There are stickers which must be manually applied.  They're not that easy to line up and they are easily removed by a child, posing a choking hazard.  In addition, on more than one occasion, my son turned the horse upside down and attempted to mount it from the wrong angle.  The rungs on the bottom are spaced just closely enough together for a child to get a leg caught between.  They hold his leg very securely and he almost severely injured himself twice since he was unable to get his leg out on his own and twisted it badly.  It also left a large, red mark on his leg after we got him out.  He literally was unable to get out himself.  A toy made for such a small child should not have stickers that can be removed but, rather, molded, colored plastic.  I called Fisher Price about this toy and have asked them to seriously consider putting a recall on it.  It is very dangerous and we have since thrown ours away!!!


------------
 B001KVEEG4 0 1.0
1.0 We were given the LuLy LadyBug Rocker (same company, different design) when my daughter was in utero. It shipped to our house and remained in its original box until we opened it. When we opened it, our daughter was the recommended age of 10 months. We put her on it and, with a parent guiding her gently, with one hand on her back and the other holding her hand and the handlebar, we proceeded to let her rock gently.Immediately, the front chest area of the ladybug "fell" forward as if it had come off a stand or something inside the body of the ladybug. The rocker went forward way too much and our daughter was jutted forward. Had it not been for the parent right next to her, I believe she would have flipped headfirst over the rocker. We attempted to use it again and the same thing happened. We put the rocker aside.Having repeatedly attempted to help her enjoy the rocker safely in the past year, it now sits in her room, with the tag on, and she uses the musical buttons and that's it.What a waste. Today we once again attempted to let her use the rocker in a final attempt to enjoy it, and despite a parent sitting right next to her, my daughter rocked gently twice and flipped headfirst over the handle. She hit her chin on the handle, splitting her chin open and the rocker fell over on her.With our experience, and having searched many websites selling this product and hundreds of reviews, it is now unequivocally my opinion that this product is unsafe. I've filed a report with the Consumer Product Safety Commission. I have contacted the company requesting a statement with regards to their plan to recall or issue a product alert.You should file a report with the CPSC about this. I have. My daughter was injured rocking normally with parental supervision. The more of us who say something, the better.Someone else filed a review as well.  [...]


------------
 B00FOL7CME 0 1.0
1.0 We have the white version of this cradle glider. It's a bit shaky, but looks nice.Unfortunately this product is of very low quality and A SAFETY HAZARD to your baby! The base board (where the mattress rests on) is too small. It can fall out of the frame!! Baby and mattress thus fall out of the bed!!!!Our daughter fell out of this bed twice, rather, she fell through the mattress support (and she was among the smaller babies at that time).The cradle has been recalled on November 13, 2013. Hazard: "The mattress support board can fall out or slide out of the bottom of the cradle glider posing a risk that babies can fall out and suffer injuries." ([...])  Consumers should immediately stop using the recalled cradles.We have contacted Dream on me within a week about this recall, and many times since then. Three months later we still have not received the promised repair kit. We cannot use the unsafe cradle, it poses a risk for our baby. The recall center is not helpful - they are still waiting, quote, for the shipment from China to arrive.Our baby will soon have outgrown the cradle. Dream on me has made us for a pile of dangerous junk made in China.Buyer beware!Do not buy from Dream on Me. Their quality control, sense of responsibility, and customer service deserve lemons, but not a single star.


------------
 B002TUTPQO 0 1.0
1.0 Top corners crack easily and cause small sharp pieces of plastic to fall in the pen. It also leaves sharp broken corners. VERY DANGEROUS!!!!! I purchased two of these play yards....one for upstairs and one for down. We do not travel with them. The only time they are moved is when we are cleaning them or the room. A while back I found a hard sharp piece of plastic in the pen. At the time I did not know where it came from but was relieved that my daughter did not swallow it. This afternoon I found another piece of hard sharp platic in the pen. I took my daughter out or the pen to see if there were more. When I lifted her I found that her ear was cut and bloody. I emptied the pen and searched the entire thing to see where the plastic could have come from. I found that the hard sharp plastic came from the top corners of the crib. Investigating both Pack and Play Bugs quilts I noticed one had 2 broken corner and the other had 3 broken corners. I have only found 2 pieces so I am hoping my daughter did not eat any pieces. THIS IS VERY UPSETTING TO ME AND I AM SURE THAT I AM NOT THE ONLY ONE REPORTING THIS COMCERN TO GRACO. I RESEACHED THE CONCERN TO SEE IF THERE MIGHT HAVE BEEN A RECALL I HAD MISSED. ALTHOUGH I DID NOT FIND A RECALL ON THE PRODUCT I DID FIND NUMEROUS OTHER CONSUMERS STATING THE SAME CONCERN. THIS PRODUCT IS DANGEROUS IF NOT DEADLY.


------------
 B00FOL7CF6 0 1.0
1.0 This cradle looks nice. Once you've assembled it you will notice it is a bit shaky. But that is not the problem.The problem is that this product is dangerous. The base board (where the mattress rests on) is too small. It can fall out of the frame!! Baby and mattress thus fall out of the bed!!!!Our daughter fell out of this bed twice, rather, she fell through the mattress support (and she was among the smaller babies at that time).The cradle has been recalled on November 13, 2013. Hazard: "The mattress support board can fall out or slide out of the bottom of the cradle glider posing a risk that babies can fall out and suffer injuries." ([...])  Consumers should immediately stop using the recalled cradles.We have contacted Dream on me within a week about this recall, and many times since then. Three months later we still have not received the promised repair kit. We cannot use the unsafe cradle, it poses a risk for our baby. The recall center is not helpful - they are waiting, quote, for a shipment from China.Our baby will soon have outgrown the cradle. What we have gotten from Dream on me is a pile of dangerous junk made in China.Buyer beware!Do not buy from Dream on Me. Their quality control, sense of responsibility, and customer service deserve lemons, but not a single star.


------------
 B00186YSMQ 0 1.0
1.0 WARNING to all owners newer model Peg Perego SIP 30/30 Infant Seats. It has two major defects.1> Defective part causes cuts on baby's heels and legs.2> Defective latch can cause seat to fall out of Pliko P3 stroller and have baby land on ground.Peg Perego's SIP 30/30 Infant seat is defective and they have not made registered customers aware of it.  They are not taking responsibility for the safety of the children using this seat.  They have been aware of the first issue since the beginning of summer 2008.The first issue with the seat causes cuts. We discovered lacerations on our son's ankles for over a month but could not figure out what was causing them. On a road trip we discovered a large bleeding gash on the back of his ankle about  the size of a DIME. We traced the source to the SIP 30/30 car seat's plastic strap cover near the feet. There was blood on the cover and the edges were sharp enough to cut my finger.  When I called Peg Perego, it took over an hour to get through to their customer service. Once I did, I discovered over 50 children have already been injured and they anticipate FOUR THOUSAND seats could have this issue. DO NOT follow their instructions to remove the plastic cover until receiving the replacement. The metal latch further cut up the back of our son's legs while waiting for the part. It took almost 4 DAYS to get the part from them instead of shipping it priority overnight.It is Peg Perego's RESPONSIBILITY to inform registered seat users of this dangerous issue, yet they have done NOTHING to warn people. Not even an e-mail. They plan to ship the parts when they have replacement. It has been almost 5 months since they first learned about the issue. How many more children will be injured before they make the public aware of this defect. Since discovering this issue, I have told every parent I see with this car seat about this issue. Their response is normally "I was wondering where those cuts where coming from."The second issue with the seat is latching into Peg Perego's Pliko P3 stroller.  The infant seat has two possible latch points. We attached the seat to the stroller using the outside latch. We heard both sides of the seat "click" as explained. While pushing the cart the infant seat fell off the stroller and landed on the sidewalk with our son head down and screaming. We were lucky the stroller wasn't stopped by a street where it could have fallen in front of a car. The fact it can even latch to something that will not hold the seat, much less EASILY latch,  is a SERIOUS design defect that is extremely dangerous to children. It could be fatal.Peg Perego needs to take responsibility for their poorly designed products and let people know about these issues. Hopefully as a buyer you will review this and evaluate if their products are safe for your children before buying and they are injured.If I could give this product less than 1 star, I would.


------------
 B000I2RK80 0 1.0
1.0 We have 9 month old twin girls who liked this mirror. Seemed like a safe toy but this morning I heard one of them coughing and when I investigated I found the stitching had come undone along the side, exposing wads of the extremely fine fibrous padding material used to line the side. My daughter had a large piece of this in her mouth, which had blocked her airway and she had started choking. Only by good fortune had I heard her coughing and was able to get the material out of her mouth, which revealed a matted string had extended into her throat, causing the choking response.In my view this could have been a fatal incident. I believe this happened because of poor manufacturing standards. The padding material should have been encased in a protective sleeve, and the stitching on the hem of the mirror should have been of a much more robust nature. It's important to note that the toy had not been abused, and I would describe the usage as pretty low. Certainly far below the point the product would start to disintegrate.This is clearly an extremely dangerous product, which should be recalled immediately. The build quality is utterly sub standard and it's only a matter of time before someone else's child comes to harm as a result of the shoddy design and construction.


------------
 B003SX0B6E 0 1.0
1.0 Health officials are warning parents not to use a special device designed to help keep babies in certain positions as they sleep. The device, called a sleep positioner, has been linked to at least 13 deaths in the last 15 years, officials with two federal agencies said on Wednesday."We urge parents and caregivers to take our warning seriously and stop using these sleep positioners," Inez Tenenbaum, the chairman of the Consumer Product Safety Commission, said in a statement.The sleep positioner devices come primarily in two forms. One is a flat mat with soft bolsters on each side. The other, known as a wedge-style positioner, looks very similar but has an incline, keeping a child in a very slight upright position.Makers of the devices claim that by keeping infants in a specific position as they sleep, they can prevent several conditions, including acid reflux and flat head syndrome, a deformation caused by pressure on one part of the skull. Many are also marketed to parents as a way to help reduce a child's risk of sudden infant death syndrome, or SIDS, which kills thousands of babies every year, most between the ages of 2 months and 4 months.But the devices have never been shown in studies to prevent SIDS, and they may actually raise the likelihood of sudden infant death, officials say. One of the leading risk factors for sudden infant death is placing a baby on his or her stomach at bedtime, and health officials have routinely warned parents to lay babies on their backs. They even initiated a "Back to Sleep" campaign in the 1990s, which led to a sharp reduction in sudden infant deaths.With the positioner devices, if an infant rolls onto the stomach, the child's mouth and nose can press up against a bolster or some other part of the device, leading to suffocation. Even if placed on the back, a child can move up or down in the positioner, "entrapping its face against a bolster or becoming trapped between the positioner and the crib side," Gail Gantt, a nurse consultant with the Food and Drug Administration, said in an e-mail. Or the child might scoot down the wedge in a way that causes the child's mouth and nose to press into the device."The baby's movement may also cause the positioner to flip on top of the baby, trapping the baby underneath the positioner or between the positioner and the side of the crib," she said.Of the 13 babies known to have suffocated in a sleep positioner since 1997, most died after they rolled from their sides onto their stomachs. The Consumer Product Safety Commission has also received dozens of reports of babies who were placed on their sides or backs, "only to be found later in hazardous positions within or next to the product," the F.D.A. said in a statement.Many baby books for new parents specifically urge against using sleep positioners, and the American Academy of Pediatrics does not support their use for SIDS prevention. Though the F.D.A. has never approved the positioners for the prevention of SIDS, it has in the past approved a number of the devices for the prevention of gastroesophageal reflux disease and flat head syndrome. But the agency said that in light of the new safety data, it believed any benefits from using the devices were outweighed by the risk of suffocation.As of Wednesday, the agency is explicitly advising parents to stop using sleep positioners, and it has asked manufacturers of the devices to submit clinical data showing that the benefits of their products outweigh the risk of serious harm. In addition to avoiding the devices, experts say, parents should keep things like pillows, comforters, quilts and bumpers away from their infants and their cribs. Soft bedding can increase the likelihood of a baby suffocating."The safest crib is a bare crib," Dr. Susan Cummins, a pediatric expect with the F.D.A., said in a statement. "Always put your baby on his or her back to sleep. An easy way to remember this is to follow the ABC's of safe sleep - Alone on the Back in a bare Crib."

In [27]:
# Plot Figures 4 and 5.

def get_reviews_before_date(data, asin, date):
    reviews = data.reviews_df[data.reviews_df.ASIN==asin].sort_values('review_time')
    return reviews
    
def time_diff(time1, time2):
    """
    time2 - time1
    time2 2012-01-02T00:00:00
    time1 2011-11-03
    """
    return (datetime.strptime(time2[:10], '%Y-%m-%d') - datetime.strptime(time1, '%Y-%m-%d')).days
    
def get_colors():
    cmap = plt.get_cmap('Dark2')
    colors = [cmap(i) for i in np.linspace(0, 1, 10)]
    return cycle(colors)
    
def predict_by_time(model, data):
    recalls_df = data.recalls_df[data.recalls_df.label==1]
    probas = model.predict_proba_reviews(data)
    found = 0
    correct = 0
    plt.figure(figsize=(8,6))
    colors = get_colors()
    all_diffs = []
    n_pos = []
    total_reviews = 0
    total_pos = 0
    total_time_diffs = 0
    for asin in data.recalled_asins:
        recall = recalls_df[recalls_df.AmazonAsin==asin].iloc[0]
        reviews = get_reviews_before_date(data, asin, recall['RecallDate'])
        if (len(reviews) > 9): # only consider products with at least 10 reviews.
            found += 1
            total_reviews += len(reviews)
            idx = np.array(reviews.index.tolist())
            vals = probas[idx]
            pos_idx = np.where(vals >= 0.5)[0]
            total_pos += len(pos_idx)
            if asin == 'XXXX':   # For manual analysis of recall reviews
                print(recall['RecallName'])
                print(recall['RecallDescription'])
                print(recall['RecallDate'])
                print(recall['RecallTitle'])
                print('\n'.join('%s %s' % (x,y) for x,y in 
                                zip(reviews.iloc[pos_idx]['review_time'],
                                    reviews.iloc[pos_idx]['reviewText'])))
            if len(pos_idx) > 0:
                n_pos.append(len(pos_idx))
                color = next(colors)
                times = reviews.iloc[pos_idx]['review_time']
                time_diffs = [-time_diff(t, recall['RecallDate']) for t in times]
                if len(time_diffs) > 0:  # found recall review within 500 days of recall (before/after)
                    all_diffs.extend(time_diffs)
                    if time_diffs[0] < 0: # found before recall
                        correct += 1
                    counts = np.arange(len(time_diffs)) + 1
                    plt.plot(time_diffs, counts, '.-', color=color)
                    plt.plot(time_diffs[0], 1, 'x', ms=6, color=color)
                    total_time_diffs += len(time_diffs)
                    print(asin, len(pos_idx), time_diffs[0], time_diffs[-1])                        
    plt.xticks(rotation=90)
    plt.axvline(x=0, color='k')
    plt.ylabel('Total number of hazardous reviews found', size=16)
    plt.xlabel(r'before recall $\leftarrow$     Days from recall     $\rightarrow$ after recall', size=16)
    plt.tight_layout()
    plt.savefig('paper/figs/leadtime.pdf')
    plt.show()
    print('found early warning for %d/%d (%.2f) product recalls' % (correct, found, correct/found))
    print('earliest day: mean=%.2f, median=%.2f' % (np.mean(all_diffs), np.median(all_diffs)))
    print('%d / %d reviews classified as positive' % (total_pos, total_reviews))
    print('%d total time diffs' % total_time_diffs)

    plt.figure(figsize=(8,6))
    plt.hist(all_diffs, bins=50)
    plt.xticks(rotation=90)
    plt.ylabel('Count', size=16)
    plt.xlabel(r'before recall $\leftarrow$     Days from recall     $\rightarrow$ after recall', size=16)
    plt.tight_layout()
    plt.savefig('paper/figs/dayshist.pdf')
    plt.show()

    plt.figure(figsize=(8,6))
    plt.hist(n_pos, bins=20)
    plt.ylabel('Count', size=16)
    plt.xlabel('Number of hazardous reviews found', size=16)
    plt.tight_layout()
    plt.savefig('paper/figs/counthist.pdf')
    plt.show()

predict_by_time(best_model, data)


B00499DRY4 1 -111 -111
B001VNCVSO 1 -993 -993
B001UHNKMM 1 -627 -627
B0028K2RNI 1 32 32
B0000AQZXM 1 -1730 -1730
B00155UGTO 2 -1227 -791
B001H0GGJG 1 -37 -37
B000325T8S 2 47 200
B00DHINGB2 4 -797 -618
B000XHUHMG 1 -496 -496
B000096RDO 2 -3203 -3121
B003VIIUDW 1 339 339
B0009UBSFM 4 -2232 -974
B000J2DQPA 1 -760 -760
B0026L7D0G 2 -330 323
B00166LX9Y 2 343 636
B001N44UVW 5 36 712
B0002E7DHW 6 -977 98
B00004D3EU 25 -1365 477
B001D62PYE 3 -121 1595
B000324Y7U 29 -2508 -344
B005OOKOK8 3 -26 714
B000056C86 5 2565 3680
B00030HRQM 2 -2268 -1910
B004B762AK 11 346 1201
B000K0QZY0 4 -136 85
B00C870102 1 81 81
B0007GDOF0 2 -1130 -1053
B00007C65S 1 -117 -117
B00318CLA0 2 -1028 -828
B0035ER8MG 1 -59 -59
B0002JZOLO 1 -511 -511
B002M6PPTQ 1 35 35
B000ZMT6VM 3 132 839
B0028K2RMO 1 -381 -381
B001870Z9K 1 -591 -591
B001N44UVC 3 -494 714
B008K0TLJ8 1 -615 -615
B007S76KSE 2 -1126 -758
B002R26LZM 7 -873 588
B0087UUKHI 1 -730 -730
B00020V5A2 4 -311 330
B0035ER8KS 8 -63 1056
B001NAATW0 8 -422 505
B00GSNFDQ4 2 -660 -561
B00005610Y 1 -117 -117
B00139Q0X8 1 -836 -836
B000SES0IW 2 1097 1567
B0000D9SR8 24 -229 3236
B0035ER8IU 3 614 883
B0050386O4 2 -1272 -687
B000BLNZUK 1 13 13
found early warning for 39/86 (0.45) product recalls
earliest day: mean=-328.97, median=-136.50
204 / 7318 reviews classified as positive
204 total time diffs

In [28]:
# Plot review distribution for recalled vs non-recalled (Figure 1).
def plot_review_dist(data):
    recalled = []
    nonrecalled = []
    for r in data.reviews_df.iterrows():
        if r[1].ASIN in data.recalled_asins:
            recalled.append(r[1].review_score)
        else:
            nonrecalled.append(r[1].review_score)
    print('recalled mean=%g' % np.mean(recalled))
    print('not recalled mean=%g' % np.mean(nonrecalled))

    plt.figure(figsize=(8,6))
    recalled_ct = Counter(recalled)
    nonrecalled_ct = Counter(nonrecalled)
    ratings = np.arange(5) + 1
    bar_width = .3
    plt.bar(ratings, [recalled_ct[r] / len(recalled) for r in ratings],
            bar_width, alpha=.5, color='grey', label='recalled')
    plt.bar(ratings + bar_width, [nonrecalled_ct[r] / len(nonrecalled) for r in ratings],
            bar_width, alpha=.5, color='w', label='non-recalled')
    plt.xticks(ratings + bar_width, ratings)
    #plt.hist(recalled, alpha=.5, normed=True, label='recalled')
    #plt.hist(nonrecalled, alpha=.5, normed=True, label='not recalled')
    plt.legend(loc='best', prop={'size':16})
    plt.ylabel('Percent of ratings', size=16)
    plt.xlabel('Rating', size=16)
    #plt.title('Rating distribution for recalled vs. non-recalled products', size=14)
    plt.tight_layout()
    plt.savefig('paper/figs/ratings.pdf')
    plt.show()
    
plot_review_dist(data)


recalled mean=3.77332
not recalled mean=4.12057

In [29]:
# Plot distribution of recall reviews for recalled vs non-recalled
def plot_recall_dist(data, model):
    recalled = []
    nonrecalled = []
    preds = model.predict_reviews(data)
    for r in data.reviews_df.iterrows():
        if r[1].ASIN in data.recalled_asins:
            recalled.append(preds[r[0]])
        else:
            nonrecalled.append(preds[r[0]])
    plt.figure(figsize=(8,6))
    recalled_ct = Counter(recalled)
    nonrecalled_ct = Counter(nonrecalled)
    print(recalled_ct)
    print(nonrecalled_ct)
    print('recalled pct pos=%g' % (recalled_ct[1] / len(recalled)))
    print('nonrecalled pct pos=%g' % (nonrecalled_ct[1] / len(nonrecalled)))    
    ratings = np.arange(2)
    bar_width = .3
    plt.bar(ratings, [recalled_ct[r] / len(recalled) for r in ratings],
            bar_width, alpha=.5, color='r', label='recalled', hatch="//")
    plt.bar(ratings + bar_width, [nonrecalled_ct[r] / len(nonrecalled) for r in ratings],
            bar_width, alpha=.5, color='g', label='non-recalled')
    plt.xticks(ratings + bar_width, ratings)
    plt.legend(loc='best')
    plt.ylabel('Percent of reviews', size=16)
    plt.xlabel('Predicted class', size=16)
    plt.show()
    
plot_recall_dist(data, best_model)
plot_recall_dist(data, baseline_model)


Counter({0: 7356, 1: 214})
Counter({0: 897233, 1: 10643})
recalled pct pos=0.0282695
nonrecalled pct pos=0.011723
Counter({0: 7343, 1: 227})
Counter({0: 897126, 1: 10750})
recalled pct pos=0.0299868
nonrecalled pct pos=0.0118408

In [30]:
# Plot Figure 6.
asin2recall_score = best_model.score_asin_recalls(data)
plt.figure()
plt.hist(sorted(asin2recall_score.values()), bins=50, bottom=1)
plt.yscale('log')
plt.xlabel('Number of hazardous reviews', size=16)
plt.ylabel('Number of products', size=16)
plt.tight_layout()
plt.savefig('paper/figs/pred_ratings.pdf')
plt.show()



In [31]:
Counter(asin2recall_score.values()).most_common(10)


Out[31]:
[(0, 59362),
 (1, 3285),
 (2, 778),
 (3, 374),
 (4, 190),
 (5, 113),
 (6, 63),
 (7, 57),
 (8, 46),
 (9, 29)]

In [32]:
# Print top coef for inclusion in Table 4.
def print_coef_table(models, data):
    submodels = get_models(models, ['RandNegSampThreshInfoPrior(C=1, nneg=20000, t=5.0)',
                                    'RandomNegSamplesThresh(C=1, nneg=20000, t=5.0)'])  
    coef = submodels[0].clf.coef_[0] * submodels[0].transform[0,:].toarray()[0]
    coef2 = submodels[1].clf.coef_[0]
    terms1 = data.vec.features[np.argsort(coef)[::-1][:20]]
    terms2 = data.vec.features[np.argsort(coef2)[::-1][:20]]
    print('in informed prior, but not baseline:', set(terms1) - set(terms2))
    print('in baseline prior, but not informed prior:', set(terms2) - set(terms1))

    print('informed prior:')
    print(', '.join(terms1))
    print('baseline:')
    print(', '.join(terms2))

    scaled = scale(coef)
    scaled2 = scale(coef2)
    
    diff = scaled2 - scaled
    print('\n\n')
    for i in np.argsort(diff)[::-1][:20]:
        print(data.vec.features[i], diff[i], coef2[i], coef[i], scaled2[i], scaled[i])
    print('\n\n')
    for i in np.argsort(diff)[:20]:
        print(data.vec.features[i], diff[i], coef2[i], coef[i], scaled2[i], scaled[i])
    
print_coef_table(models, data)


in informed prior, but not baseline: {'burnt', 'leaned forward', 'was chewing', 'was playing', 'got stuck', 'was hanging', 'very dangerous', 'swallow it', 'snapped', 'smacked', 'cpsc', 'recalled', 'the consumer', 'emergency room', 'is unsafe', 'injured', 'exploded'}
in baseline prior, but not informed prior: {'fell', 'rash', 'light', 'caused', 'noticed', 'pampers', 'choking', 'rock', 'night light', 'unsafe', 'stuck', 'broke', 'crib', 'dangerous', 'model', 'happened', 'gate'}
informed prior:
very dangerous, cpsc, mold, smacked, swallow it, emergency room, recalled, recall, was playing, hazard, is unsafe, snapped, leaned forward, the consumer, got stuck, was hanging, burnt, injured, exploded, was chewing
baseline:
mold, pampers, fell, crib, rock, dangerous, night light, hazard, broke, happened, gate, rash, light, recall, model, stuck, unsafe, caused, noticed, choking



dangerous the 39.3137994843 0.0341056623672 -5.04980244726 0.764773373732 -38.5490261105
have happened 26.1050187427 0.017878583183 -3.3636329034 0.400269600893 -25.7047491419
rock 23.2837413689 1.31904122114 0.843676905767 29.6278764444 6.34413507548
light 22.3088004069 1.08658271532 0.286178931723 24.4062341859 2.09743377894
crib 21.6961876689 1.3402889726 1.11474411547 30.1051579985 8.40897032962
pampers 21.1650146449 1.53684122427 1.76407925519 34.5202493703 13.3552347254
night light 19.0342014994 1.26248634874 1.2347754675 28.3575021441 9.32330064468
gate 17.1057717835 1.11372619459 1.04926369058 25.0159496183 7.91017783482
on 16.5640166926 0.851071217472 0.34585300944 19.1160135283 2.55199683569
stuck 16.224281969 0.998913178154 0.826417003602 22.4369409633 6.21265899428
mold 15.5544504725 2.89140534932 6.49503392393 64.9473970121 49.3929465396
off 15.5451915924 0.90731668299 0.645461874563 20.379437719 4.83424612665
diapers 15.4748927068 0.721196931315 0.105850614336 16.1986883115 0.723795604629
found 15.4017585383 0.847495897227 0.487888707454 19.0357022348 3.63394369647
between 15.0176297747 0.807275107418 0.419711088427 18.1322352913 3.11460551661
sun 14.8757129794 0.730055430613 0.210632082959 16.3976739933 1.52196101396
got 14.265302977 0.697343854846 0.194303761742 15.6628841616 1.39758118459
choking 14.0112063406 0.962189358891 1.00865172002 21.6120253686 7.60081902807
under 14.0095531604 0.836493442833 0.638209857917 18.7885575644 4.77900440394
child 13.8738258104 0.794252605603 0.531465801478 17.8397149314 3.96588912104



very dangerous -50.0340517483 0.552102788021 8.2070925209 12.4003797868 62.4344315351
cpsc -48.7921598516 0.30567822447 7.31738954943 6.8650223688 55.6571822204
great -39.3394704541 -2.1094978178 -1.04554075463 -47.386316981 -8.04684652694
loves -34.251792113 -1.76358215491 -0.69338567826 -39.6161223098 -5.36433019677
emergency room -33.7507882665 0.188965262324 4.99862059648 4.24333583122 37.9941240977
swallow it -33.568449876 0.245794523956 5.14226478572 5.51987365033 39.0883235264
love -33.0342804413 -1.73322215626 -0.763690838852 -38.934155215 -5.89987477369
was hanging -31.863245472 0.0566961147483 4.36078557903 1.27221559606 33.1354610681
leaned forward -31.4333242363 0.167000897629 4.62961911304 3.74995721954 35.1832814559
smacked -31.1715413075 0.358541485004 5.16007800494 8.05247315341 39.224014461
is unsafe -27.583599729 0.400924098247 4.81404074783 9.00450045634 36.5881001854
got stuck -25.9016777515 0.368058416899 4.49632549255 8.26624899948 34.167926751
heard him -25.1234678861 0.111424919922 3.63738906569 2.50157152281 27.6250394089
was playing -24.5489038383 0.563889510163 4.89621346787 12.6651412169 37.2140450551
was chewing -24.4306360841 0.293505860023 4.08336565476 6.59159837703 31.0222344611
perfect -24.3314569296 -1.23965399691 -0.450718747474 -27.8472889837 -3.51583205404
rolled into -24.2998941386 0.096047683391 3.48392682071 2.15615749803 26.4560516366
filed -23.7265413777 0.110238454961 3.45050479322 2.47492033378 26.2014617115
off leaving -23.6803121532 0.146048064603 3.55003322416 3.27930032541 26.9596124787
these -23.6640573443 -1.31799777647 -0.769357912292 -29.6071006481 -5.94304330388

In [33]:
def print_chi2_predicted(model, data, n_feats=100):
    preds = model.predict_reviews(data) # model.clf.predict(data.X_reviews)
    nneg = len(np.where(preds==0)[0])
    npos = len(np.where(preds==1)[0])
    print(Counter(preds))
    chi, _ = chi2(data.X_reviews, preds)
    chi = np.nan_to_num(chi)
    # restrict to positive features
    ppos_counts = data.X_reviews[np.where(preds==1)].sum(axis=0).A1
    pneg_counts = data.X_reviews[np.where(preds==0)].sum(axis=0).A1
    chi_pos = chi * np.array([1 if c > 0 else 0 for c in model.clf.coef_[0]])
    print('RECALL TERMS')
    terms = []
    for i in np.argsort(chi_pos)[::-1][:n_feats]:
        terms.append({'feature': data.vec.features[i],
                      'chi2': '%.1f' % chi_pos[i],
                      'coef': '%.2f' % model.clf.coef_[0][i],
                      'pos_count': ppos_counts[i],
                      'pos_frac': '%.3f' % (ppos_counts[i]/npos),
                      'neg_count': pneg_counts[i],
                      'neg_frac': '%.3f' % (pneg_counts[i]/nneg)})
    display(pd.DataFrame(terms))

    print('\n\nNON-RECALL TERMS')
    chi_neg = chi * np.array([1 if c < 0 else 0 for c in model.clf.coef_[0]])
    terms = []
    for i in np.argsort(chi_neg)[::-1][:n_feats]:
        terms.append({'feature': data.vec.features[i],
                      'chi2': '%.1f' % chi_neg[i],
                      'coef': '%.2f' % model.clf.coef_[0][i],
                      'pos_count': ppos_counts[i],
                      'pos_frac': '%.3f' % (ppos_counts[i]/npos),
                      'neg_count': pneg_counts[i],
                      'neg_frac': '%.3f' % (pneg_counts[i]/nneg)})
    display(pd.DataFrame(terms))


Evaluator(data).top_terms(best_model, n=50)
print('\n\n')
print_chi2_predicted(best_model, data, n_feats=50)



TOP FEATURES:

CLASS 0
great	1.336
this	1.299
these	0.986
for	0.980
love	0.975
loves	0.885
so	0.833
it	0.788
good	0.723
but	0.713
easy	0.697
is	0.615
well	0.612
you	0.609
are	0.604
easy to	0.602
they	0.598
and	0.593
very	0.579
perfect	0.576
to	0.533
don	0.514
with	0.509
recommend	0.493
cute	0.458
works	0.455
one	0.454
like	0.453
nice	0.443
really	0.435
as	0.434
our	0.430
soft	0.421
best	0.417
just	0.411
too	0.404
loved	0.404
them	0.401
much	0.401
all	0.388
other	0.376
use	0.371
car	0.369
can	0.368
my	0.365
what	0.359
the	0.353
buy	0.344
ve	0.341
would	0.341

CLASS 1
pampers	1.104
mold	1.006
rash	0.832
burn	0.715
snapped	0.698
allowing the	0.671
gate with	0.664
be tightened	0.660
night light	0.639
smacked	0.626
died	0.617
delta	0.599
model	0.595
that side	0.580
collapsed	0.564
crib	0.561
burnt	0.558
blisters	0.558
broke	0.553
swallow it	0.544
tightened	0.537
exploded	0.537
flipped	0.525
nap nanny	0.525
approved	0.524
attached	0.514
chemical	0.508
fire	0.507
stairway	0.505
disintegrated	0.482
causing	0.476
cause the	0.474
began to	0.469
gate	0.467
appeared	0.462
com	0.459
very dangerous	0.455
side	0.446
bar with	0.446
happened	0.441
finger	0.437
caused	0.429
noticed	0.427
red	0.424
fall	0.421
bleeding	0.412
burned	0.409
unsafe	0.404
seam	0.401
hazard	0.398



Counter({0: 904589, 1: 10857})
RECALL TERMS
chi2 coef feature neg_count neg_frac pos_count pos_frac
0 33099.1 0.40 hazard 1643 0.002 1055 0.097
1 31683.4 0.22 recalled 290 0.000 578 0.053
2 28377.6 0.24 dangerous 1996 0.002 1041 0.096
3 24130.7 0.55 broke 9583 0.011 1942 0.179
4 23099.4 0.45 very dangerous 43 0.000 316 0.029
5 19591.1 0.44 happened 4142 0.005 1167 0.107
6 16880.7 0.35 recall 571 0.001 465 0.043
7 16348.7 0.40 unsafe 1087 0.001 586 0.054
8 16187.7 0.05 choking hazard 599 0.001 461 0.042
9 15014.5 0.39 fell 7916 0.009 1390 0.128
10 14770.4 0.70 snapped 1669 0.002 663 0.061
11 14047.8 0.02 be recalled 43 0.000 205 0.019
12 11513.6 0.21 fell out 334 0.000 300 0.028
13 11380.8 1.01 mold 2168 0.002 646 0.060
14 10612.7 0.11 choking 1785 0.002 569 0.052
15 9677.5 0.16 is dangerous 141 0.000 201 0.019
16 9289.0 0.11 been recalled 62 0.000 157 0.014
17 9086.6 0.20 contacted 3446 0.004 715 0.066
18 8598.9 0.04 safety hazard 182 0.000 201 0.019
19 8561.2 0.32 is unsafe 63 0.000 148 0.014
20 8557.6 0.43 caused 1943 0.002 527 0.049
21 8317.1 0.43 noticed 8713 0.010 1094 0.101
22 8073.2 0.14 the company 6537 0.007 929 0.086
23 8060.4 0.34 the plastic 10894 0.012 1213 0.112
24 7839.5 0.48 causing 2084 0.002 520 0.048
25 7342.6 0.34 injury 501 0.001 266 0.025
26 7327.5 0.18 company 13046 0.014 1279 0.118
27 7196.8 0.28 injured 263 0.000 204 0.019
28 7180.8 0.22 this happened 377 0.000 234 0.022
29 7128.7 0.30 not safe 749 0.001 310 0.029
30 7039.3 0.28 called 7209 0.008 915 0.084
31 6498.3 0.01 broke off 771 0.001 298 0.027
32 6271.1 0.22 plastic 37407 0.041 2175 0.200
33 6011.9 0.20 the recall 70 0.000 117 0.011
34 5984.3 0.24 metal 6435 0.007 798 0.074
35 5976.5 0.29 off 93671 0.104 3768 0.347
36 5974.7 0.20 broken 6337 0.007 791 0.073
37 5791.4 0.00 snapped off 153 0.000 146 0.013
38 5684.1 0.18 stuck 10061 0.011 989 0.091
39 5626.9 0.22 safety 17178 0.019 1324 0.122
40 5599.7 0.15 contacted the 1218 0.001 338 0.031
41 5214.4 0.28 caused the 266 0.000 168 0.015
42 5182.7 0.08 broke the 772 0.001 263 0.024
43 5156.2 0.27 of the 178220 0.197 5511 0.508
44 5105.6 0.38 got stuck 326 0.000 180 0.017
45 5021.3 0.19 cpsc 11 0.000 70 0.006
46 4967.0 0.23 mold on 35 0.000 85 0.008
47 4815.1 0.03 product safety 35 0.000 83 0.008
48 4498.0 0.22 the metal 2344 0.003 414 0.038
49 4406.1 0.11 was recalled 63 0.000 91 0.008

NON-RECALL TERMS
chi2 coef feature neg_count neg_frac pos_count pos_frac
0 5443.6 -0.01 safety commission 6 0.000 71 0.007
1 4721.2 -0.00 consumer product 16 0.000 70 0.006
2 4136.3 -0.01 fell off 1457 0.002 314 0.029
3 3725.3 -0.08 customer 10366 0.011 832 0.077
4 3495.3 -0.03 customer service 8309 0.009 715 0.066
5 3234.7 -0.08 could 77816 0.086 2702 0.249
6 3116.9 -0.14 after 104963 0.116 3272 0.301
7 3072.2 -0.05 service 10748 0.012 781 0.072
8 3064.7 -0.04 had 177275 0.196 4715 0.434
9 2808.5 -0.00 recall on 66 0.000 68 0.006
10 2777.4 -0.08 on the 150075 0.166 4068 0.375
11 2727.4 -0.06 noticed that 2666 0.003 346 0.032
12 2499.1 -0.02 response 1712 0.002 264 0.024
13 2489.2 -0.21 replacement 11452 0.013 741 0.068
14 2451.1 -0.09 within 11956 0.013 755 0.070
15 2451.0 -0.04 called the 1530 0.002 247 0.023
16 2443.5 -0.01 from 148688 0.164 3900 0.359
17 2381.4 -0.02 fell apart 803 0.001 177 0.016
18 2339.0 -0.01 it fell 809 0.001 176 0.016
19 2251.9 -0.04 not 302759 0.335 6525 0.601
20 2214.4 -0.00 not buy 4670 0.005 424 0.039
21 2202.0 -0.08 first 98669 0.109 2822 0.260
22 2173.2 -0.07 then 66312 0.073 2132 0.196
23 2113.3 -0.04 went 24399 0.027 1097 0.101
24 2107.0 -0.03 it snapped 161 0.000 80 0.007
25 2025.5 -0.13 off the 18568 0.021 911 0.084
26 1968.7 -0.02 do not 26426 0.029 1124 0.104
27 1959.6 -0.15 sent 9515 0.011 602 0.055
28 1912.0 -0.05 company and 1673 0.002 229 0.021
29 1879.0 -0.04 immediately 9663 0.011 597 0.055
30 1863.5 -0.06 that the 56082 0.062 1811 0.167
31 1842.3 -0.11 did 69771 0.077 2098 0.193
32 1832.5 -0.02 of 439072 0.485 8407 0.774
33 1786.3 -0.10 were 89275 0.099 2474 0.228
34 1782.1 -0.01 broke in 368 0.000 105 0.010
35 1729.2 -0.02 told me 3427 0.004 320 0.029
36 1716.1 -0.00 hazard and 103 0.000 59 0.005
37 1691.5 -0.00 repair 829 0.001 151 0.014
38 1675.5 -0.02 completely 17743 0.020 824 0.076
39 1673.6 -0.06 problem 41948 0.046 1437 0.132
40 1618.6 -0.01 was not 15572 0.017 750 0.069
41 1603.5 -0.20 have happened 193 0.000 74 0.007
42 1593.8 -0.01 started 29525 0.033 1120 0.103
43 1579.1 -0.02 out of 62426 0.069 1853 0.171
44 1579.0 -0.01 send me 1097 0.001 168 0.015
45 1563.0 -0.05 product 131460 0.145 3167 0.292
46 1549.4 -0.21 again 44116 0.049 1450 0.134
47 1546.6 -0.13 before 52429 0.058 1631 0.150
48 1513.3 -0.02 the hinge 469 0.001 108 0.010
49 1494.2 -0.01 would not 17442 0.019 781 0.072

In [34]:
def get_class_discrepancy(model, data, n):
    """
    For each of the top n features in the positive class, get the class distribution
    in the training data, and the predicted class distribution in the testing data.
    """
    coef = model.get_coef()[0]
    top_coef_ind = np.argsort(coef)[::-1]  # [:n]
    preds = model.predict_reviews(data)  # model.clf.predict(data.X_reviews)
    nneg = len(np.where(preds==0)[0])
    npos = len(np.where(preds==1)[0])
    # restrict to positive features
    ppos_counts = data.X_reviews[np.where(preds==1)].sum(axis=0).A1
    pneg_counts = data.X_reviews[np.where(preds==0)].sum(axis=0).A1
    
    train_pos = data.X_complaints.sum(axis=0).A1
    train_neg = data.X_reviews[model.neg_sample_idx].sum(axis=0).A1
    results = []
    count = 0
    for i in top_coef_ind:
        if train_pos[i] > 2:
            train_pr = train_pos[i] / (train_pos[i] + train_neg[i])
            test_pr = ppos_counts[i] / (ppos_counts[i] + pneg_counts[i])
            results.append(
                {
                    'term': data.vec.features[i],
                    'coef': coef[i],
                    'pr_pos_train': train_pr,
                    'pr_pos_test': test_pr,
                    'n_pos_train': train_pos[i],
                    'n_pos_test': ppos_counts[i],
                    'diff': train_pr - test_pr
                }
            )
            count += 1
        if count >= n:
            break
    pd.set_option('display.max_rows', 1000)
    #return pd.DataFrame(results).sort_values('diff', ascending=False)
    #return pd.DataFrame(results).sort_values('coef', ascending=False)
    return pd.DataFrame(results).sort_values('pr_pos_test', ascending=False)

get_class_discrepancy(baseline_model, data, 100)


Out[34]:
coef diff n_pos_test n_pos_train pr_pos_test pr_pos_train term
20 0.909811 0.571478 322 131 0.370968 0.942446 recalled
81 0.552103 0.676271 105 31 0.292479 0.968750 very dangerous
90 0.524554 0.753390 88 161 0.240437 0.993827 the consumer
13 1.007839 0.705103 235 178 0.226834 0.931937 recall
78 0.563890 0.756768 66 86 0.209524 0.966292 was playing
7 1.198926 0.696901 535 205 0.198295 0.895197 hazard
5 1.278500 0.569231 560 104 0.184392 0.753623 dangerous
79 0.558286 0.754362 124 120 0.161669 0.916031 injury
16 0.978184 0.669801 255 111 0.152421 0.822222 unsafe
19 0.962189 0.626890 349 131 0.148258 0.775148 choking
67 0.586398 0.738243 54 79 0.139535 0.877778 injuries
9 1.146932 0.638772 640 224 0.120550 0.759322 happened
17 0.975297 0.717757 281 153 0.113765 0.831522 caused
2 1.530806 0.455141 1058 219 0.113690 0.568831 fell
63 0.593393 0.608079 261 54 0.111921 0.720000 snapped
33 0.795858 0.697881 290 140 0.111367 0.809249 causing
8 1.177096 0.525326 1222 149 0.106030 0.631356 broke
0 2.891405 0.738541 295 210 0.104833 0.843373 mold
87 0.538756 0.467357 777 112 0.104072 0.571429 the company
85 0.539605 0.596581 160 48 0.099071 0.695652 began to
54 0.657202 0.796929 167 197 0.098525 0.895455 consumer
45 0.697980 0.836743 40 86 0.098039 0.934783 rock and
99 0.513876 0.410158 245 31 0.098039 0.508197 cracked
75 0.574807 0.693934 231 148 0.097509 0.791444 stuck in
91 0.522639 0.498877 705 147 0.086780 0.585657 called
22 0.897836 0.657861 111 67 0.086583 0.744444 fire
95 0.518574 0.600593 49 26 0.083618 0.684211 burned
96 0.515046 0.710583 56 34 0.080114 0.790698 melted
48 0.691732 0.470610 467 101 0.078303 0.548913 please
43 0.713461 0.430101 934 140 0.077145 0.507246 the plastic
18 0.967269 0.603867 754 290 0.076884 0.680751 noticed
50 0.674635 0.366967 1080 165 0.075393 0.442359 company
15 0.998913 0.635676 833 315 0.075385 0.711061 stuck
88 0.537157 0.568593 697 278 0.071960 0.640553 between the
37 0.736469 0.429907 15 3 0.070093 0.500000 be tightened
40 0.718345 0.430356 92 14 0.069644 0.500000 tightened
52 0.668528 0.589265 358 172 0.062250 0.651515 fisher price
51 0.668529 0.586946 359 172 0.062111 0.649057 fisher
83 0.544205 0.450337 425 157 0.061063 0.511401 was in
84 0.543964 0.461660 456 132 0.060079 0.521739 today
76 0.573443 0.669773 28 27 0.059957 0.729730 delta
11 1.111200 0.703367 112 154 0.059009 0.762376 rash
31 0.799682 0.613624 330 241 0.053966 0.667590 leg
36 0.779019 0.347028 974 243 0.052643 0.399671 safety
49 0.688551 0.342233 152 30 0.052504 0.394737 hardware
66 0.587301 0.693510 50 73 0.051387 0.744898 diaper rash
58 0.603494 0.421202 382 87 0.049069 0.470270 loose
68 0.585479 0.308041 692 112 0.047514 0.355556 apart
23 0.893270 0.436599 320 111 0.046010 0.482609 cause
32 0.797759 0.266252 1787 265 0.045147 0.311398 plastic
4 1.319041 0.633438 186 194 0.044884 0.678322 rock
47 0.694841 0.509107 82 21 0.043524 0.552632 died
77 0.564885 0.370774 447 140 0.043428 0.414201 my child
86 0.539603 0.176183 30 5 0.041209 0.217391 approved
30 0.807275 0.403558 935 336 0.040887 0.444444 between
59 0.603360 0.290675 305 58 0.040754 0.331429 needs to
92 0.521339 0.444299 326 149 0.039467 0.483766 red
35 0.785002 0.259019 696 134 0.039422 0.298441 fall
14 1.000701 0.465593 369 160 0.039139 0.504732 model
98 0.513887 0.328334 264 77 0.038333 0.366667 arm
42 0.713793 0.292478 538 127 0.038251 0.330729 attached
69 0.581927 0.841933 26 66 0.038067 0.880000 http
65 0.588765 0.243432 365 65 0.037954 0.281385 bar
72 0.577582 0.860868 26 70 0.036568 0.897436 www
10 1.113726 0.122459 674 72 0.036482 0.158940 gate
71 0.578609 0.399531 79 37 0.035763 0.435294 rocker
21 0.907317 0.165736 3483 408 0.035745 0.201481 off
25 0.867540 0.227407 1726 298 0.035611 0.263019 side
24 0.886961 0.770345 35 91 0.034965 0.805310 burn
29 0.836493 0.209678 912 149 0.034184 0.243863 under
28 0.841278 0.686280 118 113 0.033466 0.719745 com
3 1.340289 0.307039 1341 489 0.032309 0.339348 crib
6 1.262486 0.507288 102 56 0.031174 0.538462 night light
62 0.598776 0.238809 181 39 0.030157 0.268966 ring
80 0.552995 0.509049 138 131 0.030046 0.539095 sleeper
61 0.599194 0.380822 128 73 0.029291 0.410112 infants
73 0.577034 0.196843 5350 987 0.029119 0.225962 of the
94 0.520236 0.169915 1092 190 0.029038 0.198953 came
57 0.620084 0.156313 482 73 0.028497 0.184810 out and
34 0.794253 0.222003 1817 445 0.028278 0.250281 child
60 0.599935 0.444821 63 48 0.025767 0.470588 jumper
41 0.714468 0.123251 574 68 0.025545 0.148796 straps
27 0.847496 0.212581 1112 281 0.024550 0.237131 found
1 1.536841 0.887296 12 144 0.024096 0.911392 pampers
89 0.529613 0.155361 530 64 0.023410 0.178771 bad
74 0.575606 0.158166 690 124 0.022592 0.180758 that is
70 0.580550 0.161738 7149 1449 0.022029 0.183767 was
97 0.514736 0.303588 37 13 0.021412 0.325000 this can
44 0.701729 0.233060 577 205 0.021283 0.254342 infant
46 0.697344 0.128619 1851 318 0.020817 0.149436 got
26 0.851071 0.135159 7304 1337 0.020090 0.155248 on
82 0.544560 0.154377 788 204 0.019092 0.173469 night
56 0.623273 0.165787 2205 598 0.018270 0.184057 son
93 0.520460 0.120245 7467 1490 0.016591 0.136835 in
55 0.635318 0.301336 143 107 0.016171 0.317507 pacifier
12 1.086583 0.093082 597 90 0.016141 0.109223 light
64 0.591707 0.198794 89 30 0.015492 0.214286 removable
53 0.658033 0.203999 88 30 0.014979 0.218978 the sun
38 0.730055 0.166493 119 37 0.013119 0.179612 sun
39 0.721197 0.226562 177 173 0.006907 0.233468 diapers

In [35]:
get_class_discrepancy(best_model, data, 100)


Out[35]:
coef diff n_pos_test n_pos_train pr_pos_test pr_pos_train term
35 0.454837 0.088527 316 31 0.880223 0.968750 very dangerous
72 0.354745 0.483095 465 178 0.448842 0.931937 recall
48 0.398166 0.504166 1055 205 0.391030 0.895197 hazard
53 0.390238 0.623188 78 10 0.376812 1.000000 dangerous to
59 0.379280 0.601716 180 45 0.355731 0.957447 got stuck
46 0.403533 0.471953 586 111 0.350269 0.822222 unsafe
86 0.338274 0.569225 266 120 0.346806 0.916031 injury
61 0.376972 0.626610 107 86 0.339683 0.966292 was playing
54 0.389196 0.549368 65 20 0.320197 0.869565 was chewing
79 0.348577 0.369792 57 8 0.296875 0.666667 hazardous
76 0.352241 0.603293 109 17 0.291444 0.894737 resulting in
19 0.543917 0.710526 22 5 0.289474 1.000000 swallow it
4 0.697709 0.435695 663 54 0.284305 0.720000 snapped
9 0.625954 0.537500 22 13 0.275000 0.812500 smacked
21 0.536759 0.637864 27 9 0.262136 0.900000 exploded
78 0.351418 0.596032 64 17 0.253968 0.850000 came loose
97 0.328327 0.646696 115 18 0.253304 0.900000 resulting
51 0.394158 0.770000 23 6 0.230000 1.000000 shorted
1 1.005512 0.613807 646 210 0.229566 0.843373 mold
82 0.346109 0.619035 67 22 0.227119 0.846154 his arm
38 0.441069 0.539507 1167 224 0.219815 0.759322 happened
40 0.428708 0.618161 527 153 0.213360 0.831522 caused
62 0.374636 0.650621 95 12 0.206522 0.857143 came apart
29 0.476455 0.609556 520 140 0.199693 0.809249 causing
16 0.558329 0.514939 61 10 0.199346 0.714286 burnt
7 0.660024 0.308411 41 3 0.191589 0.500000 be tightened
31 0.468615 0.506178 306 48 0.189474 0.695652 began to
14 0.563678 0.610976 155 32 0.189024 0.800000 collapsed
13 0.580240 0.642081 56 9 0.176101 0.818182 that side
70 0.358205 0.398531 37 4 0.172897 0.571429 on product
90 0.335014 0.669287 127 37 0.171622 0.840909 this morning
18 0.553217 0.462853 1942 149 0.168503 0.631356 broke
5 0.670670 0.698382 52 13 0.168285 0.866667 allowing the
91 0.334277 0.832258 26 3 0.167742 1.000000 very unstable
95 0.330502 0.565533 154 19 0.165236 0.730769 later the
50 0.394640 0.712627 208 35 0.162373 0.875000 this issue
93 0.333439 0.839506 13 6 0.160494 1.000000 crib side
27 0.506716 0.586098 203 67 0.158346 0.744444 fire
94 0.331704 0.636526 367 148 0.154918 0.791444 stuck in
92 0.334080 0.778492 27 27 0.152542 0.931034 inc
22 0.525390 0.477974 127 29 0.152461 0.630435 flipped
52 0.393265 0.419465 1390 219 0.149366 0.568831 fell
44 0.412118 0.716531 56 44 0.146214 0.862745 bleeding
57 0.385689 0.781375 24 38 0.145455 0.926829 slat
84 0.341586 0.362939 363 31 0.145258 0.508197 cracked
23 0.525131 0.634921 8 7 0.142857 0.777778 nap nanny
20 0.537220 0.359955 185 14 0.140045 0.500000 tightened
64 0.366520 0.802844 40 40 0.127389 0.930233 bruises
65 0.366181 0.667665 86 34 0.123033 0.790698 melted
60 0.377685 0.879518 10 5 0.120482 1.000000 maker of
37 0.445698 0.596939 23 5 0.117347 0.714286 bar with
45 0.408971 0.568170 68 26 0.116041 0.684211 burned
41 0.426853 0.569198 1094 290 0.111553 0.680751 noticed
33 0.461944 0.655535 120 29 0.107623 0.763158 appeared
66 0.362892 0.438857 130 19 0.104000 0.542857 tipped
73 0.354199 0.562842 19 6 0.103825 0.666667 the maker
6 0.663692 0.083950 70 3 0.103550 0.187500 gate with
83 0.345788 0.520750 379 121 0.102961 0.623711 caught
63 0.369193 0.834292 41 86 0.100490 0.934783 rock and
87 0.337476 0.407056 1213 140 0.100190 0.507246 the plastic
30 0.473845 0.570776 77 18 0.095890 0.666667 cause the
81 0.348085 0.447910 333 61 0.091913 0.539823 sharp
47 0.401157 0.479706 164 20 0.091723 0.571429 seam
55 0.388989 0.677722 111 70 0.091509 0.769231 developed
80 0.348265 0.328269 48 5 0.088398 0.416667 fall on
11 0.598997 0.644077 40 27 0.085653 0.729730 delta
68 0.360688 0.436627 646 132 0.085112 0.521739 today
67 0.362887 0.513149 457 130 0.083182 0.596330 went to
39 0.437191 0.427684 273 59 0.080937 0.508621 finger
17 0.557652 0.927273 8 42 0.072727 1.000000 blisters
69 0.358842 0.809722 48 66 0.070278 0.880000 http
2 0.832210 0.692302 133 154 0.070074 0.762376 rash
75 0.352518 0.300805 136 20 0.069565 0.370370 to slide
49 0.394990 0.828519 49 70 0.068917 0.897436 www
24 0.523781 0.150084 49 5 0.067308 0.217391 approved
98 0.328126 0.377274 765 163 0.059724 0.436997 left
88 0.336868 0.216301 629 70 0.058208 0.274510 off and
96 0.329423 0.414685 378 113 0.058118 0.472803 placed
71 0.357262 0.141935 36 3 0.058065 0.200000 slide it
3 0.714554 0.748367 57 91 0.056943 0.805310 burn
12 0.595075 0.449259 523 160 0.055473 0.504732 model
26 0.507756 0.741802 94 70 0.053653 0.795455 chemical
10 0.616952 0.500084 99 21 0.052548 0.552632 died
58 0.381722 0.363390 523 140 0.050811 0.414201 my child
43 0.420521 0.248880 875 134 0.049561 0.298441 fall
34 0.459133 0.670681 173 113 0.049064 0.719745 com
99 0.327786 0.697621 46 73 0.047276 0.744898 diaper rash
42 0.423970 0.436793 388 149 0.046973 0.483766 red
25 0.513590 0.284017 657 127 0.046712 0.330729 attached
36 0.446402 0.222332 1972 298 0.040687 0.263019 side
28 0.504810 0.324824 17 4 0.038813 0.363636 stairway
32 0.467387 0.120131 717 72 0.038809 0.158940 gate
74 0.352581 0.143280 483 44 0.038538 0.181818 the straps
56 0.387544 0.232644 218 39 0.036321 0.268966 ring
85 0.340120 0.560777 82 68 0.035714 0.596491 was using
77 0.351991 0.116442 727 68 0.032354 0.148796 straps
15 0.561073 0.311014 1176 489 0.028333 0.339348 crib
0 1.104103 0.897336 7 144 0.014056 0.911392 pampers
8 0.638696 0.525014 44 56 0.013447 0.538462 night light
89 0.336818 0.569497 42 81 0.013237 0.582734 the diapers

In [36]:
def plot_reviews_by_year(model, data):
    """
    Plot number of pos/neg examples by year.
    """
    preds = model.predict_reviews(data)
    years = [d[:4] for d in data.reviews_df.review_time]
    year2counts = defaultdict(lambda: Counter())
    for p, y in zip(preds, years):
        year2counts[y].update([p])
    print('\n'.join('%s %f' % (str(t), t[1][1] / (t[1][0] + t[1][1])) for t in sorted(year2counts.items())))
    
plot_reviews_by_year(best_model, data)


('2000', Counter({0: 56, 1: 1})) 0.017544
('2001', Counter({0: 845, 1: 15})) 0.017442
('2002', Counter({0: 2651, 1: 41})) 0.015230
('2003', Counter({0: 4153, 1: 74})) 0.017507
('2004', Counter({0: 7909, 1: 196})) 0.024183
('2005', Counter({0: 15724, 1: 408})) 0.025291
('2006', Counter({0: 12053, 1: 278})) 0.022545
('2007', Counter({0: 16551, 1: 257})) 0.015290
('2008', Counter({0: 23735, 1: 323})) 0.013426
('2009', Counter({0: 32511, 1: 473})) 0.014340
('2010', Counter({0: 54200, 1: 874})) 0.015870
('2011', Counter({0: 98804, 1: 1377})) 0.013745
('2012', Counter({0: 135303, 1: 1807})) 0.013179
('2013', Counter({0: 299410, 1: 2840})) 0.009396
('2014', Counter({0: 200684, 1: 1893})) 0.009345

In [37]:
# Plot Figure 7.
plt.figure()
plt.plot([1377, 1807, 2840], 'go-', label='detected complaints, Amazon')
plt.plot([502, 447, 432], 'bo-', label='submitted complaints, CPSC')
plt.xticks([0, 1, 2], [2011, 2012, 2013])
plt.xlim(-.1, 2.1)
plt.legend(loc='best')
plt.xlabel('year', size=16)
plt.ylabel('count', size=16)
plt.tight_layout()
plt.savefig('paper/figs/years.pdf')
plt.show()