Summary

This notebook replicates the experiments in the ICWSM'17 paper entitled "Identifying Leading Indicators of Product Recalls from Online Reviews using Positive Unlabeled Learning and Domain Adaptation," by Shreesh Kumara Bhat and Aron Culotta. A full version of the paper is here: https://arxiv.org/abs/1703.00518

This notebook first downloads all the required data files from Dropbox into the local folder data (~194M).



In [1]:

    
from collections import Counter, defaultdict
from datetime import datetime
import gzip
from IPython.display import display
from itertools import groupby, cycle
import json
import math
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import random
from sklearn.feature_selection import chi2, f_classif
from scipy.sparse import hstack as sp_hstack
from scipy.sparse import vstack as sp_vstack
from scipy.sparse import csr_matrix, lil_matrix
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, auc, classification_report, confusion_matrix, f1_score, precision_recall_curve, precision_score, recall_score, roc_auc_score, roc_curve
from sklearn.naive_bayes import BernoulliNB
from sklearn.preprocessing import scale
from tabulate import tabulate
import urllib.request

%matplotlib inline

PATH = 'data'
complaints_file = PATH + os.path.sep + 'complaints.csv'
reviews_file = PATH + os.path.sep + 'reviews.json.gz'
test_file = PATH + os.path.sep + 'test.csv'
recalls_file = PATH + os.path.sep + 'recalls.csv'
recalled_asins_file = PATH + os.path.sep + 'recalled_asins.txt'

# Formatting for matplotlib
plt.rcParams["xtick.labelsize"] = "16"
plt.rcParams["ytick.labelsize"] = "16"









    



/usr/lib64/python3.4/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)

Download data from Dropbox.



In [2]:

    
def download_data(path):
    """
    Download any required files if not already present.
    """
    files = [('https://www.dropbox.com/s/k18rpimaif014b0/complaints.csv?dl=1', 'complaints.csv'),
             ('https://www.dropbox.com/s/jwr77xpsa7d1w4b/recalls.csv?dl=1', 'recalls.csv'),
             ('https://www.dropbox.com/s/ww0bmhf4iw84a33/reviews.json.gz?dl=1', 'reviews.json.gz'),
             ('https://www.dropbox.com/s/mtppphs0bml727a/test.csv?dl=1', 'test.csv'),
             ('https://www.dropbox.com/s/53g3hqfodeb52xi/recalled_asins.txt?dl=1', 'recalled_asins.txt')]

    if not os.path.exists(path):
        os.makedirs(path)
        
    for url, name in files:
        if not os.path.exists(path + os.path.sep + name):
            print('fetching %s' % name)
            urllib.request.urlretrieve(url, path + os.path.sep + name)        

download_data(PATH)









    



fetching complaints.csv
fetching recalls.csv
fetching reviews.json.gz
fetching test.csv
fetching recalled_asins.txt

Read and explore review data.



In [3]:

    
def parse_all_reviews(filename):
    """
    Parse all reviews into a sparse document x term csr_matrix.
    """
    
    def iter_reviews(filename):
        i = 0
        for line in gzip.open(filename, 'rt'):
            js = json.loads(line)
            yield js['reviewText'], js['asin'], js['reviewTime'], js['overall'], js['reviewText']
            i += 1
            if i % 100000 == 0:
                print('read %d reviews' % i)
                      
    records = []
    vec = CountVectorizer(min_df=50, ngram_range=(1,2), max_df=.95, binary=True)
    X = vec.fit_transform(r[0] for r in iter_reviews(filename) if 
                      not records.append(r[1:]))
    return X, vec, pd.DataFrame(records, columns=['ASIN', 'review_time', 'review_score', 'reviewText'])

X_reviews, vec, reviews_df = parse_all_reviews(reviews_file)
print('X_reviews has shape %s' % str(X_reviews.shape))
reviews_df.head()









    



read 100000 reviews
read 200000 reviews
read 300000 reviews
read 400000 reviews
read 500000 reviews
read 600000 reviews
read 700000 reviews
read 800000 reviews
read 900000 reviews
X_reviews has shape (915446, 136160)






    Out[3]:






  
    
      
      ASIN
      review_time
      review_score
      reviewText
    
  
  
    
      0
      0188399313
      05 27, 2013
      5.0
      They work very well. Easy to clean, we wash th...
    
    
      1
      0188399399
      04 9, 2013
      5.0
      it came early and was not disappointed. i love...
    
    
      2
      0188399518
      02 14, 2014
      4.0
      I ended up with a variety of different brands ...
    
    
      3
      0188399518
      07 8, 2013
      3.0
      These flannel wipes are OK, but in my opinion ...
    
    
      4
      0316967297
      09 6, 2013
      4.0
      Cute quilt, the colors are perfect and my litt...



In [4]:

    
# reformat date string.
def format_dates(reviews_df):
    new_dates = []
    for x in reviews_df['review_time']:
        parts = [x.replace(',', '') for x in x.split()]
        new_dates.append('%s-%s-%s' % (parts[2], parts[0].zfill(2), parts[1].zfill(2)))
    print(new_dates[:10])
    reviews_df['review_time'] = new_dates
    
format_dates(reviews_df)









    



['2013-05-27', '2013-04-09', '2014-02-14', '2013-07-08', '2013-09-06', '2013-03-22', '2012-03-07', '2013-04-23', '2012-11-26', '2013-08-02']



In [5]:

    
# Number of reviews by score.
pd.value_counts(reviews_df.review_score)









    Out[5]:





5.0    534132
4.0    165123
3.0     82931
1.0     76938
2.0     56322
Name: review_score, dtype: int64



In [6]:

    
# Exploring length distribution of reviews.
lengths = [len(x.split()) for x in reviews_df['reviewText']]
lengths = [l for l in lengths if l != 0]
print(np.median(lengths))
pd.DataFrame(lengths).describe()









    



55.0






    Out[6]:






  
    
      
      0
    
  
  
    
      count
      915104.000000
    
    
      mean
      81.951987
    
    
      std
      87.139825
    
    
      min
      1.000000
    
    
      25%
      31.000000
    
    
      50%
      55.000000
    
    
      75%
      100.000000
    
    
      max
      4546.000000

Read and explore complaints data.



In [7]:

    
def parse_complaints(complaints_file, vec):
    """
    Parse all the CPSC complaints, using the same vectorizer fit on the Amazon review data.
    """
    complaints_df = pd.read_csv(complaints_file)
    X = vec.transform(complaints_df['Incident Description'])
    vec.features = np.array(vec.get_feature_names())
    return X, complaints_df

X_complaints, complaints_df = parse_complaints(complaints_file, vec)
print('complaints feature matrix has shape %s' % str(X_complaints.shape))
complaints_df.head()









    



complaints feature matrix has shape (2010, 136160)






    Out[7]:






  
    
      
      Report No.
      Report Date
      Sent to Manufacturer / Importer / Private Labeler
      Publication Date
      Category of Submitter
      Product Description
      Product Category
      Product Sub Category
      Product Type
      Product Code
      ...
      Submitter Has Product
      Product Was Damaged Before Incident
      Damage Description
      Damage Repaired
      Product Was Modified Before Incident
      Have You Contacted The Manufacturer
      If Not Do You Plan To
      Answer Explanation
      Company Comments
      Associated Report Numbers
    
  
  
    
      0
      20160509-F1AD6-2147419650
      5/9/2016
      5/17/2016
      6/8/2016
      Consumer
      Munchkin pacifier clip
      Baby
      Nursery Equipment & Supplies
      Pacifiers or Teething Rings
      1525
      ...
      NaN
      Yes
      NaN
      NaN
      NaN
      Yes
      Yes
      NaN
      NaN
      NaN
    
    
      1
      20160506-66663-2147419715
      5/6/2016
      5/16/2016
      5/31/2016
      Consumer
      Baby Einstein Bouncer, Multicolor, has a piano...
      Baby
      Nursery Equipment & Supplies
      Baby Bouncer Seats (Excl. Jumpers)
      1558
      ...
      NaN
      Yes
      NaN
      NaN
      NaN
      Yes
      Yes
      I thought I was contacting them. the link on t...
      NaN
      NaN
    
    
      2
      20160429-84BCB-2147419859
      4/29/2016
      5/9/2016
      5/31/2016
      Consumer
      Luxury teether toys for happy baby teething.\r...
      Baby
      Nursery Equipment & Supplies
      Pacifiers or Teething Rings
      1525
      ...
      NaN
      Yes
      NaN
      NaN
      NaN
      Yes
      Yes
      I still have the product and plan on reaching ...
      NaN
      NaN
    
    
      3
      20160505-69C2D-2147419760
      5/5/2016
      5/13/2016
      5/27/2016
      Consumer
      Graco Lauren Classic Crib, model #2354497, pro...
      Baby
      Nursery Equipment & Supplies
      Cribs
      1543
      ...
      NaN
      Yes
      NaN
      NaN
      NaN
      Yes
      NaN
      I still have this product. Graco said they are...
      NaN
      NaN
    
    
      4
      20160504-C585C-2147419771
      5/4/2016
      5/12/2016
      5/26/2016
      Consumer
      Baby swing savanah model CMH84
      Baby
      Nursery Equipment & Supplies
      Portable Baby Swings (For Home Use)
      1553
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      Yes
      Yes
      Tryin to figure out who to contact about incid...
      NaN
      NaN
    
  

5 rows × 40 columns



In [8]:

    
# Frequency of each victim severity.
complaints_df['(Primary) Victim Severity'].value_counts()









    Out[8]:





Incident, No Injury                                       909
Injury, First Aid Received by Non-Medical Professional    299
Injury, No First Aid or Medical Attention Received        298
Injury, Seen by Medical Professional                      194
Unspecified                                                87
Injury, Emergency Department Treatment Received            82
Injury, Level of care not known                            66
Injury, Hospital Admission                                 30
No Incident, No Injury                                     26
Death                                                      19
Name: (Primary) Victim Severity, dtype: int64



In [9]:

    
# Frequency of product types in complaint data.
complaints_df['Product Type'].value_counts()









    Out[9]:





Cribs                                                           407
Bassinets or Cradles                                            258
Diapers                                                         209
Pacifiers or Teething Rings                                     186
Baby Exercisers                                                 132
High Chairs                                                     116
Night-lights                                                    101
Baby Gates or Barriers                                           74
Playpens                                                         57
Infant & Toddler Play Ctrs, Excl Jumpers,bouncers&exercisers     55
Baby Bouncer Seats (Excl. Jumpers)                               55
Attached Highchair                                               49
Portable Baby Swings (For Home Use)                              46
Baby Mattresses or Pads                                          43
Baby Walkers or Jumpers                                          32
Cribs, Not Specified                                             28
Baby Carriers or Slings (Backpacks)                              25
Baby Baths or Bathinettes                                        21
Baby Bottles or Nipples                                          20
Potty Chairs or Training Seats                                   20
Portable Cribs                                                   19
Crib Mobiles or Crib Gyms                                        18
Baby Bathtub Seats or Rings (Not Toys)                           10
Bottle Warmers                                                    8
Baby Carriers (Bicycle-mounted)                                   7
Baby Changing Tables                                              6
Baby Carriers, Not Specified                                      3
Baby Harnesses                                                    2
Desks, Chests, Bureaus or Buffets                                 1
Sterilizers (Home Use)                                            1
Jewelry                                                           1
Name: Product Type, dtype: int64



In [10]:

    
# complaints by year
sorted(Counter([d[-4:] for d in complaints_df['Report Date']]).items())









    Out[10]:





[('2011', 502),
 ('2012', 447),
 ('2013', 432),
 ('2014', 332),
 ('2015', 240),
 ('2016', 57)]



In [11]:

    
# Distribution of number of words per complaint.
lengths = [len(x.split()) for x in complaints_df['Incident Description']]
print(np.median(lengths))
display(pd.DataFrame(lengths).describe())









    



98.0






    






  
    
      
      0
    
  
  
    
      count
      2010.000000
    
    
      mean
      124.513930
    
    
      std
      114.461285
    
    
      min
      4.000000
    
    
      25%
      56.000000
    
    
      50%
      98.000000
    
    
      75%
      159.000000
    
    
      max
      1683.000000

Read labeled data.



In [12]:

    
def parse_test_data(filename, vec):
    """
    Parse labeled Amazon reviews using the same
    vectorizer fit to the unlabeled Amazon reviews.
    """
    df = pd.read_csv(test_file)
    df.dropna(inplace=True)
    df.rename(columns={'Review Text': 'text'}, inplace=True)
    X = vec.transform(t for t in df['text'])
    return X, df

X_test, test_df = parse_test_data(test_file, vec)
print('X_test has shape %s' % str(X_test.shape))
test_df.head()









    



X_test has shape (448, 136160)






    Out[12]:






  
    
      
      label
      ASIN
      text
    
  
  
    
      0
      1
      B002NU50LO
      We purchased this dresser 2 years ago and were...
    
    
      1
      1
      B004C43JJ4
      I just got my order today and put my six month...
    
    
      2
      1
      B00020L78M
      I personally didn't buy this gate, specificall...
    
    
      3
      1
      B00HVSVPQ2
      I bought it for my son who is only six months ...
    
    
      4
      1
      B0091DHACS
      These are currently being recalled, and the mu...



In [13]:

    
# Label distribution
display(test_df['label'].value_counts())









    





0    351
1     97
Name: label, dtype: int64

Read recalled amazon products.



In [14]:

    
# This file contains manually labeled instances
# of recall / product pairs. The label indicates whether the match is valid.
recalls_df = pd.read_csv(recalls_file, sep='\t')
recalls_df.head()









    Out[14]:






  
    
      
      label
      RecallNumber
      RecallName
      AmazonTitle
      AmazonAsin
      NumReviews
      Score
      RecallTitle
      RecallDescription
      RecallDate
    
  
  
    
      0
      1
      8263
      Munchkin Deluxe Bottle and Food Warmers with P...
      Munchkin Deluxe Bottle  And Food Warmer With P...
      B00007C65S
      32
      9
      Baby Bottle and Food Warmers Recalled by Munch...
      Baby Bottle and Food Warmers Recalled by Munch...
      2008-04-08T00:00:00
    
    
      1
      1
      11056
      The First Years American Red Cross Cabinet Swi...
      The First Years American Red Cross Cabinet And...
      B001ODU26E
      4
      8
      The First Years® Recalls American Red Cross® C...
      This recall involves The First Years American ...
      2010-12-02T00:00:00
    
    
      2
      0
      14072
      Zoom Car Seat Adapter, Adapter clips can loose...
      Phil&amp;Teds Car Seat Adapter For Peg Perego ...
      B003BNTNLA
      5
      7
      Joovy Recalls Zoom Car Seat Stroller Adapter d...
      This recall involves all Joovy’s Zoom gray met...
      2013-12-30T00:00:00
    
    
      3
      0
      13061
      Dream On Me Ultra 2 in 1 Infant Bath Tub; Todd...
      Dream On Me 2 in 1 Baby Tunes Musical Activity...
      B005GU18FU
      11
      7
      Dream On Me Recalls Bath Seats Due to Drowning...
      The recall includes all Dream On Me bath seats...
      2012-12-06T00:00:00
    
    
      4
      0
      13061
      Dream On Me Ultra 2 in 1 Infant Bath Tub; Todd...
      Dream On Me 2 In 1 Baby Bather and Changing St...
      B003ZUXWNE
      7
      7
      Dream On Me Recalls Bath Seats Due to Drowning...
      The recall includes all Dream On Me bath seats...
      2012-12-06T00:00:00



In [15]:

    
# Number of unique recalls.
len(set(recalls_df[recalls_df.label==1]['RecallNumber']))









    Out[15]:





47



In [16]:

    
# Number of unique recalled ASINs
recalled_asins = set(l.strip() for l in open(recalled_asins_file))
print('%d recalled ASINs' % len(recalled_asins))









    



137 recalled ASINs

Classification Experiments

Here we perform classification experiments to evaluate the effectiveness of the proposed domain adaptation method for positive unlabeled learning.



In [17]:

    
class Data:
    """
    Container for all the data.
    """
    def __init__(self, X_complaints, complaints_df,
                 X_test, test_df,
                 X_reviews, reviews_df,
                 recalls_df, recalled_asins, vec):
        self.X_complaints = X_complaints
        self.complaints_df = complaints_df
        self.X_test = X_test
        self.test_df = test_df
        self.X_reviews = X_reviews
        self.reviews_df = reviews_df
        self.recalls_df = recalls_df
        self.recalled_asins = recalled_asins
        self.vec = vec

data = Data(X_complaints, complaints_df,
            X_test, test_df,
            X_reviews, reviews_df,
            recalls_df, recalled_asins, vec)



In [18]:

    
class Evaluator(object):
    """
    Evaluation metrics.
    """
    def __init__(self, data):
        self.data = data
        
    def evaluate(self, model):
        """
        Evaluate on test data.
        """
        model.fit(self.data)
        preds = model.predict(self.data)
        probas = model.predict_proba(self.data)
        truths = np.array(self.data.test_df['label'])
        f1 = f1_score(truths, preds)
        recall = recall_score(truths, preds)
        precision = precision_score(truths, preds)
        roc_auc = roc_auc_score(truths, probas, average=None)
        precisions, recalls, pr_auc, pr_at_k = self.evaluate_recalls(model)
        return {'f1': f1, 'roc_auc': roc_auc, 'pr_auc': pr_auc, 'recall': recall, 'precision': precision, 'pr_at_k': pr_at_k}
        
    def evaluate_recalls(self, model):
        """
        Evaluate against recalled products.
        """
        asin2recall_score = model.score_asin_recalls(self.data)
        asins = set(self.data.reviews_df.ASIN)
        probas = np.array([asin2recall_score[x] for x in asins])
        truths = np.array([1 if x in data.recalled_asins else 0 for x in asins])
        roc_auc = roc_auc_score(truths, probas)
        prec, recall, thresholds = precision_recall_curve(truths, probas)
        prec = self._interpolate(prec)
        pr_auc = auc(recall, prec)
        # evaluate precision at number of true positives.
        rank = sum(truths)
        pred = np.argsort(probas)[::-1][:rank]
        correct = len(set(pred) & set(np.where(truths==1)[0]))
        pr_at_k = correct / rank
        return prec, recall, pr_auc, pr_at_k
        
    def plot_prec_recalls(self, results):
        plt.figure()
        for r in results:
            plt.plot(r['prcurve'][1], r['prcurve'][0], '.-', label=r['model'])
        plt.xlabel('recall')
        plt.ylabel('precision')
        plt.legend(loc="best")
        plt.xlim(-.01, .2)
        plt.show()
       
    def _interpolate(self, prec):
        p_temp = prec[0]
        n = len(prec)
        for i in range(n):
            if prec[i] < p_temp:
                prec[i] = p_temp
            else:
                p_temp = prec[i]
        return prec
    
    
    def confusion(self, truths, preds, labels):
        m = confusion_matrix(truths, preds)
        m = np.vstack((labels, m))
        m = np.hstack((np.matrix([''] + list(labels)).T, m))
        return tabulate(m.tolist(), headers='firstrow')
    
    def top_terms(self, model, n=10):
        """
        Print top terms per class.
        """
        coef = model.get_coef()
        print('\n\nTOP FEATURES:')
        coefs = [-coef[0], coef[0]]
        for li, label in enumerate(model.clf.classes_):
            print('\nCLASS %s' % label)
            coef = coefs[li]
            top_coef_ind = np.argsort(coef)[::-1][:n]
            top_coef_terms = self.data.vec.features[top_coef_ind]
            top_coef = coef[top_coef_ind]
            print('\n'.join(['%s\t%.3f' % (term, weight)
                             for term, weight in zip(top_coef_terms, top_coef)]))

            
    def top_error_terms(self, model):
        """
        Print top terms appearing in incorrectly labeled documents.
        """
        truths = np.array(self.data.test_df['label'])
        preds = model.predict(self.data)
        X = self.data.X_test
        data = np.array(self.data.test_df['text'])
        print('\n\nERROR ANALYSIS:\n')
        for label in model.clf.classes_:
            print('\nincorrectly labeled %s' % label)
            iserror = np.zeros(len(truths))
            ind = [i for i, (t, p) in enumerate(zip(truths, preds)) if t != p and p == label]
            iserror[ind] = 1
            corrs, _ = f_classif(X, iserror)
            corrs = np.nan_to_num(corrs)
            pos_mask, pos_counts, neg_counts = self.get_pos_mask(X, iserror)
            corrs *= pos_mask
            # ignore features from only 1 incorrect instance.
            corrs *= np.sign(X.sign()[np.where(iserror == 1)].sum(axis=0).A1 - 1)
            for fidx in np.argsort(corrs)[::-1][:5]:
                print('\n\t%s (%d incorrect, %d correct) corr=%.4f' %
                      (self.data.vec.features[fidx], pos_counts[fidx], neg_counts[fidx], corrs[fidx]))
                matches = []
                for midx in range(X.shape[0]):
                    if X[midx, fidx] > 0 and iserror[midx] == 1:
                        matches.append(midx)
                for m in matches[:3]:
                    print('\t\t' + data[m])
    
    def get_pos_mask(self, X, y, reg=1):
        """Get mask for indices that are more associated with class 1 than class 0."""
        pos_counts = X.sign()[np.where(y == 1)].sum(axis=0).A1
        neg_counts = X.sign()[np.where(y == 0)].sum(axis=0).A1
        posp = (1. + pos_counts) / pos_counts.sum()
        negp = (1. + neg_counts) / neg_counts.sum()
        diffs = posp - negp
        diffs = np.array([1 if v > 0 else -1 for v in diffs])
        return np.array(diffs), pos_counts, neg_counts
    
    
def average_results(results):
    avg = {}
    for k in results[0].keys():
        vals = [r[k] for r in results]
        avg[k] = np.mean(vals)
        avg[k+'_se'] = np.std(vals) / math.sqrt(len(vals))
    return avg
        
def evaluate_models(models, data, seeds=[42, 11111, 12345678, 919191, 5555]):
    """
    Evaluate all models using multiple seeds and average the results.
    """
    evaluator = Evaluator(data)
    all_results = []
    for m in models:
        results = []
        for seed in seeds:
            m.seed = seed
            name = str(m)
            print('Evaluating %s' % name)
            results.append(evaluator.evaluate(m))
        r = average_results(results)
        r['model'] = m
        all_results.append(r)
    df = pd.DataFrame(all_results).sort_values('f1', ascending=False)
    mdl = df['model']
    df.drop(labels=['model'], axis=1,inplace = True)
    df.insert(0, 'model', mdl)
    return df



In [19]:

    
# Here we implement the different classification models.


# Some helper functions.
def _count(probs):
    return len(np.where(np.array(probs) >= .5)[0])

def _count_mean(probs):
    return len(np.where(np.array(probs) >= .5)[0]) / len(probs)

def _mean(probs):
    return np.mean(probs)

def _max(probs):
    return max(probs)

class Model(object):
    """
    Abstract base class.
    """
    def __init__(self, C=1):
        self.C = C
        self.make_clf()        
        
    def fit(self, data):
        pass

    def predict(self, data):
        return self.clf.predict(data.X_test)

    def predict_proba(self, data):
        """
        Predict the probability of recall on each test example.
        """
        return self.clf.predict_proba(data.X_test)[:,1]

    def predict_reviews(self, data):
        return self.clf.predict(data.X_reviews)

    def predict_proba_reviews(self, data):
        return self.clf.predict_proba(data.X_reviews)[:,1]

    def score_asin_recalls(self, data, aggregator_fn=_count):
        """
        Compute a score indicating the likelihood that each product
        should be recalled, based on the classification of each review.
        """
        probas = self.predict_proba_reviews(data)

        preds = {}
        for asin, group in groupby([x for x in zip(data.reviews_df['ASIN'], probas)],
                                   key=lambda x: x[0]):
            preds[asin] = aggregator_fn([x[1] for x in group])
        return preds
        
    def make_clf(self):
        self.clf = LogisticRegression(class_weight='balanced', C=self.C)
        
    def get_coef(self):
        return self.clf.coef_

    
class RandomNegativeSamples(Model):
    """
    Sample n_neg random examples from the unlabeled data
    and pretend they are negative.
    """
    def __init__(self, n_neg=-1, seed=42, C=1):
        super().__init__(C=C)
        self.seed = seed
        self.n_neg = n_neg
        
    def fit(self, data):
        random.seed(self.seed)
        if self.n_neg == -1:
            neg = data.X_complaints.shape[0]
        else:
            neg = self.n_neg
        samplei = random.sample(range(data.X_reviews.shape[0]), neg)
        self.neg_sample_idx = samplei
        X = sp_vstack((data.X_complaints, data.X_reviews[samplei]))
        y = np.concatenate(([1] * data.X_complaints.shape[0],
                            [0] * neg))
        self.clf.fit(X, y)
        
    def __str__(self):
        return "RandomNegSamples(C=%g,n=%d)" % (self.C, self.n_neg)
    

class RandomNegativeSamplesThreshold(Model):
    """
    Sample n_neg random examples from the unlabeled data
    with review >= threshold and pretend they are negative.
    """
    
    def __init__(self, threshold=4.5, n_neg=-1, seed=42, C=1):
        super().__init__(C=C)
        self.seed = seed
        self.threshold = threshold
        self.n_neg = n_neg
        
    def fit(self, data):
        if self.n_neg == -1:
            neg = data.X_complaints.shape[0]
        else:
            neg = self.n_neg
        random.seed(self.seed)
        pos_reviews = list(np.where(data.reviews_df['review_score'] >= self.threshold)[0])
        samplei = random.sample(pos_reviews, neg)
        self.neg_sample_idx = samplei
        X = sp_vstack((data.X_complaints, data.X_reviews[samplei]))
        y = np.concatenate(([1] * data.X_complaints.shape[0],
                            [0] * neg))
        self.clf.fit(X, y)
        
    def __str__(self):
        return "RandomNegSamplesThresh(C=%d, nneg=%d, t=%.1f)" % (self.C, self.n_neg, self.threshold)

    
class RandomNegativeSamplesThresholdInformedPrior(Model):
    """
    Sample n_neg random examples from the unlabeled data
    and pretend they are negative.
    Also implement the informed prior approach, described
    in the paper.
    """

    def __init__(self, threshold=4.5, n_neg=-1, seed=42, C=1000):
        super().__init__(C=C)
        self.seed = seed
        self.threshold = threshold
        self.n_neg = n_neg
        
    def fit(self, data):
        if self.n_neg == -1:
            neg = data.X_complaints.shape[0]
        else:
            neg = self.n_neg
        random.seed(self.seed)
        pos_reviews = list(np.where(data.reviews_df['review_score'] >= self.threshold)[0])
        samplei = random.sample(pos_reviews, neg)
        self.neg_sample_idx = samplei
        X = sp_vstack((data.X_complaints, data.X_reviews[samplei]))
        y = np.concatenate(([1] * data.X_complaints.shape[0],
                            [0] * neg))
        self.clf.fit(X, y)
        
        # Now, predict on test set, compute priors and refit.
        predictions = self.clf.predict(data.X_reviews)
        nneg = len(np.where(predictions==0)[0])
        npos = len(np.where(predictions==1)[0])
        ## get positive/negative coefficients.
        pos_coef_i = np.where(self.clf.coef_[0] > 0)
        neg_coef_i = np.where(self.clf.coef_[0] < 0)

        pos_pct = npos / (nneg + npos)
        ppos_counts = data.X_reviews[np.where(predictions==1)].sum(axis=0).A1
        pneg_counts = data.X_reviews[np.where(predictions==0)].sum(axis=0).A1
        pos_probs = ((1 + ppos_counts) / (2 + ppos_counts + pneg_counts))[pos_coef_i]
        print('pos probs1', pos_probs[:20])
        neg_probs = ((1 + pneg_counts) / (2 + ppos_counts + pneg_counts))[neg_coef_i]
        # Normalize so that pos and neg coef are in similar range.
        pos_probs = pos_probs / pos_probs.sum()
        neg_probs = neg_probs / neg_probs.sum()
        print('pos probs2', pos_probs[:20])

        transform = np.zeros(X.shape[1])
        transform[pos_coef_i] = pos_probs  # pos_probs[pos_coef_i]  
        transform[neg_coef_i] = neg_probs  # neg_probs[neg_coef_i]
        transform *= len(transform) / transform.sum()
        print('mean=', np.mean(transform))
        print('mean2=', np.mean(transform[pos_coef_i]))
        print('pos probs3', transform[pos_coef_i][:20])

        print('transform:', sorted(transform)[:10], sorted(transform)[::-1][:10])
        tops = []
        for i in sorted(pos_coef_i[0], key=lambda x: -transform[x])[:20]:
            tops.append({'term': data.vec.features[i], 'coef': self.clf.coef_[0][i], 'transform': transform[i]})
        display(pd.DataFrame(tops).sort_values('transform', ascending=False))
        # Some debug statements.
        print('hazard transform=', transform[data.vec.vocabulary_['hazard']])
        print('crib transform=', transform[data.vec.vocabulary_['crib']])
        print('pampers transform=', transform[data.vec.vocabulary_['pampers']])
        print('very dangerous transform=', transform[data.vec.vocabulary_['very dangerous']])

        tops = []
        for i in sorted(neg_coef_i[0], key=lambda x: -transform[x])[:20]:
            tops.append({'term': data.vec.features[i], 'coef': self.clf.coef_[0][i], 'transform': transform[i]})
        display(pd.DataFrame(tops).sort_values('transform', ascending=False))
        self.transform = csr_matrix(transform)
        self.clf.fit(X.multiply(self.transform), y)
        
    def predict(self, data):
        return self.clf.predict(data.X_test.multiply(self.transform))

    def predict_proba(self, data):
        """
        Predict the probability of recall on each test example.
        """
        return self.clf.predict_proba(data.X_test.multiply(self.transform))[:,1]

    def predict_reviews(self, data):
        return self.clf.predict(data.X_reviews.multiply(self.transform))

    def predict_proba_reviews(self, data):
        return self.clf.predict_proba(data.X_reviews.multiply(self.transform))[:,1]
        
    def __str__(self):
        return "RandNegSampThreshInfoPrior(C=%g, nneg=%d, t=%.1f)" % (self.C, self.n_neg, self.threshold)



In [20]:

    
# Collect all models for comparison.
models = [
          RandomNegativeSamples(n_neg=20000, C=1),

          RandomNegativeSamplesThreshold(threshold=3.0, n_neg=20000, C=1),
          RandomNegativeSamplesThreshold(threshold=4.0, n_neg=20000, C=1),
          RandomNegativeSamplesThreshold(threshold=5.0, n_neg=20000, C=1),

          RandomNegativeSamplesThresholdInformedPrior(threshold=3.0, n_neg=20000, C=1),
          RandomNegativeSamplesThresholdInformedPrior(threshold=4.0, n_neg=20000, C=1),
          RandomNegativeSamplesThresholdInformedPrior(threshold=5.0, n_neg=20000, C=1),

          RandomNegativeSamplesThresholdInformedPrior(threshold=5.0, n_neg=1000, C=1),
          RandomNegativeSamplesThresholdInformedPrior(threshold=5.0, n_neg=5000, C=1),
          RandomNegativeSamplesThresholdInformedPrior(threshold=5.0, n_neg=10000, C=1),
          RandomNegativeSamplesThresholdInformedPrior(threshold=5.0, n_neg=30000, C=1),
          RandomNegativeSamplesThresholdInformedPrior(threshold=5.0, n_neg=40000, C=1),
          RandomNegativeSamplesThresholdInformedPrior(threshold=5.0, n_neg=50000, C=1),
         ]

results = evaluate_models(models, data, seeds=[123456, 42, 987987])









    



Evaluating RandomNegSamples(C=1,n=20000)
Evaluating RandomNegSamples(C=1,n=20000)
Evaluating RandomNegSamples(C=1,n=20000)
Evaluating RandomNegSamplesThresh(C=1, nneg=20000, t=3.0)
Evaluating RandomNegSamplesThresh(C=1, nneg=20000, t=3.0)
Evaluating RandomNegSamplesThresh(C=1, nneg=20000, t=3.0)
Evaluating RandomNegSamplesThresh(C=1, nneg=20000, t=4.0)
Evaluating RandomNegSamplesThresh(C=1, nneg=20000, t=4.0)
Evaluating RandomNegSamplesThresh(C=1, nneg=20000, t=4.0)
Evaluating RandomNegSamplesThresh(C=1, nneg=20000, t=5.0)
Evaluating RandomNegSamplesThresh(C=1, nneg=20000, t=5.0)
Evaluating RandomNegSamplesThresh(C=1, nneg=20000, t=5.0)
Evaluating RandNegSampThreshInfoPrior(C=1, nneg=20000, t=3.0)
pos probs1 [ 0.08571429  0.01005025  0.01612903  0.04819277  0.07142857  0.07079646
  0.03816794  0.03225806  0.03703704  0.02797203  0.02994012  0.03977273
  0.05392157  0.02020202  0.01403509  0.01851852  0.01369863  0.02597403
  0.02409639  0.01010101]
pos probs2 [  1.01988815e-04   1.19584875e-05   1.91914437e-05   5.73431089e-05
   8.49906792e-05   8.42385493e-05   4.54148667e-05   3.83828874e-05
   4.40692411e-05   3.32830632e-05   3.56248356e-05   4.73243555e-05
   6.41596304e-05   2.40377679e-05   1.66999229e-05   2.20346205e-05
   1.62995823e-05   3.09057015e-05   2.86715544e-05   1.20188839e-05]
mean= 1.0
mean2= 2.13423618295
pos probs3 [ 6.94339853  0.81413383  1.30655349  3.90391885  5.78616544  5.73496044
  3.09184413  2.61310697  3.00023393  2.26591094  2.42533881  3.22184212
  4.36798764  1.63649124  1.13693075  1.50011697  1.10967556  2.10406016
  1.95195943  0.81824562]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [61.523784421246077, 55.231579196345898, 47.130947580881831, 45.903579154296366, 44.89506678449964, 41.799259135794578, 40.503158077320329, 40.193973664516356, 30.973003235597897, 28.229473811465688]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.000309
      safety commission
      61.523784
    
    
      1
      0.000702
      consumer product
      55.231579
    
    
      2
      0.094143
      have choked
      47.130948
    
    
      3
      0.006806
      product safety
      45.903579
    
    
      4
      0.362624
      cpsc
      44.895067
    
    
      5
      0.193792
      be recalled
      41.799259
    
    
      6
      0.105946
      dangerous product
      40.503158
    
    
      7
      0.005696
      commission
      40.193974
    
    
      8
      0.035305
      extremely dangerous
      30.973003
    
    
      9
      0.005788
      seriously injured
      28.229474
    
    
      10
      0.182547
      leaned forward
      27.576618
    
    
      11
      0.995862
      recalled
      27.002105
    
    
      12
      0.016994
      to recall
      27.002105
    
    
      13
      0.046329
      he leaned
      26.241483
    
    
      14
      0.087320
      her throat
      25.888617
    
    
      15
      0.079764
      choked on
      25.684930
    
    
      16
      0.009330
      plastic broke
      25.016656
    
    
      17
      0.014789
      happened if
      25.001949
    
    
      18
      0.183882
      been recalled
      24.558476
    
    
      19
      0.001871
      face first
      24.109023
    
  








    



hazard transform= 12.510975495
crib transform= 1.90864838583
pampers transform= 1.62012632309
very dangerous transform= 19.7466920266






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.054946
      super easy
      0.756345
    
    
      1
      -0.075466
      are great
      0.756289
    
    
      2
      -0.046282
      cup holders
      0.756272
    
    
      3
      -0.065321
      so cute
      0.756248
    
    
      4
      -0.034513
      love love
      0.756227
    
    
      5
      -0.060443
      are soft
      0.756226
    
    
      6
      -0.034254
      work great
      0.756214
    
    
      7
      -0.022103
      really love
      0.756213
    
    
      8
      -0.025111
      it super
      0.756210
    
    
      9
      -0.143647
      love that
      0.756204
    
    
      10
      -0.017637
      will love
      0.756188
    
    
      11
      -0.028556
      but overall
      0.756187
    
    
      12
      -0.028631
      great quality
      0.756183
    
    
      13
      -0.009706
      how well
      0.756173
    
    
      14
      -0.045497
      still loves
      0.756171
    
    
      15
      -0.011078
      colors are
      0.756168
    
    
      16
      -0.030595
      and cute
      0.756160
    
    
      17
      -0.117890
      works great
      0.756160
    
    
      18
      -0.026571
      vibrant
      0.756158
    
    
      19
      -0.024115
      are perfect
      0.756152
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=20000, t=3.0)
pos probs1 [ 0.01507538  0.01612903  0.01748252  0.05421687  0.05357143  0.0619469
  0.03053435  0.04444444  0.02797203  0.0239521   0.03977273  0.05882353
  0.03398058  0.01299892  0.03571429  0.01403509  0.01851852  0.03225806
  0.01369863  0.02739726]
pos probs2 [  1.78314965e-05   1.90777839e-05   2.06787168e-05   6.41289363e-05
   6.33654966e-05   7.32721966e-05   3.61167207e-05   5.25698934e-05
   3.30859469e-05   2.83310803e-05   4.70440808e-05   6.95778001e-05
   4.01930011e-05   1.53754204e-05   4.22436644e-05   1.66010190e-05
   2.19041223e-05   3.81555678e-05   1.62030493e-05   3.24060987e-05]
mean= 1.0
mean2= 2.16257425114
pos probs3 [ 1.21396828  1.29881553  1.40780704  4.36589798  4.31392301  4.98837115
  2.45882634  3.57895835  2.25249127  1.92877995  3.20276102  4.73685663
  2.73633951  1.04675862  2.87594867  1.13019737  1.49123264  2.59763106
  1.1031036   2.2062072 ]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [63.198061921474853, 56.734623770414927, 46.973828283031715, 46.851818339439419, 45.599378918467963, 41.873812640873986, 41.800047850549483, 41.562096914018845, 31.72258533399544, 29.605353959893939]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.000322
      safety commission
      63.198062
    
    
      1
      0.000671
      consumer product
      56.734624
    
    
      2
      0.003853
      product safety
      46.973828
    
    
      3
      0.091019
      have choked
      46.851818
    
    
      4
      0.358254
      cpsc
      45.599379
    
    
      5
      0.197924
      be recalled
      41.873813
    
    
      6
      0.005214
      commission
      41.800048
    
    
      7
      0.095590
      dangerous product
      41.562097
    
    
      8
      0.024793
      seriously injured
      31.722585
    
    
      9
      0.038375
      extremely dangerous
      29.605354
    
    
      10
      0.010754
      plastic broke
      28.421140
    
    
      11
      0.161353
      leaned forward
      28.269964
    
    
      12
      0.068356
      her throat
      28.225806
    
    
      13
      0.151812
      arm stuck
      27.767780
    
    
      14
      0.020216
      to recall
      26.842188
    
    
      15
      0.978791
      recalled
      26.657069
    
    
      16
      0.047072
      he leaned
      24.951893
    
    
      17
      0.104567
      choked on
      24.059766
    
    
      18
      0.007896
      happened if
      23.859722
    
    
      19
      0.190692
      been recalled
      23.319910
    
  








    



hazard transform= 11.9596858041
crib transform= 2.04866652901
pampers transform= 1.77158438096
very dangerous transform= 20.7450701875






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.040329
      so cute
      0.756491
    
    
      1
      -0.026346
      super easy
      0.756487
    
    
      2
      -0.102125
      are great
      0.756431
    
    
      3
      -0.053281
      love love
      0.756370
    
    
      4
      -0.022434
      are soft
      0.756368
    
    
      5
      -0.021039
      really love
      0.756355
    
    
      6
      -0.014006
      it super
      0.756353
    
    
      7
      -0.072784
      great to
      0.756342
    
    
      8
      -0.006480
      will love
      0.756331
    
    
      9
      -0.010272
      colors are
      0.756311
    
    
      10
      -0.158837
      love that
      0.756305
    
    
      11
      -0.037007
      and cute
      0.756303
    
    
      12
      -0.166537
      works great
      0.756303
    
    
      13
      -0.066330
      vibrant
      0.756301
    
    
      14
      -0.006770
      just love
      0.756291
    
    
      15
      -0.096246
      neutral
      0.756279
    
    
      16
      -0.102921
      great price
      0.756276
    
    
      17
      -0.013747
      are nice
      0.756271
    
    
      18
      -0.015055
      great gift
      0.756270
    
    
      19
      -0.037904
      these work
      0.756270
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=20000, t=3.0)
pos probs1 [ 0.01612903  0.05120482  0.05357143  0.05309735  0.03816794  0.03225806
  0.02962963  0.02797203  0.0239521   0.03409091  0.04901961  0.02912621
  0.01256031  0.01851852  0.03225806  0.04781705  0.02409639  0.01428571
  0.00826446  0.01398601]
pos probs2 [  1.90839316e-05   6.05857345e-05   6.33859155e-05   6.28249782e-05
   4.51604487e-05   3.81678631e-05   3.50578891e-05   3.30966086e-05
   2.83402097e-05   4.03364917e-05   5.80001841e-05   3.44622453e-05
   1.48614053e-05   2.19111807e-05   3.81678631e-05   5.65773106e-05
   2.85109339e-05   1.69029108e-05   9.77854344e-06   1.65483043e-05]
mean= 1.0
mean2= 2.19634158144
pos probs3 [ 1.29923406  4.12467681  4.31531313  4.27712452  3.07452335  2.59846812
  2.38674109  2.25321711  1.92940148  2.74610835  3.94865254  2.34618966
  1.01176447  1.49171318  2.59846812  3.85178331  1.94102438  1.15075017
  0.66572324  1.12660856]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [62.198774891855123, 54.92216709899278, 46.866915924473837, 45.614072915950644, 44.303881459854189, 41.813517547121222, 41.242886013536989, 40.27625587259471, 30.512315054995995, 29.136014886557877]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.001510
      safety commission
      62.198775
    
    
      1
      0.000437
      consumer product
      54.922167
    
    
      2
      0.117785
      have choked
      46.866916
    
    
      3
      0.290730
      cpsc
      45.614073
    
    
      4
      0.004052
      product safety
      44.303881
    
    
      5
      0.007271
      commission
      41.813518
    
    
      6
      0.204041
      be recalled
      41.242886
    
    
      7
      0.079250
      dangerous product
      40.276256
    
    
      8
      0.024925
      seriously injured
      30.512315
    
    
      9
      0.152220
      leaned forward
      29.136015
    
    
      10
      0.036079
      extremely dangerous
      28.430298
    
    
      11
      0.180001
      been recalled
      28.065807
    
    
      12
      0.968364
      recalled
      27.869317
    
    
      13
      0.007379
      serious injury
      27.199549
    
    
      14
      0.031881
      to recall
      26.850837
    
    
      15
      0.086748
      her throat
      25.743586
    
    
      16
      0.188896
      arm stuck
      24.999055
    
    
      17
      0.033067
      he leaned
      24.959933
    
    
      18
      0.024676
      face first
      24.932920
    
    
      19
      0.098794
      choked on
      24.558693
    
  








    



hazard transform= 12.5303907159
crib transform= 2.03962344233
pampers transform= 1.93326028188
very dangerous transform= 19.8592064967






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.050110
      so cute
      0.752067
    
    
      1
      -0.068935
      super easy
      0.752063
    
    
      2
      -0.085507
      are perfect
      0.752019
    
    
      3
      -0.101070
      are great
      0.752008
    
    
      4
      -0.131239
      love that
      0.752004
    
    
      5
      -0.040556
      also love
      0.752000
    
    
      6
      -0.076168
      great price
      0.751958
    
    
      7
      -0.024094
      love love
      0.751947
    
    
      8
      -0.021744
      are soft
      0.751945
    
    
      9
      -0.030350
      really love
      0.751932
    
    
      10
      -0.010568
      will love
      0.751908
    
    
      11
      -0.028522
      but overall
      0.751906
    
    
      12
      -0.080791
      love these
      0.751904
    
    
      13
      -0.084411
      love them
      0.751893
    
    
      14
      -0.024971
      and cute
      0.751880
    
    
      15
      -0.134213
      works great
      0.751880
    
    
      16
      -0.014960
      just love
      0.751868
    
    
      17
      -0.052358
      are easy
      0.751868
    
    
      18
      -0.008053
      great gift
      0.751848
    
    
      19
      -0.042026
      these work
      0.751848
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=20000, t=4.0)
pos probs1 [ 0.01507538  0.01612903  0.05421687  0.07142857  0.0619469   0.04580153
  0.03225806  0.03703704  0.02797203  0.02994012  0.04545455  0.05392157
  0.04854369  0.01447426  0.02020202  0.01851852  0.03225806  0.01369863
  0.01818182  0.06444906]
pos probs2 [  1.55543786e-05   1.66415126e-05   5.59395423e-05   7.36981271e-05
   6.39151899e-05   4.72568143e-05   3.32830251e-05   3.82138437e-05
   2.88608050e-05   3.08914305e-05   4.68988082e-05   5.56348607e-05
   5.00861058e-05   1.49341633e-05   2.08439147e-05   1.91069218e-05
   3.32830251e-05   1.41338874e-05   1.87595233e-05   6.64968548e-05]
mean= 1.0
mean2= 2.22789449571
pos probs3 [ 1.05894209  1.13295418  3.80836404  5.01736849  4.35134613  3.21724392
  2.26590835  2.60159848  1.96484361  2.10308859  3.19287086  3.78762131
  3.40986208  1.01671784  1.41905372  1.30079924  2.26590835  0.96223505
  1.27714834  4.52710588]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [56.016696340039786, 48.691280602931407, 41.468852846107097, 40.868746997870289, 40.389816368989003, 36.526442629346576, 36.462097750962521, 36.254533627142997, 26.607257160071804, 26.154367676496115]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.001109
      safety commission
      56.016696
    
    
      1
      0.000945
      consumer product
      48.691281
    
    
      2
      0.328170
      cpsc
      41.468853
    
    
      3
      0.074076
      have choked
      40.868747
    
    
      4
      0.006838
      product safety
      40.389816
    
    
      5
      0.213595
      be recalled
      36.526443
    
    
      6
      0.003579
      commission
      36.462098
    
    
      7
      0.084593
      dangerous product
      36.254534
    
    
      8
      0.023888
      seriously injured
      26.607257
    
    
      9
      0.173409
      leaned forward
      26.154368
    
    
      10
      0.005332
      plastic broke
      25.824691
    
    
      11
      0.039767
      extremely dangerous
      24.791703
    
    
      12
      0.945911
      recalled
      24.221779
    
    
      13
      0.042526
      he leaned
      23.744166
    
    
      14
      0.017658
      to recall
      23.414386
    
    
      15
      0.170728
      been recalled
      23.202491
    
    
      16
      0.082703
      her throat
      23.173001
    
    
      17
      0.081996
      choked on
      23.128845
    
    
      18
      0.168018
      arm stuck
      23.010690
    
    
      19
      0.027412
      first into
      22.964110
    
  








    



hazard transform= 11.9153210287
crib transform= 1.99012129877
pampers transform= 2.52875372049
very dangerous transform= 19.6525181417






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.038867
      are perfect
      0.757949
    
    
      1
      -0.080609
      so cute
      0.757897
    
    
      2
      -0.037098
      super easy
      0.757889
    
    
      3
      -0.047753
      love love
      0.757876
    
    
      4
      -0.116802
      are great
      0.757817
    
    
      5
      -0.166313
      love that
      0.757811
    
    
      6
      -0.014396
      just love
      0.757797
    
    
      7
      -0.013212
      great gift
      0.757776
    
    
      8
      -0.055244
      these work
      0.757776
    
    
      9
      -0.099074
      love these
      0.757766
    
    
      10
      -0.022087
      in great
      0.757758
    
    
      11
      -0.033653
      works very
      0.757748
    
    
      12
      -0.039976
      great to
      0.757724
    
    
      13
      -0.010060
      pricey but
      0.757724
    
    
      14
      -0.026124
      great quality
      0.757698
    
    
      15
      -0.011436
      great little
      0.757689
    
    
      16
      -0.171237
      works great
      0.757684
    
    
      17
      -0.030785
      great price
      0.757677
    
    
      18
      -0.016034
      buying more
      0.757658
    
    
      19
      -0.022507
      are soft
      0.757651
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=20000, t=4.0)
pos probs1 [ 0.0201005   0.01612903  0.08333333  0.01666667  0.05421687  0.07079646
  0.04580153  0.03703704  0.03496503  0.02994012  0.04545455  0.05392157
  0.03883495  0.01265823  0.03703704  0.03225806  0.05821206  0.01204819
  0.01449275  0.01818182]
pos probs2 [  2.04264965e-05   1.63906162e-05   8.46848501e-05   1.69369700e-05
   5.50961675e-05   7.19446514e-05   4.65443451e-05   3.76377112e-05
   3.55321049e-05   3.04256947e-05   4.61917364e-05   5.47960795e-05
   3.94647845e-05   1.28635215e-05   3.76377112e-05   3.27812323e-05
   5.91561531e-05   1.22435928e-05   1.47278000e-05   1.84766946e-05]
mean= 1.0
mean2= 2.22164208328
pos probs3 [ 1.39063588  1.11587315  5.7653446   1.15306892  3.75094709  4.89799187
  3.16873901  2.56237538  2.4190257   2.07138129  3.14473342  3.73051709
  2.68676253  0.87574855  2.56237538  2.2317463   4.0273509   0.8335438
  1.00266863  1.25789337]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [44.026267825965014, 40.010102258364078, 36.252486821266054, 35.707940725206647, 28.302600745263231, 26.30945984770948, 25.760050323702934, 24.708619698245677, 24.417930054736903, 24.417930054736903]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.087824
      have choked
      44.026268
    
    
      1
      0.350106
      cpsc
      40.010102
    
    
      2
      0.182726
      be recalled
      36.252487
    
    
      3
      0.085475
      dangerous product
      35.707941
    
    
      4
      0.027400
      seriously injured
      28.302601
    
    
      5
      0.042119
      he leaned
      26.309460
    
    
      6
      0.176600
      leaned forward
      25.760050
    
    
      7
      0.025582
      face first
      24.708620
    
    
      8
      0.036379
      extremely dangerous
      24.417930
    
    
      9
      0.008237
      plastic broke
      24.417930
    
    
      10
      0.012195
      to recall
      24.342566
    
    
      11
      0.907292
      recalled
      24.254208
    
    
      12
      0.086792
      choked on
      24.045706
    
    
      13
      0.143552
      arm stuck
      23.856598
    
    
      14
      0.179617
      been recalled
      23.165729
    
    
      15
      0.056270
      her throat
      22.823632
    
    
      16
      0.032190
      first into
      22.617890
    
    
      17
      0.009580
      happened if
      22.207253
    
    
      18
      0.214492
      is dangerous
      21.720601
    
    
      19
      0.005191
      serious injury
      21.563886
    
  








    



hazard transform= 12.3506493129
crib transform= 1.96011715675
pampers transform= 1.93715578434
very dangerous transform= 19.9311635904






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.068405
      are perfect
      0.758899
    
    
      1
      -0.051351
      also love
      0.758880
    
    
      2
      -0.229245
      love that
      0.758843
    
    
      3
      -0.052369
      super easy
      0.758839
    
    
      4
      -0.011543
      love love
      0.758826
    
    
      5
      -0.057430
      are soft
      0.758824
    
    
      6
      -0.093075
      are great
      0.758807
    
    
      7
      -0.088662
      great to
      0.758799
    
    
      8
      -0.021812
      will love
      0.758787
    
    
      9
      -0.029720
      and cute
      0.758759
    
    
      10
      -0.016050
      vibrant
      0.758757
    
    
      11
      -0.014625
      great gift
      0.758726
    
    
      12
      -0.018091
      these work
      0.758726
    
    
      13
      -0.045323
      works very
      0.758698
    
    
      14
      -0.051409
      cup holders
      0.758694
    
    
      15
      -0.013936
      pricey but
      0.758674
    
    
      16
      -0.031808
      can beat
      0.758650
    
    
      17
      -0.119674
      love these
      0.758650
    
    
      18
      -0.011255
      great little
      0.758639
    
    
      19
      -0.102801
      great price
      0.758626
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=20000, t=4.0)
pos probs1 [ 0.01507538  0.01612903  0.02097902  0.01666667  0.04819277  0.03571429
  0.07079646  0.03816794  0.03225806  0.03703704  0.02797203  0.0239521
  0.03409091  0.05392157  0.01052632  0.01851852  0.01612903  0.05197505
  0.02409639  0.03030303]
pos probs2 [  1.59710557e-05   1.70873122e-05   2.22254551e-05   1.76568893e-05
   5.10560655e-05   3.78361914e-05   7.50027157e-05   4.04356244e-05
   3.41746245e-05   3.92375318e-05   2.96339401e-05   2.53751703e-05
   3.61163645e-05   5.71252302e-05   1.11517196e-05   1.96187659e-05
   1.70873122e-05   5.50630644e-05   2.55280328e-05   3.21034351e-05]
mean= 1.0
mean2= 2.13149655604
pos probs3 [ 1.08730947  1.16330422  1.51310898  1.20208103  3.47589694  2.57588791
  5.10618489  2.75285731  2.32660844  2.67129117  2.01747864  1.72754159
  2.4588021   3.88908567  0.75920907  1.33564558  1.16330422  3.74869343
  1.73794847  2.18560186]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [44.58627802266782, 39.972814811250743, 38.081926875831577, 37.225734971487363, 28.412824230131456, 28.38957314647341, 27.357706088959464, 26.516493201197342, 25.455833473149447, 24.70483761972703]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.097777
      have choked
      44.586278
    
    
      1
      0.348655
      cpsc
      39.972815
    
    
      2
      0.187012
      be recalled
      38.081927
    
    
      3
      0.093058
      dangerous product
      37.225735
    
    
      4
      0.027220
      seriously injured
      28.412824
    
    
      5
      0.170727
      leaned forward
      28.389573
    
    
      6
      0.150988
      arm stuck
      27.357706
    
    
      7
      0.006212
      plastic broke
      26.516493
    
    
      8
      0.031533
      extremely dangerous
      25.455833
    
    
      9
      0.944766
      recalled
      24.704838
    
    
      10
      0.099355
      choked on
      24.188216
    
    
      11
      0.074376
      her throat
      23.793769
    
    
      12
      0.074566
      bruise on
      23.647496
    
    
      13
      0.032826
      he leaned
      23.364392
    
    
      14
      0.196156
      been recalled
      23.171336
    
    
      15
      0.079805
      an unsafe
      22.868859
    
    
      16
      0.016743
      to recall
      22.705975
    
    
      17
      0.005195
      happened if
      22.260760
    
    
      18
      0.197837
      is dangerous
      21.805191
    
    
      19
      0.084945
      hazard to
      21.510924
    
  








    



hazard transform= 12.2345135446
crib transform= 1.85577122699
pampers transform= 1.87524639919
very dangerous transform= 20.3787697334






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.019607
      also love
      0.765346
    
    
      1
      -0.089914
      are great
      0.765313
    
    
      2
      -0.040619
      love love
      0.765291
    
    
      3
      -0.019955
      will love
      0.765252
    
    
      4
      -0.043356
      how well
      0.765237
    
    
      5
      -0.030687
      still loves
      0.765234
    
    
      6
      -0.176153
      love that
      0.765226
    
    
      7
      -0.036675
      and cute
      0.765223
    
    
      8
      -0.032474
      vibrant
      0.765221
    
    
      9
      -0.022013
      just love
      0.765212
    
    
      10
      -0.062500
      super easy
      0.765199
    
    
      11
      -0.011823
      are nice
      0.765191
    
    
      12
      -0.005157
      great gift
      0.765191
    
    
      13
      -0.070969
      these work
      0.765191
    
    
      14
      -0.158091
      works great
      0.765182
    
    
      15
      -0.094045
      love these
      0.765180
    
    
      16
      -0.055547
      in great
      0.765172
    
    
      17
      -0.021649
      works very
      0.765162
    
    
      18
      -0.065848
      great to
      0.765138
    
    
      19
      -0.027989
      pricey but
      0.765138
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=20000, t=5.0)
pos probs1 [ 0.0201005   0.02097902  0.05722892  0.07079646  0.03816794  0.03225806
  0.03703704  0.04195804  0.0239521   0.03977273  0.04901961  0.05339806
  0.01265823  0.01654771  0.03703704  0.03225806  0.04109589  0.06029106
  0.02409639  0.01694915]
pos probs2 [  1.86545594e-05   1.94698810e-05   5.31121153e-05   6.57036694e-05
   3.54223027e-05   2.99375590e-05   3.43727530e-05   3.89397621e-05
   2.22290857e-05   3.69116495e-05   4.54933495e-05   4.95568331e-05
   1.17476497e-05   1.53573387e-05   3.43727530e-05   2.99375590e-05
   3.81396300e-05   5.59539825e-05   2.23629959e-05   1.57299039e-05]
mean= 1.0
mean2= 2.19549163146
pos probs3 [ 1.2700024   1.3255095   3.61587281  4.47310581  2.41155037  2.03814902
  2.34009702  2.651019    1.51335616  2.5129451   3.09718723  3.3738292
  0.79977999  1.04552762  2.34009702  2.03814902  2.59654601  3.80934713
  1.52247276  1.07089186]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [51.185919650409389, 45.950996049799343, 39.05834664232944, 38.06181901715307, 37.909571741084456, 34.37134504524991, 33.761705112925085, 33.629458802574916, 27.762060113420432, 26.94552893361395]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.000489
      safety commission
      51.185920
    
    
      1
      0.001652
      consumer product
      45.950996
    
    
      2
      0.104694
      have choked
      39.058347
    
    
      3
      0.315255
      cpsc
      38.061819
    
    
      4
      0.005814
      product safety
      37.909572
    
    
      5
      0.181024
      be recalled
      34.371345
    
    
      6
      0.005105
      commission
      33.761705
    
    
      7
      0.094375
      dangerous product
      33.629459
    
    
      8
      0.027799
      seriously injured
      27.762060
    
    
      9
      0.040540
      extremely dangerous
      26.945529
    
    
      10
      0.163109
      leaned forward
      26.214066
    
    
      11
      0.071244
      her throat
      23.449220
    
    
      12
      0.933195
      recalled
      23.239584
    
    
      13
      0.010669
      plastic broke
      23.228904
    
    
      14
      0.177406
      been recalled
      23.157431
    
    
      15
      0.039774
      he leaned
      23.137297
    
    
      16
      0.163501
      arm stuck
      22.876466
    
    
      17
      0.010156
      to recall
      22.230922
    
    
      18
      0.084141
      choked on
      21.574553
    
    
      19
      0.016158
      face first
      21.060873
    
  








    



hazard transform= 12.2855093605
crib transform= 2.09451875605
pampers transform= 2.27457430447
very dangerous transform= 18.7272584317






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.040616
      super easy
      0.780839
    
    
      1
      -0.031266
      love love
      0.780825
    
    
      2
      -0.035879
      are soft
      0.780824
    
    
      3
      -0.125359
      are great
      0.780806
    
    
      4
      -0.035414
      are perfect
      0.780748
    
    
      5
      -0.011094
      great gift
      0.780723
    
    
      6
      -0.020518
      these work
      0.780723
    
    
      7
      -0.124954
      love that
      0.780717
    
    
      8
      -0.034575
      works very
      0.780694
    
    
      9
      -0.150607
      works great
      0.780671
    
    
      11
      -0.022074
      pricey but
      0.780669
    
    
      10
      -0.078288
      great to
      0.780669
    
    
      12
      -0.084417
      so cute
      0.780640
    
    
      13
      -0.006706
      great little
      0.780633
    
    
      14
      -0.079870
      great price
      0.780620
    
    
      15
      -0.026168
      can carry
      0.780606
    
    
      16
      -0.024982
      glad bought
      0.780594
    
    
      17
      -0.029822
      they wash
      0.780592
    
    
      18
      -0.019830
      nice quality
      0.780591
    
    
      19
      -0.056346
      love these
      0.780576
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=20000, t=5.0)
pos probs1 [ 0.02512563  0.01612903  0.05084746  0.05722892  0.05357143  0.07079646
  0.05343511  0.03225806  0.05185185  0.04195804  0.03592814  0.05113636
  0.07352941  0.01265823  0.04761905  0.01403509  0.05555556  0.01265823
  0.03225806  0.02409639]
pos probs2 [  2.23760318e-05   1.43639688e-05   4.52830202e-05   5.09661302e-05
   4.77088963e-05   6.30489249e-05   4.75874996e-05   2.87279375e-05
   4.61774996e-05   3.73664083e-05   3.19963855e-05   4.55403101e-05
   6.54827988e-05   1.12729881e-05   4.24079078e-05   1.24991728e-05
   4.94758924e-05   1.12729881e-05   2.87279375e-05   2.14594232e-05]
mean= 1.0
mean2= 2.2228752408
pos probs3 [ 1.52336024  0.97789899  3.08286801  3.46977414  3.24802166  4.29237081
  3.23975697  1.95579799  3.14376417  2.54390508  2.17831393  3.10038431
  4.45806894  0.76746503  2.88713036  0.85094369  3.36831876  0.76746503
  1.95579799  1.46095753]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [48.350297094808333, 42.027431764047861, 36.523938329411536, 35.27548371015164, 34.8620991354233, 32.497539367977204, 31.471924874992165, 31.29276780739259, 25.721706871985575, 24.965186081633057]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.000605
      safety commission
      48.350297
    
    
      1
      0.000552
      consumer product
      42.027432
    
    
      2
      0.321763
      cpsc
      36.523938
    
    
      3
      0.085365
      have choked
      35.275484
    
    
      4
      0.005135
      product safety
      34.862099
    
    
      5
      0.183927
      be recalled
      32.497539
    
    
      6
      0.005528
      commission
      31.471925
    
    
      7
      0.098098
      dangerous product
      31.292768
    
    
      8
      0.025386
      seriously injured
      25.721707
    
    
      10
      0.005084
      plastic broke
      24.965186
    
    
      9
      0.045568
      extremely dangerous
      24.965186
    
    
      11
      0.154176
      leaned forward
      23.219900
    
    
      12
      0.151239
      arm stuck
      22.997487
    
    
      13
      0.845105
      recalled
      22.230904
    
    
      14
      0.183242
      been recalled
      22.221759
    
    
      15
      0.037975
      he leaned
      22.202439
    
    
      16
      0.004158
      happened if
      21.706943
    
    
      17
      0.021354
      face first
      20.931695
    
    
      18
      0.057167
      bruise on
      20.872533
    
    
      19
      0.087009
      her throat
      20.626612
    
  








    



hazard transform= 12.0585811502
crib transform= 2.008429441
pampers transform= 2.18267055457
very dangerous transform= 18.474435288






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.056875
      are perfect
      0.778423
    
    
      1
      -0.067761
      super easy
      0.778361
    
    
      2
      -0.026278
      love love
      0.778348
    
    
      3
      -0.020138
      are soft
      0.778346
    
    
      4
      -0.127362
      loves these
      0.778322
    
    
      5
      -0.103464
      are great
      0.778287
    
    
      6
      -0.019045
      just love
      0.778267
    
    
      7
      -0.006922
      great gift
      0.778246
    
    
      8
      -0.083266
      these work
      0.778245
    
    
      9
      -0.032004
      in great
      0.778226
    
    
      10
      -0.015455
      pricey but
      0.778192
    
    
      11
      -0.007597
      definitely buy
      0.778167
    
    
      12
      -0.116372
      love these
      0.778167
    
    
      13
      -0.032289
      great little
      0.778156
    
    
      14
      -0.040527
      great price
      0.778143
    
    
      15
      -0.029445
      buying more
      0.778124
    
    
      16
      -0.046686
      they wash
      0.778115
    
    
      17
      -0.037464
      nice quality
      0.778114
    
    
      18
      -0.031284
      bibs are
      0.778095
    
    
      19
      -0.008702
      really love
      0.778089
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=20000, t=5.0)
pos probs1 [ 0.0201005   0.01612903  0.02097902  0.06024096  0.05357143  0.07079646
  0.04580153  0.04301075  0.05185185  0.04195804  0.03592814  0.05113636
  0.06372549  0.05339806  0.01754456  0.04166667  0.01052632  0.01851852
  0.01554404  0.03225806]
pos probs2 [  1.81435671e-05   1.45587494e-05   1.89365552e-05   5.43760521e-05
   4.83558464e-05   6.39038913e-05   4.13424030e-05   3.88233318e-05
   4.68036834e-05   3.78731104e-05   3.24302682e-05   4.61578533e-05
   5.75213336e-05   4.81993549e-05   1.58364642e-05   3.76101027e-05
   9.50149963e-06   1.67156012e-05   1.40307119e-05   2.91174989e-05]
mean= 1.0
mean2= 2.17981557377
pos probs3 [ 1.23521405  0.99115966  1.28920068  3.70192163  3.29206602  4.35057692
  2.8145908   2.64309243  3.18639476  2.57840136  2.20785266  3.14242666
  3.91605239  3.28141208  1.07814648  2.56049579  0.6468621   1.13799813
  0.95521087  1.98231932]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [49.783816942811164, 39.1057539337707, 38.499984939960143, 36.359040264613, 33.675640673252829, 33.305991082088241, 32.70826884322296, 27.111131928874645, 25.139413243138311, 24.400018735987182]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.000681
      safety commission
      49.783817
    
    
      1
      0.085111
      have choked
      39.105754
    
    
      2
      0.305678
      cpsc
      38.499985
    
    
      3
      0.005806
      product safety
      36.359040
    
    
      4
      0.204428
      be recalled
      33.675641
    
    
      5
      0.007200
      commission
      33.305991
    
    
      6
      0.078297
      dangerous product
      32.708269
    
    
      7
      0.031247
      extremely dangerous
      27.111132
    
    
      8
      0.021551
      seriously injured
      25.139413
    
    
      9
      0.007024
      plastic broke
      24.400019
    
    
      10
      0.167001
      leaned forward
      24.188513
    
    
      11
      0.909811
      recalled
      22.814900
    
    
      12
      0.022167
      to recall
      22.759963
    
    
      13
      0.043061
      he leaned
      22.503512
    
    
      14
      0.182853
      been recalled
      22.245031
    
    
      15
      0.006363
      happened if
      22.001297
    
    
      16
      0.163226
      arm stuck
      21.190310
    
    
      17
      0.059252
      bruise on
      21.155572
    
    
      18
      0.056202
      choked on
      20.983575
    
    
      19
      0.072693
      her throat
      20.906316
    
  








    



hazard transform= 12.1993399573
crib transform= 1.98680853113
pampers transform= 1.59774937501
very dangerous transform= 18.0440479172






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.045964
      are perfect
      0.784348
    
    
      1
      -0.025519
      love love
      0.784272
    
    
      2
      -0.047174
      are soft
      0.784270
    
    
      3
      -0.007944
      really love
      0.784256
    
    
      4
      -0.103692
      loves these
      0.784246
    
    
      5
      -0.029826
      will love
      0.784231
    
    
      6
      -0.028986
      how well
      0.784216
    
    
      7
      -0.078673
      still loves
      0.784213
    
    
      8
      -0.136279
      are great
      0.784211
    
    
      9
      -0.017515
      just love
      0.784190
    
    
      10
      -0.042817
      these work
      0.784169
    
    
      11
      -0.110932
      love these
      0.784158
    
    
      12
      -0.041703
      also love
      0.784154
    
    
      13
      -0.113129
      loves them
      0.784118
    
    
      14
      -0.060624
      great to
      0.784114
    
    
      15
      -0.025287
      pricey but
      0.784114
    
    
      16
      -0.012199
      great little
      0.784079
    
    
      17
      -0.024881
      super easy
      0.784069
    
    
      18
      -0.005174
      buying more
      0.784046
    
    
      19
      -0.018044
      they wash
      0.784037
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=1000, t=5.0)
pos probs1 [ 0.22857143  0.08542714  0.06451613  0.07342657  0.10542169  0.23214286
  0.13274336  0.09923664  0.07526882  0.11111111  0.0979021   0.07185629
  0.07386364  0.09803922  0.0685434   0.08928571  0.12121212  0.06315789
  0.09259259  0.08064516]
pos probs2 [  7.85316911e-05   2.93507263e-05   2.21662031e-05   2.52276193e-05
   3.62203771e-05   7.97587488e-05   4.56074533e-05   3.40953430e-05
   2.58605703e-05   3.81751276e-05   3.36368257e-05   2.46881065e-05
   2.53777837e-05   3.36839361e-05   2.35498784e-05   3.06764418e-05
   4.16455938e-05   2.16995462e-05   3.18126063e-05   2.77077539e-05]
mean= 1.0
mean2= 2.16140707346
pos probs3 [ 5.34643753  1.99819744  1.50907511  1.71749632  2.46588327  5.42997562
  3.10495542  2.32121095  1.76058763  2.59896269  2.2899951   1.68076629
  1.72771951  2.29320237  1.60327572  2.08845216  2.83523202  1.47730511
  2.16580224  1.88634389]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [20.133736268248057, 17.861961748140924, 17.808801147700027, 16.063468181417612, 15.983620532562613, 15.845288647544368, 14.782899770604251, 14.105820391797948, 13.821756114632858, 13.415233875967186]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.000887
      safety commission
      20.133736
    
    
      1
      0.030602
      have choked
      17.861962
    
    
      2
      0.004502
      consumer product
      17.808801
    
    
      3
      0.142718
      cpsc
      16.063468
    
    
      4
      0.006279
      product safety
      15.983621
    
    
      5
      0.039004
      dangerous product
      15.845289
    
    
      6
      0.117738
      be recalled
      14.782900
    
    
      7
      0.002023
      commission
      14.105820
    
    
      8
      0.021048
      seriously injured
      13.821756
    
    
      9
      0.033536
      extremely dangerous
      13.415234
    
    
      10
      0.058837
      leg stuck
      13.024347
    
    
      11
      0.078349
      arm stuck
      12.905194
    
    
      12
      0.033588
      choked on
      12.408462
    
    
      13
      0.000557
      crib were
      12.357332
    
    
      14
      0.078417
      leaned forward
      11.944169
    
    
      15
      0.005050
      plastic broke
      11.695332
    
    
      16
      0.066169
      been recalled
      11.430732
    
    
      17
      0.002227
      happened if
      11.262172
    
    
      18
      0.000084
      got hurt
      11.245512
    
    
      19
      0.016341
      had happened
      11.177840
    
  








    



hazard transform= 7.29442194494
crib transform= 2.9725776627
pampers transform= 2.05837844907
very dangerous transform= 9.71911254598






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.022151
      just great
      2.961582
    
    
      1
      -0.006894
      great love
      2.960185
    
    
      2
      -0.004003
      best stroller
      2.959723
    
    
      3
      -0.025013
      cute too
      2.958532
    
    
      4
      -0.065863
      great quality
      2.958349
    
    
      5
      -0.000256
      great stroller
      2.958033
    
    
      6
      -0.002675
      perfect love
      2.957760
    
    
      7
      -0.000945
      much love
      2.957712
    
    
      8
      -0.004824
      just love
      2.957543
    
    
      9
      -0.000142
      far love
      2.957269
    
    
      10
      -0.103704
      love these
      2.957081
    
    
      11
      -0.011254
      great gift
      2.957060
    
    
      12
      -0.000105
      re great
      2.957034
    
    
      13
      -0.002634
      great fit
      2.956966
    
    
      14
      -0.010690
      quality easy
      2.956946
    
    
      15
      -0.000903
      too highly
      2.956846
    
    
      16
      -0.000043
      time great
      2.956785
    
    
      17
      -0.000672
      well love
      2.956605
    
    
      18
      -0.007872
      colors they
      2.956529
    
    
      19
      -0.001934
      great don
      2.956485
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=1000, t=5.0)
pos probs1 [ 0.09843571  0.22857143  0.08040201  0.06451613  0.08741259  0.12951807
  0.21428571  0.15044248  0.13740458  0.10752688  0.11111111  0.1048951
  0.08982036  0.11363636  0.15196078  0.12135922  0.0952381   0.12121212
  0.0877193   0.09259259]
pos probs2 [  2.95944627e-05   6.87194567e-05   2.41726732e-05   1.93966208e-05
   2.62803866e-05   3.89393006e-05   6.44244906e-05   4.52301734e-05
   4.13103604e-05   3.23277014e-05   3.34052914e-05   3.15364639e-05
   2.70042775e-05   3.41645026e-05   4.56866486e-05   3.64863620e-05
   2.86331069e-05   3.64421361e-05   2.63725985e-05   2.78377429e-05]
mean= 1.0
mean2= 2.14844736178
pos probs3 [ 2.01479102  4.67842061  1.64567559  1.32052195  1.78916872  2.65098758
  4.38601932  3.0792702   2.81240934  2.20086991  2.27423224  2.14700247
  1.83845121  2.32591934  3.11034704  2.48399153  1.94934192  2.48098063
  1.79544651  1.89519353]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [17.618102932457433, 16.048843429950779, 15.258030855141609, 14.549606266510292, 14.195610925416958, 14.157095701975903, 13.672684234582995, 12.704331830605714, 12.343352088175441, 12.341054368129242]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.000872
      safety commission
      17.618103
    
    
      1
      0.002292
      consumer product
      16.048843
    
    
      2
      0.017139
      have choked
      15.258031
    
    
      3
      0.118388
      cpsc
      14.549606
    
    
      4
      0.040189
      dangerous product
      14.195611
    
    
      5
      0.006078
      product safety
      14.157096
    
    
      6
      0.114750
      be recalled
      13.672684
    
    
      7
      0.077787
      arm stuck
      12.704332
    
    
      8
      0.004272
      commission
      12.343352
    
    
      9
      0.024803
      extremely dangerous
      12.341054
    
    
      10
      0.010991
      seriously injured
      12.094781
    
    
      11
      0.061572
      leaned forward
      11.540519
    
    
      12
      0.078326
      leg stuck
      11.164413
    
    
      13
      0.009245
      hinge broke
      11.021279
    
    
      14
      0.018246
      had happened
      10.686879
    
    
      15
      0.003831
      plastic broke
      10.535046
    
    
      16
      0.073682
      been recalled
      10.465585
    
    
      17
      0.030703
      choked on
      10.358851
    
    
      18
      0.009745
      was caused
      10.234045
    
    
      19
      0.062147
      was strapped
      10.234045
    
  








    



hazard transform= 6.70898511178
crib transform= 2.55826471547
pampers transform= 2.00587283681
very dangerous transform= 9.52531620171






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.001160
      great easy
      3.084866
    
    
      1
      -0.000029
      re great
      3.083841
    
    
      2
      -0.000001
      too love
      3.082497
    
    
      3
      -0.029517
      kids love
      3.082373
    
    
      4
      -0.032480
      them easy
      3.081706
    
    
      5
      -0.000026
      girls love
      3.081534
    
    
      6
      -0.004637
      cute too
      3.081457
    
    
      7
      -0.051376
      great quality
      3.081267
    
    
      8
      -0.000485
      perfect love
      3.080653
    
    
      9
      -0.021444
      comfortable easy
      3.080553
    
    
      10
      -0.007746
      clean love
      3.080362
    
    
      11
      -0.008106
      use easy
      3.080225
    
    
      12
      -0.001460
      far love
      3.080141
    
    
      13
      -0.012911
      children love
      3.080065
    
    
      14
      -0.040314
      great gift
      3.079924
    
    
      15
      -0.000045
      too highly
      3.079701
    
    
      16
      -0.000066
      cuddly and
      3.079255
    
    
      17
      -0.009100
      much stuff
      3.079111
    
    
      18
      -0.009641
      great too
      3.078952
    
    
      19
      -0.000570
      great all
      3.078886
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=1000, t=5.0)
pos probs1 [ 0.09538344  0.31428571  0.12060302  0.08064516  0.08741259  0.12048193
  0.19642857  0.17699115  0.13740458  0.15053763  0.11851852  0.11888112
  0.08982036  0.09090909  0.14215686  0.13106796  0.07165357  0.11309524
  0.13131313  0.07017544]
pos probs2 [  2.95086230e-05   9.72300696e-05   3.73107622e-05   2.49490648e-05
   2.70426926e-05   3.72733016e-05   6.07687935e-05   5.47554696e-05
   4.25086356e-05   4.65715876e-05   3.66658848e-05   3.67780619e-05
   2.77875811e-05   2.81244003e-05   4.39788416e-05   4.05482859e-05
   2.21673520e-05   3.49880932e-05   4.06241338e-05   2.17100634e-05]
mean= 1.0
mean2= 2.14939698175
pos probs3 [ 2.00894705  6.61942314  2.54011669  1.69853233  1.84106651  2.53756637
  4.13713946  3.72775237  2.89398791  3.17059368  2.49621344  2.50385046
  1.89177852  1.91470917  2.99407954  2.7605273   1.50915332  2.38198939
  2.76569103  1.47802112]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [18.662355219089577, 16.753705253500872, 16.083557043360837, 14.743260623080767, 14.717884959357738, 14.607378036692928, 14.237777401718001, 13.344499800607455, 12.699027007275451, 12.445609616886362]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.001153
      safety commission
      18.662355
    
    
      1
      0.001477
      consumer product
      16.753705
    
    
      2
      0.030728
      have choked
      16.083557
    
    
      3
      0.005414
      product safety
      14.743261
    
    
      4
      0.148264
      cpsc
      14.717885
    
    
      5
      0.044863
      dangerous product
      14.607378
    
    
      6
      0.139361
      be recalled
      14.237777
    
    
      7
      0.003083
      commission
      13.344500
    
    
      8
      0.028408
      extremely dangerous
      12.699027
    
    
      9
      0.010930
      seriously injured
      12.445610
    
    
      10
      0.063387
      leaned forward
      11.427147
    
    
      11
      0.039924
      choked on
      10.916177
    
    
      12
      0.003181
      plastic broke
      10.840633
    
    
      13
      0.081311
      been recalled
      10.769156
    
    
      14
      0.084687
      arm stuck
      10.530900
    
    
      15
      0.013225
      hinge broke
      10.530900
    
    
      16
      0.061636
      leg stuck
      10.530900
    
    
      17
      0.025749
      wedged between
      10.530900
    
    
      18
      0.487445
      recalled
      10.434065
    
    
      19
      0.030352
      we woke
      10.211782
    
  








    



hazard transform= 6.8177829548
crib transform= 2.22603162053
pampers transform= 1.68494407121
very dangerous transform= 9.33487020061






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.003210
      great easy
      3.129270
    
    
      1
      -0.040028
      cute too
      3.129022
    
    
      2
      -0.013174
      re great
      3.128231
    
    
      3
      -0.001136
      great love
      3.127559
    
    
      4
      -0.007227
      just love
      3.127255
    
    
      5
      -0.010193
      cute love
      3.126922
    
    
      6
      -0.013057
      great quality
      3.126721
    
    
      7
      -0.030617
      works perfect
      3.126248
    
    
      8
      -0.003089
      girls love
      3.125890
    
    
      9
      -0.000159
      person than
      3.125719
    
    
      10
      -0.009842
      definitely good
      3.125609
    
    
      11
      -0.007634
      made great
      3.125255
    
    
      12
      -0.011515
      great bag
      3.125055
    
    
      13
      -0.001026
      seat easy
      3.124878
    
    
      14
      -0.010255
      are perfect
      3.124857
    
    
      15
      -0.014045
      clean love
      3.124701
    
    
      16
      -0.007827
      use easy
      3.124563
    
    
      17
      -0.003820
      much cuter
      3.124053
    
    
      18
      -0.001372
      time great
      3.123966
    
    
      19
      -0.046951
      great little
      3.123788
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=5000, t=5.0)
pos probs1 [ 0.03517588  0.01612903  0.0753012   0.09734513  0.0610687   0.05376344
  0.05185185  0.04895105  0.04790419  0.05113636  0.07843137  0.06796117
  0.02787192  0.07407407  0.01265823  0.03225806  0.08731809  0.02409639
  0.04116223  0.02857143]
pos probs2 [  2.30738645e-05   1.05799517e-05   4.93943527e-05   6.38542217e-05
   4.00584430e-05   3.52665056e-05   3.40125854e-05   3.21097834e-05
   3.14230900e-05   3.35432559e-05   5.14476082e-05   4.45796022e-05
   1.82827842e-05   4.85894077e-05   8.30325322e-06   2.11599034e-05
   5.72769110e-05   1.58061929e-05   2.70006515e-05   1.87416287e-05]
mean= 1.0
mean2= 2.25176953099
pos probs3 [ 1.57086869  0.72028311  3.36276753  4.34719541  2.7271788   2.4009437
  2.31557681  2.18603406  2.13928397  2.28362486  3.50255316  3.03497932
  1.24469195  3.30796688  0.56528548  1.44056622  3.8994121   1.07608561
  1.83820435  1.27593008]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [37.874127098997562, 33.493164636053812, 30.130397102313466, 30.042353734157359, 29.027409351246639, 26.650475086752497, 26.437271286058476, 25.567301248896037, 21.672047705681877, 21.652146835428727]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.000561
      safety commission
      37.874127
    
    
      1
      0.001285
      consumer product
      33.493165
    
    
      2
      0.206390
      cpsc
      30.130397
    
    
      3
      0.063057
      have choked
      30.042354
    
    
      4
      0.004870
      product safety
      29.027409
    
    
      5
      0.066088
      dangerous product
      26.650475
    
    
      6
      0.138981
      be recalled
      26.437271
    
    
      7
      0.002385
      commission
      25.567301
    
    
      8
      0.042933
      extremely dangerous
      21.672048
    
    
      9
      0.025698
      seriously injured
      21.652147
    
    
      10
      0.105207
      leaned forward
      20.428455
    
    
      11
      0.084513
      leg stuck
      19.283943
    
    
      12
      0.097001
      arm stuck
      19.248945
    
    
      13
      0.007322
      plastic broke
      19.045133
    
    
      14
      0.117245
      been recalled
      18.994615
    
    
      15
      0.026804
      he leaned
      18.240409
    
    
      16
      0.656412
      recalled
      18.119674
    
    
      17
      0.012408
      face first
      18.075676
    
    
      18
      0.062615
      choked on
      17.699640
    
    
      19
      0.007693
      serious injury
      17.399047
    
  








    



hazard transform= 10.4531753333
crib transform= 2.67463323649
pampers transform= 1.96493232532
very dangerous transform= 15.4631415679






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.026905
      great gift
      1.283171
    
    
      1
      -0.013443
      love love
      1.282964
    
    
      2
      -0.034365
      they wash
      1.282956
    
    
      3
      -0.021354
      granddaughter loves
      1.282801
    
    
      4
      -0.020510
      great love
      1.282759
    
    
      5
      -0.162338
      are great
      1.282697
    
    
      6
      -0.013495
      just love
      1.282696
    
    
      7
      -0.111860
      great to
      1.282660
    
    
      8
      -0.001235
      smooth ride
      1.282603
    
    
      9
      -0.030555
      are soft
      1.282580
    
    
      10
      -0.004383
      excellent quality
      1.282566
    
    
      11
      -0.004369
      for on
      1.282535
    
    
      12
      -0.001818
      great easy
      1.282503
    
    
      13
      -0.006315
      works perfect
      1.282490
    
    
      14
      -0.032234
      are perfect
      1.282457
    
    
      15
      -0.010474
      just great
      1.282406
    
    
      16
      -0.003411
      cute too
      1.282401
    
    
      17
      -0.000276
      one loves
      1.282399
    
    
      18
      -0.003563
      its perfect
      1.282394
    
    
      19
      -0.016166
      great condition
      1.282390
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=5000, t=5.0)
pos probs1 [ 0.04020101  0.03225806  0.03846154  0.0753012   0.08928571  0.11504425
  0.06870229  0.06451613  0.05925926  0.06293706  0.05389222  0.06818182
  0.10294118  0.08252427  0.02531646  0.05952381  0.03859649  0.07407407
  0.02040816  0.01265823]
pos probs2 [  2.45677682e-05   1.97136527e-05   2.35047398e-05   4.60183158e-05
   5.45645745e-05   7.03062128e-05   4.19855657e-05   3.94273054e-05
   3.62147102e-05   3.84623014e-05   3.29347851e-05   4.16674932e-05
   6.29097447e-05   5.04324999e-05   1.54714743e-05   3.63763830e-05
   2.35872125e-05   4.52683877e-05   1.24719027e-05   7.73573714e-06]
mean= 1.0
mean2= 2.32879523842
pos probs3 [ 1.67257366  1.34210548  1.60020268  3.13292694  3.71475623  4.78644696
  2.85837731  2.68421095  2.46549747  2.61851348  2.24220017  2.83672294
  4.28289542  3.43344459  1.05329797  2.47650415  1.60581743  3.08187183
  0.84908714  0.52664898]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [36.338779920887077, 32.14952663619983, 28.430267672404167, 27.569757072857406, 27.232540209486917, 25.296004016812294, 24.828951311068945, 24.772603371225379, 22.026319287085009, 20.802634882246952]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.000747
      safety commission
      36.338780
    
    
      1
      0.001620
      consumer product
      32.149527
    
    
      2
      0.004401
      product safety
      28.430268
    
    
      3
      0.207560
      cpsc
      27.569757
    
    
      4
      0.047664
      have choked
      27.232540
    
    
      5
      0.118144
      be recalled
      25.296004
    
    
      6
      0.062101
      dangerous product
      24.828951
    
    
      7
      0.004731
      commission
      24.772603
    
    
      8
      0.038586
      extremely dangerous
      22.026319
    
    
      9
      0.024125
      seriously injured
      20.802635
    
    
      10
      0.106876
      arm stuck
      20.085303
    
    
      11
      0.108927
      leaned forward
      19.474807
    
    
      12
      0.003463
      plastic broke
      18.355266
    
    
      13
      0.118795
      been recalled
      18.261137
    
    
      14
      0.011615
      face first
      17.830830
    
    
      15
      0.616804
      recalled
      17.359440
    
    
      16
      0.011173
      was caused
      17.335529
    
    
      17
      0.004682
      happened if
      16.950295
    
    
      18
      0.023260
      he leaned
      16.407712
    
    
      19
      0.162380
      is dangerous
      16.085758
    
  








    



hazard transform= 9.90821794762
crib transform= 2.3645280759
pampers transform= 1.9970529487
very dangerous transform= 15.0977571722






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.022564
      they wash
      1.279903
    
    
      1
      -0.027158
      granddaughter loves
      1.279748
    
    
      2
      -0.041605
      loves them
      1.279721
    
    
      3
      -0.031197
      great love
      1.279705
    
    
      4
      -0.028614
      are perfect
      1.279655
    
    
      5
      -0.087715
      love these
      1.279651
    
    
      6
      -0.006822
      great gift
      1.279574
    
    
      7
      -0.005415
      love love
      1.279535
    
    
      8
      -0.001285
      great easy
      1.279450
    
    
      9
      -0.007134
      works perfect
      1.279437
    
    
      10
      -0.003447
      just great
      1.279354
    
    
      11
      -0.009131
      cute too
      1.279348
    
    
      12
      -0.002644
      its perfect
      1.279342
    
    
      13
      -0.007778
      great condition
      1.279337
    
    
      14
      -0.027284
      order more
      1.279325
    
    
      15
      -0.021562
      kids love
      1.279314
    
    
      16
      -0.063685
      perfect gift
      1.279311
    
    
      17
      -0.006889
      great have
      1.279289
    
    
      18
      -0.002234
      these blankets
      1.279272
    
    
      19
      -0.021895
      good size
      1.279245
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=5000, t=5.0)
pos probs1 [ 0.03517588  0.03225806  0.02797203  0.07831325  0.05357143  0.09734513
  0.06870229  0.05376344  0.05185185  0.05594406  0.04790419  0.06818182
  0.08333333  0.06796117  0.03361378  0.05952381  0.03157895  0.09259259
  0.03225806  0.08523909]
pos probs2 [  2.11722115e-05   1.94159912e-05   1.68362441e-05   4.71364123e-05
   3.22444139e-05   5.85916194e-05   4.13516148e-05   3.23599853e-05
   3.12094080e-05   3.36724882e-05   2.88333282e-05   4.10383450e-05
   5.01579772e-05   4.09055348e-05   2.02319908e-05   3.58271266e-05
   1.90072335e-05   5.57310858e-05   1.94159912e-05   5.13050411e-05]
mean= 1.0
mean2= 2.23895813464
pos probs3 [ 1.44140416  1.32184068  1.1462115   3.20904695  2.1951997   3.98891745
  2.81521793  2.2030678   2.1247365   2.29242299  1.96297298  2.79389053
  3.41475509  2.78484881  1.37739393  2.43911078  1.29401245  3.79417232
  1.32184068  3.4928472 ]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [34.752697333401606, 30.732795776179778, 27.647173710298272, 26.821349041029624, 26.635089672689141, 25.114972892361969, 24.258420132664572, 23.147347454832609, 19.885926678704561, 19.616678155008369]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.001107
      safety commission
      34.752697
    
    
      1
      0.000892
      consumer product
      30.732796
    
    
      2
      0.215214
      cpsc
      27.647174
    
    
      3
      0.052237
      have choked
      26.821349
    
    
      4
      0.008167
      product safety
      26.635090
    
    
      5
      0.060487
      dangerous product
      25.114973
    
    
      6
      0.156912
      be recalled
      24.258420
    
    
      7
      0.003301
      commission
      23.147347
    
    
      8
      0.004294
      plastic broke
      19.885927
    
    
      9
      0.109024
      leaned forward
      19.616678
    
    
      10
      0.015218
      seriously injured
      19.246801
    
    
      11
      0.034908
      extremely dangerous
      18.680719
    
    
      12
      0.122094
      arm stuck
      18.369027
    
    
      13
      0.025996
      he leaned
      17.891393
    
    
      14
      0.009828
      face first
      17.073775
    
    
      15
      0.127482
      been recalled
      17.058324
    
    
      16
      0.652671
      recalled
      17.050225
    
    
      17
      0.029688
      choked on
      16.990489
    
    
      18
      0.160019
      lodged in
      16.872907
    
    
      19
      0.002963
      happened if
      16.694358
    
  








    



hazard transform= 10.0317916089
crib transform= 2.2883498959
pampers transform= 1.80299068554
very dangerous transform= 14.075223181






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.015536
      love love
      1.309494
    
    
      1
      -0.041191
      are perfect
      1.309106
    
    
      2
      -0.015463
      great love
      1.308901
    
    
      3
      -0.011353
      great gift
      1.308766
    
    
      4
      -0.006002
      looks just
      1.308739
    
    
      5
      -0.093288
      and cuddly
      1.308703
    
    
      6
      -0.000980
      great easy
      1.308639
    
    
      7
      -0.009425
      works perfect
      1.308626
    
    
      8
      -0.004267
      just great
      1.308541
    
    
      9
      -0.013785
      cute too
      1.308535
    
    
      10
      -0.001943
      one loves
      1.308534
    
    
      11
      -0.015339
      its perfect
      1.308528
    
    
      12
      -0.014099
      great condition
      1.308524
    
    
      13
      -0.093832
      love these
      1.308501
    
    
      14
      -0.173151
      are great
      1.308491
    
    
      15
      -0.015282
      baby book
      1.308487
    
    
      16
      -0.023053
      great little
      1.308465
    
    
      17
      -0.035855
      great price
      1.308421
    
    
      18
      -0.001665
      great car
      1.308406
    
    
      19
      -0.001002
      great bag
      1.308377
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=10000, t=5.0)
pos probs1 [ 0.0201005   0.06325301  0.07964602  0.05343511  0.03225806  0.03703704
  0.04195804  0.02994012  0.04545455  0.06372549  0.0631068   0.05555556
  0.03225806  0.04109589  0.02409639  0.02663438  0.02142857  0.01398601
  0.03369066  0.02510345]
pos probs2 [  1.59012732e-05   5.00387204e-05   6.30070360e-05   4.22718961e-05
   2.55189787e-05   2.92995682e-05   3.31925178e-05   2.36852797e-05
   3.59585610e-05   5.04124923e-05   4.99230507e-05   4.39493523e-05
   2.55189787e-05   3.25104798e-05   1.90623697e-05   2.10701495e-05
   1.69518930e-05   1.10641726e-05   2.66522871e-05   1.98590453e-05]
mean= 1.0
mean2= 2.21225710015
pos probs3 [ 1.08255868  3.40663608  4.28951901  2.87787069  1.73733207  1.9947146
  2.25974661  1.61249384  2.44805883  3.43208248  3.39876129  2.9920719
  1.73733207  2.21331346  1.29776613  1.43445578  1.15408488  0.75324887
  1.81448771  1.3520038 ]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [44.312963637397282, 39.780955992663465, 35.039685426716737, 34.272823624448527, 32.763187345752577, 30.160084789514706, 29.600955627658927, 28.665979206623994, 24.552590033480982, 22.917997560421504]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.000674
      safety commission
      44.312964
    
    
      1
      0.001685
      consumer product
      39.780956
    
    
      2
      0.251464
      cpsc
      35.039685
    
    
      3
      0.082300
      have choked
      34.272824
    
    
      4
      0.006198
      product safety
      32.763187
    
    
      5
      0.163349
      be recalled
      30.160085
    
    
      6
      0.003099
      commission
      29.600956
    
    
      7
      0.074420
      dangerous product
      28.665979
    
    
      8
      0.045054
      extremely dangerous
      24.552590
    
    
      9
      0.128361
      leaned forward
      22.917998
    
    
      10
      0.027261
      seriously injured
      22.848549
    
    
      11
      0.117937
      arm stuck
      22.285777
    
    
      12
      0.008700
      plastic broke
      22.176533
    
    
      13
      0.065441
      her throat
      20.543504
    
    
      14
      0.032669
      he leaned
      20.480943
    
    
      15
      0.788093
      recalled
      20.428629
    
    
      16
      0.160297
      been recalled
      19.983249
    
    
      17
      0.004762
      happened if
      19.947146
    
    
      18
      0.015022
      face first
      19.875906
    
    
      19
      0.114735
      leg stuck
      19.584471
    
  








    



hazard transform= 11.4895561103
crib transform= 2.46398289036
pampers transform= 2.15429177068
very dangerous transform= 16.8583774298






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.017904
      great gift
      0.959842
    
    
      1
      -0.018977
      these work
      0.959842
    
    
      2
      -0.038453
      works very
      0.959806
    
    
      3
      -0.039860
      loves them
      0.959780
    
    
      4
      -0.006288
      great little
      0.959732
    
    
      5
      -0.032879
      super easy
      0.959720
    
    
      6
      -0.121242
      are great
      0.959691
    
    
      7
      -0.027762
      are perfect
      0.959684
    
    
      8
      -0.038266
      are soft
      0.959683
    
    
      9
      -0.051360
      they wash
      0.959681
    
    
      10
      -0.016716
      nice quality
      0.959680
    
    
      11
      -0.028836
      bibs are
      0.959656
    
    
      12
      -0.063358
      great quality
      0.959574
    
    
      13
      -0.034300
      granddaughter loves
      0.959565
    
    
      14
      -0.012644
      great love
      0.959534
    
    
      15
      -0.070079
      love these
      0.959493
    
    
      16
      -0.009933
      just love
      0.959487
    
    
      17
      -0.133109
      so cute
      0.959486
    
    
      18
      -0.146752
      works great
      0.959464
    
    
      19
      -0.084632
      great to
      0.959460
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=10000, t=5.0)
pos probs1 [ 0.03517588  0.01612903  0.02797203  0.06927711  0.07142857  0.08849558
  0.05343511  0.03225806  0.05185185  0.04895105  0.04191617  0.0625
  0.07352941  0.0631068   0.05555556  0.01265823  0.03225806  0.02409639
  0.02142857  0.02097902]
pos probs2 [  2.67578424e-05   1.22691489e-05   2.12779646e-05   5.26982120e-05
   5.43348025e-05   6.73174544e-05   4.06474095e-05   2.45382979e-05
   3.94430418e-05   3.72364381e-05   3.18850937e-05   4.75429522e-05
   5.59328849e-05   4.80045342e-05   4.22604019e-05   9.62895234e-06
   2.45382979e-05   1.83298129e-05   1.63004407e-05   1.59584735e-05]
mean= 1.0
mean2= 2.45448318131
pos probs3 [ 1.82167391  0.83528366  1.44860383  3.58769428  3.69911335  4.58297229
  2.76727564  1.67056732  2.68528229  2.5350567   2.17073718  3.23672418
  3.8079108   3.26814869  2.87708816  0.65553908  1.67056732  1.24789366
  1.10973401  1.08645287]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [40.643422654365153, 36.486708973805086, 33.06918201590107, 32.9557371376304, 31.504115382758581, 29.829650072003744, 28.399644445792092, 27.277431284036499, 23.609046983050348, 23.539812241164569]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.000558
      safety commission
      40.643423
    
    
      1
      0.000756
      consumer product
      36.486709
    
    
      2
      0.258297
      cpsc
      33.069182
    
    
      3
      0.063114
      have choked
      32.955737
    
    
      4
      0.005346
      product safety
      31.504115
    
    
      5
      0.142525
      be recalled
      29.829650
    
    
      6
      0.080092
      dangerous product
      28.399644
    
    
      7
      0.004693
      commission
      27.277431
    
    
      8
      0.040811
      extremely dangerous
      23.609047
    
    
      9
      0.022710
      seriously injured
      23.539812
    
    
      10
      0.135499
      leaned forward
      22.588203
    
    
      11
      0.110721
      arm stuck
      22.322236
    
    
      12
      0.005010
      plastic broke
      22.085883
    
    
      13
      0.013932
      face first
      20.961642
    
    
      14
      0.005318
      happened if
      20.459294
    
    
      15
      0.032507
      he leaned
      20.423274
    
    
      16
      0.731980
      recalled
      20.417405
    
    
      17
      0.143253
      been recalled
      20.386969
    
    
      18
      0.080545
      choked on
      19.262456
    
    
      19
      0.015616
      was caused
      18.988782
    
  








    



hazard transform= 11.2590050105
crib transform= 2.16592586758
pampers transform= 2.07150347722
very dangerous transform= 17.3581662565






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.046468
      are perfect
      0.932160
    
    
      1
      -0.058869
      great to
      0.931883
    
    
      2
      -0.027812
      buying more
      0.931802
    
    
      3
      -0.013700
      love love
      0.931797
    
    
      4
      -0.020760
      they wash
      0.931792
    
    
      5
      -0.002297
      nice quality
      0.931790
    
    
      6
      -0.110198
      love these
      0.931772
    
    
      7
      -0.037772
      buy more
      0.931690
    
    
      8
      -0.082884
      granddaughter loves
      0.931679
    
    
      9
      -0.015124
      great love
      0.931648
    
    
      10
      -0.081226
      are great
      0.931603
    
    
      11
      -0.023289
      just love
      0.931603
    
    
      12
      -0.085981
      super easy
      0.931572
    
    
      13
      -0.005377
      fits great
      0.931564
    
    
      14
      -0.010040
      great gift
      0.931552
    
    
      15
      -0.051509
      these work
      0.931552
    
    
      16
      -0.002919
      smooth ride
      0.931535
    
    
      17
      -0.043371
      and works
      0.931509
    
    
      18
      -0.018459
      excellent quality
      0.931508
    
    
      19
      -0.004873
      really love
      0.931469
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=10000, t=5.0)
pos probs1 [ 0.02512563  0.01612903  0.02447552  0.06626506  0.05357143  0.07964602
  0.0610687   0.05376344  0.05185185  0.04895105  0.03592814  0.0625
  0.06862745  0.05825243  0.02256868  0.04761905  0.01754386  0.03703704
  0.02072539  0.03225806]
pos probs2 [  1.91036813e-05   1.22633309e-05   1.86093902e-05   5.03830823e-05
   4.07317775e-05   6.05569790e-05   4.64321535e-05   4.08777696e-05
   3.94243378e-05   3.72187804e-05   2.73171203e-05   4.75204071e-05
   5.21792706e-05   4.42908649e-05   1.71595680e-05   3.62060245e-05
   1.33390617e-05   2.81602413e-05   1.57580625e-05   2.45266618e-05]
mean= 1.0
mean2= 2.0866153799
pos probs3 [ 1.30057862  0.83488757  1.26692729  3.43008024  2.77301942  4.12271913
  3.16110101  2.78295855  2.68400892  2.53385457  1.85974955  3.23518932
  3.55236474  3.01532208  1.16822339  2.46490615  0.90812332  1.91714923
  1.07280889  1.66977513]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [43.245062278185721, 33.881255404727327, 33.053500503607808, 31.92053460584264, 30.055952375161336, 29.608452639797818, 28.84504674511879, 22.075409465089901, 22.026820889598142, 21.31418844905232]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.001121
      safety commission
      43.245062
    
    
      1
      0.067835
      have choked
      33.881255
    
    
      2
      0.255756
      cpsc
      33.053501
    
    
      3
      0.007122
      product safety
      31.920535
    
    
      4
      0.072981
      dangerous product
      30.055952
    
    
      5
      0.176967
      be recalled
      29.608453
    
    
      6
      0.004950
      commission
      28.845047
    
    
      7
      0.007241
      plastic broke
      22.075409
    
    
      8
      0.130176
      leaned forward
      22.026821
    
    
      9
      0.032472
      extremely dangerous
      21.314188
    
    
      10
      0.020071
      seriously injured
      21.175785
    
    
      11
      0.031054
      he leaned
      20.413589
    
    
      12
      0.779196
      recalled
      19.931741
    
    
      13
      0.144836
      been recalled
      19.908857
    
    
      14
      0.042404
      choked on
      19.884578
    
    
      15
      0.004983
      happened if
      19.810542
    
    
      16
      0.058871
      her throat
      19.744661
    
    
      17
      0.012332
      face first
      19.719249
    
    
      18
      0.137869
      arm stuck
      19.634252
    
    
      19
      0.044502
      fell forward
      19.517208
    
  








    



hazard transform= 10.9277505858
crib transform= 2.05515736584
pampers transform= 1.96699510544
very dangerous transform= 16.489607605






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.021753
      love love
      0.999110
    
    
      1
      -0.056056
      are soft
      0.999108
    
    
      2
      -0.025866
      will love
      0.999059
    
    
      3
      -0.048431
      are perfect
      0.999011
    
    
      4
      -0.028363
      pricey but
      0.998910
    
    
      5
      -0.112322
      love these
      0.998878
    
    
      6
      -0.040542
      super easy
      0.998852
    
    
      7
      -0.007736
      buying more
      0.998823
    
    
      8
      -0.150090
      are great
      0.998821
    
    
      9
      -0.018612
      they wash
      0.998812
    
    
      10
      -0.018059
      nice quality
      0.998810
    
    
      11
      -0.001782
      bibs are
      0.998785
    
    
      12
      -0.015641
      really love
      0.998779
    
    
      13
      -0.062853
      great to
      0.998745
    
    
      14
      -0.006987
      great seat
      0.998719
    
    
      15
      -0.035608
      granddaughter loves
      0.998691
    
    
      16
      -0.109246
      loves them
      0.998670
    
    
      17
      -0.005596
      great love
      0.998658
    
    
      18
      -0.004411
      can wear
      0.998612
    
    
      19
      -0.007664
      just love
      0.998609
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=30000, t=5.0)
pos probs1 [ 0.0201005   0.02097902  0.05421687  0.0619469   0.03816794  0.03225806
  0.03703704  0.02797203  0.0239521   0.03977273  0.04901961  0.04854369
  0.01265823  0.01447426  0.03703704  0.03225806  0.02739726  0.05197505
  0.02409639  0.01428571]
pos probs2 [  1.96528307e-05   2.05117831e-05   5.30093672e-05   6.05672416e-05
   3.73178751e-05   3.15396235e-05   3.62121603e-05   2.73490442e-05
   2.34186426e-05   3.88869222e-05   4.79278593e-05   4.74625402e-05
   1.23763080e-05   1.41518953e-05   3.62121603e-05   3.15396235e-05
   2.67870775e-05   5.08174807e-05   2.35597188e-05   1.39675476e-05]
mean= 1.0
mean2= 2.08208453116
pos probs3 [ 1.33796472  1.3964422   3.60887772  4.12341781  2.54060094  2.14721757
  2.46532388  1.86192293  1.59434119  2.64742166  3.26292866  3.23124974
  0.84257905  0.96346104  2.46532388  2.14721757  1.82366424  3.45965409
  1.60394565  0.95091064]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [52.239900848093548, 46.140777526935999, 41.148496679759312, 39.2966685159289, 38.274153161687885, 35.411912142639927, 34.355481098850284, 34.044052596522917, 27.230622802781902, 25.492497943029861]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.000446
      safety commission
      52.239901
    
    
      1
      0.001021
      consumer product
      46.140778
    
    
      2
      0.125960
      have choked
      41.148497
    
    
      3
      0.346409
      cpsc
      39.296669
    
    
      4
      0.005699
      product safety
      38.274153
    
    
      5
      0.203077
      be recalled
      35.411912
    
    
      6
      0.102296
      dangerous product
      34.355481
    
    
      7
      0.006882
      commission
      34.044053
    
    
      8
      0.029717
      seriously injured
      27.230623
    
    
      9
      0.174497
      leaned forward
      25.492498
    
    
      10
      0.010443
      to recall
      24.653239
    
    
      11
      0.037560
      extremely dangerous
      24.471965
    
    
      12
      0.007890
      plastic broke
      24.471965
    
    
      13
      0.179959
      arm stuck
      24.100666
    
    
      14
      0.072727
      her throat
      24.017846
    
    
      15
      1.026880
      recalled
      23.106035
    
    
      16
      0.072744
      choked on
      22.729084
    
    
      17
      0.044563
      he leaned
      22.500421
    
    
      18
      0.077257
      bruise on
      21.824179
    
    
      19
      0.196307
      been recalled
      21.685926
    
  








    



hazard transform= 12.104740227
crib transform= 1.86342563503
pampers transform= 1.86378484961
very dangerous transform= 19.3606459447






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.029401
      are perfect
      0.732417
    
    
      1
      -0.037871
      super easy
      0.732359
    
    
      2
      -0.050797
      love love
      0.732346
    
    
      3
      -0.028392
      are soft
      0.732345
    
    
      4
      -0.111585
      are great
      0.732328
    
    
      5
      -0.014392
      will love
      0.732308
    
    
      6
      -0.022346
      how well
      0.732294
    
    
      7
      -0.122148
      love that
      0.732284
    
    
      8
      -0.032615
      and cute
      0.732281
    
    
      9
      -0.008972
      great gift
      0.732250
    
    
      10
      -0.035083
      these work
      0.732250
    
    
      11
      -0.005254
      can buy
      0.732241
    
    
      12
      -0.045156
      also love
      0.732236
    
    
      13
      -0.029895
      works very
      0.732222
    
    
      14
      -0.143818
      works great
      0.732201
    
    
      15
      -0.072392
      great to
      0.732199
    
    
      16
      -0.020611
      pricey but
      0.732199
    
    
      17
      -0.006094
      great little
      0.732166
    
    
      18
      -0.021004
      still love
      0.732164
    
    
      19
      -0.020179
      can carry
      0.732140
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=30000, t=5.0)
pos probs1 [ 0.0201005   0.01612903  0.03389831  0.05120482  0.05357143  0.05309735
  0.05343511  0.03225806  0.03703704  0.04195804  0.0239521   0.03977273
  0.04901961  0.01543124  0.04761905  0.03703704  0.03225806  0.01333333
  0.05613306  0.02597403]
pos probs2 [  1.95166475e-05   1.56605357e-05   3.29136682e-05   4.97174838e-05
   5.20153507e-05   5.15550378e-05   5.18829961e-05   3.13210714e-05
   3.59612301e-05   4.07392956e-05   2.32563644e-05   3.86174573e-05
   4.75957457e-05   1.49830094e-05   4.62358673e-05   3.59612301e-05
   3.13210714e-05   1.29460428e-05   5.45025712e-05   2.52195640e-05]
mean= 1.0
mean2= 2.2015263226
pos probs3 [ 1.32869336  1.06616927  2.24076253  3.3847663   3.54120507  3.50986698
  3.53219437  2.13233854  2.44824054  2.77353125  1.58329329  2.62907649
  3.24031837  1.02004328  3.14773784  2.44824054  2.13233854  0.8813666
  3.71053505  1.71694791]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [51.877907235875675, 45.069882752575275, 39.024364343193689, 38.459633282197565, 37.458080332140341, 34.637707224779184, 33.808146146206596, 33.051247351888534, 27.041929651545168, 25.274483269091231]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.000876
      safety commission
      51.877907
    
    
      1
      0.000188
      consumer product
      45.069883
    
    
      2
      0.357030
      cpsc
      39.024364
    
    
      3
      0.099461
      have choked
      38.459633
    
    
      4
      0.004071
      product safety
      37.458080
    
    
      5
      0.199229
      be recalled
      34.637707
    
    
      6
      0.006804
      commission
      33.808146
    
    
      7
      0.102644
      dangerous product
      33.051247
    
    
      8
      0.025662
      seriously injured
      27.041930
    
    
      10
      0.004352
      plastic broke
      25.274483
    
    
      9
      0.045481
      extremely dangerous
      25.274483
    
    
      11
      0.047588
      he leaned
      24.206547
    
    
      12
      0.173367
      leaned forward
      23.909413
    
    
      13
      0.939408
      recalled
      23.021903
    
    
      14
      0.081013
      her throat
      22.488478
    
    
      15
      0.067302
      bruise on
      21.672949
    
    
      16
      0.167528
      arm stuck
      21.654266
    
    
      17
      0.206521
      been recalled
      21.535654
    
    
      18
      0.009116
      serious injury
      21.461849
    
    
      19
      0.110703
      choked on
      21.362392
    
  








    



hazard transform= 12.4615443719
crib transform= 1.75655419819
pampers transform= 2.11527983052
very dangerous transform= 20.1420343973






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.061144
      are perfect
      0.720625
    
    
      1
      -0.055170
      super easy
      0.720568
    
    
      2
      -0.030545
      love love
      0.720555
    
    
      3
      -0.018313
      are soft
      0.720554
    
    
      4
      -0.116446
      loves these
      0.720532
    
    
      5
      -0.017579
      will love
      0.720518
    
    
      6
      -0.028787
      how well
      0.720504
    
    
      7
      -0.071094
      still loves
      0.720501
    
    
      8
      -0.115854
      are great
      0.720499
    
    
      9
      -0.058507
      and cute
      0.720491
    
    
      10
      -0.021815
      just love
      0.720480
    
    
      11
      -0.063367
      so cute
      0.720480
    
    
      12
      -0.079080
      great price
      0.720466
    
    
      13
      -0.011157
      great gift
      0.720460
    
    
      14
      -0.069786
      these work
      0.720460
    
    
      15
      -0.145178
      works great
      0.720452
    
    
      16
      -0.104681
      love these
      0.720451
    
    
      17
      -0.024524
      and holds
      0.720448
    
    
      18
      -0.028490
      in great
      0.720443
    
    
      19
      -0.098826
      just loves
      0.720426
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=30000, t=5.0)
pos probs1 [ 0.01507538  0.01612903  0.05722892  0.05357143  0.0619469   0.03816794
  0.03225806  0.05185185  0.03496503  0.03592814  0.05113636  0.05882353
  0.05339806  0.01535149  0.04166667  0.01052632  0.01851852  0.01554404
  0.03225806  0.04989605]
pos probs2 [  1.44561077e-05   1.54664808e-05   5.48780554e-05   5.13708113e-05
   5.94022361e-05   3.66000691e-05   3.09329616e-05   4.97218717e-05
   3.35287346e-05   3.44522806e-05   4.90357744e-05   5.64071653e-05
   5.12045627e-05   1.47208779e-05   3.99550754e-05   1.00939138e-05
   1.77578113e-05   1.49055204e-05   3.09329616e-05   4.78464105e-05]
mean= 1.0
mean2= 2.11790325089
pos probs3 [ 0.98417181  1.05295801  3.73609801  3.49732483  4.04410423  2.49173271
  2.10591603  3.38506502  2.28263625  2.34551127  3.33835552  3.84019982
  3.48600663  1.00219737  2.72014154  0.68719365  1.20895179  1.01476783
  2.10591603  3.25738363]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [39.170038126099882, 38.540800565841252, 34.730767138475237, 33.694656452559045, 25.921348759919042, 25.717701799964569, 24.961298805847964, 24.307647773288934, 24.179035880308568, 23.906596039403688]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.096737
      have choked
      39.170038
    
    
      1
      0.343633
      cpsc
      38.540801
    
    
      2
      0.215181
      be recalled
      34.730767
    
    
      3
      0.091366
      dangerous product
      33.694656
    
    
      4
      0.033225
      extremely dangerous
      25.921349
    
    
      5
      0.020952
      seriously injured
      25.717702
    
    
      6
      0.006061
      plastic broke
      24.961299
    
    
      7
      0.192478
      leaned forward
      24.307648
    
    
      8
      0.023194
      to recall
      24.179036
    
    
      9
      0.051944
      he leaned
      23.906596
    
    
      10
      0.076137
      her throat
      23.555865
    
    
      11
      0.953646
      recalled
      22.961746
    
    
      12
      0.192052
      been recalled
      22.154999
    
    
      13
      0.193364
      arm stuck
      21.385940
    
    
      14
      0.027783
      face first
      20.983949
    
    
      15
      0.092343
      an unsafe
      20.699614
    
    
      16
      0.013654
      serious injury
      20.348072
    
    
      17
      0.069234
      bruise on
      20.334173
    
    
      18
      0.210948
      is dangerous
      20.306173
    
    
      19
      0.035762
      first into
      20.087199
    
  








    



hazard transform= 12.5489196219
crib transform= 1.85274745883
pampers transform= 1.17510114378
very dangerous transform= 18.988245629






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.057630
      are perfect
      0.729353
    
    
      1
      -0.023159
      love love
      0.729283
    
    
      2
      -0.040618
      are soft
      0.729281
    
    
      3
      -0.009550
      really love
      0.729268
    
    
      4
      -0.118116
      loves these
      0.729259
    
    
      5
      -0.025952
      will love
      0.729245
    
    
      6
      -0.028169
      how well
      0.729231
    
    
      7
      -0.121223
      are great
      0.729226
    
    
      8
      -0.016767
      just love
      0.729207
    
    
      9
      -0.022283
      super easy
      0.729195
    
    
      10
      -0.112130
      love these
      0.729177
    
    
      11
      -0.046526
      we keep
      0.729177
    
    
      12
      -0.046095
      also love
      0.729173
    
    
      13
      -0.101532
      loves them
      0.729140
    
    
      14
      -0.045198
      pricey but
      0.729136
    
    
      15
      -0.012348
      great little
      0.729103
    
    
      16
      -0.151013
      love that
      0.729102
    
    
      17
      -0.155300
      cuddly
      0.729098
    
    
      18
      -0.027865
      messes
      0.729098
    
    
      19
      -0.011973
      look great
      0.729095
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=40000, t=5.0)
pos probs1 [ 0.01507538  0.01612903  0.05120482  0.0619469   0.03816794  0.03225806
  0.03703704  0.02797203  0.0239521   0.03977273  0.05392157  0.04854369
  0.01265823  0.01339766  0.03703704  0.03225806  0.02739726  0.04573805
  0.02409639  0.00714286]
pos probs2 [  1.56228141e-05   1.67147313e-05   5.30642372e-05   6.41964015e-05
   3.95539442e-05   3.34294625e-05   3.83819755e-05   2.89877857e-05
   2.48218764e-05   4.12170078e-05   5.58796408e-05   5.03064727e-05
   1.31178904e-05   1.38841773e-05   3.83819755e-05   3.34294625e-05
   2.83921463e-05   4.73989469e-05   2.49714057e-05   7.40223813e-06]
mean= 1.0
mean2= 2.11159703483
pos probs3 [ 1.06360119  1.1379389   3.61261327  4.37049101  2.69283252  2.27587781
  2.61304489  1.97348845  1.68987334  2.80605389  3.80428594  3.42486466
  0.89306598  0.94523479  2.61304489  2.27587781  1.93293732  3.2269203
  1.7000533   0.50394437]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [43.614094731982782, 39.951252615937449, 37.53377681934753, 35.276106033221367, 28.862268572635664, 25.938313259721593, 25.544766437849951, 25.518885215521838, 24.90078072933273, 24.823926467822439]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.130880
      have choked
      43.614095
    
    
      1
      0.385196
      cpsc
      39.951253
    
    
      2
      0.204602
      be recalled
      37.533777
    
    
      3
      0.110728
      dangerous product
      35.276106
    
    
      4
      0.031190
      seriously injured
      28.862269
    
    
      5
      0.009011
      plastic broke
      25.938313
    
    
      6
      0.197681
      arm stuck
      25.544766
    
    
      7
      0.196782
      leaned forward
      25.518885
    
    
      8
      0.036551
      extremely dangerous
      24.900781
    
    
      9
      0.011641
      to recall
      24.823926
    
    
      10
      0.081265
      her throat
      24.729641
    
    
      11
      1.078535
      recalled
      23.922877
    
    
      12
      0.049263
      he leaned
      23.848635
    
    
      13
      0.076250
      choked on
      23.230606
    
    
      14
      0.007374
      happened if
      22.646389
    
    
      15
      0.096855
      an unsafe
      22.370214
    
    
      16
      0.211191
      been recalled
      22.346855
    
    
      17
      0.080847
      bruise on
      21.975279
    
    
      18
      0.407371
      lodged in
      21.788183
    
    
      19
      0.252387
      major safety
      21.708373
    
  








    



hazard transform= 12.699398172
crib transform= 1.79321055532
pampers transform= 1.97546193786
very dangerous transform= 20.7161619918






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.039699
      are perfect
      0.702097
    
    
      1
      -0.042502
      also love
      0.702079
    
    
      2
      -0.049475
      super easy
      0.702041
    
    
      3
      -0.057006
      love love
      0.702029
    
    
      4
      -0.053468
      are soft
      0.702028
    
    
      5
      -0.124748
      great to
      0.702004
    
    
      6
      -0.013462
      will love
      0.701993
    
    
      7
      -0.030713
      how well
      0.701979
    
    
      8
      -0.168623
      are great
      0.701975
    
    
      9
      -0.142282
      love that
      0.701969
    
    
      10
      -0.032280
      and cute
      0.701967
    
    
      11
      -0.007791
      great gift
      0.701937
    
    
      12
      -0.037611
      these work
      0.701937
    
    
      13
      -0.002277
      can buy
      0.701928
    
    
      14
      -0.180540
      works great
      0.701928
    
    
      15
      -0.070262
      love these
      0.701927
    
    
      16
      -0.033197
      works very
      0.701911
    
    
      17
      -0.021374
      pricey but
      0.701888
    
    
      18
      -0.051475
      and highly
      0.701881
    
    
      19
      -0.274672
      perfect for
      0.701857
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=40000, t=5.0)
pos probs1 [ 0.01507538  0.03174603  0.01612903  0.05357143  0.05309735  0.04580153
  0.03703704  0.04195804  0.02994012  0.04545455  0.04901961  0.0139559
  0.01851852  0.01554404  0.03225806  0.04989605  0.02409639  0.01428571
  0.01398601  0.01886792]
pos probs2 [  1.74028726e-05   3.66473190e-05   1.86192024e-05   6.18423508e-05
   6.12950733e-05   5.28728495e-05   4.27552055e-05   4.84359670e-05
   3.45625912e-05   5.24722976e-05   5.65877719e-05   1.61105583e-05
   2.13776027e-05   1.79438945e-05   3.72384048e-05   5.75995284e-05
   2.78166397e-05   1.64912935e-05   1.61453223e-05   2.17809537e-05]
mean= 1.0
mean2= 2.59896926894
pos probs3 [ 1.18478756  2.49494948  1.2675953   4.21022724  4.17296859  3.59958359
  2.91077439  3.29752064  2.35302121  3.57231402  3.85249551  1.09680681
  1.45538719  1.22162034  2.5351906   3.92137589  1.89375683  1.12272726
  1.09917355  1.48284733]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [60.684119204636488, 52.691631820568304, 47.154545086881463, 45.450163939162863, 44.534848137610268, 40.552908774718063, 39.295454239067894, 38.995488939533018, 30.960054855023184, 30.049465006346029]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.001098
      safety commission
      60.684119
    
    
      1
      0.000060
      consumer product
      52.691632
    
    
      2
      0.109886
      have choked
      47.154545
    
    
      3
      0.390884
      cpsc
      45.450164
    
    
      4
      0.004259
      product safety
      44.534848
    
    
      5
      0.214720
      be recalled
      40.552909
    
    
      6
      0.118041
      dangerous product
      39.295454
    
    
      7
      0.006764
      commission
      38.995489
    
    
      8
      0.026136
      seriously injured
      30.960055
    
    
      9
      0.004638
      plastic broke
      30.049465
    
    
      10
      0.191898
      leaned forward
      29.262572
    
    
      11
      0.057721
      he leaned
      28.779769
    
    
      12
      0.046126
      extremely dangerous
      27.737968
    
    
      13
      0.943841
      recalled
      26.648641
    
    
      14
      0.124325
      choked on
      26.356707
    
    
      15
      0.080228
      her throat
      25.926898
    
    
      16
      0.070775
      bruise on
      25.767511
    
    
      17
      0.176953
      arm stuck
      25.745298
    
    
      18
      0.011205
      serious injury
      25.516529
    
    
      19
      0.404974
      lodged in
      25.426470
    
  








    



hazard transform= 13.8261783434
crib transform= 1.92179271719
pampers transform= 2.35772725434
very dangerous transform= 23.2942581916






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.070929
      great price
      0.662283
    
    
      1
      -0.057035
      are perfect
      0.662245
    
    
      2
      -0.109664
      are great
      0.662200
    
    
      3
      -0.164367
      love that
      0.662196
    
    
      4
      -0.067864
      super easy
      0.662193
    
    
      5
      -0.036232
      love love
      0.662181
    
    
      6
      -0.027924
      are soft
      0.662180
    
    
      7
      -0.121256
      loves these
      0.662160
    
    
      8
      -0.016857
      will love
      0.662147
    
    
      9
      -0.064931
      still loves
      0.662132
    
    
      10
      -0.052259
      and cute
      0.662122
    
    
      11
      -0.063323
      vibrant
      0.662121
    
    
      12
      -0.028600
      just love
      0.662112
    
    
      13
      -0.092877
      so cute
      0.662112
    
    
      14
      -0.019988
      great gift
      0.662094
    
    
      15
      -0.079132
      these work
      0.662094
    
    
      16
      -0.151691
      works great
      0.662086
    
    
      17
      -0.023933
      and holds
      0.662082
    
    
      18
      -0.018155
      also love
      0.662081
    
    
      19
      -0.027893
      in great
      0.662078
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=40000, t=5.0)
pos probs1 [ 0.01005025  0.01612903  0.05722892  0.05309735  0.03816794  0.04301075
  0.04444444  0.03496503  0.03592814  0.05113636  0.04901961  0.04854369
  0.01265823  0.01335779  0.04166667  0.01851852  0.03225806  0.04365904
  0.02409639  0.02      ]
pos probs2 [  1.00929745e-05   1.61975962e-05   5.74721937e-05   5.33230600e-05
   3.83301895e-05   4.31935899e-05   4.46333762e-05   3.51136701e-05
   3.60808730e-05   5.13537425e-05   4.92279884e-05   4.87500468e-05
   1.27120375e-05   1.34145729e-05   4.18437902e-05   1.85972401e-05
   3.23951924e-05   4.38446367e-05   2.41988184e-05   2.00850193e-05]
mean= 1.0
mean2= 2.09599458145
pos probs3 [ 0.6871297   1.10273235  3.91270695  3.63023393  2.6095193   2.9406196
  3.03864025  2.39053866  2.45638583  3.49616279  3.35144145  3.31890319
  0.86543551  0.91326412  2.84872523  1.2661001   2.2054647   2.98494287
  1.64745556  1.36738811]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [39.778563275651827, 37.891477819050884, 36.099046172654042, 35.287435163884687, 27.146675764863495, 26.933402217889263, 26.141243329127814, 25.456693585648196, 24.073734376769927, 24.055901980935285]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.112878
      have choked
      39.778563
    
    
      1
      0.358471
      cpsc
      37.891478
    
    
      2
      0.210131
      be recalled
      36.099046
    
    
      3
      0.105552
      dangerous product
      35.287435
    
    
      4
      0.005419
      plastic broke
      27.146676
    
    
      5
      0.022009
      seriously injured
      26.933402
    
    
      6
      0.034469
      extremely dangerous
      26.141243
    
    
      7
      0.202244
      leaned forward
      25.456694
    
    
      8
      0.052217
      he leaned
      24.073734
    
    
      9
      0.025004
      to recall
      24.055902
    
    
      10
      1.001171
      recalled
      23.654243
    
    
      11
      0.082581
      her throat
      23.259695
    
    
      12
      0.000754
      happened if
      22.789802
    
    
      13
      0.207549
      been recalled
      22.583559
    
    
      14
      0.070701
      bruise on
      22.416199
    
    
      15
      0.207929
      arm stuck
      22.396874
    
    
      16
      0.085070
      choked on
      21.261218
    
    
      17
      0.027813
      face first
      21.161959
    
    
      18
      0.264456
      major safety
      21.036740
    
    
      19
      0.080452
      hazard to
      20.990607
    
  








    



hazard transform= 12.7116450468
crib transform= 1.75090773308
pampers transform= 1.3673881126
very dangerous transform= 20.0752271379






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.061982
      are perfect
      0.703816
    
    
      1
      -0.025023
      love love
      0.703748
    
    
      2
      -0.049120
      are soft
      0.703747
    
    
      3
      -0.010696
      really love
      0.703735
    
    
      4
      -0.103067
      loves these
      0.703725
    
    
      5
      -0.023692
      will love
      0.703712
    
    
      6
      -0.022112
      how well
      0.703698
    
    
      7
      -0.062363
      still loves
      0.703696
    
    
      8
      -0.203154
      love that
      0.703688
    
    
      9
      -0.019219
      just love
      0.703675
    
    
      10
      -0.026617
      super easy
      0.703663
    
    
      11
      -0.011857
      great gift
      0.703656
    
    
      12
      -0.038358
      these work
      0.703656
    
    
      13
      -0.130893
      love these
      0.703646
    
    
      14
      -0.044515
      we keep
      0.703646
    
    
      15
      -0.037627
      also love
      0.703642
    
    
      16
      -0.112927
      are great
      0.703619
    
    
      17
      -0.090956
      loves them
      0.703610
    
    
      18
      -0.049348
      pricey but
      0.703607
    
    
      19
      -0.082588
      so cute
      0.703581
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=50000, t=5.0)
pos probs1 [ 0.01507538  0.01612903  0.04819277  0.0619469   0.03816794  0.04444444
  0.02797203  0.02994012  0.04545455  0.04901961  0.03883495  0.01265823
  0.01264006  0.01851852  0.03225806  0.04158004  0.02409639  0.00714286
  0.04        0.01398601]
pos probs2 [  1.86897102e-05   1.99959803e-05   5.97470254e-05   7.67987207e-05
   4.73187320e-05   5.51000346e-05   3.46783434e-05   3.71182868e-05
   5.63523081e-05   6.07720969e-05   4.81456613e-05   1.56930478e-05
   1.56705210e-05   2.29583477e-05   3.99919606e-05   5.15488889e-05
   2.98735127e-05   8.85536270e-06   4.95900311e-05   1.73391717e-05]
mean= 1.0
mean2= 2.58908537745
pos probs3 [ 1.27239547  1.36132634  4.06757749  5.22845691  3.22145927  3.75121035
  2.36090162  2.52701296  3.83646513  4.13736436  3.27775662  1.0683827
  1.06684907  1.56300431  2.72265268  3.50944836  2.03378875  0.60287309
  3.37608932  1.18045081]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [52.175925821721378, 44.743352405586727, 44.226770064176769, 42.201116473451123, 34.528186205550917, 31.260086276630457, 31.030232701067, 29.584287837058515, 28.732675045753954, 28.547814084981642]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.137381
      have choked
      52.175926
    
    
      1
      0.405288
      cpsc
      44.743352
    
    
      2
      0.202521
      be recalled
      44.226770
    
    
      3
      0.119609
      dangerous product
      42.201116
    
    
      4
      0.031107
      seriously injured
      34.528186
    
    
      5
      0.016123
      to recall
      31.260086
    
    
      6
      0.010686
      plastic broke
      31.030233
    
    
      7
      0.083943
      her throat
      29.584288
    
    
      8
      0.157448
      leaned forward
      28.732675
    
    
      9
      0.042361
      extremely dangerous
      28.547814
    
    
      10
      0.050338
      he leaned
      28.530332
    
    
      11
      0.205917
      arm stuck
      27.649007
    
    
      12
      1.114556
      recalled
      27.357965
    
    
      13
      0.085108
      choked on
      27.276331
    
    
      14
      0.000089
      face first
      27.129289
    
    
      15
      0.084713
      bruise on
      26.289220
    
    
      16
      0.009051
      happened if
      26.050072
    
    
      17
      0.101294
      an unsafe
      25.732388
    
    
      18
      0.273798
      is dangerous
      25.271599
    
    
      19
      0.009994
      serious injury
      25.211057
    
  








    



hazard transform= 14.3171195147
crib transform= 2.02322978178
pampers transform= 2.36326252251
very dangerous transform= 24.5491259264






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.037166
      are perfect
      0.650168
    
    
      1
      -0.038593
      also love
      0.650151
    
    
      2
      -0.045371
      super easy
      0.650116
    
    
      3
      -0.050607
      love love
      0.650105
    
    
      4
      -0.043662
      are soft
      0.650104
    
    
      5
      -0.013759
      really love
      0.650092
    
    
      6
      -0.011023
      will love
      0.650072
    
    
      7
      -0.042295
      to everyone
      0.650060
    
    
      8
      -0.030848
      how well
      0.650059
    
    
      9
      -0.158737
      are great
      0.650055
    
    
      10
      -0.009754
      great value
      0.650050
    
    
      11
      -0.031869
      and cute
      0.650047
    
    
      12
      -0.182132
      works great
      0.650047
    
    
      13
      -0.130126
      so cute
      0.650037
    
    
      14
      -0.085463
      great price
      0.650024
    
    
      15
      -0.013177
      great gift
      0.650020
    
    
      16
      -0.038757
      these work
      0.650020
    
    
      17
      -0.056841
      they fit
      0.650015
    
    
      18
      -0.147866
      love that
      0.650015
    
    
      19
      -0.000194
      can buy
      0.650012
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=50000, t=5.0)
pos probs1 [ 0.01507538  0.03174603  0.01612903  0.05309735  0.03816794  0.04444444
  0.04195804  0.02994012  0.04545455  0.05392157  0.01267993  0.01851852
  0.01612903  0.02597403  0.02409639  0.01428571  0.01398601  0.01462069
  0.01886792  0.00381679]
pos probs2 [  1.84173065e-05   3.87835344e-05   1.97045376e-05   6.48680354e-05
   4.66290585e-05   5.42969481e-05   5.12593566e-05   3.65772854e-05
   5.55309697e-05   6.58749738e-05   1.54908355e-05   2.26237284e-05
   1.97045376e-05   3.17319827e-05   2.94381044e-05   1.74525905e-05
   1.70864522e-05   1.78618236e-05   2.30505912e-05   4.66290585e-06]
mean= 1.0
mean2= 2.59026747327
pos probs3 [ 1.25385023  2.64038302  1.34148492  4.41621585  3.1745063   3.69653623
  3.489737    2.49018159  3.78054841  4.48476822  1.05461608  1.54022343
  1.34148492  2.16031338  2.00414615  1.18817236  1.16324567  1.21603295
  1.56928425  0.31745063]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [63.168657052976869, 54.817952010746204, 49.903239071851715, 47.09743446540223, 46.437736358528689, 42.251409080834456, 41.586032559876436, 40.633680669192238, 31.853131322458545, 31.801083722258447]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.000927
      safety commission
      63.168657
    
    
      1
      0.000002
      consumer product
      54.817952
    
    
      2
      0.114063
      have choked
      49.903239
    
    
      3
      0.404963
      cpsc
      47.097434
    
    
      4
      0.005074
      product safety
      46.437736
    
    
      5
      0.226370
      be recalled
      42.251409
    
    
      6
      0.121862
      dangerous product
      41.586033
    
    
      7
      0.006777
      commission
      40.633681
    
    
      8
      0.205142
      leaned forward
      31.853131
    
    
      9
      0.049417
      extremely dangerous
      31.801084
    
    
      10
      0.029234
      seriously injured
      31.504570
    
    
      11
      0.184021
      arm stuck
      30.114024
    
    
      12
      0.062944
      he leaned
      29.285938
    
    
      13
      0.137513
      choked on
      27.893071
    
    
      14
      0.001871
      to recall
      27.724022
    
    
      15
      0.082729
      bruise on
      27.269530
    
    
      16
      0.962315
      recalled
      27.246021
    
    
      17
      0.086213
      her throat
      26.580763
    
    
      18
      0.006958
      happened if
      25.670390
    
    
      19
      0.282246
      is dangerous
      25.145043
    
  








    



hazard transform= 14.231664476
crib transform= 1.89355308707
pampers transform= 1.99612956287
very dangerous transform= 24.4217144119






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.078052
      great price
      0.650222
    
    
      1
      -0.049508
      are perfect
      0.650185
    
    
      2
      -0.102442
      are great
      0.650175
    
    
      3
      -0.081805
      so cute
      0.650140
    
    
      4
      -0.039294
      love love
      0.650122
    
    
      5
      -0.027001
      are soft
      0.650121
    
    
      6
      -0.170702
      love that
      0.650102
    
    
      7
      -0.110864
      loves these
      0.650101
    
    
      8
      -0.014534
      will love
      0.650089
    
    
      9
      -0.031840
      how well
      0.650076
    
    
      10
      -0.054782
      still loves
      0.650073
    
    
      11
      -0.047988
      and cute
      0.650064
    
    
      12
      -0.155314
      works great
      0.650064
    
    
      13
      -0.082190
      vibrant
      0.650063
    
    
      14
      -0.059160
      super easy
      0.650044
    
    
      15
      -0.020693
      great gift
      0.650037
    
    
      16
      -0.080134
      these work
      0.650037
    
    
      17
      -0.022921
      up great
      0.650029
    
    
      18
      -0.092235
      love these
      0.650028
    
    
      19
      -0.051657
      and holds
      0.650025
    
  








    



Evaluating RandNegSampThreshInfoPrior(C=1, nneg=50000, t=5.0)
pos probs1 [ 0.01507538  0.01612903  0.05421687  0.05357143  0.05309735  0.03816794
  0.03225806  0.03703704  0.03496503  0.03592814  0.05113636  0.03921569
  0.03398058  0.01236094  0.04166667  0.01851852  0.03225806  0.04158004
  0.02409639  0.00968523]
pos probs2 [  1.81060036e-05   1.93714770e-05   6.51161697e-05   6.43409772e-05
   6.37715880e-05   4.58408998e-05   3.87429540e-05   4.44826509e-05
   4.19941110e-05   4.31508350e-05   6.14163873e-05   4.70992774e-05
   4.08117525e-05   1.48458785e-05   5.00429822e-05   2.22413254e-05
   3.87429540e-05   4.99389428e-05   2.89405199e-05   1.16322671e-05]
mean= 1.0
mean2= 2.47942311895
pos probs3 [ 1.23265673  1.31881015  4.43310883  4.38033373  4.34156971  3.12084846
  2.63762031  3.02837887  2.85895908  2.93770885  4.18122765  3.20651881
  2.77846411  1.01070741  3.40692623  1.51418944  2.63762031  3.39984322
  1.97027059  0.79192474]
transform: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [49.059737728525562, 46.3013589004157, 42.518439364722155, 40.883114773771311, 32.466002908583093, 30.972056646796442, 30.061113804243607, 29.575019198047329, 28.195251568118142, 27.639288861141161]






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      0.119380
      have choked
      49.059738
    
    
      1
      0.389740
      cpsc
      46.301359
    
    
      2
      0.225671
      be recalled
      42.518439
    
    
      3
      0.113521
      dangerous product
      40.883115
    
    
      4
      0.004987
      plastic broke
      32.466003
    
    
      5
      0.022133
      seriously injured
      30.972057
    
    
      6
      0.039725
      extremely dangerous
      30.061114
    
    
      7
      0.218680
      leaned forward
      29.575019
    
    
      8
      0.215887
      arm stuck
      28.195252
    
    
      9
      0.057253
      he leaned
      27.639289
    
    
      10
      1.074319
      recalled
      27.443378
    
    
      11
      0.025741
      to recall
      27.255410
    
    
      12
      0.089658
      her throat
      26.974426
    
    
      13
      0.090259
      choked on
      25.427303
    
    
      14
      0.010797
      face first
      25.308595
    
    
      15
      0.233771
      been recalled
      25.158840
    
    
      16
      0.083710
      hazard to
      25.103667
    
    
      17
      0.099912
      an unsafe
      24.928729
    
    
      18
      0.014824
      serious injury
      24.423679
    
    
      19
      0.225101
      is dangerous
      23.769253
    
  








    



hazard transform= 14.5967861637
crib transform= 1.83593827547
pampers transform= 1.63532459095
very dangerous transform= 22.8764243332






    






  
    
      
      coef
      term
      transform
    
  
  
    
      0
      -0.063993
      are perfect
      0.657001
    
    
      1
      -0.034083
      also love
      0.656984
    
    
      2
      -0.091621
      so cute
      0.656955
    
    
      3
      -0.029207
      love love
      0.656937
    
    
      4
      -0.039489
      are soft
      0.656936
    
    
      5
      -0.012903
      really love
      0.656924
    
    
      6
      -0.219684
      love that
      0.656917
    
    
      7
      -0.092163
      loves these
      0.656916
    
    
      8
      -0.021039
      will love
      0.656903
    
    
      9
      -0.116712
      love these
      0.656900
    
    
      10
      -0.017855
      how well
      0.656890
    
    
      11
      -0.064606
      still loves
      0.656888
    
    
      12
      -0.117028
      are great
      0.656886
    
    
      13
      -0.025611
      and cute
      0.656879
    
    
      14
      -0.014724
      just love
      0.656869
    
    
      15
      -0.034018
      super easy
      0.656858
    
    
      16
      -0.014893
      great gift
      0.656851
    
    
      17
      -0.032790
      these work
      0.656851
    
    
      18
      -0.047630
      we keep
      0.656842
    
    
      19
      -0.058110
      and holds
      0.656839



In [21]:

    
models = results['model'].tolist()
results









    Out[21]:






  
    
      
      model
      f1
      f1_se
      pr_at_k
      pr_at_k_se
      pr_auc
      pr_auc_se
      precision
      precision_se
      recall
      recall_se
      roc_auc
      roc_auc_se
    
  
  
    
      11
      RandNegSampThreshInfoPrior(C=1, nneg=40000, t=...
      0.844340
      0.002757
      0.026764
      0.001987
      0.017120
      0.000216
      0.880867
      0.006455
      0.810997
      0.007424
      0.968573
      0.001955
    
    
      6
      RandNegSampThreshInfoPrior(C=1, nneg=20000, t=...
      0.842716
      0.004200
      0.026764
      0.001987
      0.015887
      0.000225
      0.857953
      0.008991
      0.828179
      0.002806
      0.969611
      0.000989
    
    
      9
      RandNegSampThreshInfoPrior(C=1, nneg=10000, t=...
      0.839954
      0.001670
      0.031630
      0.001987
      0.016307
      0.000187
      0.841555
      0.005809
      0.838488
      0.002806
      0.971667
      0.000783
    
    
      10
      RandNegSampThreshInfoPrior(C=1, nneg=30000, t=...
      0.834279
      0.008755
      0.026764
      0.001987
      0.017245
      0.000564
      0.867041
      0.012572
      0.804124
      0.008417
      0.969552
      0.002114
    
    
      8
      RandNegSampThreshInfoPrior(C=1, nneg=5000, t=5.0)
      0.831069
      0.003827
      0.036496
      0.003441
      0.015420
      0.000152
      0.817261
      0.002994
      0.845361
      0.004860
      0.972205
      0.000882
    
    
      5
      RandNegSampThreshInfoPrior(C=1, nneg=20000, t=...
      0.827318
      0.004651
      0.026764
      0.001987
      0.017016
      0.000338
      0.867892
      0.003475
      0.790378
      0.005612
      0.964432
      0.001948
    
    
      4
      RandNegSampThreshInfoPrior(C=1, nneg=20000, t=...
      0.821095
      0.004076
      0.021898
      0.000000
      0.018210
      0.000259
      0.875548
      0.001621
      0.773196
      0.008417
      0.963325
      0.000837
    
    
      12
      RandNegSampThreshInfoPrior(C=1, nneg=50000, t=...
      0.816033
      0.012426
      0.024331
      0.001987
      0.017151
      0.000358
      0.877241
      0.009532
      0.762887
      0.014580
      0.964432
      0.002644
    
    
      7
      RandNegSampThreshInfoPrior(C=1, nneg=1000, t=5.0)
      0.773960
      0.001943
      0.031630
      0.001987
      0.011101
      0.000428
      0.669547
      0.006728
      0.917526
      0.008417
      0.966018
      0.001153
    
    
      1
      RandomNegSamplesThresh(C=1, nneg=20000, t=3.0)
      0.764142
      0.005421
      0.026764
      0.001987
      0.016838
      0.000525
      0.783588
      0.008551
      0.745704
      0.002806
      0.956619
      0.000623
    
    
      3
      RandomNegSamplesThresh(C=1, nneg=20000, t=5.0)
      0.752931
      0.004321
      0.034063
      0.001987
      0.015665
      0.000485
      0.727640
      0.005702
      0.780069
      0.002806
      0.961367
      0.000826
    
    
      2
      RandomNegSamplesThresh(C=1, nneg=20000, t=4.0)
      0.747940
      0.003574
      0.041363
      0.005256
      0.016923
      0.000265
      0.736940
      0.007574
      0.759450
      0.002806
      0.958851
      0.000123
    
    
      0
      RandomNegSamples(C=1,n=20000)
      0.699726
      0.002380
      0.024331
      0.001987
      0.016046
      0.000213
      0.789573
      0.009472
      0.628866
      0.009720
      0.939985
      0.000545



In [22]:

    
def print_main_results_table(results):
    """
    Print Table 3.
    """
    res = []
    for ii, r in results.sort_values('roc_auc', ascending=False).iterrows():
        if r['model'].n_neg == 20000:
            rr = []
            rr.append('informed prior' if type(r['model']) == RandomNegativeSamplesThresholdInformedPrior else 'baseline')
            try:
                rr.append('%.1f' % r['model'].threshold)
            except:
                rr.append('none')
            rr.append('%.1f $\pm $ %.2f' % ((r['roc_auc'] * 100), (r['roc_auc_se'] * 100)))
            rr.append('%.1f $\pm$ %.2f' % ((r['f1'] * 100), (r['f1_se'] * 100)))
            rr.append('%.1f $\pm$ %.2f' % ((r['precision'] * 100), (r['precision_se'] * 100)))
            rr.append('%.1f $\pm$ %.2f' % ((r['recall'] * 100), (r['recall_se'] * 100)))
            res.append(rr)
    df = pd.DataFrame(res, columns=['Model', 'Review Threshold', 'ROC AUC', 'F1', 'Precision', 'Recall'])
    display(df)
    print(df.to_latex(index=False, escape=False))
    
print_main_results_table(results)









    






  
    
      
      Model
      Review Threshold
      ROC AUC
      F1
      Precision
      Recall
    
  
  
    
      0
      informed prior
      5.0
      97.0 $\pm $ 0.10
      84.3 $\pm$ 0.42
      85.8 $\pm$ 0.90
      82.8 $\pm$ 0.28
    
    
      1
      informed prior
      4.0
      96.4 $\pm $ 0.19
      82.7 $\pm$ 0.47
      86.8 $\pm$ 0.35
      79.0 $\pm$ 0.56
    
    
      2
      informed prior
      3.0
      96.3 $\pm $ 0.08
      82.1 $\pm$ 0.41
      87.6 $\pm$ 0.16
      77.3 $\pm$ 0.84
    
    
      3
      baseline
      5.0
      96.1 $\pm $ 0.08
      75.3 $\pm$ 0.43
      72.8 $\pm$ 0.57
      78.0 $\pm$ 0.28
    
    
      4
      baseline
      4.0
      95.9 $\pm $ 0.01
      74.8 $\pm$ 0.36
      73.7 $\pm$ 0.76
      75.9 $\pm$ 0.28
    
    
      5
      baseline
      3.0
      95.7 $\pm $ 0.06
      76.4 $\pm$ 0.54
      78.4 $\pm$ 0.86
      74.6 $\pm$ 0.28
    
    
      6
      baseline
      none
      94.0 $\pm $ 0.05
      70.0 $\pm$ 0.24
      79.0 $\pm$ 0.95
      62.9 $\pm$ 0.97
    
  








    



\begin{tabular}{llllll}
\toprule
          Model & Review Threshold &           ROC AUC &               F1 &        Precision &           Recall \\
\midrule
 informed prior &              5.0 &  97.0 $\pm $ 0.10 &  84.3 $\pm$ 0.42 &  85.8 $\pm$ 0.90 &  82.8 $\pm$ 0.28 \\
 informed prior &              4.0 &  96.4 $\pm $ 0.19 &  82.7 $\pm$ 0.47 &  86.8 $\pm$ 0.35 &  79.0 $\pm$ 0.56 \\
 informed prior &              3.0 &  96.3 $\pm $ 0.08 &  82.1 $\pm$ 0.41 &  87.6 $\pm$ 0.16 &  77.3 $\pm$ 0.84 \\
       baseline &              5.0 &  96.1 $\pm $ 0.08 &  75.3 $\pm$ 0.43 &  72.8 $\pm$ 0.57 &  78.0 $\pm$ 0.28 \\
       baseline &              4.0 &  95.9 $\pm $ 0.01 &  74.8 $\pm$ 0.36 &  73.7 $\pm$ 0.76 &  75.9 $\pm$ 0.28 \\
       baseline &              3.0 &  95.7 $\pm $ 0.06 &  76.4 $\pm$ 0.54 &  78.4 $\pm$ 0.86 &  74.6 $\pm$ 0.28 \\
       baseline &             none &  94.0 $\pm $ 0.05 &  70.0 $\pm$ 0.24 &  79.0 $\pm$ 0.95 &  62.9 $\pm$ 0.97 \\
\bottomrule
\end{tabular}



In [23]:

    
def plot_f1_v_nneg(results):
    """ Plot Figure 3"""
    nnegs = []
    f1s = []
    ses = []
    for i, r in results.iterrows():
        if type(r['model']) == RandomNegativeSamplesThresholdInformedPrior and r['model'].threshold == 5.0:
            nnegs.append(r['model'].n_neg)
            f1s.append(r['f1'])
            ses.append(r['f1_se'])
    plt.figure(figsize=(8,6))
    vals = sorted(zip(nnegs, f1s, ses), key=lambda x: x[0])
    plt.plot([v[0] for v in vals], [v[1] for v in vals], 'bo-')
    plt.errorbar([v[0] for v in vals], [v[1] for v in vals], yerr=[v[2] for v in vals])
    plt.xlabel('Number of negative training examples', size=16)
    plt.ylabel('F1', size=16)
    plt.tight_layout()
    plt.savefig('paper/figs/nneg.pdf')
    plt.show()
    
plot_f1_v_nneg(results)



In [24]:

    
def plot_rocs(models, data, names):
    """
    Plot Figure 2.
    """
    truths = np.array(data.test_df['label'])    
    plt.figure(figsize=(8,6))
    formats = ['bo-', 'g^--', 'rs:']
    for model, name, fmt in zip(models, names, formats):
        probas = model.predict_proba(data)
        fpr, tpr, thresh = roc_curve(truths, probas)
        auc = roc_auc_score(truths, probas)
        plt.plot(fpr, tpr, fmt, ms=4, label='%s' % (name))
    plt.legend(loc='lower right', prop={'size':16})
    plt.xlabel('False Positive Rate', size=16)
    plt.ylabel('True Positive Rate', size=16)
    plt.xlim((0, .5))
    plt.ylim((0.39, 1.005))
    plt.tight_layout()
    plt.savefig('paper/figs/roc.pdf')
    plt.show()
    
def get_models(models, names):
    r = []
    for n in names:
        for m in models:
            if str(m) == n:
                  r.append(m)
    return r

submodels = get_models(models, ['RandNegSampThreshInfoPrior(C=1, nneg=20000, t=5.0)',
                                'RandomNegSamplesThresh(C=1, nneg=20000, t=5.0)'])
                                
plot_rocs(submodels, data, [r'informed prior, $\tau=5$', r'baseline, $\tau=5$'])



In [25]:

    
best_model = get_models(models, ['RandNegSampThreshInfoPrior(C=1, nneg=20000, t=5.0)'])[0]
baseline_model = get_models(models, ['RandomNegSamplesThresh(C=1, nneg=20000, t=5.0)'])[0]



In [26]:

    
def error_analysis_recalled_products(model, data, n):
    """
    Look at the worst reviews for the top products predicted
    to be recalled by this model.
    """
    probas = model.predict_proba_reviews(data)
    scores = model.score_asin_recalls(data, _max)
    asins = np.array(list(scores.keys()))
    found = 0
    for asin, score in sorted(scores.items(), key=lambda x: -x[1]):
        label = 1 if asin in data.recalled_asins else 0
        #if label == 0:
        print('\n\n------------\n', asin, label, score)
        idx = np.array(data.reviews_df[data.reviews_df.ASIN==asin].index.tolist())
        topi = probas[idx].argsort()[::-1][:1]
        for i in topi:
            ii = idx[i]
            print(probas[ii], data.reviews_df.iloc[ii]['reviewText'])
        found += 1
        if found >= n:
            break

error_analysis_recalled_products(best_model, data, 20)









    



------------
 B00115PFOO 0 1.0
1.0 How can I begin the review of this bed?  I have to say, this bed is sweet. The wooden pieces were veneered and cheap, but acceptable for a toddler, temporary bed- they look cute, at least. However, my friend and I spent an evening constructing this bed for her adorable toddler.  We nearly died in the process. First, the instructions appeared to have been translated from the original, incomprehensible language into Farsi, then French, then Yiddish, from there into Mandarin Chinese before finally being written in English.  All of the parts were listed in letter format ("Attach Side A to bar F using wooden dowels and bolt.  It is better this way"), yet none of the parts themselves had a letter, despite being plastered with warning labels.  Instead, tiny drawings on the instructions labeled the parts.  It was up to you to sort and translate.  This seemed like not a huge deal at first, but like the horror movie in which the innocent girl finds the latch to the bathroom not working and decides to take a shower anyway, this lack of clear designation of parts, and their top and bottom sides would result in tragedy.The instruction booklet listed "tools you will need for this job: 1 flat head screwdriver", so with our screwdriver and our newly sorted parts, we set to work.  Each of us began by slipping the rails into the head and foot board pieces, per step one.  My friend's pieces fell out as soon as she put them in, so that she was playing a sort of whack a mole game to try to get them all in at once.  "Hey," I said, "you really have to push them down first."  I demonstrated by shoving my rails in with force.  This worked great, and even though it was a bit of a chore to wrestle the top bar on without losing the wobbly rails, we appeared to be trucking along.  I lifted my completed headboard to set it aside: the entire thing fell to the floor with a clatter. My friend laughed, and I joined in.  How young we were, how innocent!  I said: "we need a mallet, something to pound the rails in better." She reread the instructions "It doesn't say we need a mallet.  It says, one flat head screwdriver." She looked up at me: "I don't have a mallet."We decided the work might proceed better if we handled the headboard together.  By straddling and shoving on the thing in ways that felt like a dirty game of Twister,, we finally managed to get the bars shoved in and the headboard assembled.  The footboard was accomplished with similar grunting effort.  Sweating slightly, my friend said, "you know what tool I do have?  Wine." She fetched a pair of glasses and a nice bottle of pinot.  This turned out to have been the saving grace of the evening.  So to recap thus far: Tools needed for the job: 1 flat head screwdriver. 1 mallet. at least 1 extra person. 2 or more glasses of wine.Now it was time to attach the head and footboards to those cute little sleigh sides ("Bar G" and "Bar M",  I think), using wooden dowels and screws with an allen wrench head (thoughtfully provided).  Why oh why must cheap manufacturers use allen wrenches, those tiny little instruments of torture?!? By the time my friend and I had (SPOILER ALERT) finally finished the bed, our fingers were sore and arthritic.  Days later, they continued to throb where we'd grasped that evil allen wrench.Anywoo, it was at this point that things really began to go wrong: on each of the sides, the screw sections were not quite lined up correctly, so that we (working together, and pausing for many wine breaks) had to carefully shimmy first one side, then the other, up and down and at angles, in order to get the screws in.  Early on we abandoned the wooden dowels, as those holes didn't line up either.  "Do you have a drill?" I asked. "Not. In. The. Instructions," grunted my friend, as she struggled to allen wrench.  Fetching a hammer and a potholder from the kitchen proved to be the ticket, as we could pound together the bars whilst simultaneously pulling the screw with the bar part of the flat head screwdriver (incidentally this was the only use we made of the tool so touted in the instructions) and allen wrenching. It was as easy as it sounds.  When the second bar finally started to come together, my friend's fingertips were painfully pinched by the pieces.  "No problem," she mumbled as she sucked on the injured fingers, "at least it's together now."After another half glass of wine (thank you, wine gods!), we moved on to the bottom bar.  As a foreshadowing note, at no point in the instructions was a top or bottom referred to at this stage- squinting at the line drawings was the only way to figure out inside and outside.  It had a metal plate which was attached with "self drilling" screws.  I put that in quotes because I think this appellation was completely facetious: I picture the instruction writer, laughing maniacally as he translates from the Farsi the words "little bastard tiny screws that you will never line up properly and that have PHILLIPS heads" into "self drilling screws".  My friend searched her house for the suddenly required Phillips head while I rubbed at the knot in my back and checked the clock: more than an hour had passed since we'd begun.  "I think we're nearly out of the tunnel," I called, and my friend and I cheered.  We screwed the plate and feet (full disclosure: we had abandoned the instructions and therefore had a frustrating moment when we realized that the feet had to go under the plate- at this point the "self drilling screws", constructed of the softest, cheapest alloy possible, had nearly been stripped, but we managed.  The error was out fault, but the screws deserve blame for being tiny, cheaper than cheap and Phillips head.  So there.Once the bottom bars were assembled, we had a fun flashback to our first bar assembly as we placed the bed support slats.  The wine had done its work, though, so we laughed, thinking we were nearly done.  (Tools required update: 1 Phillips head screwdriver, maybe your own well made screws, and perhaps another bottle of wine.).Allen wrenching was required to fully assemble the bed: we each sat at an end, balancing the bed with our legs as we reached through the slats to hold the bolt with one hand and allen wrenching.  There was no more laughter.  It felt like a Soviet work camp.  My back was killing me.  My fingers were on fire. I looked up and mopped my brow.  "Ok, just the side rail now, and we're done".  My friend grabbed the rail and then started to laugh, a scary, high pitched, wheezy sort of laugh.  Something was clearly wrong.  "Um." she said. "What?" I whispered, afraid of the response.  She gulped. "The side bar is upside down.  We have to disassemble back to step 6." I snatched the directions from the floor, and poured over them for help.  Not one indication of the top or bottom was listed, yet at this stage we saw that the predrilled holes had to be reversed. I looked at my friend.  She looked back at me.  She said, "we could just skip the side rails. Toddlers don't fall out of their beds do they?"  After we finished laughing, we compromised by putting up the one side that had accidentally been done properly and pushing the other side against the wall.  Maybe someday we will disassemble the bed to step six and remake it so that both side rails can be installed.  I think we'll at least wait until the nightmares stop, though.Ok, you might think to yourself, what dummies, to not look over the bar and see that the holes would have to be in one certain way.  Let me just say that between us, we have nearly 20 years of higher education.  We have assembled countless IKEA Glurgs and Bjorks. I have reupholstered sofas and built fences.  This toddler bed bested us. We looked over all of the directions before we started; we lined up the pieces and the "required tools". Proper instructions with all of the tools, a handful of stickers to label the parts and their sizes, and a better quality of hardware might have raised production costs by 30 cents or so: really, Dream On? You couldn't spare 30 cents to make your product actually easy to assemble?  The bottom line is this: although this bed looks cute, save yourself the aggravation and spend the same dough on a KRITTER bed or something from IKEA.  At least you'll know you can put it together without medical intervention.


------------
 B000I2Q0FE 0 1.0
1.0 A Personal Warning to all who have this baby toy by Lamaze. (Stretch The Giraffe)PLEASE READ!!I LOVED this giraffe..this thing is the cutest toy ever and I had placed this on my wish list for my baby shower last May. Well..I ended up receiving the giraffe from a good friend of mine for my baby shower like I had wanted. My son had only played with it maybe twice since he's been born, so it's still pretty brand new.My son is 10 months old now, and we just moved to a new house last week, so I re introduced him to stretch yesterday, (still in super new condition) and he had been sitting in his highchair playing with him, chewing and biting on him as most babies will do with their toys. It's to be expected.I was upstairs unpacking, my son was in his highchair next to my husband while he washed dishes when my son started choking!!He was choking on the white stuffing inside the toys foot! He had managed to chew the thing until it became unstitched at the seams and that's when the stuffing began to fill his mouth. He must have swallowed a majority of what he had in his mouth. Needless to say, it was a frightening moment for all of us!!UGH! I am just so disappointed and now concerned for anyone else's children who may have this toy!Upon close inspection of this toy after the fact, I noticed that the stitching is very poor. Not nearly strong enough to withstand the mouth of a teething baby! I noted other parts of the giraffes seams and stitching where it has signs of also beginning to become unstitched!The very worst part is the fact that had my son gone un noticed and not attended to right away, he could have very well choked completely, or the round small plastic squeaker (also inside the same foot he opened) could have also come out and easily become lodged inside his throat! In fact there are multiple "perfectly sized pieces" including a small round ball rattle, a more flat circular shaped rattle and the disc shaped squeaker (the one my son almost managed to get out) inside each foot. Again..they are very poorly secured inside the giraffe and are a major choking hazard! I hate to think of how many babies go to bed with this toy and are possibly left alone with it when they wake up in the morning or after a nap. Parent unsuspecting, because these toys made by learning curve are supposed to be designed with safety in mind. Safety being what Learning Curves number one priority is.  I would have never have thought to check those details since I was convinced this toy or any toy designed by a big name baby toy company was undoubtedly safe to let my baby play with.This toy needs to be recalled ASAP! I have contacted Learning Curve /Lamaze RC2 department regarding this serious issue, and Stretch the Giraffe is on his way back to them for inspection. I have no doubt they will issue a recall. If I were you...and just to warn parents ahead of time..please refrain from buying stretch for now! Or if you have him already, take him away from your baby...especially babies with teeth!In an effort to make me, the consumer happy, they offered me a new toy and that's on it's way now, but in reality, is that really enough? Or does that make up for the fact that we watched our little son choke and go through that trauma? Imagining the scenario if we had not been right there to help him...would the new toy suffice? It's horrifying imagining the things that COULD have happened and Thank God that didn't.In addition I am going to be weary of the new toy and I am not even sure he will play with it until I inspect every inch of it, and now I need to sit with him every second while he plays with in fear that some little thing could go wrong in a toy that has been deemed safe from a reputable company I have grown to love and trust. I just believe this is an honest mistake but a serious issue that was grossly overlooked. I know that Learning Curve will do the right thing in the end.I will never send my son to bed nor leave him to play with ANY toy, stuffed animal, or object ever again for fear that he will wake up, be alone and begin innocently teething while I am still asleep, and something similar to this may happen. It's a good thing that wasn't the case this time.Please parents...check the babies toys! You just never know!!Forward this to all your friends who you think may have this toy, may buy it, are expecting a baby, have infants and small children.-Tiffany


------------
 B004DC9T94 0 1.0
1.0 I have had my BOB Ironman for about 4 months now, and I couldn't be happier. I just wish I had known how great the Ironman was before we bought our carseat/stroller travel system, and never used the stroller. I run with my baby several miles at a time, and I have never had trouble with the wheels getting hung up or any other issues while going over (reasonable) obstacles on my runs. (I have run on grass, gravel trails, and paved walks.) The only quirk is the fixed front wheel. It does take a little getting used to, but I tend to lift the wheel just enough to take the weight off the front, and slide the stroller into my turns. It takes a little getting used to, but once you do, it seems like second nature. (My wife still has trouble with it, but I think it is mainly because she hasn't really pushed the stroller that much - I like to push it.)Also, the other thing that is a little difficult for us is the size of the stroller when it is folded up. It fits in my trunk, but not much else fits in there with it and I drive a 2003 Altima which holds a lot of stuff in the trunk. (I cannot fit a suitcase or any other "large" items with it.) Thus, we bought a Thule Rooftop Travel Bag (http://www.amazon.com/Thule-867-Tahoe-Rooftop-Cargo/dp/B00152VYRU/ref=sr_1_11?s=sporting-goods&ie;=UTF8&qid;=1364985747&sr;=1-11&keywords;=thule)that we put on our roof rack when we need to take the stroller and other large items with us. (Removing the wheels does help and they come off easily, but it doesn't solve the problem.) (I have not purchased the BOB Travel Bag [http://www.amazon.com/Bob-Single-Stroller-Travel-Black/dp/B000GKW8EA/ref=sr_1_1?s=baby-products&ie;=UTF8&qid;=1364987235&sr;=1-1&keywords;=bob+travel+bag] yet, but if I have to fly with my stroller, I will - the wheels will make moving it around in a (relatively) compact case much easier. (I personally don't want people rifling through my baby's stroller at the airport.))I would recommend some of the accessories available for this stroller:1. Warm Fuzzy [http://www.amazon.com/BOB-WF1001-Warm-Fuzzy/dp/B003KTLYIU/ref=sr_1_1?ie=UTF8&qid;=1364986828&sr;=8-1&keywords;=bob+warm+fuzzy] - this one depends on when you buy your BOB (i.e. age of baby) (it will keep your baby warmer during the cold weather, and it will fill in some of the space inside the stroller if you want to use it for a 2-8 month old so they fit a little more snug in the big interior of this stroller. ...I recently read that you can turn the Warm Fuzzy backwards (with the fuzzy side down) and use it during warm weather too, and it won't be so warm yet will still hold your baby snug on those runs. I have done this and it works. My daughter stays much cooler now.)2. Weather Shield [http://www.amazon.com/Weather-Shield-Revolution-Stroller-Strides/dp/B003KTLYO4/ref=sr_1_7?s=baby-products&ie;=UTF8&qid;=1364986937&sr;=1-7&keywords;=bob+weather+shield] and/or Sun Shield [http://www.amazon.com/BOB-Shield-Utility-Stroller-Ironman/dp/B005GYZLSG/ref=sr_1_17?s=baby-products&ie;=UTF8&qid;=1364986937&sr;=1-17&keywords;=bob+weather+shield]- You should definitely get at least one of these. Just evaluate which one you will need more. (I bought both.) The Weather shield is water proof on the front and it will keep some warmth in during the cold weather it is still breathable through intelligently placed air vents at the front and back; Sun Shield will breath better and block the sunlight so your baby doesn't get sunburned on your run or walk. I also discovered that by using the Weather Shield, all of the toys I put in the stroller with my daughter stay inside the stroller, so I don't have to constantly watch her to make sure her favorite teether doesn't end up on the sidewalk. Also, another note: the Sun Shield and Weather Shield for the Revolution fits the Ironman. You can check the BOB Gear website if you want to verify. bobgear.com3. Snack Tray [http://www.amazon.com/BOB-Single-Snack-Tray-Black/dp/B004LT5330/ref=pd_bxgy_ba_text_z] - This accessory can wait a while. Your baby won't use it right away, and anything you try to put on there will get knocked onto the ground pretty quickly. (Mine is 8 months old and still doesn't use it.)4. Handlebar Console [http://www.amazon.com/Handlebar-Console-Single-Strollers-Black/dp/B003KTLYGM/ref=pd_sbs_ba_2] - A great addition to my BOB: it has 2 drink holders & a zippered pocket (that we use for sanitizing wipes and car keys). (Also, if something happens to yours, like mine (one of the velcro straps ripped off the other day) contact BOB and explain the situation; they have exceptional customer service and will do whatever they can to make it right)I was reviewing the Ironman on Consumer Reports earlier, and I saw that the pre-2011 monels were recalled due to a strangulation hazard from a draw string that wasn't near the child area. I thought you might like to be aware that the new models have no draw strings (anywhere). I attached a copy of the recall notice below:FOR IMMEDIATE RELEASEFebruary 23, 2011Release #11-143 Firm's Recall Hotline: (855) 242-2245CPSC Recall Hotline: (800) 638-2772CPSC Media Contact: (301) 504-7908HC Media Contact: (613) 957-2983Note: This product has been recalled subsequently for another hazardJogging Strollers Recalled by B.O.B. Trailers Due to Strangulation HazardWASHINGTON, D.C. - The U.S. Consumer Product Safety Commission and Health Canada, in cooperation with the firm named below, today announced a voluntary recall of the following consumer product. Consumers should stop using recalled products immediately unless otherwise instructed. It is illegal to resell or attempt to resell a recalled consumer product.Name of Product: B.O.B. single and double strollersUnits: About 337,000 in the United States and 20,000 in CanadaImporter: B.O.B. Trailers Inc., of Boise, IdahoHazard: A drawstring on the stroller can get wrapped around a child's neck, posing a strangulation hazard.Incidents/Injuries: The firm has received one report of an 11-month-old girl who got entangled at the neck by the stroller's drawstring. The child was freed by her mother.Description: This recall involves the following 11 models of B.O.B. single and double strollers. The name "B.O.B" appears on the cargo basket under the stroller and on the front of the stroller. All of the recalled strollers have a yellow/orange drawstring at the rear of the canopy which is used to gather loose fabric when the canopy is pulled back. Strollers have the serial number either stamped in the frame or on a white label located on the stroller's rear right leg.Model Serial # rangesSport Utility Stroller 12362 - 35107AA00001 - AA025490AA900000 - AA999999Sport Utility Stroller D'Lux 12362 - 35107AB000001 - AB007940AB900000 - AB999999Ironman 800000 - 803700AC000001- AC027923AC900000 - AC999999Sport Utility Duallie 002001 - 008068AD000001 - AD011252AD900000 - AD999999Ironman Duallie AE000001 - AE008909AE900000 - AE999999Revolution AF000001 - AF189112AF900000 - AF999999Revolution 12" AK000001 - AK024149AK900000 - AK999999Stroller Strides AG000001 - AG011163AG900000 - AG999999Revolution Duallie AH000001 - AH072921AH900000 - AH999999Revolution Duallie 12" AL000001 - AL012657AL900000 - AL999999Stroller Strides Duallie AM000001 - AM003229AM900000 - AM999999Sold ... between April 2002 and February 2011 for between $300 and $600.Here is the Consumer Reports review:CR's TakeWhen we performed the impact test from the American Society for Testing and Materials safety standard for Strollers, the BOB Ironman Sport Utility D'Lux (and BOB Sport Utility) stroller failed. The standard specifies that the impact test be conducted at the stroller's claimed maximum weight capacity--which is 70 pounds for both of these models. (According to growth charts from the Centers for Disease Control and Prevention, a 70-lb. child may be anywhere from a large 7-year-old to a very small 12-year-old.) While this caused this stroller to receive a Fair score for Safety, it should perform well when using it with children in the more typical 40-pound range. Learn more in Detailed Test Results below.Detailed test resultsWhen we performed the impact test from the American Society for Testing and Materials safety standard for Strollers, the BOB Ironman Sport Utility D'Lux and BOB Sport Utility Stroller failed. The standard specifies that the impact test be conducted at the stroller's claimed maximum weight capacity--which is 70 pounds for both of these models. (According to growth charts from the Centers for Disease Control and Prevention, a 70-lb. child may be anywhere from a large 7-year-old to a very small 12-year-old--all of whom may be too old, or just unwilling, to ride in a stroller.)The standard specifies that, when a stroller is put through the impact test, "the fold locking/latching mechanism shall not disengage or break, or both."  But when loaded with a 70-lb. "passenger" in our test labs, the BOB strollers, at times, collapsed completely on impact. At other times, the shock absorber adjustment mechanism, which is also part of the fold mechanism, would partially disengage, slipping from the second setting (the one for a child passenger weighing between 41 and 70 lbs.) to the first setting (for a child passenger weighing up to 40 lbs.). When we tested using a far more typical 40-lb. "passenger," both BOB models passed the impact test.Because the BOB models failed our impact test at their claimed weight capacities, we lowered their safety and overall scores. However, we feel that the 70-lb. weight limits for the 2 BOB models are simply excessive, and that it's far more likely that the weight of a child riding in any stroller would be closer to 40 lbs. or less.The Ironman was Excellent in running performance, and both models were Excellent in maneuverability; they would have been higher in our Ratings without the Safety concern at what we believe to be an unrealistically high weight limit.We spoke with the manufacturer about our impact test findings. They told us that the models in question passed internal and third-party testing for impact at 70 pounds.  We asked them specifically why there is such a high weight limit for the strollers. They told us that their products are designed for longer duration runs and walks where older children may not be able to keep up and would be in a stroller. In answer to our question, they said there were no reports of injuries or fatalities with their products associated with these issues.The BOB Ironman Sport Utility D'Lux is part of the stroller test program at Consumer Reports. In our lab tests, jogging stroller models like the Ironman Sport Utility D'Lux are rated on multiple criteria, such as those listed below.Performance:Performance is panelists' judgments of ease of pushing and maneuvering on varied terrain while running.Off-road maneuverability:Off-road maneuverability is trained panelists' judgments of ease of pushing, maneuvering and turning while walking on a course that included flat pavement, up- and downhill sections of pavement, and a rough dirt/grass/mulch section with tree roots, fallen branches, and the like.Ease of use:Ease of use includes such items as opening and folding the stroller, using the harness, adjusting the backrest, lifting and carrying the folded stroller.Overall, I have been very satisfied with our purchase. I would buy it again, and the price on Amazon of $309 is the lowest I have seen online. (I paid around $350 on Albeebaby.com, which was the lowest at the time - Amazon has come down recently on this item!)


------------
 B001N44UX0 0 1.0
1.0 I have a 12 mos old daughter and purchased this gate about 6 weeks ago.  Today she reached through the narrow areas on the outside of the swinging door area of the gate.  She reached past her elbow, and then bent her arm, thus making the width of her arm increase.  She tried to pull her arm out but could not.  She then pulled harder and wedged the flesh of her arm in the narrow space between two bars.  She started screaming, which is when I saw what she did (I was folding clothes).  I immediately ran over and tried to pull her arm out without realizing exactly how it was stuck, causing her even more pain and to cry even harder.  It took me a few mins with her in pain and screaming to figure out it was stuck because it was bent and wedged in there.  I straightened her arm and was able to extract it.  Just as crib rails now have a spacing requirement to prevent this very issue, it shouldn't take a rocket scientist for a manufacturer of a SAFETY product to consider that a toddler might be interested in what's on the other side of the gate and try to reach through.  As a first-time parent, I embarrassingly admit I did not notice that narrow spacing and imagine what would/could happen, but a company with the majority of its business based in the SAFETY of babies and children has no excuse.  As we all find in the parenting journey, I have learned this lesson the hard way and will be sure not to allow such a dangerous product into my home again.  The reason I purchased this gate was because it was one of the few that expanded as widely as I needed.  However, that is nowhere near as important as protecting my baby from needless injury like this.  I shudder to think how this might have gone if I had been in the shower or restroom and unable to immediately respond, all the while thinking everything was ok because I have a safety gate installed to prevent her from getting to dangerous areas.  I have already reported this to the US Consumer Product Safety Commission and will follow this review with a report to Summer Infant itself.  DO NOT RISK YOUR CHILD'S SAFETY WITH THIS PRODUCT!!!!!I have included (in the "add your own images" are of the product at the top of this page) a picture of my daughter's arm immediately following the incident.  I am sure it will become a nasty bruise in a day or two.  We'll see if this site allows this image to be included.UPDATE:  Summer Infant has reached out and sent a free shipping label to return the product.  They will be sending it to quality control so they can investigate the safety concerns.  They will follow up with me.  I am pleased with their prompt response and attempt to investigate.  They have a mesh gate that is similar in width which I will try in exchange.


------------
 B004CJ9D6W 0 1.0
1.0 I have NEVER written a review before for anything! DO NOT BUY THIS PRODUCT!!!This is a very expensive monitor and the features are awesome!! It is really a fantastic monitor, truly. The clarity is great, the VOX feature is awesome, the intercom is fabulous for older kids (although there is a delay on it so after you speak you cannot hear the response as it won't pick up sounds for several seconds after you let go of the "talk" button) And I REALLY hate that you cannot mute the volume. The lullaby feature is nice, but I do NOT want to sit there and listen to the lullaby myself! But, you must. No mute...BUTafter only 10 months of use our camera just stopped working!! I unplugged the camera to move it to a safer location as our son had become mobile and I was concerned he could reach the cord and pull it down onto himself. When I plugged the camera back in... nothing. The green power light would not come on!!I (naturally) thought it was the outlet. Checked the fuse box, etc. But it was not the outlet. No other outlet in the house worked (despite other appliances working just fine) so clearly it was not a problem on our end.We then attempted three different sets of new batteries. That also did not work. So it is not a battery problem.When plugged in the camera briefly indicated that it was receiving power, so it was getting power, just not turning on...What a pain in the butt!! When I bought this system it had fabulous reviews. What a shock it was to come on here and see how the reviews have dramatically tanked since we decided on this system!I called but could not get through. I emailed and emailed and emailed again, but two weeks have gone by and there has been no response. I have sent a message through their "contact us" form on their website, but have received no response... by the time these guys get back to us our warranty will have expired! I think we'll be out the $230 we spent on this machine! And all this time we've been without a monitor for our little one-who happens to have a bedroom on a different level of our home. It has been a total nightmare!!!As of today I still have not heard back from Levana, and despite their bogus claim that you can call or reach them through live chat I have been unable to reach them despite multiple attempts.Just now I tried to plug my camera in and it lit up and started working!!! I have NO idea why.... and I'm sure it won't last. I can only hope that I'll get at least another 6 months-1 year out of the camera and I will NEVER buy another Levana product EVER again!!I will also be filing a complaint with the BBB and I recommend all the negative reviewers out there that have received horrible customer service do the same!!! Not all families have hundreds of dollars to throw down the drain on a fancy monitor just to replace it again before one year has passed!! So much for their wonderful "lifetime" support!!!!SHAME on you Levana!!!! Stop pedaling your horrible "made in china" GARBAGE!!!!IT IS A DANGEROUS MONITOR!!!When you're expecting something to let you know your child is safe and it fails on you it can really lead to suffering for your child as well as injury or even death!!! What of the battery life? It is NOT 4.5 hours as they claim. I brought my monitor upstairs with me to take a short nap and I did not expect to need the charger as the monitor was completely charged. I fell asleep and since my then four-month-old was still not a very good night sleeper I was exhausted and was sleeping very deeply. The monitor's battery failed and the machine shut off at some point because I woke up about 45 minutes later to the very faint sound of my infant son screaming his head off. Our bedrooms are on different levels of the house and I was COUNTING on this monitor to let me know if my son woke up.My poor infant son had rolled in such a way as to get his fat little thigh wedged between the bars in his crib with the leg hanging out. He had twisted and he couldn't get himself back through. This caused him to have his face smashed pretty hard into the mattress surface. Also, by the time I got to him his foot was absolutely purple. I had a very difficult time removing the swollen leg from the crib bars and thank GOD my son did not exhaust himself and suffocate in the crib mattress while I was sleeping and relying on my monitor to warn me if he needed me for anything.My poor son had a purple bruise on his nose because of being smashed into the mattress and his leg had such an awful and painful looking purple indented ring around it for days. Thank God his foot and lower leg did not suffer any permanent vascular damage from lack of oxygenated blood. Who knows how long my little sweetie was stuck in there before I woke up? That is an awful thing to happen to a child and his parents, but with all the concerns about crib bumpers I hadn't installed them!I installed mesh bumpers after that, but I have never again fallen asleep without the monitor attached to the plugged-in charging base!!!Save yourself the worry, hassle and headache!! Go with a tried-and-true brand that truly boasts of fabulous customer service!!!


------------
 B000HCX5EY 0 1.0
1.0 My 10-month-old son was enamored of his little friend's activity cube. I went on the hunt for one to give him for Christmas (his friend's model is no longer produced), and found this well-reviewed cube, which we happily snapped up from Target for about $[...].The baby loved it, but within one day of playing with it, the bottom brace for one of the little doors that opens and shuts, swung down. Turns out, the hot-glue-and-small-metal-pin construction they use was at fault. It appears that one of the metal pins was missing, causing the brace to swing down, which made the door fall off--including the two, inch-long, sharp metal pins that act like hinges. These two pins were loose on the floor; thank goodness I was right there and saw it happen, or they would have gone right down my son's throat (he was reaching for them as soon as they fell out). That was the end of that. We returned it to Target immediately.When I was researching activity cubes, I wasn't concerned with the toy tipping over--I figure my son is going to fall down quite a lot, and get bumped by things over the years--but these tiny metal pins are truly dangerous and could lead to serious time in the emergency room and hospital.Due to the poor construction of this toy, I can not recommend enough that you look elsewhere for a better-constructed option. I will be contacting the manufacturer about this frightening flaw to their product.


------------
 B00020L78M 0 1.0
1.0 I personally didn't buy this gate, specifically because of the alarming number of reviews I found online about plastic shards breaking off of it. The following are only two of MANY reviews I found:"Buyer Beware. We had two different gates sent to us. The initial one was missing a part. The second one had little pieces that were falling off of it the size of a piece of rice and sharper then you can imagine. We found this out because our nine month old was gagging on a piece - then investigated and found two other pieces on the floor. We took the part we needed out of the new gate and put it in the first one we received. We looked, and though that one looked okay. Last night I found two pieces that had broken off on the floor, thankfully before my son did. This happened to us twice - not a coincidence. I have talked to guest relations and they are putting out a product recall. This could have been a FATAL situation with our son!""DO NOT BUY THIS! I CAN'T BELIEVE I ORDERED THIS! Even after seeing one review that said they found dangerous shards of the product on the floor, their child had choked on these pieces. I just recieved the product and looked it over, it looked fine, set it up and put my 7 month old in it. He cried a bit so I climbed in...that is when I found some small rice sized VERY SHARP! pieces of the gate that had fallen off from it . I can't figure out where they came from. I assumed the other review must be a fluke or that the problem was fixed as they said the company was suposed to put a recall. I called the company to complain and they said they had never heard of the problem, and only offered to send a replacement gate. If a small child swallowed one of these incredibly sharp pieces it has the potential to be fatal!! DO NOT BUY!!!!!"In short, the product should probably be recalled. Do not put your children at risk!!


------------
 B0007CQ6OK 0 1.0
1.0 Update - July 23rdToo much to tell about how awful this company is, so I will just paste my email to them:"Hello,I am writing to voice my family's anger over your unsafe, cheap co-sleeper.  If you recall, I had a problem with my newly purchased co-sleeper back in May, which I immediately called about and was told to send the frame back.  At that time, I asked to speak to a supervisor about the situation, and was told that I would be contacted shortly.  However, Mayra was the only one who I was able to speak with, after numerous attempts to be put in contact with the supervisor.  After a huge delay due to mistakes on your end, I finally got the co-sleeper sent back to the company, after speaking with Veronica on June 13th.At this time (June 13th), I asked to speak with the manager of the company, and Veronica told me that Sharon was not in at the time, but would be in later that day.  I obviously never heard from Sharon, or anyone else from this company, for that matter, from that point on.  I was inquiring to speak with the manager after voicing my concern over just repairing the frame, instead of receiving a new product.We received the frame shipped back to us, without so much as an email or follow up from Arm's Reach.  The entire process to get our frame back took over a month from the time I first contacted customer service.  (I had initially asked for a replacement frame, since our daughter had been used to sleeping in it by this time, and I did not want to completely disrupt her sleep pattern while waiting for the repair, which it did).  The frame came back to us in a box with one sheet of paper, which did nothing to describe what was wrong with the frame, or what was done to "repair it".I use the term "repair" in disgust, since it clearly was never properly repaired.  We assembled the co-sleeper, again, according to exact instructions in the manual.  Two nights later, I noticed that now if I barely touched the rail of the co-sleeper nearest our bed, that it would collapse and had to be pulled back up into the lock position.  Obviously, we felt very uncomfortable using the co-sleeper, but after having dealt with your awful lack of customer service the first time around, I did not waste my time trying to reach Mayra, Veronica, or Sharon again.To avoid the problem of the collapsing side rail, we have been careful to not touch or even brush against it, for fear that it would collapse with our daughter in it.  Without touching it, it does stay up.  Tonight, (July 23rd) however, I laid my daughter down from the other side of the co-sleeper all together, and the other side rail immediately collapsed when I laid her in the co-sleeper.  She was in it!!!  I had not put any pressure on the side rail, it just collapsed.  After taking her out, waking her up in the process, and moving her elsewhere to be put to sleep for the night, I attempted to re-lock the side rail.  It took several tries, and does not seem secure at all.I can not believe that your remedy to fix the situation of a faulty co-sleeper was to send our same frame back to us "repaired", which still breaks and is worse off than when I first contacted you.  I am so turned off by your company, not only because of the safety hazards associated with putting a baby in this piece of crap product that you make, but the complete lack of care from the supervisor or manager. You would think that someone would be concerned that their products for babies are faulty and unsafe.  These are babies we are talking about here!  Not pets, not adults...innocent babies who need the safest place possible to sleep.  Your company disgusts me, and I am honestly surprised that you are still around.  Not for long, I am sure."Previous review:We had been using the euro mino co-sleeper for almost 3 months with our Ikea platform bed, and were mostly happy with it.  My husband had some frustration putting it together, but once he did it seemed sturdy and safe, and with an extra pad in it our baby was starting to sleep in it for most of the night.  Then, we needed to stay overnight at in-laws for a funeral, and needed to pack up the co-sleeper for the first time for the trip.  We were happy that the co-sleeper was advertised as travel friendly, since we knew we would eventually need an overnight bed, and this folds up into a small-ish bag with handle.  HOWEVER, we found out that it did not go back together - 3 of the 4 sides would not lock into place once we were at my in-laws.  There were 3 adults working on this for a couple hours, it just wouldn't lock together.  We had all of the instructions, warnings, manuals, etc with us...and it just wouldn't work. Extremely frustrating when it is late at night, and the customer service is only open M-F 9am-4pm.  We went online trying to find solutions from others that had the same problem; we tried pulling fast, made sure the bottom legs were unlocked, etc, etc...nothing worked.  Our baby had to sleep in a car seat.  It was such an awful experience, and of course I called Arm's Reach the next business day that they were open. I was told I had to send the co-sleeper back for repairs (no offer to help trouble shoot over the phone, or any other assistance).  They estimated it would take about 2 weeks for shipping and repairs, which is so frustrating since we finally had our baby getting close to sleeping through the night in this.  Now we will be disrupting the entire sleep situation. I asked if they would send a replacement in the meantime, and they said "they were unable to".  Currently waiting for UPS to pick up the broken product, which I guess will get fixed and we get the same one back??  Doesn't seem like a great idea.  Overall, I guess this product is fine...IF you plan to only set it up once, and never take it down to move it or travel with it, or set it up for other babies in the future?


------------
 B0002ZOI9W 0 1.0
1.0 This chair can tip over if your child reaches forward, overbalancing the chair.  My son tipped the chair and fell face first into the hardwood floor.  Luckily, he was not seriously injured.  The instructions for the chair say NOTHING about being usafe for infants who can sit up, but that is what a representative at Fisher Price had to say about it!  "NOT SAFE FOR INFANTS WHO CAN SIT UP."  Shouldn't that be on the instructions?  Infant to Toddler Rocker is a misleading name, if that's the case.  The response by Fisher Price was to offer a refund.  This product should be RECALLED!  This could have led to a serious injury.  I see numerous reviews about this product being unsafe.  Where is the U.S. Consumer Product Safety Commission?  I called them numerous times but could not get my call returned.  BEWARE!


------------
 B00115Q5A2 0 1.0
1.0 We bought this jumper two weeks ago.  My six month old daughter LOVES it.  However, yesterday my husband called me and said, "she got that purple thing off and had it in her mouth."  Of course I contacted baby einstine as the toy hook is a very small piece and is a choking hazard.  They put me in touch with Kids II Corporation.  I told the woman what happened, she said it was a mis-mold on the piece and she knows that because they have been getting a LOT of calls about this piece coming off.  She asked if I wanted a replacement part.  Of course I said NO!!!  She asked why I was calling if I didn't want a replacement part.  I explained to her that it was a choking hazard and they needed to do something about it as a child could die from the defective piece.  She then stopped me and said the toy hook was NOT defective, it was just mis-molded.  (Ummm...isn't that defective?) And she also informed me it wasn't a choking hazard, there was the possibility that a child could get it lodged in his/her throat, but it isn't a choking hazard.  (Ummm...isn't choking on a toy usually done when it gets lodged in their throat?)  She then informed me that she was not going to write a report up since I didn't want a replacement piece (can you say covering something up!).  I requested her name and extention.  She said there was no way to contact specifically her.  I requested a copy of our phone conversation as she said it was being recorded.  She said there was no way to get a copy of it.  I demanded to speak to a manager who took my information down and said she would forward it to the engineering department.This peice is a choking hazard and is dangerous!!!  Do not purchase it!!!


------------
 B001KVIBXQ 0 1.0
1.0 We were given this product when my daughter was in utero. It shipped to our house and remained in its original box until we opened it.  When we opened it, our daughter was the recommended age of 10 months.  We put her on it and, with a parent guiding her gently, with one hand on her back and the other holding her hand and the handlebar, we proceeded to let her rock gently.Immediately, the front chest area of the ladybug &#34;fell&#34; forward as if it had come off a stand or something inside the body of the ladybug.  The rocker went forward way too much and our daughter was jutted forward.  Had it not been for the parent right next to her, I believe she would have flipped headfirst over the rocker.  We attempted to use it again and the same thing happened. We put the rocker aside.Having repeatedly attempted to help her enjoy the rocker safely in the past year, it now sits in her room, with the tag on, and she uses the musical buttons and that's it.What a waste.  Today we once again attempted to let her use the rocker in a final attempt to enjoy it, and despite a parent sitting right next to her, my daughter rocked gently twice and flipped headfirst over the handle.  She hit her chin on the handle, splitting her chin open and the rocker fell over on her.With our experience, and having searched many websites selling this product and hundreds of reviews, it is now unequivocally my opinion that this product is unsafe. I've filed a report with the Consumer Product Safety Commission.  I have contacted the company requesting a statement with regards to their plan to recall or issue a product alert.


------------
 B001R95J2W 0 1.0
1.0 Attached is a letter I sent to Prince Lionheart.  It pretty much sums up my review, so I figure I would just copy and paste.  I will post updates if the company responds.Dear Sir or Madam:I bought two of these wipes warmers when I had my twins in November 2011.  I loved both of them over other wipes warmers I had owned in previous years (Dex baby and Babies R Us Brands).  The moistened replacement inserts were a plus for me- because of them, the problem of needing to add water was not an issue with your warmers as opposed to others I had used.  Because I had twins, they both received high usage and were both maintained/refilled and moistened on a weekly basis. However, a few months ago, I noticed that one of my AC inserts was severely corroded after taking on water.  Water??  The cord, nor the warmer had never been anywhere near water?!  I stopped plugging it in, aware that it could pose a fire risk.  And in a nursery right next to my baby's crib, no less!  I assumed it was just a fluke and went about using my other one, which was not anywhere near the nursery on the other end of my home.Last week, I noticed the SAME thing had happened to my second wipes warmer AC adapter insert.  A simple inspection showed me that when the top is lifted, condensation that has collected drips through onto the AC adapter insert - a design flaw and undoubtedly what is causing this problem with so many consumers.  I am fortunate that nobody was hurt and I am just out $60 for the wipes warmers.  However, in searching for a replacement, I started reading reviews of your premium wipes warmer on Amazon.com.  Alas, I am not the only one that is experiencing this issue.  I counted at least half a dozen people with the same issue - and that is just from the small population that actually takes the time to post a review.  Then, I saw someone had also reported this wipes warmer on saferproducts.gov for, you guessed it, the same thing.  I noticed that your corporate response was one averting blame and indicating it was not your product that is the problem.  Very disappointing, to say the least.  I take very good care of my items, live in a home with a whole house generator and a whole house surge protection system -it is very unlikely that there are wiring or power issues here that would have caused this.  Furthermore, it was not exposed to water other than the water that leaks through the cover when in use.  All signs point to the fact that this is not a consumer problem, but a manufacturing problem.PLEASE for the safety of every family using your products, bring this issue to light with your design team so that this issue can be fixed, and stop averting blame to the consumer.  My wipes warmers were refilled and instructions were carefully followed.  I agree that a lot of the times, product safety issues are the result of poor, neglectful parenting or the lack of common sense, but this is simply not the case here and to continue to ignore it and act like it is not your fault is unethical and simply put, unequivocally wrong.Update May 2013:  Price Lionheart did respond. Here is the response-"Dear Ms. XXXXXXXX,Your message has been forwarded to me, since it referred to a safety concern about one of our products. I am the head of operations for Prince Lionheart, where my responsibilities include oversight of quality control and regulatory compliance. Thank you for taking the time to explain your experience. I was greatly relieved that no injury to your family or damage to your home occurred, but sorry that you had to go through this troubling incident at all. I would very much like to learn as much detail as possible about the circumstances, since first hand information from real-world environments is extremely valuable in our efforts to improve our products. I hope you'll respond to this message with a phone number and convenient time when I may reach you for a short conversation.In the meantime, you might find some measure of reassurance in knowing that the issue of producing the safest possible wipe warmer, even though both water and electricity are inherent to its function, is always first and foremost in the efforts of our design team. Much care has been exercised in developing the channeling that returns condensation to the wipe tub, where it belongs, so your observation on this subject is of great interest to me. Of the many, many wipe warmers Prince Lionheart has sold over almost fifteen years, we are not aware of any instance of a fire being started or an individual being injured by one of them, but we're always seeking ways to further improve our warmer.Again, thank you for your time in bringing this to our attention. We're sorry for the distress it caused you, and I hope to speak with you soon.Best Regards,Richard Siegel"I did reply to this message providing him with a phone number with which to contact me, but I never heard from him after that.  It has been more than a month.  Not really impressed by Prince Lionheart to say the least.


------------
 B0000DJ3FM 0 1.0
1.0 This toy seemed like a great deal for the money--the least expensive rocking horse out there.  There's a reason it's so inexpensive--it's cheap!!!  There are stickers which must be manually applied.  They're not that easy to line up and they are easily removed by a child, posing a choking hazard.  In addition, on more than one occasion, my son turned the horse upside down and attempted to mount it from the wrong angle.  The rungs on the bottom are spaced just closely enough together for a child to get a leg caught between.  They hold his leg very securely and he almost severely injured himself twice since he was unable to get his leg out on his own and twisted it badly.  It also left a large, red mark on his leg after we got him out.  He literally was unable to get out himself.  A toy made for such a small child should not have stickers that can be removed but, rather, molded, colored plastic.  I called Fisher Price about this toy and have asked them to seriously consider putting a recall on it.  It is very dangerous and we have since thrown ours away!!!


------------
 B001KVEEG4 0 1.0
1.0 We were given the LuLy LadyBug Rocker (same company, different design) when my daughter was in utero. It shipped to our house and remained in its original box until we opened it. When we opened it, our daughter was the recommended age of 10 months. We put her on it and, with a parent guiding her gently, with one hand on her back and the other holding her hand and the handlebar, we proceeded to let her rock gently.Immediately, the front chest area of the ladybug "fell" forward as if it had come off a stand or something inside the body of the ladybug. The rocker went forward way too much and our daughter was jutted forward. Had it not been for the parent right next to her, I believe she would have flipped headfirst over the rocker. We attempted to use it again and the same thing happened. We put the rocker aside.Having repeatedly attempted to help her enjoy the rocker safely in the past year, it now sits in her room, with the tag on, and she uses the musical buttons and that's it.What a waste. Today we once again attempted to let her use the rocker in a final attempt to enjoy it, and despite a parent sitting right next to her, my daughter rocked gently twice and flipped headfirst over the handle. She hit her chin on the handle, splitting her chin open and the rocker fell over on her.With our experience, and having searched many websites selling this product and hundreds of reviews, it is now unequivocally my opinion that this product is unsafe. I've filed a report with the Consumer Product Safety Commission. I have contacted the company requesting a statement with regards to their plan to recall or issue a product alert.You should file a report with the CPSC about this. I have. My daughter was injured rocking normally with parental supervision. The more of us who say something, the better.Someone else filed a review as well.  [...]


------------
 B00FOL7CME 0 1.0
1.0 We have the white version of this cradle glider. It's a bit shaky, but looks nice.Unfortunately this product is of very low quality and A SAFETY HAZARD to your baby! The base board (where the mattress rests on) is too small. It can fall out of the frame!! Baby and mattress thus fall out of the bed!!!!Our daughter fell out of this bed twice, rather, she fell through the mattress support (and she was among the smaller babies at that time).The cradle has been recalled on November 13, 2013. Hazard: "The mattress support board can fall out or slide out of the bottom of the cradle glider posing a risk that babies can fall out and suffer injuries." ([...])  Consumers should immediately stop using the recalled cradles.We have contacted Dream on me within a week about this recall, and many times since then. Three months later we still have not received the promised repair kit. We cannot use the unsafe cradle, it poses a risk for our baby. The recall center is not helpful - they are still waiting, quote, for the shipment from China to arrive.Our baby will soon have outgrown the cradle. Dream on me has made us for a pile of dangerous junk made in China.Buyer beware!Do not buy from Dream on Me. Their quality control, sense of responsibility, and customer service deserve lemons, but not a single star.


------------
 B002TUTPQO 0 1.0
1.0 Top corners crack easily and cause small sharp pieces of plastic to fall in the pen. It also leaves sharp broken corners. VERY DANGEROUS!!!!! I purchased two of these play yards....one for upstairs and one for down. We do not travel with them. The only time they are moved is when we are cleaning them or the room. A while back I found a hard sharp piece of plastic in the pen. At the time I did not know where it came from but was relieved that my daughter did not swallow it. This afternoon I found another piece of hard sharp platic in the pen. I took my daughter out or the pen to see if there were more. When I lifted her I found that her ear was cut and bloody. I emptied the pen and searched the entire thing to see where the plastic could have come from. I found that the hard sharp plastic came from the top corners of the crib. Investigating both Pack and Play Bugs quilts I noticed one had 2 broken corner and the other had 3 broken corners. I have only found 2 pieces so I am hoping my daughter did not eat any pieces. THIS IS VERY UPSETTING TO ME AND I AM SURE THAT I AM NOT THE ONLY ONE REPORTING THIS COMCERN TO GRACO. I RESEACHED THE CONCERN TO SEE IF THERE MIGHT HAVE BEEN A RECALL I HAD MISSED. ALTHOUGH I DID NOT FIND A RECALL ON THE PRODUCT I DID FIND NUMEROUS OTHER CONSUMERS STATING THE SAME CONCERN. THIS PRODUCT IS DANGEROUS IF NOT DEADLY.


------------
 B00FOL7CF6 0 1.0
1.0 This cradle looks nice. Once you've assembled it you will notice it is a bit shaky. But that is not the problem.The problem is that this product is dangerous. The base board (where the mattress rests on) is too small. It can fall out of the frame!! Baby and mattress thus fall out of the bed!!!!Our daughter fell out of this bed twice, rather, she fell through the mattress support (and she was among the smaller babies at that time).The cradle has been recalled on November 13, 2013. Hazard: "The mattress support board can fall out or slide out of the bottom of the cradle glider posing a risk that babies can fall out and suffer injuries." ([...])  Consumers should immediately stop using the recalled cradles.We have contacted Dream on me within a week about this recall, and many times since then. Three months later we still have not received the promised repair kit. We cannot use the unsafe cradle, it poses a risk for our baby. The recall center is not helpful - they are waiting, quote, for a shipment from China.Our baby will soon have outgrown the cradle. What we have gotten from Dream on me is a pile of dangerous junk made in China.Buyer beware!Do not buy from Dream on Me. Their quality control, sense of responsibility, and customer service deserve lemons, but not a single star.


------------
 B00186YSMQ 0 1.0
1.0 WARNING to all owners newer model Peg Perego SIP 30/30 Infant Seats. It has two major defects.1> Defective part causes cuts on baby's heels and legs.2> Defective latch can cause seat to fall out of Pliko P3 stroller and have baby land on ground.Peg Perego's SIP 30/30 Infant seat is defective and they have not made registered customers aware of it.  They are not taking responsibility for the safety of the children using this seat.  They have been aware of the first issue since the beginning of summer 2008.The first issue with the seat causes cuts. We discovered lacerations on our son's ankles for over a month but could not figure out what was causing them. On a road trip we discovered a large bleeding gash on the back of his ankle about  the size of a DIME. We traced the source to the SIP 30/30 car seat's plastic strap cover near the feet. There was blood on the cover and the edges were sharp enough to cut my finger.  When I called Peg Perego, it took over an hour to get through to their customer service. Once I did, I discovered over 50 children have already been injured and they anticipate FOUR THOUSAND seats could have this issue. DO NOT follow their instructions to remove the plastic cover until receiving the replacement. The metal latch further cut up the back of our son's legs while waiting for the part. It took almost 4 DAYS to get the part from them instead of shipping it priority overnight.It is Peg Perego's RESPONSIBILITY to inform registered seat users of this dangerous issue, yet they have done NOTHING to warn people. Not even an e-mail. They plan to ship the parts when they have replacement. It has been almost 5 months since they first learned about the issue. How many more children will be injured before they make the public aware of this defect. Since discovering this issue, I have told every parent I see with this car seat about this issue. Their response is normally "I was wondering where those cuts where coming from."The second issue with the seat is latching into Peg Perego's Pliko P3 stroller.  The infant seat has two possible latch points. We attached the seat to the stroller using the outside latch. We heard both sides of the seat "click" as explained. While pushing the cart the infant seat fell off the stroller and landed on the sidewalk with our son head down and screaming. We were lucky the stroller wasn't stopped by a street where it could have fallen in front of a car. The fact it can even latch to something that will not hold the seat, much less EASILY latch,  is a SERIOUS design defect that is extremely dangerous to children. It could be fatal.Peg Perego needs to take responsibility for their poorly designed products and let people know about these issues. Hopefully as a buyer you will review this and evaluate if their products are safe for your children before buying and they are injured.If I could give this product less than 1 star, I would.


------------
 B000I2RK80 0 1.0
1.0 We have 9 month old twin girls who liked this mirror. Seemed like a safe toy but this morning I heard one of them coughing and when I investigated I found the stitching had come undone along the side, exposing wads of the extremely fine fibrous padding material used to line the side. My daughter had a large piece of this in her mouth, which had blocked her airway and she had started choking. Only by good fortune had I heard her coughing and was able to get the material out of her mouth, which revealed a matted string had extended into her throat, causing the choking response.In my view this could have been a fatal incident. I believe this happened because of poor manufacturing standards. The padding material should have been encased in a protective sleeve, and the stitching on the hem of the mirror should have been of a much more robust nature. It's important to note that the toy had not been abused, and I would describe the usage as pretty low. Certainly far below the point the product would start to disintegrate.This is clearly an extremely dangerous product, which should be recalled immediately. The build quality is utterly sub standard and it's only a matter of time before someone else's child comes to harm as a result of the shoddy design and construction.


------------
 B003SX0B6E 0 1.0
1.0 Health officials are warning parents not to use a special device designed to help keep babies in certain positions as they sleep. The device, called a sleep positioner, has been linked to at least 13 deaths in the last 15 years, officials with two federal agencies said on Wednesday."We urge parents and caregivers to take our warning seriously and stop using these sleep positioners," Inez Tenenbaum, the chairman of the Consumer Product Safety Commission, said in a statement.The sleep positioner devices come primarily in two forms. One is a flat mat with soft bolsters on each side. The other, known as a wedge-style positioner, looks very similar but has an incline, keeping a child in a very slight upright position.Makers of the devices claim that by keeping infants in a specific position as they sleep, they can prevent several conditions, including acid reflux and flat head syndrome, a deformation caused by pressure on one part of the skull. Many are also marketed to parents as a way to help reduce a child's risk of sudden infant death syndrome, or SIDS, which kills thousands of babies every year, most between the ages of 2 months and 4 months.But the devices have never been shown in studies to prevent SIDS, and they may actually raise the likelihood of sudden infant death, officials say. One of the leading risk factors for sudden infant death is placing a baby on his or her stomach at bedtime, and health officials have routinely warned parents to lay babies on their backs. They even initiated a "Back to Sleep" campaign in the 1990s, which led to a sharp reduction in sudden infant deaths.With the positioner devices, if an infant rolls onto the stomach, the child's mouth and nose can press up against a bolster or some other part of the device, leading to suffocation. Even if placed on the back, a child can move up or down in the positioner, "entrapping its face against a bolster or becoming trapped between the positioner and the crib side," Gail Gantt, a nurse consultant with the Food and Drug Administration, said in an e-mail. Or the child might scoot down the wedge in a way that causes the child's mouth and nose to press into the device."The baby's movement may also cause the positioner to flip on top of the baby, trapping the baby underneath the positioner or between the positioner and the side of the crib," she said.Of the 13 babies known to have suffocated in a sleep positioner since 1997, most died after they rolled from their sides onto their stomachs. The Consumer Product Safety Commission has also received dozens of reports of babies who were placed on their sides or backs, "only to be found later in hazardous positions within or next to the product," the F.D.A. said in a statement.Many baby books for new parents specifically urge against using sleep positioners, and the American Academy of Pediatrics does not support their use for SIDS prevention. Though the F.D.A. has never approved the positioners for the prevention of SIDS, it has in the past approved a number of the devices for the prevention of gastroesophageal reflux disease and flat head syndrome. But the agency said that in light of the new safety data, it believed any benefits from using the devices were outweighed by the risk of suffocation.As of Wednesday, the agency is explicitly advising parents to stop using sleep positioners, and it has asked manufacturers of the devices to submit clinical data showing that the benefits of their products outweigh the risk of serious harm. In addition to avoiding the devices, experts say, parents should keep things like pillows, comforters, quilts and bumpers away from their infants and their cribs. Soft bedding can increase the likelihood of a baby suffocating."The safest crib is a bare crib," Dr. Susan Cummins, a pediatric expect with the F.D.A., said in a statement. "Always put your baby on his or her back to sleep. An easy way to remember this is to follow the ABC's of safe sleep - Alone on the Back in a bare Crib."



In [27]:

    
# Plot Figures 4 and 5.

def get_reviews_before_date(data, asin, date):
    reviews = data.reviews_df[data.reviews_df.ASIN==asin].sort_values('review_time')
    return reviews
    
def time_diff(time1, time2):
    """
    time2 - time1
    time2 2012-01-02T00:00:00
    time1 2011-11-03
    """
    return (datetime.strptime(time2[:10], '%Y-%m-%d') - datetime.strptime(time1, '%Y-%m-%d')).days
    
def get_colors():
    cmap = plt.get_cmap('Dark2')
    colors = [cmap(i) for i in np.linspace(0, 1, 10)]
    return cycle(colors)
    
def predict_by_time(model, data):
    recalls_df = data.recalls_df[data.recalls_df.label==1]
    probas = model.predict_proba_reviews(data)
    found = 0
    correct = 0
    plt.figure(figsize=(8,6))
    colors = get_colors()
    all_diffs = []
    n_pos = []
    total_reviews = 0
    total_pos = 0
    total_time_diffs = 0
    for asin in data.recalled_asins:
        recall = recalls_df[recalls_df.AmazonAsin==asin].iloc[0]
        reviews = get_reviews_before_date(data, asin, recall['RecallDate'])
        if (len(reviews) > 9): # only consider products with at least 10 reviews.
            found += 1
            total_reviews += len(reviews)
            idx = np.array(reviews.index.tolist())
            vals = probas[idx]
            pos_idx = np.where(vals >= 0.5)[0]
            total_pos += len(pos_idx)
            if asin == 'XXXX':   # For manual analysis of recall reviews
                print(recall['RecallName'])
                print(recall['RecallDescription'])
                print(recall['RecallDate'])
                print(recall['RecallTitle'])
                print('\n'.join('%s %s' % (x,y) for x,y in 
                                zip(reviews.iloc[pos_idx]['review_time'],
                                    reviews.iloc[pos_idx]['reviewText'])))
            if len(pos_idx) > 0:
                n_pos.append(len(pos_idx))
                color = next(colors)
                times = reviews.iloc[pos_idx]['review_time']
                time_diffs = [-time_diff(t, recall['RecallDate']) for t in times]
                if len(time_diffs) > 0:  # found recall review within 500 days of recall (before/after)
                    all_diffs.extend(time_diffs)
                    if time_diffs[0] < 0: # found before recall
                        correct += 1
                    counts = np.arange(len(time_diffs)) + 1
                    plt.plot(time_diffs, counts, '.-', color=color)
                    plt.plot(time_diffs[0], 1, 'x', ms=6, color=color)
                    total_time_diffs += len(time_diffs)
                    print(asin, len(pos_idx), time_diffs[0], time_diffs[-1])                        
    plt.xticks(rotation=90)
    plt.axvline(x=0, color='k')
    plt.ylabel('Total number of hazardous reviews found', size=16)
    plt.xlabel(r'before recall $\leftarrow$     Days from recall     $\rightarrow$ after recall', size=16)
    plt.tight_layout()
    plt.savefig('paper/figs/leadtime.pdf')
    plt.show()
    print('found early warning for %d/%d (%.2f) product recalls' % (correct, found, correct/found))
    print('earliest day: mean=%.2f, median=%.2f' % (np.mean(all_diffs), np.median(all_diffs)))
    print('%d / %d reviews classified as positive' % (total_pos, total_reviews))
    print('%d total time diffs' % total_time_diffs)

    plt.figure(figsize=(8,6))
    plt.hist(all_diffs, bins=50)
    plt.xticks(rotation=90)
    plt.ylabel('Count', size=16)
    plt.xlabel(r'before recall $\leftarrow$     Days from recall     $\rightarrow$ after recall', size=16)
    plt.tight_layout()
    plt.savefig('paper/figs/dayshist.pdf')
    plt.show()

    plt.figure(figsize=(8,6))
    plt.hist(n_pos, bins=20)
    plt.ylabel('Count', size=16)
    plt.xlabel('Number of hazardous reviews found', size=16)
    plt.tight_layout()
    plt.savefig('paper/figs/counthist.pdf')
    plt.show()

predict_by_time(best_model, data)









    



B00499DRY4 1 -111 -111
B001VNCVSO 1 -993 -993
B001UHNKMM 1 -627 -627
B0028K2RNI 1 32 32
B0000AQZXM 1 -1730 -1730
B00155UGTO 2 -1227 -791
B001H0GGJG 1 -37 -37
B000325T8S 2 47 200
B00DHINGB2 4 -797 -618
B000XHUHMG 1 -496 -496
B000096RDO 2 -3203 -3121
B003VIIUDW 1 339 339
B0009UBSFM 4 -2232 -974
B000J2DQPA 1 -760 -760
B0026L7D0G 2 -330 323
B00166LX9Y 2 343 636
B001N44UVW 5 36 712
B0002E7DHW 6 -977 98
B00004D3EU 25 -1365 477
B001D62PYE 3 -121 1595
B000324Y7U 29 -2508 -344
B005OOKOK8 3 -26 714
B000056C86 5 2565 3680
B00030HRQM 2 -2268 -1910
B004B762AK 11 346 1201
B000K0QZY0 4 -136 85
B00C870102 1 81 81
B0007GDOF0 2 -1130 -1053
B00007C65S 1 -117 -117
B00318CLA0 2 -1028 -828
B0035ER8MG 1 -59 -59
B0002JZOLO 1 -511 -511
B002M6PPTQ 1 35 35
B000ZMT6VM 3 132 839
B0028K2RMO 1 -381 -381
B001870Z9K 1 -591 -591
B001N44UVC 3 -494 714
B008K0TLJ8 1 -615 -615
B007S76KSE 2 -1126 -758
B002R26LZM 7 -873 588
B0087UUKHI 1 -730 -730
B00020V5A2 4 -311 330
B0035ER8KS 8 -63 1056
B001NAATW0 8 -422 505
B00GSNFDQ4 2 -660 -561
B00005610Y 1 -117 -117
B00139Q0X8 1 -836 -836
B000SES0IW 2 1097 1567
B0000D9SR8 24 -229 3236
B0035ER8IU 3 614 883
B0050386O4 2 -1272 -687
B000BLNZUK 1 13 13






    












    



found early warning for 39/86 (0.45) product recalls
earliest day: mean=-328.97, median=-136.50
204 / 7318 reviews classified as positive
204 total time diffs



In [28]:

    
# Plot review distribution for recalled vs non-recalled (Figure 1).
def plot_review_dist(data):
    recalled = []
    nonrecalled = []
    for r in data.reviews_df.iterrows():
        if r[1].ASIN in data.recalled_asins:
            recalled.append(r[1].review_score)
        else:
            nonrecalled.append(r[1].review_score)
    print('recalled mean=%g' % np.mean(recalled))
    print('not recalled mean=%g' % np.mean(nonrecalled))

    plt.figure(figsize=(8,6))
    recalled_ct = Counter(recalled)
    nonrecalled_ct = Counter(nonrecalled)
    ratings = np.arange(5) + 1
    bar_width = .3
    plt.bar(ratings, [recalled_ct[r] / len(recalled) for r in ratings],
            bar_width, alpha=.5, color='grey', label='recalled')
    plt.bar(ratings + bar_width, [nonrecalled_ct[r] / len(nonrecalled) for r in ratings],
            bar_width, alpha=.5, color='w', label='non-recalled')
    plt.xticks(ratings + bar_width, ratings)
    #plt.hist(recalled, alpha=.5, normed=True, label='recalled')
    #plt.hist(nonrecalled, alpha=.5, normed=True, label='not recalled')
    plt.legend(loc='best', prop={'size':16})
    plt.ylabel('Percent of ratings', size=16)
    plt.xlabel('Rating', size=16)
    #plt.title('Rating distribution for recalled vs. non-recalled products', size=14)
    plt.tight_layout()
    plt.savefig('paper/figs/ratings.pdf')
    plt.show()
    
plot_review_dist(data)









    



recalled mean=3.77332
not recalled mean=4.12057



In [29]:

    
# Plot distribution of recall reviews for recalled vs non-recalled
def plot_recall_dist(data, model):
    recalled = []
    nonrecalled = []
    preds = model.predict_reviews(data)
    for r in data.reviews_df.iterrows():
        if r[1].ASIN in data.recalled_asins:
            recalled.append(preds[r[0]])
        else:
            nonrecalled.append(preds[r[0]])
    plt.figure(figsize=(8,6))
    recalled_ct = Counter(recalled)
    nonrecalled_ct = Counter(nonrecalled)
    print(recalled_ct)
    print(nonrecalled_ct)
    print('recalled pct pos=%g' % (recalled_ct[1] / len(recalled)))
    print('nonrecalled pct pos=%g' % (nonrecalled_ct[1] / len(nonrecalled)))    
    ratings = np.arange(2)
    bar_width = .3
    plt.bar(ratings, [recalled_ct[r] / len(recalled) for r in ratings],
            bar_width, alpha=.5, color='r', label='recalled', hatch="//")
    plt.bar(ratings + bar_width, [nonrecalled_ct[r] / len(nonrecalled) for r in ratings],
            bar_width, alpha=.5, color='g', label='non-recalled')
    plt.xticks(ratings + bar_width, ratings)
    plt.legend(loc='best')
    plt.ylabel('Percent of reviews', size=16)
    plt.xlabel('Predicted class', size=16)
    plt.show()
    
plot_recall_dist(data, best_model)
plot_recall_dist(data, baseline_model)









    



Counter({0: 7356, 1: 214})
Counter({0: 897233, 1: 10643})
recalled pct pos=0.0282695
nonrecalled pct pos=0.011723






    












    



Counter({0: 7343, 1: 227})
Counter({0: 897126, 1: 10750})
recalled pct pos=0.0299868
nonrecalled pct pos=0.0118408



In [30]:

    
# Plot Figure 6.
asin2recall_score = best_model.score_asin_recalls(data)
plt.figure()
plt.hist(sorted(asin2recall_score.values()), bins=50, bottom=1)
plt.yscale('log')
plt.xlabel('Number of hazardous reviews', size=16)
plt.ylabel('Number of products', size=16)
plt.tight_layout()
plt.savefig('paper/figs/pred_ratings.pdf')
plt.show()



In [31]:

    
Counter(asin2recall_score.values()).most_common(10)









    Out[31]:





[(0, 59362),
 (1, 3285),
 (2, 778),
 (3, 374),
 (4, 190),
 (5, 113),
 (6, 63),
 (7, 57),
 (8, 46),
 (9, 29)]



In [32]:

    
# Print top coef for inclusion in Table 4.
def print_coef_table(models, data):
    submodels = get_models(models, ['RandNegSampThreshInfoPrior(C=1, nneg=20000, t=5.0)',
                                    'RandomNegSamplesThresh(C=1, nneg=20000, t=5.0)'])  
    coef = submodels[0].clf.coef_[0] * submodels[0].transform[0,:].toarray()[0]
    coef2 = submodels[1].clf.coef_[0]
    terms1 = data.vec.features[np.argsort(coef)[::-1][:20]]
    terms2 = data.vec.features[np.argsort(coef2)[::-1][:20]]
    print('in informed prior, but not baseline:', set(terms1) - set(terms2))
    print('in baseline prior, but not informed prior:', set(terms2) - set(terms1))

    print('informed prior:')
    print(', '.join(terms1))
    print('baseline:')
    print(', '.join(terms2))

    scaled = scale(coef)
    scaled2 = scale(coef2)
    
    diff = scaled2 - scaled
    print('\n\n')
    for i in np.argsort(diff)[::-1][:20]:
        print(data.vec.features[i], diff[i], coef2[i], coef[i], scaled2[i], scaled[i])
    print('\n\n')
    for i in np.argsort(diff)[:20]:
        print(data.vec.features[i], diff[i], coef2[i], coef[i], scaled2[i], scaled[i])
    
print_coef_table(models, data)









    



in informed prior, but not baseline: {'burnt', 'leaned forward', 'was chewing', 'was playing', 'got stuck', 'was hanging', 'very dangerous', 'swallow it', 'snapped', 'smacked', 'cpsc', 'recalled', 'the consumer', 'emergency room', 'is unsafe', 'injured', 'exploded'}
in baseline prior, but not informed prior: {'fell', 'rash', 'light', 'caused', 'noticed', 'pampers', 'choking', 'rock', 'night light', 'unsafe', 'stuck', 'broke', 'crib', 'dangerous', 'model', 'happened', 'gate'}
informed prior:
very dangerous, cpsc, mold, smacked, swallow it, emergency room, recalled, recall, was playing, hazard, is unsafe, snapped, leaned forward, the consumer, got stuck, was hanging, burnt, injured, exploded, was chewing
baseline:
mold, pampers, fell, crib, rock, dangerous, night light, hazard, broke, happened, gate, rash, light, recall, model, stuck, unsafe, caused, noticed, choking



dangerous the 39.3137994843 0.0341056623672 -5.04980244726 0.764773373732 -38.5490261105
have happened 26.1050187427 0.017878583183 -3.3636329034 0.400269600893 -25.7047491419
rock 23.2837413689 1.31904122114 0.843676905767 29.6278764444 6.34413507548
light 22.3088004069 1.08658271532 0.286178931723 24.4062341859 2.09743377894
crib 21.6961876689 1.3402889726 1.11474411547 30.1051579985 8.40897032962
pampers 21.1650146449 1.53684122427 1.76407925519 34.5202493703 13.3552347254
night light 19.0342014994 1.26248634874 1.2347754675 28.3575021441 9.32330064468
gate 17.1057717835 1.11372619459 1.04926369058 25.0159496183 7.91017783482
on 16.5640166926 0.851071217472 0.34585300944 19.1160135283 2.55199683569
stuck 16.224281969 0.998913178154 0.826417003602 22.4369409633 6.21265899428
mold 15.5544504725 2.89140534932 6.49503392393 64.9473970121 49.3929465396
off 15.5451915924 0.90731668299 0.645461874563 20.379437719 4.83424612665
diapers 15.4748927068 0.721196931315 0.105850614336 16.1986883115 0.723795604629
found 15.4017585383 0.847495897227 0.487888707454 19.0357022348 3.63394369647
between 15.0176297747 0.807275107418 0.419711088427 18.1322352913 3.11460551661
sun 14.8757129794 0.730055430613 0.210632082959 16.3976739933 1.52196101396
got 14.265302977 0.697343854846 0.194303761742 15.6628841616 1.39758118459
choking 14.0112063406 0.962189358891 1.00865172002 21.6120253686 7.60081902807
under 14.0095531604 0.836493442833 0.638209857917 18.7885575644 4.77900440394
child 13.8738258104 0.794252605603 0.531465801478 17.8397149314 3.96588912104



very dangerous -50.0340517483 0.552102788021 8.2070925209 12.4003797868 62.4344315351
cpsc -48.7921598516 0.30567822447 7.31738954943 6.8650223688 55.6571822204
great -39.3394704541 -2.1094978178 -1.04554075463 -47.386316981 -8.04684652694
loves -34.251792113 -1.76358215491 -0.69338567826 -39.6161223098 -5.36433019677
emergency room -33.7507882665 0.188965262324 4.99862059648 4.24333583122 37.9941240977
swallow it -33.568449876 0.245794523956 5.14226478572 5.51987365033 39.0883235264
love -33.0342804413 -1.73322215626 -0.763690838852 -38.934155215 -5.89987477369
was hanging -31.863245472 0.0566961147483 4.36078557903 1.27221559606 33.1354610681
leaned forward -31.4333242363 0.167000897629 4.62961911304 3.74995721954 35.1832814559
smacked -31.1715413075 0.358541485004 5.16007800494 8.05247315341 39.224014461
is unsafe -27.583599729 0.400924098247 4.81404074783 9.00450045634 36.5881001854
got stuck -25.9016777515 0.368058416899 4.49632549255 8.26624899948 34.167926751
heard him -25.1234678861 0.111424919922 3.63738906569 2.50157152281 27.6250394089
was playing -24.5489038383 0.563889510163 4.89621346787 12.6651412169 37.2140450551
was chewing -24.4306360841 0.293505860023 4.08336565476 6.59159837703 31.0222344611
perfect -24.3314569296 -1.23965399691 -0.450718747474 -27.8472889837 -3.51583205404
rolled into -24.2998941386 0.096047683391 3.48392682071 2.15615749803 26.4560516366
filed -23.7265413777 0.110238454961 3.45050479322 2.47492033378 26.2014617115
off leaving -23.6803121532 0.146048064603 3.55003322416 3.27930032541 26.9596124787
these -23.6640573443 -1.31799777647 -0.769357912292 -29.6071006481 -5.94304330388



In [33]:

    
def print_chi2_predicted(model, data, n_feats=100):
    preds = model.predict_reviews(data) # model.clf.predict(data.X_reviews)
    nneg = len(np.where(preds==0)[0])
    npos = len(np.where(preds==1)[0])
    print(Counter(preds))
    chi, _ = chi2(data.X_reviews, preds)
    chi = np.nan_to_num(chi)
    # restrict to positive features
    ppos_counts = data.X_reviews[np.where(preds==1)].sum(axis=0).A1
    pneg_counts = data.X_reviews[np.where(preds==0)].sum(axis=0).A1
    chi_pos = chi * np.array([1 if c > 0 else 0 for c in model.clf.coef_[0]])
    print('RECALL TERMS')
    terms = []
    for i in np.argsort(chi_pos)[::-1][:n_feats]:
        terms.append({'feature': data.vec.features[i],
                      'chi2': '%.1f' % chi_pos[i],
                      'coef': '%.2f' % model.clf.coef_[0][i],
                      'pos_count': ppos_counts[i],
                      'pos_frac': '%.3f' % (ppos_counts[i]/npos),
                      'neg_count': pneg_counts[i],
                      'neg_frac': '%.3f' % (pneg_counts[i]/nneg)})
    display(pd.DataFrame(terms))

    print('\n\nNON-RECALL TERMS')
    chi_neg = chi * np.array([1 if c < 0 else 0 for c in model.clf.coef_[0]])
    terms = []
    for i in np.argsort(chi_neg)[::-1][:n_feats]:
        terms.append({'feature': data.vec.features[i],
                      'chi2': '%.1f' % chi_neg[i],
                      'coef': '%.2f' % model.clf.coef_[0][i],
                      'pos_count': ppos_counts[i],
                      'pos_frac': '%.3f' % (ppos_counts[i]/npos),
                      'neg_count': pneg_counts[i],
                      'neg_frac': '%.3f' % (pneg_counts[i]/nneg)})
    display(pd.DataFrame(terms))


Evaluator(data).top_terms(best_model, n=50)
print('\n\n')
print_chi2_predicted(best_model, data, n_feats=50)









    



TOP FEATURES:

CLASS 0
great	1.336
this	1.299
these	0.986
for	0.980
love	0.975
loves	0.885
so	0.833
it	0.788
good	0.723
but	0.713
easy	0.697
is	0.615
well	0.612
you	0.609
are	0.604
easy to	0.602
they	0.598
and	0.593
very	0.579
perfect	0.576
to	0.533
don	0.514
with	0.509
recommend	0.493
cute	0.458
works	0.455
one	0.454
like	0.453
nice	0.443
really	0.435
as	0.434
our	0.430
soft	0.421
best	0.417
just	0.411
too	0.404
loved	0.404
them	0.401
much	0.401
all	0.388
other	0.376
use	0.371
car	0.369
can	0.368
my	0.365
what	0.359
the	0.353
buy	0.344
ve	0.341
would	0.341

CLASS 1
pampers	1.104
mold	1.006
rash	0.832
burn	0.715
snapped	0.698
allowing the	0.671
gate with	0.664
be tightened	0.660
night light	0.639
smacked	0.626
died	0.617
delta	0.599
model	0.595
that side	0.580
collapsed	0.564
crib	0.561
burnt	0.558
blisters	0.558
broke	0.553
swallow it	0.544
tightened	0.537
exploded	0.537
flipped	0.525
nap nanny	0.525
approved	0.524
attached	0.514
chemical	0.508
fire	0.507
stairway	0.505
disintegrated	0.482
causing	0.476
cause the	0.474
began to	0.469
gate	0.467
appeared	0.462
com	0.459
very dangerous	0.455
side	0.446
bar with	0.446
happened	0.441
finger	0.437
caused	0.429
noticed	0.427
red	0.424
fall	0.421
bleeding	0.412
burned	0.409
unsafe	0.404
seam	0.401
hazard	0.398



Counter({0: 904589, 1: 10857})
RECALL TERMS






    






  
    
      
      chi2
      coef
      feature
      neg_count
      neg_frac
      pos_count
      pos_frac
    
  
  
    
      0
      33099.1
      0.40
      hazard
      1643
      0.002
      1055
      0.097
    
    
      1
      31683.4
      0.22
      recalled
      290
      0.000
      578
      0.053
    
    
      2
      28377.6
      0.24
      dangerous
      1996
      0.002
      1041
      0.096
    
    
      3
      24130.7
      0.55
      broke
      9583
      0.011
      1942
      0.179
    
    
      4
      23099.4
      0.45
      very dangerous
      43
      0.000
      316
      0.029
    
    
      5
      19591.1
      0.44
      happened
      4142
      0.005
      1167
      0.107
    
    
      6
      16880.7
      0.35
      recall
      571
      0.001
      465
      0.043
    
    
      7
      16348.7
      0.40
      unsafe
      1087
      0.001
      586
      0.054
    
    
      8
      16187.7
      0.05
      choking hazard
      599
      0.001
      461
      0.042
    
    
      9
      15014.5
      0.39
      fell
      7916
      0.009
      1390
      0.128
    
    
      10
      14770.4
      0.70
      snapped
      1669
      0.002
      663
      0.061
    
    
      11
      14047.8
      0.02
      be recalled
      43
      0.000
      205
      0.019
    
    
      12
      11513.6
      0.21
      fell out
      334
      0.000
      300
      0.028
    
    
      13
      11380.8
      1.01
      mold
      2168
      0.002
      646
      0.060
    
    
      14
      10612.7
      0.11
      choking
      1785
      0.002
      569
      0.052
    
    
      15
      9677.5
      0.16
      is dangerous
      141
      0.000
      201
      0.019
    
    
      16
      9289.0
      0.11
      been recalled
      62
      0.000
      157
      0.014
    
    
      17
      9086.6
      0.20
      contacted
      3446
      0.004
      715
      0.066
    
    
      18
      8598.9
      0.04
      safety hazard
      182
      0.000
      201
      0.019
    
    
      19
      8561.2
      0.32
      is unsafe
      63
      0.000
      148
      0.014
    
    
      20
      8557.6
      0.43
      caused
      1943
      0.002
      527
      0.049
    
    
      21
      8317.1
      0.43
      noticed
      8713
      0.010
      1094
      0.101
    
    
      22
      8073.2
      0.14
      the company
      6537
      0.007
      929
      0.086
    
    
      23
      8060.4
      0.34
      the plastic
      10894
      0.012
      1213
      0.112
    
    
      24
      7839.5
      0.48
      causing
      2084
      0.002
      520
      0.048
    
    
      25
      7342.6
      0.34
      injury
      501
      0.001
      266
      0.025
    
    
      26
      7327.5
      0.18
      company
      13046
      0.014
      1279
      0.118
    
    
      27
      7196.8
      0.28
      injured
      263
      0.000
      204
      0.019
    
    
      28
      7180.8
      0.22
      this happened
      377
      0.000
      234
      0.022
    
    
      29
      7128.7
      0.30
      not safe
      749
      0.001
      310
      0.029
    
    
      30
      7039.3
      0.28
      called
      7209
      0.008
      915
      0.084
    
    
      31
      6498.3
      0.01
      broke off
      771
      0.001
      298
      0.027
    
    
      32
      6271.1
      0.22
      plastic
      37407
      0.041
      2175
      0.200
    
    
      33
      6011.9
      0.20
      the recall
      70
      0.000
      117
      0.011
    
    
      34
      5984.3
      0.24
      metal
      6435
      0.007
      798
      0.074
    
    
      35
      5976.5
      0.29
      off
      93671
      0.104
      3768
      0.347
    
    
      36
      5974.7
      0.20
      broken
      6337
      0.007
      791
      0.073
    
    
      37
      5791.4
      0.00
      snapped off
      153
      0.000
      146
      0.013
    
    
      38
      5684.1
      0.18
      stuck
      10061
      0.011
      989
      0.091
    
    
      39
      5626.9
      0.22
      safety
      17178
      0.019
      1324
      0.122
    
    
      40
      5599.7
      0.15
      contacted the
      1218
      0.001
      338
      0.031
    
    
      41
      5214.4
      0.28
      caused the
      266
      0.000
      168
      0.015
    
    
      42
      5182.7
      0.08
      broke the
      772
      0.001
      263
      0.024
    
    
      43
      5156.2
      0.27
      of the
      178220
      0.197
      5511
      0.508
    
    
      44
      5105.6
      0.38
      got stuck
      326
      0.000
      180
      0.017
    
    
      45
      5021.3
      0.19
      cpsc
      11
      0.000
      70
      0.006
    
    
      46
      4967.0
      0.23
      mold on
      35
      0.000
      85
      0.008
    
    
      47
      4815.1
      0.03
      product safety
      35
      0.000
      83
      0.008
    
    
      48
      4498.0
      0.22
      the metal
      2344
      0.003
      414
      0.038
    
    
      49
      4406.1
      0.11
      was recalled
      63
      0.000
      91
      0.008
    
  








    



NON-RECALL TERMS






    






  
    
      
      chi2
      coef
      feature
      neg_count
      neg_frac
      pos_count
      pos_frac
    
  
  
    
      0
      5443.6
      -0.01
      safety commission
      6
      0.000
      71
      0.007
    
    
      1
      4721.2
      -0.00
      consumer product
      16
      0.000
      70
      0.006
    
    
      2
      4136.3
      -0.01
      fell off
      1457
      0.002
      314
      0.029
    
    
      3
      3725.3
      -0.08
      customer
      10366
      0.011
      832
      0.077
    
    
      4
      3495.3
      -0.03
      customer service
      8309
      0.009
      715
      0.066
    
    
      5
      3234.7
      -0.08
      could
      77816
      0.086
      2702
      0.249
    
    
      6
      3116.9
      -0.14
      after
      104963
      0.116
      3272
      0.301
    
    
      7
      3072.2
      -0.05
      service
      10748
      0.012
      781
      0.072
    
    
      8
      3064.7
      -0.04
      had
      177275
      0.196
      4715
      0.434
    
    
      9
      2808.5
      -0.00
      recall on
      66
      0.000
      68
      0.006
    
    
      10
      2777.4
      -0.08
      on the
      150075
      0.166
      4068
      0.375
    
    
      11
      2727.4
      -0.06
      noticed that
      2666
      0.003
      346
      0.032
    
    
      12
      2499.1
      -0.02
      response
      1712
      0.002
      264
      0.024
    
    
      13
      2489.2
      -0.21
      replacement
      11452
      0.013
      741
      0.068
    
    
      14
      2451.1
      -0.09
      within
      11956
      0.013
      755
      0.070
    
    
      15
      2451.0
      -0.04
      called the
      1530
      0.002
      247
      0.023
    
    
      16
      2443.5
      -0.01
      from
      148688
      0.164
      3900
      0.359
    
    
      17
      2381.4
      -0.02
      fell apart
      803
      0.001
      177
      0.016
    
    
      18
      2339.0
      -0.01
      it fell
      809
      0.001
      176
      0.016
    
    
      19
      2251.9
      -0.04
      not
      302759
      0.335
      6525
      0.601
    
    
      20
      2214.4
      -0.00
      not buy
      4670
      0.005
      424
      0.039
    
    
      21
      2202.0
      -0.08
      first
      98669
      0.109
      2822
      0.260
    
    
      22
      2173.2
      -0.07
      then
      66312
      0.073
      2132
      0.196
    
    
      23
      2113.3
      -0.04
      went
      24399
      0.027
      1097
      0.101
    
    
      24
      2107.0
      -0.03
      it snapped
      161
      0.000
      80
      0.007
    
    
      25
      2025.5
      -0.13
      off the
      18568
      0.021
      911
      0.084
    
    
      26
      1968.7
      -0.02
      do not
      26426
      0.029
      1124
      0.104
    
    
      27
      1959.6
      -0.15
      sent
      9515
      0.011
      602
      0.055
    
    
      28
      1912.0
      -0.05
      company and
      1673
      0.002
      229
      0.021
    
    
      29
      1879.0
      -0.04
      immediately
      9663
      0.011
      597
      0.055
    
    
      30
      1863.5
      -0.06
      that the
      56082
      0.062
      1811
      0.167
    
    
      31
      1842.3
      -0.11
      did
      69771
      0.077
      2098
      0.193
    
    
      32
      1832.5
      -0.02
      of
      439072
      0.485
      8407
      0.774
    
    
      33
      1786.3
      -0.10
      were
      89275
      0.099
      2474
      0.228
    
    
      34
      1782.1
      -0.01
      broke in
      368
      0.000
      105
      0.010
    
    
      35
      1729.2
      -0.02
      told me
      3427
      0.004
      320
      0.029
    
    
      36
      1716.1
      -0.00
      hazard and
      103
      0.000
      59
      0.005
    
    
      37
      1691.5
      -0.00
      repair
      829
      0.001
      151
      0.014
    
    
      38
      1675.5
      -0.02
      completely
      17743
      0.020
      824
      0.076
    
    
      39
      1673.6
      -0.06
      problem
      41948
      0.046
      1437
      0.132
    
    
      40
      1618.6
      -0.01
      was not
      15572
      0.017
      750
      0.069
    
    
      41
      1603.5
      -0.20
      have happened
      193
      0.000
      74
      0.007
    
    
      42
      1593.8
      -0.01
      started
      29525
      0.033
      1120
      0.103
    
    
      43
      1579.1
      -0.02
      out of
      62426
      0.069
      1853
      0.171
    
    
      44
      1579.0
      -0.01
      send me
      1097
      0.001
      168
      0.015
    
    
      45
      1563.0
      -0.05
      product
      131460
      0.145
      3167
      0.292
    
    
      46
      1549.4
      -0.21
      again
      44116
      0.049
      1450
      0.134
    
    
      47
      1546.6
      -0.13
      before
      52429
      0.058
      1631
      0.150
    
    
      48
      1513.3
      -0.02
      the hinge
      469
      0.001
      108
      0.010
    
    
      49
      1494.2
      -0.01
      would not
      17442
      0.019
      781
      0.072



In [34]:

    
def get_class_discrepancy(model, data, n):
    """
    For each of the top n features in the positive class, get the class distribution
    in the training data, and the predicted class distribution in the testing data.
    """
    coef = model.get_coef()[0]
    top_coef_ind = np.argsort(coef)[::-1]  # [:n]
    preds = model.predict_reviews(data)  # model.clf.predict(data.X_reviews)
    nneg = len(np.where(preds==0)[0])
    npos = len(np.where(preds==1)[0])
    # restrict to positive features
    ppos_counts = data.X_reviews[np.where(preds==1)].sum(axis=0).A1
    pneg_counts = data.X_reviews[np.where(preds==0)].sum(axis=0).A1
    
    train_pos = data.X_complaints.sum(axis=0).A1
    train_neg = data.X_reviews[model.neg_sample_idx].sum(axis=0).A1
    results = []
    count = 0
    for i in top_coef_ind:
        if train_pos[i] > 2:
            train_pr = train_pos[i] / (train_pos[i] + train_neg[i])
            test_pr = ppos_counts[i] / (ppos_counts[i] + pneg_counts[i])
            results.append(
                {
                    'term': data.vec.features[i],
                    'coef': coef[i],
                    'pr_pos_train': train_pr,
                    'pr_pos_test': test_pr,
                    'n_pos_train': train_pos[i],
                    'n_pos_test': ppos_counts[i],
                    'diff': train_pr - test_pr
                }
            )
            count += 1
        if count >= n:
            break
    pd.set_option('display.max_rows', 1000)
    #return pd.DataFrame(results).sort_values('diff', ascending=False)
    #return pd.DataFrame(results).sort_values('coef', ascending=False)
    return pd.DataFrame(results).sort_values('pr_pos_test', ascending=False)

get_class_discrepancy(baseline_model, data, 100)









    Out[34]:






  
    
      
      coef
      diff
      n_pos_test
      n_pos_train
      pr_pos_test
      pr_pos_train
      term
    
  
  
    
      20
      0.909811
      0.571478
      322
      131
      0.370968
      0.942446
      recalled
    
    
      81
      0.552103
      0.676271
      105
      31
      0.292479
      0.968750
      very dangerous
    
    
      90
      0.524554
      0.753390
      88
      161
      0.240437
      0.993827
      the consumer
    
    
      13
      1.007839
      0.705103
      235
      178
      0.226834
      0.931937
      recall
    
    
      78
      0.563890
      0.756768
      66
      86
      0.209524
      0.966292
      was playing
    
    
      7
      1.198926
      0.696901
      535
      205
      0.198295
      0.895197
      hazard
    
    
      5
      1.278500
      0.569231
      560
      104
      0.184392
      0.753623
      dangerous
    
    
      79
      0.558286
      0.754362
      124
      120
      0.161669
      0.916031
      injury
    
    
      16
      0.978184
      0.669801
      255
      111
      0.152421
      0.822222
      unsafe
    
    
      19
      0.962189
      0.626890
      349
      131
      0.148258
      0.775148
      choking
    
    
      67
      0.586398
      0.738243
      54
      79
      0.139535
      0.877778
      injuries
    
    
      9
      1.146932
      0.638772
      640
      224
      0.120550
      0.759322
      happened
    
    
      17
      0.975297
      0.717757
      281
      153
      0.113765
      0.831522
      caused
    
    
      2
      1.530806
      0.455141
      1058
      219
      0.113690
      0.568831
      fell
    
    
      63
      0.593393
      0.608079
      261
      54
      0.111921
      0.720000
      snapped
    
    
      33
      0.795858
      0.697881
      290
      140
      0.111367
      0.809249
      causing
    
    
      8
      1.177096
      0.525326
      1222
      149
      0.106030
      0.631356
      broke
    
    
      0
      2.891405
      0.738541
      295
      210
      0.104833
      0.843373
      mold
    
    
      87
      0.538756
      0.467357
      777
      112
      0.104072
      0.571429
      the company
    
    
      85
      0.539605
      0.596581
      160
      48
      0.099071
      0.695652
      began to
    
    
      54
      0.657202
      0.796929
      167
      197
      0.098525
      0.895455
      consumer
    
    
      45
      0.697980
      0.836743
      40
      86
      0.098039
      0.934783
      rock and
    
    
      99
      0.513876
      0.410158
      245
      31
      0.098039
      0.508197
      cracked
    
    
      75
      0.574807
      0.693934
      231
      148
      0.097509
      0.791444
      stuck in
    
    
      91
      0.522639
      0.498877
      705
      147
      0.086780
      0.585657
      called
    
    
      22
      0.897836
      0.657861
      111
      67
      0.086583
      0.744444
      fire
    
    
      95
      0.518574
      0.600593
      49
      26
      0.083618
      0.684211
      burned
    
    
      96
      0.515046
      0.710583
      56
      34
      0.080114
      0.790698
      melted
    
    
      48
      0.691732
      0.470610
      467
      101
      0.078303
      0.548913
      please
    
    
      43
      0.713461
      0.430101
      934
      140
      0.077145
      0.507246
      the plastic
    
    
      18
      0.967269
      0.603867
      754
      290
      0.076884
      0.680751
      noticed
    
    
      50
      0.674635
      0.366967
      1080
      165
      0.075393
      0.442359
      company
    
    
      15
      0.998913
      0.635676
      833
      315
      0.075385
      0.711061
      stuck
    
    
      88
      0.537157
      0.568593
      697
      278
      0.071960
      0.640553
      between the
    
    
      37
      0.736469
      0.429907
      15
      3
      0.070093
      0.500000
      be tightened
    
    
      40
      0.718345
      0.430356
      92
      14
      0.069644
      0.500000
      tightened
    
    
      52
      0.668528
      0.589265
      358
      172
      0.062250
      0.651515
      fisher price
    
    
      51
      0.668529
      0.586946
      359
      172
      0.062111
      0.649057
      fisher
    
    
      83
      0.544205
      0.450337
      425
      157
      0.061063
      0.511401
      was in
    
    
      84
      0.543964
      0.461660
      456
      132
      0.060079
      0.521739
      today
    
    
      76
      0.573443
      0.669773
      28
      27
      0.059957
      0.729730
      delta
    
    
      11
      1.111200
      0.703367
      112
      154
      0.059009
      0.762376
      rash
    
    
      31
      0.799682
      0.613624
      330
      241
      0.053966
      0.667590
      leg
    
    
      36
      0.779019
      0.347028
      974
      243
      0.052643
      0.399671
      safety
    
    
      49
      0.688551
      0.342233
      152
      30
      0.052504
      0.394737
      hardware
    
    
      66
      0.587301
      0.693510
      50
      73
      0.051387
      0.744898
      diaper rash
    
    
      58
      0.603494
      0.421202
      382
      87
      0.049069
      0.470270
      loose
    
    
      68
      0.585479
      0.308041
      692
      112
      0.047514
      0.355556
      apart
    
    
      23
      0.893270
      0.436599
      320
      111
      0.046010
      0.482609
      cause
    
    
      32
      0.797759
      0.266252
      1787
      265
      0.045147
      0.311398
      plastic
    
    
      4
      1.319041
      0.633438
      186
      194
      0.044884
      0.678322
      rock
    
    
      47
      0.694841
      0.509107
      82
      21
      0.043524
      0.552632
      died
    
    
      77
      0.564885
      0.370774
      447
      140
      0.043428
      0.414201
      my child
    
    
      86
      0.539603
      0.176183
      30
      5
      0.041209
      0.217391
      approved
    
    
      30
      0.807275
      0.403558
      935
      336
      0.040887
      0.444444
      between
    
    
      59
      0.603360
      0.290675
      305
      58
      0.040754
      0.331429
      needs to
    
    
      92
      0.521339
      0.444299
      326
      149
      0.039467
      0.483766
      red
    
    
      35
      0.785002
      0.259019
      696
      134
      0.039422
      0.298441
      fall
    
    
      14
      1.000701
      0.465593
      369
      160
      0.039139
      0.504732
      model
    
    
      98
      0.513887
      0.328334
      264
      77
      0.038333
      0.366667
      arm
    
    
      42
      0.713793
      0.292478
      538
      127
      0.038251
      0.330729
      attached
    
    
      69
      0.581927
      0.841933
      26
      66
      0.038067
      0.880000
      http
    
    
      65
      0.588765
      0.243432
      365
      65
      0.037954
      0.281385
      bar
    
    
      72
      0.577582
      0.860868
      26
      70
      0.036568
      0.897436
      www
    
    
      10
      1.113726
      0.122459
      674
      72
      0.036482
      0.158940
      gate
    
    
      71
      0.578609
      0.399531
      79
      37
      0.035763
      0.435294
      rocker
    
    
      21
      0.907317
      0.165736
      3483
      408
      0.035745
      0.201481
      off
    
    
      25
      0.867540
      0.227407
      1726
      298
      0.035611
      0.263019
      side
    
    
      24
      0.886961
      0.770345
      35
      91
      0.034965
      0.805310
      burn
    
    
      29
      0.836493
      0.209678
      912
      149
      0.034184
      0.243863
      under
    
    
      28
      0.841278
      0.686280
      118
      113
      0.033466
      0.719745
      com
    
    
      3
      1.340289
      0.307039
      1341
      489
      0.032309
      0.339348
      crib
    
    
      6
      1.262486
      0.507288
      102
      56
      0.031174
      0.538462
      night light
    
    
      62
      0.598776
      0.238809
      181
      39
      0.030157
      0.268966
      ring
    
    
      80
      0.552995
      0.509049
      138
      131
      0.030046
      0.539095
      sleeper
    
    
      61
      0.599194
      0.380822
      128
      73
      0.029291
      0.410112
      infants
    
    
      73
      0.577034
      0.196843
      5350
      987
      0.029119
      0.225962
      of the
    
    
      94
      0.520236
      0.169915
      1092
      190
      0.029038
      0.198953
      came
    
    
      57
      0.620084
      0.156313
      482
      73
      0.028497
      0.184810
      out and
    
    
      34
      0.794253
      0.222003
      1817
      445
      0.028278
      0.250281
      child
    
    
      60
      0.599935
      0.444821
      63
      48
      0.025767
      0.470588
      jumper
    
    
      41
      0.714468
      0.123251
      574
      68
      0.025545
      0.148796
      straps
    
    
      27
      0.847496
      0.212581
      1112
      281
      0.024550
      0.237131
      found
    
    
      1
      1.536841
      0.887296
      12
      144
      0.024096
      0.911392
      pampers
    
    
      89
      0.529613
      0.155361
      530
      64
      0.023410
      0.178771
      bad
    
    
      74
      0.575606
      0.158166
      690
      124
      0.022592
      0.180758
      that is
    
    
      70
      0.580550
      0.161738
      7149
      1449
      0.022029
      0.183767
      was
    
    
      97
      0.514736
      0.303588
      37
      13
      0.021412
      0.325000
      this can
    
    
      44
      0.701729
      0.233060
      577
      205
      0.021283
      0.254342
      infant
    
    
      46
      0.697344
      0.128619
      1851
      318
      0.020817
      0.149436
      got
    
    
      26
      0.851071
      0.135159
      7304
      1337
      0.020090
      0.155248
      on
    
    
      82
      0.544560
      0.154377
      788
      204
      0.019092
      0.173469
      night
    
    
      56
      0.623273
      0.165787
      2205
      598
      0.018270
      0.184057
      son
    
    
      93
      0.520460
      0.120245
      7467
      1490
      0.016591
      0.136835
      in
    
    
      55
      0.635318
      0.301336
      143
      107
      0.016171
      0.317507
      pacifier
    
    
      12
      1.086583
      0.093082
      597
      90
      0.016141
      0.109223
      light
    
    
      64
      0.591707
      0.198794
      89
      30
      0.015492
      0.214286
      removable
    
    
      53
      0.658033
      0.203999
      88
      30
      0.014979
      0.218978
      the sun
    
    
      38
      0.730055
      0.166493
      119
      37
      0.013119
      0.179612
      sun
    
    
      39
      0.721197
      0.226562
      177
      173
      0.006907
      0.233468
      diapers



In [35]:

    
get_class_discrepancy(best_model, data, 100)









    Out[35]:






  
    
      
      coef
      diff
      n_pos_test
      n_pos_train
      pr_pos_test
      pr_pos_train
      term
    
  
  
    
      35
      0.454837
      0.088527
      316
      31
      0.880223
      0.968750
      very dangerous
    
    
      72
      0.354745
      0.483095
      465
      178
      0.448842
      0.931937
      recall
    
    
      48
      0.398166
      0.504166
      1055
      205
      0.391030
      0.895197
      hazard
    
    
      53
      0.390238
      0.623188
      78
      10
      0.376812
      1.000000
      dangerous to
    
    
      59
      0.379280
      0.601716
      180
      45
      0.355731
      0.957447
      got stuck
    
    
      46
      0.403533
      0.471953
      586
      111
      0.350269
      0.822222
      unsafe
    
    
      86
      0.338274
      0.569225
      266
      120
      0.346806
      0.916031
      injury
    
    
      61
      0.376972
      0.626610
      107
      86
      0.339683
      0.966292
      was playing
    
    
      54
      0.389196
      0.549368
      65
      20
      0.320197
      0.869565
      was chewing
    
    
      79
      0.348577
      0.369792
      57
      8
      0.296875
      0.666667
      hazardous
    
    
      76
      0.352241
      0.603293
      109
      17
      0.291444
      0.894737
      resulting in
    
    
      19
      0.543917
      0.710526
      22
      5
      0.289474
      1.000000
      swallow it
    
    
      4
      0.697709
      0.435695
      663
      54
      0.284305
      0.720000
      snapped
    
    
      9
      0.625954
      0.537500
      22
      13
      0.275000
      0.812500
      smacked
    
    
      21
      0.536759
      0.637864
      27
      9
      0.262136
      0.900000
      exploded
    
    
      78
      0.351418
      0.596032
      64
      17
      0.253968
      0.850000
      came loose
    
    
      97
      0.328327
      0.646696
      115
      18
      0.253304
      0.900000
      resulting
    
    
      51
      0.394158
      0.770000
      23
      6
      0.230000
      1.000000
      shorted
    
    
      1
      1.005512
      0.613807
      646
      210
      0.229566
      0.843373
      mold
    
    
      82
      0.346109
      0.619035
      67
      22
      0.227119
      0.846154
      his arm
    
    
      38
      0.441069
      0.539507
      1167
      224
      0.219815
      0.759322
      happened
    
    
      40
      0.428708
      0.618161
      527
      153
      0.213360
      0.831522
      caused
    
    
      62
      0.374636
      0.650621
      95
      12
      0.206522
      0.857143
      came apart
    
    
      29
      0.476455
      0.609556
      520
      140
      0.199693
      0.809249
      causing
    
    
      16
      0.558329
      0.514939
      61
      10
      0.199346
      0.714286
      burnt
    
    
      7
      0.660024
      0.308411
      41
      3
      0.191589
      0.500000
      be tightened
    
    
      31
      0.468615
      0.506178
      306
      48
      0.189474
      0.695652
      began to
    
    
      14
      0.563678
      0.610976
      155
      32
      0.189024
      0.800000
      collapsed
    
    
      13
      0.580240
      0.642081
      56
      9
      0.176101
      0.818182
      that side
    
    
      70
      0.358205
      0.398531
      37
      4
      0.172897
      0.571429
      on product
    
    
      90
      0.335014
      0.669287
      127
      37
      0.171622
      0.840909
      this morning
    
    
      18
      0.553217
      0.462853
      1942
      149
      0.168503
      0.631356
      broke
    
    
      5
      0.670670
      0.698382
      52
      13
      0.168285
      0.866667
      allowing the
    
    
      91
      0.334277
      0.832258
      26
      3
      0.167742
      1.000000
      very unstable
    
    
      95
      0.330502
      0.565533
      154
      19
      0.165236
      0.730769
      later the
    
    
      50
      0.394640
      0.712627
      208
      35
      0.162373
      0.875000
      this issue
    
    
      93
      0.333439
      0.839506
      13
      6
      0.160494
      1.000000
      crib side
    
    
      27
      0.506716
      0.586098
      203
      67
      0.158346
      0.744444
      fire
    
    
      94
      0.331704
      0.636526
      367
      148
      0.154918
      0.791444
      stuck in
    
    
      92
      0.334080
      0.778492
      27
      27
      0.152542
      0.931034
      inc
    
    
      22
      0.525390
      0.477974
      127
      29
      0.152461
      0.630435
      flipped
    
    
      52
      0.393265
      0.419465
      1390
      219
      0.149366
      0.568831
      fell
    
    
      44
      0.412118
      0.716531
      56
      44
      0.146214
      0.862745
      bleeding
    
    
      57
      0.385689
      0.781375
      24
      38
      0.145455
      0.926829
      slat
    
    
      84
      0.341586
      0.362939
      363
      31
      0.145258
      0.508197
      cracked
    
    
      23
      0.525131
      0.634921
      8
      7
      0.142857
      0.777778
      nap nanny
    
    
      20
      0.537220
      0.359955
      185
      14
      0.140045
      0.500000
      tightened
    
    
      64
      0.366520
      0.802844
      40
      40
      0.127389
      0.930233
      bruises
    
    
      65
      0.366181
      0.667665
      86
      34
      0.123033
      0.790698
      melted
    
    
      60
      0.377685
      0.879518
      10
      5
      0.120482
      1.000000
      maker of
    
    
      37
      0.445698
      0.596939
      23
      5
      0.117347
      0.714286
      bar with
    
    
      45
      0.408971
      0.568170
      68
      26
      0.116041
      0.684211
      burned
    
    
      41
      0.426853
      0.569198
      1094
      290
      0.111553
      0.680751
      noticed
    
    
      33
      0.461944
      0.655535
      120
      29
      0.107623
      0.763158
      appeared
    
    
      66
      0.362892
      0.438857
      130
      19
      0.104000
      0.542857
      tipped
    
    
      73
      0.354199
      0.562842
      19
      6
      0.103825
      0.666667
      the maker
    
    
      6
      0.663692
      0.083950
      70
      3
      0.103550
      0.187500
      gate with
    
    
      83
      0.345788
      0.520750
      379
      121
      0.102961
      0.623711
      caught
    
    
      63
      0.369193
      0.834292
      41
      86
      0.100490
      0.934783
      rock and
    
    
      87
      0.337476
      0.407056
      1213
      140
      0.100190
      0.507246
      the plastic
    
    
      30
      0.473845
      0.570776
      77
      18
      0.095890
      0.666667
      cause the
    
    
      81
      0.348085
      0.447910
      333
      61
      0.091913
      0.539823
      sharp
    
    
      47
      0.401157
      0.479706
      164
      20
      0.091723
      0.571429
      seam
    
    
      55
      0.388989
      0.677722
      111
      70
      0.091509
      0.769231
      developed
    
    
      80
      0.348265
      0.328269
      48
      5
      0.088398
      0.416667
      fall on
    
    
      11
      0.598997
      0.644077
      40
      27
      0.085653
      0.729730
      delta
    
    
      68
      0.360688
      0.436627
      646
      132
      0.085112
      0.521739
      today
    
    
      67
      0.362887
      0.513149
      457
      130
      0.083182
      0.596330
      went to
    
    
      39
      0.437191
      0.427684
      273
      59
      0.080937
      0.508621
      finger
    
    
      17
      0.557652
      0.927273
      8
      42
      0.072727
      1.000000
      blisters
    
    
      69
      0.358842
      0.809722
      48
      66
      0.070278
      0.880000
      http
    
    
      2
      0.832210
      0.692302
      133
      154
      0.070074
      0.762376
      rash
    
    
      75
      0.352518
      0.300805
      136
      20
      0.069565
      0.370370
      to slide
    
    
      49
      0.394990
      0.828519
      49
      70
      0.068917
      0.897436
      www
    
    
      24
      0.523781
      0.150084
      49
      5
      0.067308
      0.217391
      approved
    
    
      98
      0.328126
      0.377274
      765
      163
      0.059724
      0.436997
      left
    
    
      88
      0.336868
      0.216301
      629
      70
      0.058208
      0.274510
      off and
    
    
      96
      0.329423
      0.414685
      378
      113
      0.058118
      0.472803
      placed
    
    
      71
      0.357262
      0.141935
      36
      3
      0.058065
      0.200000
      slide it
    
    
      3
      0.714554
      0.748367
      57
      91
      0.056943
      0.805310
      burn
    
    
      12
      0.595075
      0.449259
      523
      160
      0.055473
      0.504732
      model
    
    
      26
      0.507756
      0.741802
      94
      70
      0.053653
      0.795455
      chemical
    
    
      10
      0.616952
      0.500084
      99
      21
      0.052548
      0.552632
      died
    
    
      58
      0.381722
      0.363390
      523
      140
      0.050811
      0.414201
      my child
    
    
      43
      0.420521
      0.248880
      875
      134
      0.049561
      0.298441
      fall
    
    
      34
      0.459133
      0.670681
      173
      113
      0.049064
      0.719745
      com
    
    
      99
      0.327786
      0.697621
      46
      73
      0.047276
      0.744898
      diaper rash
    
    
      42
      0.423970
      0.436793
      388
      149
      0.046973
      0.483766
      red
    
    
      25
      0.513590
      0.284017
      657
      127
      0.046712
      0.330729
      attached
    
    
      36
      0.446402
      0.222332
      1972
      298
      0.040687
      0.263019
      side
    
    
      28
      0.504810
      0.324824
      17
      4
      0.038813
      0.363636
      stairway
    
    
      32
      0.467387
      0.120131
      717
      72
      0.038809
      0.158940
      gate
    
    
      74
      0.352581
      0.143280
      483
      44
      0.038538
      0.181818
      the straps
    
    
      56
      0.387544
      0.232644
      218
      39
      0.036321
      0.268966
      ring
    
    
      85
      0.340120
      0.560777
      82
      68
      0.035714
      0.596491
      was using
    
    
      77
      0.351991
      0.116442
      727
      68
      0.032354
      0.148796
      straps
    
    
      15
      0.561073
      0.311014
      1176
      489
      0.028333
      0.339348
      crib
    
    
      0
      1.104103
      0.897336
      7
      144
      0.014056
      0.911392
      pampers
    
    
      8
      0.638696
      0.525014
      44
      56
      0.013447
      0.538462
      night light
    
    
      89
      0.336818
      0.569497
      42
      81
      0.013237
      0.582734
      the diapers



In [36]:

    
def plot_reviews_by_year(model, data):
    """
    Plot number of pos/neg examples by year.
    """
    preds = model.predict_reviews(data)
    years = [d[:4] for d in data.reviews_df.review_time]
    year2counts = defaultdict(lambda: Counter())
    for p, y in zip(preds, years):
        year2counts[y].update([p])
    print('\n'.join('%s %f' % (str(t), t[1][1] / (t[1][0] + t[1][1])) for t in sorted(year2counts.items())))
    
plot_reviews_by_year(best_model, data)









    



('2000', Counter({0: 56, 1: 1})) 0.017544
('2001', Counter({0: 845, 1: 15})) 0.017442
('2002', Counter({0: 2651, 1: 41})) 0.015230
('2003', Counter({0: 4153, 1: 74})) 0.017507
('2004', Counter({0: 7909, 1: 196})) 0.024183
('2005', Counter({0: 15724, 1: 408})) 0.025291
('2006', Counter({0: 12053, 1: 278})) 0.022545
('2007', Counter({0: 16551, 1: 257})) 0.015290
('2008', Counter({0: 23735, 1: 323})) 0.013426
('2009', Counter({0: 32511, 1: 473})) 0.014340
('2010', Counter({0: 54200, 1: 874})) 0.015870
('2011', Counter({0: 98804, 1: 1377})) 0.013745
('2012', Counter({0: 135303, 1: 1807})) 0.013179
('2013', Counter({0: 299410, 1: 2840})) 0.009396
('2014', Counter({0: 200684, 1: 1893})) 0.009345



In [37]:

    
# Plot Figure 7.
plt.figure()
plt.plot([1377, 1807, 2840], 'go-', label='detected complaints, Amazon')
plt.plot([502, 447, 432], 'bo-', label='submitted complaints, CPSC')
plt.xticks([0, 1, 2], [2011, 2012, 2013])
plt.xlim(-.1, 2.1)
plt.legend(loc='best')
plt.xlabel('year', size=16)
plt.ylabel('count', size=16)
plt.tight_layout()
plt.savefig('paper/figs/years.pdf')
plt.show()

	ASIN	review_time	review_score	reviewText
0	0188399313	05 27, 2013	5.0	They work very well. Easy to clean, we wash th...
1	0188399399	04 9, 2013	5.0	it came early and was not disappointed. i love...
2	0188399518	02 14, 2014	4.0	I ended up with a variety of different brands ...
3	0188399518	07 8, 2013	3.0	These flannel wipes are OK, but in my opinion ...
4	0316967297	09 6, 2013	4.0	Cute quilt, the colors are perfect and my litt...

	0
count	915104.000000
mean	81.951987
std	87.139825
min	1.000000
25%	31.000000
50%	55.000000
75%	100.000000
max	4546.000000

	Report No.	Report Date	Sent to Manufacturer / Importer / Private Labeler	Publication Date	Category of Submitter	Product Description	Product Category	Product Sub Category	Product Type	Product Code	...	Submitter Has Product	Product Was Damaged Before Incident	Damage Description	Damage Repaired	Product Was Modified Before Incident	Have You Contacted The Manufacturer	If Not Do You Plan To	Answer Explanation	Company Comments	Associated Report Numbers
0	20160509-F1AD6-2147419650	5/9/2016	5/17/2016	6/8/2016	Consumer	Munchkin pacifier clip	Baby	Nursery Equipment & Supplies	Pacifiers or Teething Rings	1525	...	NaN	Yes	NaN	NaN	NaN	Yes	Yes	NaN	NaN	NaN
1	20160506-66663-2147419715	5/6/2016	5/16/2016	5/31/2016	Consumer	Baby Einstein Bouncer, Multicolor, has a piano...	Baby	Nursery Equipment & Supplies	Baby Bouncer Seats (Excl. Jumpers)	1558	...	NaN	Yes	NaN	NaN	NaN	Yes	Yes	I thought I was contacting them. the link on t...	NaN	NaN
2	20160429-84BCB-2147419859	4/29/2016	5/9/2016	5/31/2016	Consumer	Luxury teether toys for happy baby teething.\r...	Baby	Nursery Equipment & Supplies	Pacifiers or Teething Rings	1525	...	NaN	Yes	NaN	NaN	NaN	Yes	Yes	I still have the product and plan on reaching ...	NaN	NaN
3	20160505-69C2D-2147419760	5/5/2016	5/13/2016	5/27/2016	Consumer	Graco Lauren Classic Crib, model #2354497, pro...	Baby	Nursery Equipment & Supplies	Cribs	1543	...	NaN	Yes	NaN	NaN	NaN	Yes	NaN	I still have this product. Graco said they are...	NaN	NaN
4	20160504-C585C-2147419771	5/4/2016	5/12/2016	5/26/2016	Consumer	Baby swing savanah model CMH84	Baby	Nursery Equipment & Supplies	Portable Baby Swings (For Home Use)	1553	...	NaN	NaN	NaN	NaN	NaN	Yes	Yes	Tryin to figure out who to contact about incid...	NaN	NaN

	0
count	2010.000000
mean	124.513930
std	114.461285
min	4.000000
25%	56.000000
50%	98.000000
75%	159.000000
max	1683.000000

	label	ASIN	text
0	1	B002NU50LO	We purchased this dresser 2 years ago and were...
1	1	B004C43JJ4	I just got my order today and put my six month...
2	1	B00020L78M	I personally didn't buy this gate, specificall...
3	1	B00HVSVPQ2	I bought it for my son who is only six months ...
4	1	B0091DHACS	These are currently being recalled, and the mu...

	label	RecallNumber	RecallName	AmazonTitle	AmazonAsin	NumReviews	Score	RecallTitle	RecallDescription	RecallDate
0	1	8263	Munchkin Deluxe Bottle and Food Warmers with P...	Munchkin Deluxe Bottle And Food Warmer With P...	B00007C65S	32	9	Baby Bottle and Food Warmers Recalled by Munch...	Baby Bottle and Food Warmers Recalled by Munch...	2008-04-08T00:00:00
1	1	11056	The First Years American Red Cross Cabinet Swi...	The First Years American Red Cross Cabinet And...	B001ODU26E	4	8	The First Years® Recalls American Red Cross® C...	This recall involves The First Years American ...	2010-12-02T00:00:00
2	0	14072	Zoom Car Seat Adapter, Adapter clips can loose...	Phil&Teds Car Seat Adapter For Peg Perego ...	B003BNTNLA	5	7	Joovy Recalls Zoom Car Seat Stroller Adapter d...	This recall involves all Joovy’s Zoom gray met...	2013-12-30T00:00:00
3	0	13061	Dream On Me Ultra 2 in 1 Infant Bath Tub; Todd...	Dream On Me 2 in 1 Baby Tunes Musical Activity...	B005GU18FU	11	7	Dream On Me Recalls Bath Seats Due to Drowning...	The recall includes all Dream On Me bath seats...	2012-12-06T00:00:00
4	0	13061	Dream On Me Ultra 2 in 1 Infant Bath Tub; Todd...	Dream On Me 2 In 1 Baby Bather and Changing St...	B003ZUXWNE	7	7	Dream On Me Recalls Bath Seats Due to Drowning...	The recall includes all Dream On Me bath seats...	2012-12-06T00:00:00

	coef	term	transform
0	0.000309	safety commission	61.523784
1	0.000702	consumer product	55.231579
2	0.094143	have choked	47.130948
3	0.006806	product safety	45.903579
4	0.362624	cpsc	44.895067
5	0.193792	be recalled	41.799259
6	0.105946	dangerous product	40.503158
7	0.005696	commission	40.193974
8	0.035305	extremely dangerous	30.973003
9	0.005788	seriously injured	28.229474
10	0.182547	leaned forward	27.576618
11	0.995862	recalled	27.002105
12	0.016994	to recall	27.002105
13	0.046329	he leaned	26.241483
14	0.087320	her throat	25.888617
15	0.079764	choked on	25.684930
16	0.009330	plastic broke	25.016656
17	0.014789	happened if	25.001949
18	0.183882	been recalled	24.558476
19	0.001871	face first	24.109023

	coef	term	transform
0	-0.054946	super easy	0.756345
1	-0.075466	are great	0.756289
2	-0.046282	cup holders	0.756272
3	-0.065321	so cute	0.756248
4	-0.034513	love love	0.756227
5	-0.060443	are soft	0.756226
6	-0.034254	work great	0.756214
7	-0.022103	really love	0.756213
8	-0.025111	it super	0.756210
9	-0.143647	love that	0.756204
10	-0.017637	will love	0.756188
11	-0.028556	but overall	0.756187
12	-0.028631	great quality	0.756183
13	-0.009706	how well	0.756173
14	-0.045497	still loves	0.756171
15	-0.011078	colors are	0.756168
16	-0.030595	and cute	0.756160
17	-0.117890	works great	0.756160
18	-0.026571	vibrant	0.756158
19	-0.024115	are perfect	0.756152

	coef	term	transform
0	0.000322	safety commission	63.198062
1	0.000671	consumer product	56.734624
2	0.003853	product safety	46.973828
3	0.091019	have choked	46.851818
4	0.358254	cpsc	45.599379
5	0.197924	be recalled	41.873813
6	0.005214	commission	41.800048
7	0.095590	dangerous product	41.562097
8	0.024793	seriously injured	31.722585
9	0.038375	extremely dangerous	29.605354
10	0.010754	plastic broke	28.421140
11	0.161353	leaned forward	28.269964
12	0.068356	her throat	28.225806
13	0.151812	arm stuck	27.767780
14	0.020216	to recall	26.842188
15	0.978791	recalled	26.657069
16	0.047072	he leaned	24.951893
17	0.104567	choked on	24.059766
18	0.007896	happened if	23.859722
19	0.190692	been recalled	23.319910

	coef	term	transform
0	-0.040329	so cute	0.756491
1	-0.026346	super easy	0.756487
2	-0.102125	are great	0.756431
3	-0.053281	love love	0.756370
4	-0.022434	are soft	0.756368
5	-0.021039	really love	0.756355
6	-0.014006	it super	0.756353
7	-0.072784	great to	0.756342
8	-0.006480	will love	0.756331
9	-0.010272	colors are	0.756311
10	-0.158837	love that	0.756305
11	-0.037007	and cute	0.756303
12	-0.166537	works great	0.756303
13	-0.066330	vibrant	0.756301
14	-0.006770	just love	0.756291
15	-0.096246	neutral	0.756279
16	-0.102921	great price	0.756276
17	-0.013747	are nice	0.756271
18	-0.015055	great gift	0.756270
19	-0.037904	these work	0.756270

	coef	term	transform
0	0.001510	safety commission	62.198775
1	0.000437	consumer product	54.922167
2	0.117785	have choked	46.866916
3	0.290730	cpsc	45.614073
4	0.004052	product safety	44.303881
5	0.007271	commission	41.813518
6	0.204041	be recalled	41.242886
7	0.079250	dangerous product	40.276256
8	0.024925	seriously injured	30.512315
9	0.152220	leaned forward	29.136015
10	0.036079	extremely dangerous	28.430298
11	0.180001	been recalled	28.065807
12	0.968364	recalled	27.869317
13	0.007379	serious injury	27.199549
14	0.031881	to recall	26.850837
15	0.086748	her throat	25.743586
16	0.188896	arm stuck	24.999055
17	0.033067	he leaned	24.959933
18	0.024676	face first	24.932920
19	0.098794	choked on	24.558693

	coef	term	transform
0	-0.050110	so cute	0.752067
1	-0.068935	super easy	0.752063
2	-0.085507	are perfect	0.752019
3	-0.101070	are great	0.752008
4	-0.131239	love that	0.752004
5	-0.040556	also love	0.752000
6	-0.076168	great price	0.751958
7	-0.024094	love love	0.751947
8	-0.021744	are soft	0.751945
9	-0.030350	really love	0.751932
10	-0.010568	will love	0.751908
11	-0.028522	but overall	0.751906
12	-0.080791	love these	0.751904
13	-0.084411	love them	0.751893
14	-0.024971	and cute	0.751880
15	-0.134213	works great	0.751880
16	-0.014960	just love	0.751868
17	-0.052358	are easy	0.751868
18	-0.008053	great gift	0.751848
19	-0.042026	these work	0.751848

	coef	term	transform
0	0.001109	safety commission	56.016696
1	0.000945	consumer product	48.691281
2	0.328170	cpsc	41.468853
3	0.074076	have choked	40.868747
4	0.006838	product safety	40.389816
5	0.213595	be recalled	36.526443
6	0.003579	commission	36.462098
7	0.084593	dangerous product	36.254534
8	0.023888	seriously injured	26.607257
9	0.173409	leaned forward	26.154368
10	0.005332	plastic broke	25.824691
11	0.039767	extremely dangerous	24.791703
12	0.945911	recalled	24.221779
13	0.042526	he leaned	23.744166
14	0.017658	to recall	23.414386
15	0.170728	been recalled	23.202491
16	0.082703	her throat	23.173001
17	0.081996	choked on	23.128845
18	0.168018	arm stuck	23.010690
19	0.027412	first into	22.964110

	coef	term	transform
0	-0.038867	are perfect	0.757949
1	-0.080609	so cute	0.757897
2	-0.037098	super easy	0.757889
3	-0.047753	love love	0.757876
4	-0.116802	are great	0.757817
5	-0.166313	love that	0.757811
6	-0.014396	just love	0.757797
7	-0.013212	great gift	0.757776
8	-0.055244	these work	0.757776
9	-0.099074	love these	0.757766
10	-0.022087	in great	0.757758
11	-0.033653	works very	0.757748
12	-0.039976	great to	0.757724
13	-0.010060	pricey but	0.757724
14	-0.026124	great quality	0.757698
15	-0.011436	great little	0.757689
16	-0.171237	works great	0.757684
17	-0.030785	great price	0.757677
18	-0.016034	buying more	0.757658
19	-0.022507	are soft	0.757651

	coef	term	transform
0	0.087824	have choked	44.026268
1	0.350106	cpsc	40.010102
2	0.182726	be recalled	36.252487
3	0.085475	dangerous product	35.707941
4	0.027400	seriously injured	28.302601
5	0.042119	he leaned	26.309460
6	0.176600	leaned forward	25.760050
7	0.025582	face first	24.708620
8	0.036379	extremely dangerous	24.417930
9	0.008237	plastic broke	24.417930
10	0.012195	to recall	24.342566
11	0.907292	recalled	24.254208
12	0.086792	choked on	24.045706
13	0.143552	arm stuck	23.856598
14	0.179617	been recalled	23.165729
15	0.056270	her throat	22.823632
16	0.032190	first into	22.617890
17	0.009580	happened if	22.207253
18	0.214492	is dangerous	21.720601
19	0.005191	serious injury	21.563886

	coef	term	transform
0	-0.068405	are perfect	0.758899
1	-0.051351	also love	0.758880
2	-0.229245	love that	0.758843
3	-0.052369	super easy	0.758839
4	-0.011543	love love	0.758826
5	-0.057430	are soft	0.758824
6	-0.093075	are great	0.758807
7	-0.088662	great to	0.758799
8	-0.021812	will love	0.758787
9	-0.029720	and cute	0.758759
10	-0.016050	vibrant	0.758757
11	-0.014625	great gift	0.758726
12	-0.018091	these work	0.758726
13	-0.045323	works very	0.758698
14	-0.051409	cup holders	0.758694
15	-0.013936	pricey but	0.758674
16	-0.031808	can beat	0.758650
17	-0.119674	love these	0.758650
18	-0.011255	great little	0.758639
19	-0.102801	great price	0.758626

	coef	term	transform
0	0.097777	have choked	44.586278
1	0.348655	cpsc	39.972815
2	0.187012	be recalled	38.081927
3	0.093058	dangerous product	37.225735
4	0.027220	seriously injured	28.412824
5	0.170727	leaned forward	28.389573
6	0.150988	arm stuck	27.357706
7	0.006212	plastic broke	26.516493
8	0.031533	extremely dangerous	25.455833
9	0.944766	recalled	24.704838
10	0.099355	choked on	24.188216
11	0.074376	her throat	23.793769
12	0.074566	bruise on	23.647496
13	0.032826	he leaned	23.364392
14	0.196156	been recalled	23.171336
15	0.079805	an unsafe	22.868859
16	0.016743	to recall	22.705975
17	0.005195	happened if	22.260760
18	0.197837	is dangerous	21.805191
19	0.084945	hazard to	21.510924

	coef	term	transform
0	-0.019607	also love	0.765346
1	-0.089914	are great	0.765313
2	-0.040619	love love	0.765291
3	-0.019955	will love	0.765252
4	-0.043356	how well	0.765237
5	-0.030687	still loves	0.765234
6	-0.176153	love that	0.765226
7	-0.036675	and cute	0.765223
8	-0.032474	vibrant	0.765221
9	-0.022013	just love	0.765212
10	-0.062500	super easy	0.765199
11	-0.011823	are nice	0.765191
12	-0.005157	great gift	0.765191
13	-0.070969	these work	0.765191
14	-0.158091	works great	0.765182
15	-0.094045	love these	0.765180
16	-0.055547	in great	0.765172
17	-0.021649	works very	0.765162
18	-0.065848	great to	0.765138
19	-0.027989	pricey but	0.765138

	coef	term	transform
0	0.000489	safety commission	51.185920
1	0.001652	consumer product	45.950996
2	0.104694	have choked	39.058347
3	0.315255	cpsc	38.061819
4	0.005814	product safety	37.909572
5	0.181024	be recalled	34.371345
6	0.005105	commission	33.761705
7	0.094375	dangerous product	33.629459
8	0.027799	seriously injured	27.762060
9	0.040540	extremely dangerous	26.945529
10	0.163109	leaned forward	26.214066
11	0.071244	her throat	23.449220
12	0.933195	recalled	23.239584
13	0.010669	plastic broke	23.228904
14	0.177406	been recalled	23.157431
15	0.039774	he leaned	23.137297
16	0.163501	arm stuck	22.876466
17	0.010156	to recall	22.230922
18	0.084141	choked on	21.574553
19	0.016158	face first	21.060873

	coef	term	transform
0	-0.040616	super easy	0.780839
1	-0.031266	love love	0.780825
2	-0.035879	are soft	0.780824
3	-0.125359	are great	0.780806
4	-0.035414	are perfect	0.780748
5	-0.011094	great gift	0.780723
6	-0.020518	these work	0.780723
7	-0.124954	love that	0.780717
8	-0.034575	works very	0.780694
9	-0.150607	works great	0.780671
11	-0.022074	pricey but	0.780669
10	-0.078288	great to	0.780669
12	-0.084417	so cute	0.780640
13	-0.006706	great little	0.780633
14	-0.079870	great price	0.780620
15	-0.026168	can carry	0.780606
16	-0.024982	glad bought	0.780594
17	-0.029822	they wash	0.780592
18	-0.019830	nice quality	0.780591
19	-0.056346	love these	0.780576

	coef	term	transform
0	0.000605	safety commission	48.350297
1	0.000552	consumer product	42.027432
2	0.321763	cpsc	36.523938
3	0.085365	have choked	35.275484
4	0.005135	product safety	34.862099
5	0.183927	be recalled	32.497539
6	0.005528	commission	31.471925
7	0.098098	dangerous product	31.292768
8	0.025386	seriously injured	25.721707
10	0.005084	plastic broke	24.965186
9	0.045568	extremely dangerous	24.965186
11	0.154176	leaned forward	23.219900
12	0.151239	arm stuck	22.997487
13	0.845105	recalled	22.230904
14	0.183242	been recalled	22.221759
15	0.037975	he leaned	22.202439
16	0.004158	happened if	21.706943
17	0.021354	face first	20.931695
18	0.057167	bruise on	20.872533
19	0.087009	her throat	20.626612

	coef	term	transform
0	-0.056875	are perfect	0.778423
1	-0.067761	super easy	0.778361
2	-0.026278	love love	0.778348
3	-0.020138	are soft	0.778346
4	-0.127362	loves these	0.778322
5	-0.103464	are great	0.778287
6	-0.019045	just love	0.778267
7	-0.006922	great gift	0.778246
8	-0.083266	these work	0.778245
9	-0.032004	in great	0.778226
10	-0.015455	pricey but	0.778192
11	-0.007597	definitely buy	0.778167
12	-0.116372	love these	0.778167
13	-0.032289	great little	0.778156
14	-0.040527	great price	0.778143
15	-0.029445	buying more	0.778124
16	-0.046686	they wash	0.778115
17	-0.037464	nice quality	0.778114
18	-0.031284	bibs are	0.778095
19	-0.008702	really love	0.778089

	coef	term	transform
0	0.000681	safety commission	49.783817
1	0.085111	have choked	39.105754
2	0.305678	cpsc	38.499985
3	0.005806	product safety	36.359040
4	0.204428	be recalled	33.675641
5	0.007200	commission	33.305991
6	0.078297	dangerous product	32.708269
7	0.031247	extremely dangerous	27.111132
8	0.021551	seriously injured	25.139413
9	0.007024	plastic broke	24.400019
10	0.167001	leaned forward	24.188513
11	0.909811	recalled	22.814900
12	0.022167	to recall	22.759963
13	0.043061	he leaned	22.503512
14	0.182853	been recalled	22.245031
15	0.006363	happened if	22.001297
16	0.163226	arm stuck	21.190310
17	0.059252	bruise on	21.155572
18	0.056202	choked on	20.983575
19	0.072693	her throat	20.906316

	coef	term	transform
0	-0.045964	are perfect	0.784348
1	-0.025519	love love	0.784272
2	-0.047174	are soft	0.784270
3	-0.007944	really love	0.784256
4	-0.103692	loves these	0.784246
5	-0.029826	will love	0.784231
6	-0.028986	how well	0.784216
7	-0.078673	still loves	0.784213
8	-0.136279	are great	0.784211
9	-0.017515	just love	0.784190
10	-0.042817	these work	0.784169
11	-0.110932	love these	0.784158
12	-0.041703	also love	0.784154
13	-0.113129	loves them	0.784118
14	-0.060624	great to	0.784114
15	-0.025287	pricey but	0.784114
16	-0.012199	great little	0.784079
17	-0.024881	super easy	0.784069
18	-0.005174	buying more	0.784046
19	-0.018044	they wash	0.784037

	coef	term	transform
0	0.000887	safety commission	20.133736
1	0.030602	have choked	17.861962
2	0.004502	consumer product	17.808801
3	0.142718	cpsc	16.063468
4	0.006279	product safety	15.983621
5	0.039004	dangerous product	15.845289
6	0.117738	be recalled	14.782900
7	0.002023	commission	14.105820
8	0.021048	seriously injured	13.821756
9	0.033536	extremely dangerous	13.415234
10	0.058837	leg stuck	13.024347
11	0.078349	arm stuck	12.905194
12	0.033588	choked on	12.408462
13	0.000557	crib were	12.357332
14	0.078417	leaned forward	11.944169
15	0.005050	plastic broke	11.695332
16	0.066169	been recalled	11.430732
17	0.002227	happened if	11.262172
18	0.000084	got hurt	11.245512
19	0.016341	had happened	11.177840

	coef	term	transform
0	-0.022151	just great	2.961582
1	-0.006894	great love	2.960185
2	-0.004003	best stroller	2.959723
3	-0.025013	cute too	2.958532
4	-0.065863	great quality	2.958349
5	-0.000256	great stroller	2.958033
6	-0.002675	perfect love	2.957760
7	-0.000945	much love	2.957712
8	-0.004824	just love	2.957543
9	-0.000142	far love	2.957269
10	-0.103704	love these	2.957081
11	-0.011254	great gift	2.957060
12	-0.000105	re great	2.957034
13	-0.002634	great fit	2.956966
14	-0.010690	quality easy	2.956946
15	-0.000903	too highly	2.956846
16	-0.000043	time great	2.956785
17	-0.000672	well love	2.956605
18	-0.007872	colors they	2.956529
19	-0.001934	great don	2.956485

	coef	term	transform
0	0.000872	safety commission	17.618103
1	0.002292	consumer product	16.048843
2	0.017139	have choked	15.258031
3	0.118388	cpsc	14.549606
4	0.040189	dangerous product	14.195611
5	0.006078	product safety	14.157096
6	0.114750	be recalled	13.672684
7	0.077787	arm stuck	12.704332
8	0.004272	commission	12.343352
9	0.024803	extremely dangerous	12.341054
10	0.010991	seriously injured	12.094781
11	0.061572	leaned forward	11.540519
12	0.078326	leg stuck	11.164413
13	0.009245	hinge broke	11.021279
14	0.018246	had happened	10.686879
15	0.003831	plastic broke	10.535046
16	0.073682	been recalled	10.465585
17	0.030703	choked on	10.358851
18	0.009745	was caused	10.234045
19	0.062147	was strapped	10.234045

	coef	term	transform
0	-0.001160	great easy	3.084866
1	-0.000029	re great	3.083841
2	-0.000001	too love	3.082497
3	-0.029517	kids love	3.082373
4	-0.032480	them easy	3.081706
5	-0.000026	girls love	3.081534
6	-0.004637	cute too	3.081457
7	-0.051376	great quality	3.081267
8	-0.000485	perfect love	3.080653
9	-0.021444	comfortable easy	3.080553
10	-0.007746	clean love	3.080362
11	-0.008106	use easy	3.080225
12	-0.001460	far love	3.080141
13	-0.012911	children love	3.080065
14	-0.040314	great gift	3.079924
15	-0.000045	too highly	3.079701
16	-0.000066	cuddly and	3.079255
17	-0.009100	much stuff	3.079111
18	-0.009641	great too	3.078952
19	-0.000570	great all	3.078886

	coef	term	transform
0	0.001153	safety commission	18.662355
1	0.001477	consumer product	16.753705
2	0.030728	have choked	16.083557
3	0.005414	product safety	14.743261
4	0.148264	cpsc	14.717885
5	0.044863	dangerous product	14.607378
6	0.139361	be recalled	14.237777
7	0.003083	commission	13.344500
8	0.028408	extremely dangerous	12.699027
9	0.010930	seriously injured	12.445610
10	0.063387	leaned forward	11.427147
11	0.039924	choked on	10.916177
12	0.003181	plastic broke	10.840633
13	0.081311	been recalled	10.769156
14	0.084687	arm stuck	10.530900
15	0.013225	hinge broke	10.530900
16	0.061636	leg stuck	10.530900
17	0.025749	wedged between	10.530900
18	0.487445	recalled	10.434065
19	0.030352	we woke	10.211782

	coef	term	transform
0	-0.003210	great easy	3.129270
1	-0.040028	cute too	3.129022
2	-0.013174	re great	3.128231
3	-0.001136	great love	3.127559
4	-0.007227	just love	3.127255
5	-0.010193	cute love	3.126922
6	-0.013057	great quality	3.126721
7	-0.030617	works perfect	3.126248
8	-0.003089	girls love	3.125890
9	-0.000159	person than	3.125719
10	-0.009842	definitely good	3.125609
11	-0.007634	made great	3.125255
12	-0.011515	great bag	3.125055
13	-0.001026	seat easy	3.124878
14	-0.010255	are perfect	3.124857
15	-0.014045	clean love	3.124701
16	-0.007827	use easy	3.124563
17	-0.003820	much cuter	3.124053
18	-0.001372	time great	3.123966
19	-0.046951	great little	3.123788