In [1]:
import os
import json
from csv import DictReader, DictWriter
import numpy as np
from numpy import array
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import FeatureUnion, Pipeline
from sklearn.metrics import accuracy_score
SEED = 5
In [74]:
'''
The ItemSelector class was created by Matt Terry to help with using
Feature Unions on Heterogeneous Data Sources
All credit goes to Matt Terry for the ItemSelector class below
For more information:
http://scikit-learn.org/stable/auto_examples/hetero_feature_union.html
'''
class ItemSelector(BaseEstimator, TransformerMixin):
def __init__(self, key):
self.key = key
def fit(self, x, y=None):
#print ("SelectorFit", x['text'][:1][:10])
print ("SelectorFit")
return self
def transform(self, data_dict):
print (data_dict.keys())
return data_dict[self.key]
In [75]:
"""
This is an example of a custom feature transformer. The constructor is used
to store the state (e.g like if you need to store certain words/vocab), the
fit method is used to update the state based on the training data, and the
transform method is used to transform the data into the new feature(s). In
this example, we simply use the length of the movie review as a feature. This
requires no state, so the constructor and fit method do nothing.
"""
class TextLengthTransformer(BaseEstimator, TransformerMixin):
def __init__(self):
pass
def fit(self, examples):
print ("TextLengthFit", len(examples[0]))
return self
def transform(self, examples):
features = np.zeros((len(examples), 1))
i = 0
for ex in examples:
features[i, 0] = len(ex)
i += 1
return features
In [76]:
# TODO: Add custom feature transformers for the movie review data
class CountTransformer(BaseEstimator, TransformerMixin):
def __init__(self):
self.vectorizer = CountVectorizer()
self.x_train = None
def fit(self, examples):
print(examples[:1])
self.tranformer = self.vectorizer.fit(examples)
print ( "Count train",self.x_train[0] )
return self
def transform(self, examples):
features = None
features = self.transformer.transform(examples)
#print (features[0])
return features
In [77]:
# TODO: Add custom feature transformers for the movie review data
class TfidfTransformer(BaseEstimator, TransformerMixin):
def __init__(self):
self.tfidf_vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5, stop_words='english')
self.tranformer = None
def fit(self, examples):
print("tfidf fit", examples[:1])
self.transformer = self.tfidf_vectorizer.fit(examples)
return self
def transform(self, examples):
print("tfidf transform", examples[:1])
features = None
features = self.transformer.transform(examples)
print (features[0])
return features
In [ ]:
In [78]:
class Featurizer:
def __init__(self):
# To add new features, just add a new pipeline to the feature union
# The ItemSelector is used to select certain pieces of the input data
# In this case, we are selecting the plaintext of the input data
# TODO: Add any new feature transformers or other features to the FeatureUnion
self.all_features = FeatureUnion([
('text_stats', Pipeline([
('selector', ItemSelector(key='text')),
('text_length', TextLengthTransformer())
]))
,
('text_stats2', Pipeline([
('selector', ItemSelector(key='text')),
('tfidf', TfidfTransformer())
])),
])
def train_feature(self, examples):
return self.all_features.fit_transform(examples)
def test_feature(self, examples):
return self.all_features.transform(examples)
In [ ]:
In [79]:
# Read in data
dataset_x = []
dataset_y = []
with open('../data/movie_review_data.json') as f:
data = json.load(f)
for d in data['data']:
dataset_x.append(d['text'])
dataset_y.append(d['label'])
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(dataset_x, dataset_y, test_size=0.3, random_state=SEED)
In [80]:
X_train[0]
Out[80]:
'note : some may consider portions of the following text to be spoilers . be forewarned . " all the world\'s a stage and all the men and women merely players they have their exits and their entrances and one man in his time plays many parts " - excerpt from as you like it , act ii , scene 7 when william shakespeare penned this passage , he could not have possibly envisioned a world in which the domestic activites in an abode would be broadcast across the continent , or where women would install webcams in their apartments in order to convert voyeurism into cash . this is the world of today , and it is the perfect climate to unveil a prototypical high-concept project like the truman show . truman burbank ( jim carrey ) seems to have the perfect life . he has a pretty , doting wife meryl ( laura linney ) , a comfortable insurance sales position , an immaculate suburban home in the idyllic island community of seahaven , a reliable childhood buddy marlon ( noah emmerich ) -- except for the dog that paws a greeting to him every morning and his hydrophobia , this is paradise . or so it would seem . bizarre , inexplicable incidents begin to crop up in truman\'s life . a lighting fixture unexpectedly plummets from the sky . a rainstorm which gives new meaning to the term \'isolated\' follows him around . a radio broadcast appears to be describing his every move . for the first time in his life , it is beginning to dawn on truman that things are not what they appear to be . the truman show , directed by peter weir and written by andrew niccol , is a remarkably well-constructed film , paced perfectly with every scene fluidly leading into the next . balancing drama with humour , thoughtfulness with abandon , this is a film which addresses an intriguing and intricate concept with just the right mixture of sermonizing and whimsical fancy . in many ways , mr . niccol\'s screenplay mirrors the spirit of his previous gattaca -- both films focus on the theme of triumph of the human spirit over oppressive adversity , with pivotal , character-defining moments occurring at sea . although the film\'s rather conventional climax doesn\'t sustain the inspired dynamic achieved in the first two-thirds , and ultimately just falls short on delivering the intended emotional punch , there are moments through the truman show which are genuinely stirring and moving as truman attempts to make sense of his unraveling world and grasp the implications behind his discoveries of deception . despite the multitude of pertinent issues being broached by the film , it remains foremost a finely-crafted piece of entertainment , constantly light and accessible . the tone achieved is far too sunny for any sense of fearful paranoia to legitimately intrude , and as the carefully-stacked seahaven house of cards tumbles down , the film eschews any psychological ramifications for a man discovering that the fundamentals of his entire existence have been false . the end act of the truman show essentially boils down to a rehash of the reliable , crowdpleasing underdog vs . the system formula . while the film is an impressive realisation of an inspired concept , some elements are lacking . the device of mysterious lauren/sylvia ( natascha mcelhone ) as a contributing impetus to truman\'s growing awareness isn\'t exactly convincing . there\'s not much of a discernable spark between either the two characters or the actors themselves , and the scenes in which she indignantly confronts the megalomaniac christoff ( ed harris ) with regards to the moral ambiguity of his actions unnecessarily spells things out for the audience , thankfully avoided elsewhere in the film . in fact , although the film smartly restricts the number of scenes depicting the real world outside truman\'s artificial utopia to a mere handful of select reaction shots from enraptured gazers , it might have perhaps been even more indicting and damning to immerse itself even * more * deeply in the insulatory seahaven world , with the resultant effect that the repercussions felt as the film finally shifts outside the imposed bubble are all the more startling . still , the scathing commentary issued by this film is on target , and its insidiously oblique manner of delivery is far more effective in conveying the message than the more forthright anti-voyeuristic tirades such as that of the recent costa gavras film mad city . i\'ve always believed that the versatile ms . linney possesses a tremendous comic gift -- she has always seems to have a impish gleam in her eyes -- and in the truman show she runs rampant with her duplicitous character , mischievously flashing a glazed , insanely jovial grin as she perkily recites impromptu product placement slogans . cheerfully going over the top , it\'s hilarious to watch her -- who would have ever guessed that she\'d have the opportunity to outshine the reigning king of comedy in the laughs department of a film ? ms . linney is a delight in the film . but it is mr . carrey who rules the show here . he may not have been intuitively the most obvious choice to portray underdog hero truman burbank -- the role is seriously lacking in any of the frenetic comic interludes which typify his traditional parts -- but he brings to the character tremendous energy and eminent appeal : it\'s difficult not to root for truman . in a commanding performance , mr . carrey lends our protagonist an inherent sense of decency and integrity which makes him a genuinely engaging presence , and acquits himself impressively in this dramatic turn , demonstrating admirable restraint -- there\'s an early scene where he\'s down on his knees gardening , and given his past track record , one almost instinctively anticipates mr . carrey to pull one of more infamous , and in this case , wholly inappropriate sight gags . it\'s almost poetic how this notorious look-at-me actor successfully tones it down to play a character who\'s constantly the centre of attention . in addition to the fine performances by the bulk of the cast -- mr . emmerich is particularly noteworthy for issuing his character a sense of sturdy trustworthiness -- the film is immeasurably aided by wonderful production design by dennis gassner in creating truman\'s antiseptic , white picket-fenced smalltown utopia , and by burkhard dallwitz\'s original score . technical credits are first-rate all around . while the truman show is clearly a definitive high-concept film , it\'s not exactly a dazzlingly innovative one -- obvious predecessors utilising common elements abound , ranging from 1965\'s secret agent through to the groundbreaking british 1967 tv series the prisoner . all the same , this is a visionary , award-calibre film : entertaining , provocative , and intelligent . the curious ( and possibly disturbing ) thing may be that although upon first glance the plausibility of the film\'s concept seems unfathomably outrageous , it\'s abundantly clear that our society is rapidly approaching , if not actualisation , at least permissiveness of such a scenario . perhaps the highest compliment which can be paid to mr . weir is that his depiction of this bizarro state is so convincing that we accept it without question . perhaps we recognize a bit of ourselves in this world . '
In [81]:
feat = Featurizer()
labels = []
for l in y_train:
if not l in labels:
labels.append(l)
print("Label set: %s\n" % str(labels))
# Here we collect the train features
# The inner dictionary contains certain pieces of the input data that we
# would like to be able to select with the ItemSelector
# The text key refers to the plaintext
feat_train = feat.train_feature({
'text': [t for t in X_train]
})
# Here we collect the test features
feat_test = feat.test_feature({
'text': [t for t in X_test]
})
print(feat_train)
print(set(y_train))
Label set: [1, 0]
SelectorFit
dict_keys(['text'])
TextLengthFit 7257
SelectorFit
dict_keys(['text'])
tfidf fit ['note : some may consider portions of the following text to be spoilers . be forewarned . " all the world\'s a stage and all the men and women merely players they have their exits and their entrances and one man in his time plays many parts " - excerpt from as you like it , act ii , scene 7 when william shakespeare penned this passage , he could not have possibly envisioned a world in which the domestic activites in an abode would be broadcast across the continent , or where women would install webcams in their apartments in order to convert voyeurism into cash . this is the world of today , and it is the perfect climate to unveil a prototypical high-concept project like the truman show . truman burbank ( jim carrey ) seems to have the perfect life . he has a pretty , doting wife meryl ( laura linney ) , a comfortable insurance sales position , an immaculate suburban home in the idyllic island community of seahaven , a reliable childhood buddy marlon ( noah emmerich ) -- except for the dog that paws a greeting to him every morning and his hydrophobia , this is paradise . or so it would seem . bizarre , inexplicable incidents begin to crop up in truman\'s life . a lighting fixture unexpectedly plummets from the sky . a rainstorm which gives new meaning to the term \'isolated\' follows him around . a radio broadcast appears to be describing his every move . for the first time in his life , it is beginning to dawn on truman that things are not what they appear to be . the truman show , directed by peter weir and written by andrew niccol , is a remarkably well-constructed film , paced perfectly with every scene fluidly leading into the next . balancing drama with humour , thoughtfulness with abandon , this is a film which addresses an intriguing and intricate concept with just the right mixture of sermonizing and whimsical fancy . in many ways , mr . niccol\'s screenplay mirrors the spirit of his previous gattaca -- both films focus on the theme of triumph of the human spirit over oppressive adversity , with pivotal , character-defining moments occurring at sea . although the film\'s rather conventional climax doesn\'t sustain the inspired dynamic achieved in the first two-thirds , and ultimately just falls short on delivering the intended emotional punch , there are moments through the truman show which are genuinely stirring and moving as truman attempts to make sense of his unraveling world and grasp the implications behind his discoveries of deception . despite the multitude of pertinent issues being broached by the film , it remains foremost a finely-crafted piece of entertainment , constantly light and accessible . the tone achieved is far too sunny for any sense of fearful paranoia to legitimately intrude , and as the carefully-stacked seahaven house of cards tumbles down , the film eschews any psychological ramifications for a man discovering that the fundamentals of his entire existence have been false . the end act of the truman show essentially boils down to a rehash of the reliable , crowdpleasing underdog vs . the system formula . while the film is an impressive realisation of an inspired concept , some elements are lacking . the device of mysterious lauren/sylvia ( natascha mcelhone ) as a contributing impetus to truman\'s growing awareness isn\'t exactly convincing . there\'s not much of a discernable spark between either the two characters or the actors themselves , and the scenes in which she indignantly confronts the megalomaniac christoff ( ed harris ) with regards to the moral ambiguity of his actions unnecessarily spells things out for the audience , thankfully avoided elsewhere in the film . in fact , although the film smartly restricts the number of scenes depicting the real world outside truman\'s artificial utopia to a mere handful of select reaction shots from enraptured gazers , it might have perhaps been even more indicting and damning to immerse itself even * more * deeply in the insulatory seahaven world , with the resultant effect that the repercussions felt as the film finally shifts outside the imposed bubble are all the more startling . still , the scathing commentary issued by this film is on target , and its insidiously oblique manner of delivery is far more effective in conveying the message than the more forthright anti-voyeuristic tirades such as that of the recent costa gavras film mad city . i\'ve always believed that the versatile ms . linney possesses a tremendous comic gift -- she has always seems to have a impish gleam in her eyes -- and in the truman show she runs rampant with her duplicitous character , mischievously flashing a glazed , insanely jovial grin as she perkily recites impromptu product placement slogans . cheerfully going over the top , it\'s hilarious to watch her -- who would have ever guessed that she\'d have the opportunity to outshine the reigning king of comedy in the laughs department of a film ? ms . linney is a delight in the film . but it is mr . carrey who rules the show here . he may not have been intuitively the most obvious choice to portray underdog hero truman burbank -- the role is seriously lacking in any of the frenetic comic interludes which typify his traditional parts -- but he brings to the character tremendous energy and eminent appeal : it\'s difficult not to root for truman . in a commanding performance , mr . carrey lends our protagonist an inherent sense of decency and integrity which makes him a genuinely engaging presence , and acquits himself impressively in this dramatic turn , demonstrating admirable restraint -- there\'s an early scene where he\'s down on his knees gardening , and given his past track record , one almost instinctively anticipates mr . carrey to pull one of more infamous , and in this case , wholly inappropriate sight gags . it\'s almost poetic how this notorious look-at-me actor successfully tones it down to play a character who\'s constantly the centre of attention . in addition to the fine performances by the bulk of the cast -- mr . emmerich is particularly noteworthy for issuing his character a sense of sturdy trustworthiness -- the film is immeasurably aided by wonderful production design by dennis gassner in creating truman\'s antiseptic , white picket-fenced smalltown utopia , and by burkhard dallwitz\'s original score . technical credits are first-rate all around . while the truman show is clearly a definitive high-concept film , it\'s not exactly a dazzlingly innovative one -- obvious predecessors utilising common elements abound , ranging from 1965\'s secret agent through to the groundbreaking british 1967 tv series the prisoner . all the same , this is a visionary , award-calibre film : entertaining , provocative , and intelligent . the curious ( and possibly disturbing ) thing may be that although upon first glance the plausibility of the film\'s concept seems unfathomably outrageous , it\'s abundantly clear that our society is rapidly approaching , if not actualisation , at least permissiveness of such a scenario . perhaps the highest compliment which can be paid to mr . weir is that his depiction of this bizarro state is so convincing that we accept it without question . perhaps we recognize a bit of ourselves in this world . ']
tfidf transform ['note : some may consider portions of the following text to be spoilers . be forewarned . " all the world\'s a stage and all the men and women merely players they have their exits and their entrances and one man in his time plays many parts " - excerpt from as you like it , act ii , scene 7 when william shakespeare penned this passage , he could not have possibly envisioned a world in which the domestic activites in an abode would be broadcast across the continent , or where women would install webcams in their apartments in order to convert voyeurism into cash . this is the world of today , and it is the perfect climate to unveil a prototypical high-concept project like the truman show . truman burbank ( jim carrey ) seems to have the perfect life . he has a pretty , doting wife meryl ( laura linney ) , a comfortable insurance sales position , an immaculate suburban home in the idyllic island community of seahaven , a reliable childhood buddy marlon ( noah emmerich ) -- except for the dog that paws a greeting to him every morning and his hydrophobia , this is paradise . or so it would seem . bizarre , inexplicable incidents begin to crop up in truman\'s life . a lighting fixture unexpectedly plummets from the sky . a rainstorm which gives new meaning to the term \'isolated\' follows him around . a radio broadcast appears to be describing his every move . for the first time in his life , it is beginning to dawn on truman that things are not what they appear to be . the truman show , directed by peter weir and written by andrew niccol , is a remarkably well-constructed film , paced perfectly with every scene fluidly leading into the next . balancing drama with humour , thoughtfulness with abandon , this is a film which addresses an intriguing and intricate concept with just the right mixture of sermonizing and whimsical fancy . in many ways , mr . niccol\'s screenplay mirrors the spirit of his previous gattaca -- both films focus on the theme of triumph of the human spirit over oppressive adversity , with pivotal , character-defining moments occurring at sea . although the film\'s rather conventional climax doesn\'t sustain the inspired dynamic achieved in the first two-thirds , and ultimately just falls short on delivering the intended emotional punch , there are moments through the truman show which are genuinely stirring and moving as truman attempts to make sense of his unraveling world and grasp the implications behind his discoveries of deception . despite the multitude of pertinent issues being broached by the film , it remains foremost a finely-crafted piece of entertainment , constantly light and accessible . the tone achieved is far too sunny for any sense of fearful paranoia to legitimately intrude , and as the carefully-stacked seahaven house of cards tumbles down , the film eschews any psychological ramifications for a man discovering that the fundamentals of his entire existence have been false . the end act of the truman show essentially boils down to a rehash of the reliable , crowdpleasing underdog vs . the system formula . while the film is an impressive realisation of an inspired concept , some elements are lacking . the device of mysterious lauren/sylvia ( natascha mcelhone ) as a contributing impetus to truman\'s growing awareness isn\'t exactly convincing . there\'s not much of a discernable spark between either the two characters or the actors themselves , and the scenes in which she indignantly confronts the megalomaniac christoff ( ed harris ) with regards to the moral ambiguity of his actions unnecessarily spells things out for the audience , thankfully avoided elsewhere in the film . in fact , although the film smartly restricts the number of scenes depicting the real world outside truman\'s artificial utopia to a mere handful of select reaction shots from enraptured gazers , it might have perhaps been even more indicting and damning to immerse itself even * more * deeply in the insulatory seahaven world , with the resultant effect that the repercussions felt as the film finally shifts outside the imposed bubble are all the more startling . still , the scathing commentary issued by this film is on target , and its insidiously oblique manner of delivery is far more effective in conveying the message than the more forthright anti-voyeuristic tirades such as that of the recent costa gavras film mad city . i\'ve always believed that the versatile ms . linney possesses a tremendous comic gift -- she has always seems to have a impish gleam in her eyes -- and in the truman show she runs rampant with her duplicitous character , mischievously flashing a glazed , insanely jovial grin as she perkily recites impromptu product placement slogans . cheerfully going over the top , it\'s hilarious to watch her -- who would have ever guessed that she\'d have the opportunity to outshine the reigning king of comedy in the laughs department of a film ? ms . linney is a delight in the film . but it is mr . carrey who rules the show here . he may not have been intuitively the most obvious choice to portray underdog hero truman burbank -- the role is seriously lacking in any of the frenetic comic interludes which typify his traditional parts -- but he brings to the character tremendous energy and eminent appeal : it\'s difficult not to root for truman . in a commanding performance , mr . carrey lends our protagonist an inherent sense of decency and integrity which makes him a genuinely engaging presence , and acquits himself impressively in this dramatic turn , demonstrating admirable restraint -- there\'s an early scene where he\'s down on his knees gardening , and given his past track record , one almost instinctively anticipates mr . carrey to pull one of more infamous , and in this case , wholly inappropriate sight gags . it\'s almost poetic how this notorious look-at-me actor successfully tones it down to play a character who\'s constantly the centre of attention . in addition to the fine performances by the bulk of the cast -- mr . emmerich is particularly noteworthy for issuing his character a sense of sturdy trustworthiness -- the film is immeasurably aided by wonderful production design by dennis gassner in creating truman\'s antiseptic , white picket-fenced smalltown utopia , and by burkhard dallwitz\'s original score . technical credits are first-rate all around . while the truman show is clearly a definitive high-concept film , it\'s not exactly a dazzlingly innovative one -- obvious predecessors utilising common elements abound , ranging from 1965\'s secret agent through to the groundbreaking british 1967 tv series the prisoner . all the same , this is a visionary , award-calibre film : entertaining , provocative , and intelligent . the curious ( and possibly disturbing ) thing may be that although upon first glance the plausibility of the film\'s concept seems unfathomably outrageous , it\'s abundantly clear that our society is rapidly approaching , if not actualisation , at least permissiveness of such a scenario . perhaps the highest compliment which can be paid to mr . weir is that his depiction of this bizarro state is so convincing that we accept it without question . perhaps we recognize a bit of ourselves in this world . ']
(0, 33752) 0.0226150081821
(0, 33659) 0.0528809217987
(0, 33594) 0.0276334603728
(0, 33587) 0.0449546537009
(0, 33415) 0.0296708263845
(0, 33389) 0.0211023855811
(0, 33342) 0.0460467913122
(0, 33317) 0.0256968017505
(0, 33280) 0.0481737365846
(0, 33180) 0.076946783351
(0, 33139) 0.0612211932042
(0, 33093) 0.0276334603728
(0, 33046) 0.0198773889676
(0, 32846) 0.0401216027803
(0, 32842) 0.0537929702931
(0, 32841) 0.0579341522019
(0, 32734) 0.0556019594956
(0, 32529) 0.0499827257871
(0, 32437) 0.0183500553103
(0, 32293) 0.0864609730615
(0, 32286) 0.0612211932042
(0, 32155) 0.0556019594956
(0, 32054) 0.0579341522019
(0, 32001) 0.0474010721489
(0, 31846) 0.0612211932042
: :
(0, 1643) 0.034064054727
(0, 1510) 0.0392674618738
(0, 1372) 0.0490278774911
(0, 1108) 0.0438720179328
(0, 1059) 0.03045176733
(0, 956) 0.0579341522019
(0, 877) 0.0448866955823
(0, 842) 0.0454460097943
(0, 834) 0.0323985531748
(0, 792) 0.0612211932042
(0, 787) 0.0189225406716
(0, 786) 0.0203349496787
(0, 783) 0.0612211932042
(0, 777) 0.0351262799651
(0, 769) 0.0427573093934
(0, 759) 0.0556019594956
(0, 727) 0.0664855923594
(0, 662) 0.0474010721489
(0, 653) 0.0363489770917
(0, 625) 0.0537929702931
(0, 578) 0.0466956847848
(0, 561) 0.0537929702931
(0, 524) 0.0434086437826
(0, 139) 0.0510652435029
(0, 137) 0.0556019594956
dict_keys(['text'])
dict_keys(['text'])
tfidf transform ['every year , hollywood crowns a new " it boy " --a young actor pegged for major movie stardom . inheriting the mantle from last year\'s " winner , " matthew mcconaughey , is matt damon , and , like mcconaughey , he proves to be more than just a fresh young face , as evidenced in two radically different projects currently in release--john grisham\'s the rainmaker and good will hunting . damon has his first major starring role , a la mcconaughey , in a john grisham adaptation--in this case , francis ford coppola\'s take on the rainmaker . damon plays wet-behind-the-ears attorney rudy baylor , who , immediately after passing the bar exam , finds himself representing the mother ( mary kay place ) of a terminally ill young man ( john whitworth ) in a big-league suit against a negligent insurance company . while rudy\'s ( and the film\'s ) main concern is this case , he also finds time to protect a young wife ( claire danes ) from her abusive husband ( melrose placer andrew shue , in a mercifully brief role ) . written for the screen and directed by coppola , the rainmaker is the best grisham film yet mostly because it does not take itself too seriously . coppola\'s most notable--and effective--contribution to the tried-and-true grisham formula is a sense of humor about itself , which largely comes in the presence of danny devito ( as rudy\'s unlicensed co-counsel ) and golden globe nominee jon voight ( as the insurance company\'s hotshot attorney ) . the inclusion of the battered wife subplot feels rather superfluous , but danes is as superb as always . then , of course , there is damon , who nicely juggles the weighty ( the insurance case , the spousal abuse ) and the humorous ( rudy\'s often comical naivete ) requirements of his role without missing a beat . as good as he is in the rainmaker , damon showcases the depth of his talent in good will hunting , directed by gus van sant and written by actor ben affleck and damon himself . damon plays the title character , will hunting , a troubled young construction worker/janitor at mit who also happens to be a supergenius . in an attempt to steer this brilliant young mind in the right direction , an mit math professor ( stellan skarsgard ) taps his old college friend , community college psychologist sean mcguire ( robin williams ) , to counsel the abrasive , standoffish will and try to help him come to terms with his turbulent life . good will hunting is the touchy-feely enterprise its plot synopsis suggests , but to simply dismiss it as that would be to discount the true emotional chords affleck and damon\'s intelligent script touches . even though no one ( and , if so , _very_ few people ) can directly relate to will\'s burden of superhuman intelligence , the insecurities he suffers are universal . the material is brought to life by the terrific ensemble of actors . williams delivers a nice dramatic turn ; affleck , a hot up-and-coming actor himself ( chasing amy ) , turns up in a warm and charming performance as will\'s best friend ; and the ever-appealing minnie driver shines as will\'s harvard-schooled love interest . the clear standout in the cast , though , is damon , who bravely does not soften will\'s prickly nature but has such a natural ease with the audience that it is hard _not_ to care for him . so many names come and go with the fluctuations of the hollywood hype machine , but based on his impressive work in john grisham\'s the rainmaker and especially good will hunting , it is a safe bet that matt damon is one name we will be hearing a lot more of in the years to come . ']
(0, 33922) 0.0782778859019
(0, 33860) 0.0254673582918
(0, 33853) 0.0429825392825
(0, 33752) 0.0551489095671
(0, 33644) 0.0523525285789
(0, 33639) 0.0233839010695
(0, 33473) 0.0508273967356
(0, 33416) 0.0766158897023
(0, 33389) 0.0514602313955
(0, 33240) 0.0625203911094
(0, 33177) 0.0753478772901
(0, 32999) 0.0515650787148
(0, 32790) 0.0682705864896
(0, 32372) 0.047001291995
(0, 31952) 0.0565559496979
(0, 31401) 0.0325220429263
(0, 31395) 0.0336150350298
(0, 31382) 0.0834411199851
(0, 31320) 0.0360983555727
(0, 31290) 0.0554897684213
(0, 31266) 0.055802392691
(0, 31189) 0.0475281218567
(0, 30880) 0.0800821207759
(0, 30876) 0.0541072357697
(0, 30700) 0.035700268796
: :
(0, 4017) 0.0437284635172
(0, 3918) 0.0800821207759
(0, 3825) 0.0368654187479
(0, 3220) 0.0253592456573
(0, 3152) 0.055802392691
(0, 3144) 0.0360779606048
(0, 3065) 0.0458572648007
(0, 2881) 0.0501335640589
(0, 2814) 0.0800821207759
(0, 2757) 0.0325718343924
(0, 2662) 0.0518217062121
(0, 2263) 0.0253592456573
(0, 2237) 0.106986207125
(0, 2210) 0.0370104650988
(0, 1730) 0.0478930290892
(0, 1510) 0.0565559496979
(0, 1456) 0.0561730930674
(0, 1009) 0.122195492184
(0, 815) 0.0508273967356
(0, 787) 0.0272536651801
(0, 786) 0.0614639946562
(0, 632) 0.0654547587557
(0, 626) 0.06072051527
(0, 586) 0.0753478772901
(0, 458) 0.0774766785736
(0, 0) 7257.0
(0, 138) 0.0556019594956
(0, 140) 0.0510652435029
(0, 525) 0.0434086437826
(0, 562) 0.0537929702931
(0, 579) 0.0466956847848
(0, 626) 0.0537929702931
(0, 654) 0.0363489770917
(0, 663) 0.0474010721489
(0, 728) 0.0664855923594
(0, 760) 0.0556019594956
(0, 770) 0.0427573093934
(0, 778) 0.0351262799651
(0, 784) 0.0612211932042
(0, 787) 0.0203349496787
(0, 788) 0.0189225406716
(0, 793) 0.0612211932042
(0, 835) 0.0323985531748
(0, 843) 0.0454460097943
(0, 878) 0.0448866955823
(0, 957) 0.0579341522019
(0, 1060) 0.03045176733
(0, 1109) 0.0438720179328
(0, 1373) 0.0490278774911
(0, 1511) 0.0392674618738
: :
(1399, 29094) 0.0764747675028
(1399, 29174) 0.0760894946923
(1399, 29569) 0.0705953506189
(1399, 29583) 0.0446871131389
(1399, 29991) 0.113758533904
(1399, 30437) 0.0385479201461
(1399, 30934) 0.071738702112
(1399, 31296) 0.053711104079
(1399, 31321) 0.0530029949903
(1399, 31401) 0.0720368647327
(1399, 31521) 0.10368156719
(1399, 31874) 0.0484213958232
(1399, 32438) 0.0657038094454
(1399, 32693) 0.0552216078978
(1399, 32729) 0.0987495691691
(1399, 32748) 0.0614009863633
(1399, 32751) 0.0726492126319
(1399, 33035) 0.0524184866077
(1399, 33047) 0.0420356528832
(1399, 33170) 0.207437766932
(1399, 33524) 0.0868663103555
(1399, 33604) 0.253691047709
(1399, 33640) 0.034334439106
(1399, 33687) 0.0509507314034
(1399, 33861) 0.0373935666193
{0, 1}
In [ ]:
In [82]:
# Train classifier
#lr = SGDClassifier(loss='log', penalty='l2', alpha=0.0001, max_iter=15000, shuffle=True, verbose=2)
lr = SGDClassifier(loss='log', penalty='l2', alpha=0.01, max_iter=100, shuffle=True, verbose=2)
lr.fit(feat_train, y_train)
y_pred = lr.predict(feat_train)
accuracy = accuracy_score(y_pred, y_train)
print("Accuracy on training set =", accuracy)
y_pred = lr.predict(feat_test)
accuracy = accuracy_score(y_pred, y_test)
print("Accuracy on test set =", accuracy)
# EXTRA CREDIT: Replace the following code with scikit-learn cross validation
-- Epoch 1
Norm: 218.87, NNZs: 25217, Bias: -0.144211, T: 1400, Avg. loss: 1287714.812042
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 100.14, NNZs: 30378, Bias: -0.166552, T: 2800, Avg. loss: 222468.445218
Total training time: 0.01 seconds.
-- Epoch 3
Norm: 56.11, NNZs: 32307, Bias: -0.178242, T: 4200, Avg. loss: 127309.596050
Total training time: 0.01 seconds.
-- Epoch 4
Norm: 50.14, NNZs: 33128, Bias: -0.184788, T: 5600, Avg. loss: 88911.818882
Total training time: 0.01 seconds.
-- Epoch 5
Norm: 36.19, NNZs: 33506, Bias: -0.192887, T: 7000, Avg. loss: 66764.622624
Total training time: 0.02 seconds.
-- Epoch 6
Norm: 23.68, NNZs: 33745, Bias: -0.197856, T: 8400, Avg. loss: 53592.746203
Total training time: 0.02 seconds.
-- Epoch 7
Norm: 15.41, NNZs: 33854, Bias: -0.203774, T: 9800, Avg. loss: 51355.720944
Total training time: 0.03 seconds.
-- Epoch 8
Norm: 2.81, NNZs: 33897, Bias: -0.206870, T: 11200, Avg. loss: 41088.726185
Total training time: 0.03 seconds.
-- Epoch 9
Norm: 24.92, NNZs: 33991, Bias: -0.211461, T: 12600, Avg. loss: 39142.473945
Total training time: 0.03 seconds.
-- Epoch 10
Norm: 6.51, NNZs: 34064, Bias: -0.214832, T: 14000, Avg. loss: 32854.378432
Total training time: 0.03 seconds.
-- Epoch 11
Norm: 6.72, NNZs: 34064, Bias: -0.217499, T: 15400, Avg. loss: 30608.952077
Total training time: 0.04 seconds.
-- Epoch 12
Norm: 20.97, NNZs: 34064, Bias: -0.221284, T: 16800, Avg. loss: 27224.474708
Total training time: 0.04 seconds.
-- Epoch 13
Norm: 6.98, NNZs: 34064, Bias: -0.223984, T: 18200, Avg. loss: 25071.758777
Total training time: 0.04 seconds.
-- Epoch 14
Norm: 18.85, NNZs: 34064, Bias: -0.226202, T: 19600, Avg. loss: 23553.277853
Total training time: 0.05 seconds.
-- Epoch 15
Norm: 27.58, NNZs: 34064, Bias: -0.228938, T: 21000, Avg. loss: 21556.146849
Total training time: 0.05 seconds.
-- Epoch 16
Norm: 8.66, NNZs: 34064, Bias: -0.230476, T: 22400, Avg. loss: 20397.805451
Total training time: 0.05 seconds.
-- Epoch 17
Norm: 17.31, NNZs: 34064, Bias: -0.232330, T: 23800, Avg. loss: 19914.031489
Total training time: 0.05 seconds.
-- Epoch 18
Norm: 5.30, NNZs: 34064, Bias: -0.234438, T: 25200, Avg. loss: 18341.952977
Total training time: 0.06 seconds.
-- Epoch 19
Norm: 3.97, NNZs: 34064, Bias: -0.236677, T: 26600, Avg. loss: 17153.538798
Total training time: 0.06 seconds.
-- Epoch 20
Norm: 22.88, NNZs: 34064, Bias: -0.238325, T: 28000, Avg. loss: 16540.189936
Total training time: 0.06 seconds.
-- Epoch 21
Norm: 14.76, NNZs: 34064, Bias: -0.240102, T: 29400, Avg. loss: 15083.690058
Total training time: 0.06 seconds.
-- Epoch 22
Norm: 7.22, NNZs: 34064, Bias: -0.241067, T: 30800, Avg. loss: 15061.259548
Total training time: 0.06 seconds.
-- Epoch 23
Norm: 10.33, NNZs: 34064, Bias: -0.242437, T: 32200, Avg. loss: 14423.142812
Total training time: 0.07 seconds.
-- Epoch 24
Norm: 17.28, NNZs: 34064, Bias: -0.243624, T: 33600, Avg. loss: 13697.594228
Total training time: 0.07 seconds.
-- Epoch 25
Norm: 6.86, NNZs: 34064, Bias: -0.244994, T: 35000, Avg. loss: 12999.241146
Total training time: 0.07 seconds.
-- Epoch 26
Norm: 14.19, NNZs: 34064, Bias: -0.246165, T: 36400, Avg. loss: 11587.731146
Total training time: 0.07 seconds.
-- Epoch 27
Norm: 11.73, NNZs: 34064, Bias: -0.247110, T: 37800, Avg. loss: 11886.357328
Total training time: 0.07 seconds.
-- Epoch 28
Norm: 13.07, NNZs: 34064, Bias: -0.247961, T: 39200, Avg. loss: 10843.521192
Total training time: 0.08 seconds.
-- Epoch 29
Norm: 4.95, NNZs: 34064, Bias: -0.248812, T: 40600, Avg. loss: 11279.396708
Total training time: 0.08 seconds.
-- Epoch 30
Norm: 9.92, NNZs: 34064, Bias: -0.250117, T: 42000, Avg. loss: 10742.709764
Total training time: 0.08 seconds.
-- Epoch 31
Norm: 3.43, NNZs: 34064, Bias: -0.251428, T: 43400, Avg. loss: 10098.822006
Total training time: 0.08 seconds.
-- Epoch 32
Norm: 5.17, NNZs: 34064, Bias: -0.252585, T: 44800, Avg. loss: 10184.077456
Total training time: 0.08 seconds.
-- Epoch 33
Norm: 8.68, NNZs: 34064, Bias: -0.253553, T: 46200, Avg. loss: 9531.128132
Total training time: 0.08 seconds.
-- Epoch 34
Norm: 8.02, NNZs: 34064, Bias: -0.254380, T: 47600, Avg. loss: 9194.223659
Total training time: 0.09 seconds.
-- Epoch 35
Norm: 2.05, NNZs: 34064, Bias: -0.255370, T: 49000, Avg. loss: 9209.607293
Total training time: 0.09 seconds.
-- Epoch 36
Norm: 8.40, NNZs: 34064, Bias: -0.256257, T: 50400, Avg. loss: 9488.787375
Total training time: 0.09 seconds.
-- Epoch 37
Norm: 2.06, NNZs: 34064, Bias: -0.257042, T: 51800, Avg. loss: 8786.837288
Total training time: 0.09 seconds.
-- Epoch 38
Norm: 5.25, NNZs: 34064, Bias: -0.257674, T: 53200, Avg. loss: 7893.822924
Total training time: 0.09 seconds.
-- Epoch 39
Norm: 2.02, NNZs: 34064, Bias: -0.258451, T: 54600, Avg. loss: 7805.084830
Total training time: 0.10 seconds.
-- Epoch 40
Norm: 12.29, NNZs: 34064, Bias: -0.259352, T: 56000, Avg. loss: 8694.466155
Total training time: 0.10 seconds.
-- Epoch 41
Norm: 2.52, NNZs: 34064, Bias: -0.260165, T: 57400, Avg. loss: 8458.915753
Total training time: 0.10 seconds.
-- Epoch 42
Norm: 2.32, NNZs: 34064, Bias: -0.260990, T: 58800, Avg. loss: 7859.021957
Total training time: 0.10 seconds.
-- Epoch 43
Norm: 2.98, NNZs: 34064, Bias: -0.261714, T: 60200, Avg. loss: 7194.682475
Total training time: 0.10 seconds.
-- Epoch 44
Norm: 4.32, NNZs: 34064, Bias: -0.262501, T: 61600, Avg. loss: 7647.202650
Total training time: 0.11 seconds.
-- Epoch 45
Norm: 4.87, NNZs: 34064, Bias: -0.263191, T: 63000, Avg. loss: 7203.897145
Total training time: 0.11 seconds.
-- Epoch 46
Norm: 6.05, NNZs: 34064, Bias: -0.264007, T: 64400, Avg. loss: 7227.156552
Total training time: 0.11 seconds.
-- Epoch 47
Norm: 2.05, NNZs: 34064, Bias: -0.264575, T: 65800, Avg. loss: 6242.300343
Total training time: 0.11 seconds.
-- Epoch 48
Norm: 2.62, NNZs: 34064, Bias: -0.265386, T: 67200, Avg. loss: 6620.688269
Total training time: 0.11 seconds.
-- Epoch 49
Norm: 2.08, NNZs: 34064, Bias: -0.266240, T: 68600, Avg. loss: 6288.116197
Total training time: 0.11 seconds.
-- Epoch 50
Norm: 2.46, NNZs: 34064, Bias: -0.267091, T: 70000, Avg. loss: 6487.131374
Total training time: 0.12 seconds.
-- Epoch 51
Norm: 2.56, NNZs: 34064, Bias: -0.267699, T: 71400, Avg. loss: 5846.040269
Total training time: 0.12 seconds.
-- Epoch 52
Norm: 6.68, NNZs: 34064, Bias: -0.268282, T: 72800, Avg. loss: 5978.743829
Total training time: 0.12 seconds.
-- Epoch 53
Norm: 4.74, NNZs: 34064, Bias: -0.268853, T: 74200, Avg. loss: 6116.698868
Total training time: 0.12 seconds.
-- Epoch 54
Norm: 3.74, NNZs: 34064, Bias: -0.269346, T: 75600, Avg. loss: 5843.702299
Total training time: 0.12 seconds.
-- Epoch 55
Norm: 4.38, NNZs: 34064, Bias: -0.269935, T: 77000, Avg. loss: 6028.951504
Total training time: 0.13 seconds.
-- Epoch 56
Norm: 3.18, NNZs: 34064, Bias: -0.270591, T: 78400, Avg. loss: 5704.874710
Total training time: 0.13 seconds.
-- Epoch 57
Norm: 2.89, NNZs: 34064, Bias: -0.271211, T: 79800, Avg. loss: 5982.074316
Total training time: 0.13 seconds.
-- Epoch 58
Norm: 3.59, NNZs: 34064, Bias: -0.271719, T: 81200, Avg. loss: 5130.978828
Total training time: 0.13 seconds.
-- Epoch 59
Norm: 4.07, NNZs: 34064, Bias: -0.272402, T: 82600, Avg. loss: 5433.203162
Total training time: 0.13 seconds.
-- Epoch 60
Norm: 2.66, NNZs: 34064, Bias: -0.272738, T: 84000, Avg. loss: 5377.065418
Total training time: 0.14 seconds.
-- Epoch 61
Norm: 5.40, NNZs: 34064, Bias: -0.273293, T: 85400, Avg. loss: 5040.723464
Total training time: 0.14 seconds.
-- Epoch 62
Norm: 2.01, NNZs: 34064, Bias: -0.273652, T: 86800, Avg. loss: 5090.105952
Total training time: 0.14 seconds.
-- Epoch 63
Norm: 3.45, NNZs: 34064, Bias: -0.274064, T: 88200, Avg. loss: 5247.323229
Total training time: 0.14 seconds.
-- Epoch 64
Norm: 2.17, NNZs: 34064, Bias: -0.274582, T: 89600, Avg. loss: 5100.543143
Total training time: 0.15 seconds.
-- Epoch 65
Norm: 2.22, NNZs: 34064, Bias: -0.275058, T: 91000, Avg. loss: 4617.276598
Total training time: 0.15 seconds.
-- Epoch 66
Norm: 4.90, NNZs: 34064, Bias: -0.275626, T: 92400, Avg. loss: 4826.084735
Total training time: 0.15 seconds.
-- Epoch 67
Norm: 4.27, NNZs: 34064, Bias: -0.276184, T: 93800, Avg. loss: 4969.400684
Total training time: 0.15 seconds.
-- Epoch 68
Norm: 3.27, NNZs: 34064, Bias: -0.276880, T: 95200, Avg. loss: 4793.829218
Total training time: 0.15 seconds.
-- Epoch 69
Norm: 2.07, NNZs: 34064, Bias: -0.277266, T: 96600, Avg. loss: 4710.832527
Total training time: 0.16 seconds.
-- Epoch 70
Norm: 2.02, NNZs: 34064, Bias: -0.277719, T: 98000, Avg. loss: 4864.092304
Total training time: 0.16 seconds.
-- Epoch 71
Norm: 4.59, NNZs: 34064, Bias: -0.278175, T: 99400, Avg. loss: 4521.275411
Total training time: 0.16 seconds.
-- Epoch 72
Norm: 6.76, NNZs: 34064, Bias: -0.278634, T: 100800, Avg. loss: 4799.373745
Total training time: 0.17 seconds.
-- Epoch 73
Norm: 2.78, NNZs: 34064, Bias: -0.279077, T: 102200, Avg. loss: 4081.690189
Total training time: 0.17 seconds.
-- Epoch 74
Norm: 4.96, NNZs: 34064, Bias: -0.279544, T: 103600, Avg. loss: 4507.801844
Total training time: 0.17 seconds.
-- Epoch 75
Norm: 2.01, NNZs: 34064, Bias: -0.279949, T: 105000, Avg. loss: 4074.571352
Total training time: 0.17 seconds.
-- Epoch 76
Norm: 3.19, NNZs: 34064, Bias: -0.280261, T: 106400, Avg. loss: 4093.696834
Total training time: 0.17 seconds.
-- Epoch 77
Norm: 2.67, NNZs: 34064, Bias: -0.280588, T: 107800, Avg. loss: 4183.741830
Total training time: 0.18 seconds.
-- Epoch 78
Norm: 5.36, NNZs: 34064, Bias: -0.280985, T: 109200, Avg. loss: 4107.464490
Total training time: 0.18 seconds.
-- Epoch 79
Norm: 2.01, NNZs: 34064, Bias: -0.281322, T: 110600, Avg. loss: 4322.262151
Total training time: 0.18 seconds.
-- Epoch 80
Norm: 4.45, NNZs: 34064, Bias: -0.281600, T: 112000, Avg. loss: 3762.108972
Total training time: 0.18 seconds.
-- Epoch 81
Norm: 4.45, NNZs: 34064, Bias: -0.281858, T: 113400, Avg. loss: 3733.370012
Total training time: 0.19 seconds.
-- Epoch 82
Norm: 2.36, NNZs: 34064, Bias: -0.282331, T: 114800, Avg. loss: 4075.734892
Total training time: 0.19 seconds.
-- Epoch 83
Norm: 2.60, NNZs: 34064, Bias: -0.282721, T: 116200, Avg. loss: 3814.406979
Total training time: 0.20 seconds.
-- Epoch 84
Norm: 2.08, NNZs: 34064, Bias: -0.283115, T: 117600, Avg. loss: 3624.583720
Total training time: 0.20 seconds.
-- Epoch 85
Norm: 2.08, NNZs: 34064, Bias: -0.283572, T: 119000, Avg. loss: 3600.108959
Total training time: 0.21 seconds.
-- Epoch 86
Norm: 2.24, NNZs: 34064, Bias: -0.284031, T: 120400, Avg. loss: 3884.967846
Total training time: 0.22 seconds.
-- Epoch 87
Norm: 2.74, NNZs: 34064, Bias: -0.284493, T: 121800, Avg. loss: 3786.596196
Total training time: 0.22 seconds.
-- Epoch 88
Norm: 7.50, NNZs: 34064, Bias: -0.284836, T: 123200, Avg. loss: 3401.150210
Total training time: 0.23 seconds.
-- Epoch 89
Norm: 2.06, NNZs: 34064, Bias: -0.285239, T: 124600, Avg. loss: 3611.987490
Total training time: 0.23 seconds.
-- Epoch 90
Norm: 2.45, NNZs: 34064, Bias: -0.285622, T: 126000, Avg. loss: 3491.906820
Total training time: 0.23 seconds.
-- Epoch 91
Norm: 3.32, NNZs: 34064, Bias: -0.286067, T: 127400, Avg. loss: 3427.519416
Total training time: 0.24 seconds.
-- Epoch 92
Norm: 3.59, NNZs: 34064, Bias: -0.286453, T: 128800, Avg. loss: 3487.428547
Total training time: 0.24 seconds.
-- Epoch 93
Norm: 2.59, NNZs: 34064, Bias: -0.286761, T: 130200, Avg. loss: 3220.351968
Total training time: 0.25 seconds.
-- Epoch 94
Norm: 3.37, NNZs: 34064, Bias: -0.287059, T: 131600, Avg. loss: 3356.773123
Total training time: 0.25 seconds.
-- Epoch 95
Norm: 2.23, NNZs: 34064, Bias: -0.287384, T: 133000, Avg. loss: 3398.589063
Total training time: 0.26 seconds.
-- Epoch 96
Norm: 2.85, NNZs: 34064, Bias: -0.287720, T: 134400, Avg. loss: 3599.481901
Total training time: 0.27 seconds.
-- Epoch 97
Norm: 4.69, NNZs: 34064, Bias: -0.287957, T: 135800, Avg. loss: 3153.925242
Total training time: 0.27 seconds.
-- Epoch 98
Norm: 2.70, NNZs: 34064, Bias: -0.288309, T: 137200, Avg. loss: 3156.514823
Total training time: 0.28 seconds.
-- Epoch 99
Norm: 2.99, NNZs: 34064, Bias: -0.288624, T: 138600, Avg. loss: 3161.938154
Total training time: 0.28 seconds.
-- Epoch 100
Norm: 2.74, NNZs: 34064, Bias: -0.288883, T: 140000, Avg. loss: 3190.355351
Total training time: 0.29 seconds.
Accuracy on training set = 0.499285714286
Accuracy on test set = 0.501666666667
In [ ]:
In [ ]:
In [ ]:
Content source: bdmckean/MachineLearning
Similar notebooks: