Text Data Mining

Mostly adapted from: http://scikit-learn.org/stable/tutorial/text_analytics/working_with_text_data.html



In [1]:

    
%matplotlib inline
import numpy as np
from scipy.io import arff
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import patsy
import statsmodels.api as sm

from sklearn import tree, linear_model, metrics, dummy, naive_bayes, neighbors

from IPython.display import Image
import pydotplus

import nltk
import gensim
import wordcloud

import pyLDAvis
pyLDAvis.enable_notebook()
import pyLDAvis.gensim



In [2]:

    
sns.set_context("paper")
sns.set_style("ticks")

def get_confusion_matrix(clf, X, y, verbose=True, classes=None):
    y_pred = clf.predict(X)
    cm = metrics.confusion_matrix(y_true=y, y_pred=y_pred)
    clf_report = metrics.classification_report(y, y_pred)
    if classes is None:
        classes = clf.classes_
    df_cm = pd.DataFrame(cm, columns=classes, index=classes)
    if verbose:
        print clf_report
        print df_cm
    return clf_report, df_cm



In [4]:

    
from sklearn.datasets import fetch_20newsgroups



In [5]:

    
categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']
twenty_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, random_state=42)
twenty_test = fetch_20newsgroups(subset='test', categories=categories, shuffle=True, random_state=42)



In [6]:

    
len(twenty_train.data), len(twenty_test.data)









    Out[6]:





(2257, 1502)



In [11]:

    
twenty_train.target_names









    Out[11]:





['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian']



In [13]:

    
print(twenty_train.data[0])









    



From: sd345@city.ac.uk (Michael Collier)
Subject: Converting images to HP LaserJet III?
Nntp-Posting-Host: hampton
Organization: The City University
Lines: 14

Does anyone know of a good way (standard PC application/PD utility) to
convert tif/img/tga files into LaserJet III format.  We would also like to
do the same, converting to HPGL (HP plotter) files.

Please email any response.

Is this the correct group?

Thanks in advance.  Michael.
-- 
Michael Collier (Programmer)                 The Computer Unit,
Email: M.P.Collier@uk.ac.city                The City University,
Tel: 071 477-8000 x3769                      London,
Fax: 071 477-8565                            EC1V 0HB.



In [18]:

    
"\n".join(twenty_train.data[0].splitlines()[6:-5])









    Out[18]:





u'Does anyone know of a good way (standard PC application/PD utility) to\nconvert tif/img/tga files into LaserJet III format.  We would also like to\ndo the same, converting to HPGL (HP plotter) files.\n\nPlease email any response.\n\nIs this the correct group?\n\nThanks in advance.  Michael.'



In [20]:

    
twenty_train.target_names









    Out[20]:





['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian']



In [21]:

    
twenty_train.target[0]









    Out[21]:





1



In [22]:

    
twenty_train.target_names









    Out[22]:





['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian']



In [23]:

    
classification_categories = ["soc.religion.christian", "sci.med"]



In [24]:

    
classes = np.array(twenty_train.target_names)



In [25]:

    
classes









    Out[25]:





array(['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian'], 
      dtype='|S22')



In [26]:

    
from sklearn.feature_extraction.text import CountVectorizer

Show how to get help on functions



In [27]:

    
CountVectorizer?



In [28]:

    
count_vec = CountVectorizer(stop_words="english", token_pattern="[a-z]+")



In [29]:

    
X_train_counts = count_vec.fit_transform(twenty_train.data)
X_test_counts = count_vec.transform(twenty_test.data)



In [30]:

    
print "X_train_counts.shape =", X_train_counts.shape
print "X_test_counts.shape =", X_test_counts.shape









    



X_train_counts.shape = (2257, 30877)
X_test_counts.shape = (1502, 30877)



In [31]:

    
count_vec.vocabulary_.items()[:10]









    Out[31]:





[(u'raining', 22239),
 (u'uhura', 28449),
 (u'schlegel', 24205),
 (u'hordes', 12488),
 (u'moskowitz', 17773),
 (u'foul', 10438),
 (u'tilton', 27646),
 (u'compatable', 5071),
 (u'circuitry', 4515),
 (u'pantheistic', 19750)]



In [32]:

    
count_vec.get_feature_names()[100:110]









    Out[32]:





[u'absent',
 u'absitinence',
 u'absol',
 u'absolute',
 u'absolutely',
 u'absolutes',
 u'absolutism',
 u'absolutist',
 u'absolve',
 u'absorb']



In [33]:

    
clf = naive_bayes.MultinomialNB()
clf.fit(X_train_counts, twenty_train.target)
report, df_cm = get_confusion_matrix(clf, X_train_counts, twenty_train.target, classes=twenty_train.target_names)
df_cm









    



             precision    recall  f1-score   support

          0       1.00      1.00      1.00       480
          1       0.99      1.00      1.00       584
          2       1.00      1.00      1.00       594
          3       1.00      0.99      1.00       599

avg / total       1.00      1.00      1.00      2257

                        alt.atheism  comp.graphics  sci.med  \
alt.atheism                     479              0        0   
comp.graphics                     0            584        0   
sci.med                           0              2      592   
soc.religion.christian            0              3        0   

                        soc.religion.christian  
alt.atheism                                  1  
comp.graphics                                0  
sci.med                                      0  
soc.religion.christian                     596  






    Out[33]:






  
    
      
      alt.atheism
      comp.graphics
      sci.med
      soc.religion.christian
    
  
  
    
      alt.atheism
      479
      0
      0
      1
    
    
      comp.graphics
      0
      584
      0
      0
    
    
      sci.med
      0
      2
      592
      0
    
    
      soc.religion.christian
      0
      3
      0
      596



In [34]:

    
report, df_cm = get_confusion_matrix(clf, X_test_counts, twenty_test.target, classes=twenty_train.target_names)
df_cm









    



             precision    recall  f1-score   support

          0       0.93      0.91      0.92       319
          1       0.95      0.97      0.96       389
          2       0.96      0.93      0.94       396
          3       0.94      0.96      0.95       398

avg / total       0.94      0.94      0.94      1502

                        alt.atheism  comp.graphics  sci.med  \
alt.atheism                     289              4        6   
comp.graphics                     6            376        6   
sci.med                          12             12      369   
soc.religion.christian            5              4        5   

                        soc.religion.christian  
alt.atheism                                 20  
comp.graphics                                1  
sci.med                                      3  
soc.religion.christian                     384  






    Out[34]:






  
    
      
      alt.atheism
      comp.graphics
      sci.med
      soc.religion.christian
    
  
  
    
      alt.atheism
      289
      4
      6
      20
    
    
      comp.graphics
      6
      376
      6
      1
    
    
      sci.med
      12
      12
      369
      3
    
    
      soc.religion.christian
      5
      4
      5
      384

Using pipelines



In [35]:

    
from sklearn.pipeline import Pipeline



In [36]:

    
clf = Pipeline([
        ("vect", CountVectorizer(stop_words="english", token_pattern="[a-z]+")),
        ("nb_clf", naive_bayes.MultinomialNB())
    ])



In [37]:

    
X = twenty_train.data
y = twenty_train.target
classes = twenty_train.target_names
clf.fit(X, y)
report, df_cm = get_confusion_matrix(clf, X, y, classes=classes)
df_cm









    



             precision    recall  f1-score   support

          0       1.00      1.00      1.00       480
          1       0.99      1.00      1.00       584
          2       1.00      1.00      1.00       594
          3       1.00      0.99      1.00       599

avg / total       1.00      1.00      1.00      2257

                        alt.atheism  comp.graphics  sci.med  \
alt.atheism                     479              0        0   
comp.graphics                     0            584        0   
sci.med                           0              2      592   
soc.religion.christian            0              3        0   

                        soc.religion.christian  
alt.atheism                                  1  
comp.graphics                                0  
sci.med                                      0  
soc.religion.christian                     596  






    Out[37]:






  
    
      
      alt.atheism
      comp.graphics
      sci.med
      soc.religion.christian
    
  
  
    
      alt.atheism
      479
      0
      0
      1
    
    
      comp.graphics
      0
      584
      0
      0
    
    
      sci.med
      0
      2
      592
      0
    
    
      soc.religion.christian
      0
      3
      0
      596



In [38]:

    
clf.classes_









    Out[38]:





array([0, 1, 2, 3])



In [39]:

    
clf.predict(twenty_test.data[:10])









    Out[39]:





array([2, 2, 2, 0, 3, 0, 1, 3, 2, 2])



In [40]:

    
report, df_cm = get_confusion_matrix(clf, twenty_test.data, twenty_test.target, classes=classes)
df_cm









    



             precision    recall  f1-score   support

          0       0.93      0.91      0.92       319
          1       0.95      0.97      0.96       389
          2       0.96      0.93      0.94       396
          3       0.94      0.96      0.95       398

avg / total       0.94      0.94      0.94      1502

                        alt.atheism  comp.graphics  sci.med  \
alt.atheism                     289              4        6   
comp.graphics                     6            376        6   
sci.med                          12             12      369   
soc.religion.christian            5              4        5   

                        soc.religion.christian  
alt.atheism                                 20  
comp.graphics                                1  
sci.med                                      3  
soc.religion.christian                     384  






    Out[40]:






  
    
      
      alt.atheism
      comp.graphics
      sci.med
      soc.religion.christian
    
  
  
    
      alt.atheism
      289
      4
      6
      20
    
    
      comp.graphics
      6
      376
      6
      1
    
    
      sci.med
      12
      12
      369
      3
    
    
      soc.religion.christian
      5
      4
      5
      384

Using logistic regression



In [41]:

    
clf = Pipeline([
        ("vect", CountVectorizer(stop_words="english", token_pattern="[a-z]+")),
        ("nb_clf", linear_model.LogisticRegression(multi_class="multinomial", solver="lbfgs"))
    ])



In [42]:

    
X = twenty_train.data
y = twenty_train.target
classes = twenty_train.target_names
clf.fit(X, y)
report, df_cm = get_confusion_matrix(clf, X, y, classes=classes)
df_cm









    



             precision    recall  f1-score   support

          0       1.00      1.00      1.00       480
          1       1.00      1.00      1.00       584
          2       1.00      1.00      1.00       594
          3       1.00      1.00      1.00       599

avg / total       1.00      1.00      1.00      2257

                        alt.atheism  comp.graphics  sci.med  \
alt.atheism                     480              0        0   
comp.graphics                     0            584        0   
sci.med                           0              0      594   
soc.religion.christian            0              0        0   

                        soc.religion.christian  
alt.atheism                                  0  
comp.graphics                                0  
sci.med                                      0  
soc.religion.christian                     599  






    Out[42]:






  
    
      
      alt.atheism
      comp.graphics
      sci.med
      soc.religion.christian
    
  
  
    
      alt.atheism
      480
      0
      0
      0
    
    
      comp.graphics
      0
      584
      0
      0
    
    
      sci.med
      0
      0
      594
      0
    
    
      soc.religion.christian
      0
      0
      0
      599



In [43]:

    
report, df_cm = get_confusion_matrix(clf, twenty_test.data, twenty_test.target, classes=classes)
df_cm









    



             precision    recall  f1-score   support

          0       0.93      0.79      0.86       319
          1       0.88      0.95      0.91       389
          2       0.93      0.86      0.90       396
          3       0.87      0.97      0.92       398

avg / total       0.90      0.90      0.90      1502

                        alt.atheism  comp.graphics  sci.med  \
alt.atheism                     253             12       12   
comp.graphics                     3            368       13   
sci.med                          11             33      342   
soc.religion.christian            5              6        1   

                        soc.religion.christian  
alt.atheism                                 42  
comp.graphics                                5  
sci.med                                     10  
soc.religion.christian                     386  






    Out[43]:






  
    
      
      alt.atheism
      comp.graphics
      sci.med
      soc.religion.christian
    
  
  
    
      alt.atheism
      253
      12
      12
      42
    
    
      comp.graphics
      3
      368
      13
      5
    
    
      sci.med
      11
      33
      342
      10
    
    
      soc.religion.christian
      5
      6
      1
      386

Text exploration

Word clouds



In [44]:

    
text = " ".join(twenty_train.data)



In [45]:

    
wc = wordcloud.WordCloud(max_font_size=40, relative_scaling=.5).generate(text)



In [46]:

    
plt.figure()
plt.imshow(wc)
plt.axis("off")
plt.show()



In [47]:

    
def get_words_of_class(data, labels, c=0, ax=None):
    if ax is None:
        fig, ax = plt.subplots()
    labels = np.array(labels)
    idx = np.where(labels==c)[0]
    text = " ".join(data[i] for i in idx)
    wc = wordcloud.WordCloud(max_font_size=40, relative_scaling=.5).generate(text)
    ax.imshow(wc)
    ax.axis("off")
    return ax



In [48]:

    
fig, ax = plt.subplots(2,2, figsize=(10,10))
ax = ax.flatten()
labels = twenty_train.target
data = twenty_train.data
classes = twenty_train.target_names
for i, axi in enumerate(ax):
    get_words_of_class(data, labels, c=i, ax=axi)
    axi.set_title(classes[i])
fig.tight_layout()

Topic Modelling



In [49]:

    
from nltk.corpus import movie_reviews



In [50]:

    
movie_reviews.categories()









    Out[50]:





[u'neg', u'pos']



In [51]:

    
movie_reviews.words()









    Out[51]:





[u'plot', u':', u'two', u'teen', u'couples', u'go', ...]



In [52]:

    
sents = movie_reviews.sents()



In [53]:

    
len(sents)









    Out[53]:





71532



In [55]:

    
sents[0]









    Out[55]:





[u'plot',
 u':',
 u'two',
 u'teen',
 u'couples',
 u'go',
 u'to',
 u'a',
 u'church',
 u'party',
 u',',
 u'drink',
 u'and',
 u'then',
 u'drive',
 u'.']



In [57]:

    
movie_reviews.categories()[0]









    Out[57]:





u'neg'



In [58]:

    
for i, s in enumerate(sents[:10]):
    print "S[%s]:\t%s" % (i, " ".join(s))









    



S[0]:	plot : two teen couples go to a church party , drink and then drive .
S[1]:	they get into an accident .
S[2]:	one of the guys dies , but his girlfriend continues to see him in her life , and has nightmares .
S[3]:	what ' s the deal ?
S[4]:	watch the movie and " sorta " find out .
S[5]:	.
S[6]:	.
S[7]:	critique : a mind - fuck movie for the teen generation that touches on a very cool idea , but presents it in a very bad package .
S[8]:	which is what makes this review an even harder one to write , since i generally applaud films which attempt to break the mold , mess with your head and such ( lost highway & memento ) , but there are good and bad ways of making all types of films , and these folks just didn ' t snag this one correctly .
S[9]:	they seem to have taken this pretty neat concept , but executed it terribly .



In [61]:

    
bigrams = gensim.models.Phrases(sents[:1000])



In [62]:

    
bigrams.vocab.items()[0:10]









    Out[62]:





[('unimaginative', 2),
 ('and_most', 1),
 ('"_shrewd', 1),
 ('automobile_if', 1),
 ('i_haven', 2),
 ('yellow', 2),
 ('disturbed_parental', 1),
 ('too_ditzy', 1),
 ('hanging', 2),
 ('be_one', 1)]



In [63]:

    
sorted(bigrams.vocab.iteritems(), key=lambda x: x[1], reverse=True)[:10]









    Out[63]:





[(',', 1018),
 ('the', 1006),
 ('.', 886),
 ('a', 516),
 ("'", 510),
 ('and', 455),
 ('of', 443),
 ('to', 428),
 ('"', 326),
 ('s', 315)]



In [64]:

    
word_frequencies = map(lambda x: x[1], bigrams.vocab.iteritems())



In [65]:

    
plt.hist(word_frequencies, bins=range(0,100), log=True)
plt.xscale("symlog")



In [66]:

    
sorted(filter(lambda x: isinstance(x[0], str) and "_" in x[0], 
        bigrams.vocab.iteritems()), key=lambda x: x[1], reverse=True)[:30]









    Out[66]:





[("'_s", 314),
 (',_and', 108),
 ("'_t", 106),
 ("it_'", 77),
 ('in_the', 76),
 ('of_the', 73),
 (',_but', 69),
 (',_the', 58),
 ('the_film', 53),
 (')_,', 46),
 (',_"', 43),
 ('the_movie', 41),
 ('to_be', 37),
 (')_.', 34),
 (',_it', 32),
 ('to_the', 31),
 ('on_the', 31),
 ('from_the', 30),
 ('is_a', 29),
 ('and_the', 28),
 ('in_a', 28),
 ('with_the', 27),
 ('._"', 27),
 ("he_'", 26),
 ('with_a', 25),
 ('as_a', 25),
 (',_a', 24),
 ('for_the', 24),
 ('of_a', 23),
 ('this_film', 23)]



In [68]:

    
corpus = bigrams[sents[:1000]]
id2word = gensim.corpora.Dictionary(corpus)



In [69]:

    
len(id2word.keys())









    Out[69]:





4255



In [70]:

    
corpus_processed = [id2word.doc2bow(k) for k in corpus]
print len(corpus_processed)



In [71]:

    
corpus_processed[0]









    Out[71]:





[(0, 1),
 (1, 1),
 (2, 1),
 (3, 1),
 (4, 1),
 (5, 1),
 (6, 1),
 (7, 1),
 (8, 1),
 (9, 1),
 (10, 1),
 (11, 1),
 (12, 1),
 (13, 1),
 (14, 1),
 (15, 1)]



In [72]:

    
corpus[0]









    Out[72]:





[u'plot',
 u':',
 u'two',
 u'teen',
 u'couples',
 u'go',
 u'to',
 u'a',
 u'church',
 u'party',
 u',',
 u'drink',
 u'and',
 u'then',
 u'drive',
 u'.']



In [73]:

    
LDA_model = gensim.models.ldamodel.LdaModel(corpus_processed, num_topics=10, id2word=id2word)



In [76]:

    
LDA_model.print_topics(num_words=20)









    Out[76]:





[(0,
  u"0.038*, + 0.032*the + 0.025*. + 0.025*and + 0.022*) + 0.022*- + 0.022*( + 0.017*a + 0.010*is + 0.010*of + 0.009*to + 0.009*'_s + 0.008*it + 0.007*with + 0.007*who + 0.007*that + 0.007*/_10 + 0.007*in + 0.006*on + 0.005*his"),
 (1,
  u'0.055*the + 0.042*, + 0.034*. + 0.031*" + 0.027*a + 0.019*of + 0.016*and + 0.016*to + 0.013*is + 0.012*that + 0.010*in + 0.009*) + 0.008*( + 0.008*\'_s + 0.008*for + 0.008*movie + 0.008*- + 0.006*as + 0.005*this + 0.005*by'),
 (2,
  u"0.058*. + 0.033*the + 0.030*, + 0.021*a + 0.016*in + 0.015*is + 0.013*and + 0.009*that + 0.009*for + 0.008*'_s + 0.008*of + 0.008*to + 0.007*,_but + 0.007*- + 0.005*i + 0.005*it + 0.005*it_' + 0.005*s + 0.005*not + 0.005*on"),
 (3,
  u'0.048*, + 0.044*the + 0.033*. + 0.018*a + 0.017*to + 0.016*in + 0.015*and + 0.015*of + 0.012*( + 0.012*is + 0.010*) + 0.010*" + 0.010*- + 0.008*with + 0.007*that + 0.006*as + 0.006*it + 0.006*an + 0.006*on + 0.006*for'),
 (4,
  u'0.047*. + 0.033*the + 0.023*to + 0.018*and + 0.017*of + 0.015*, + 0.014*\'_s + 0.012*" + 0.010*a + 0.010*in + 0.009*that + 0.009*is + 0.008*it + 0.008*- + 0.007*: + 0.006*) + 0.006*his + 0.005*at + 0.005*i + 0.004*,_but'),
 (5,
  u'0.045*the + 0.031*. + 0.024*, + 0.017*of + 0.015*that + 0.015*and + 0.012*to + 0.010*\'_s + 0.009*it + 0.008*a + 0.008*? + 0.007*on + 0.007*this + 0.007*in + 0.006*with + 0.006*s + 0.006*movie + 0.006*t + 0.006*- + 0.005*"'),
 (6,
  u'0.044*, + 0.025*. + 0.022*and + 0.018*is + 0.016*the + 0.016*a + 0.012*of + 0.009*in + 0.009*movie + 0.009*- + 0.008*it + 0.007*to + 0.007*that + 0.007*this + 0.006*all + 0.005*who + 0.005*" + 0.005*i + 0.005*just + 0.005*one'),
 (7,
  u"0.043*. + 0.041*the + 0.024*, + 0.020*a + 0.019*in + 0.018*to + 0.016*of + 0.012*i + 0.009*and + 0.009*is + 0.008*that + 0.008*with + 0.008*for + 0.007*- + 0.007*are + 0.007*he + 0.006*'_s + 0.005*this + 0.005*movie + 0.005*her"),
 (8,
  u'0.053*, + 0.031*the + 0.030*. + 0.025*a + 0.022*and + 0.019*\'_s + 0.018*of + 0.014*to + 0.014*in + 0.013*- + 0.013*as + 0.009*it + 0.009*" + 0.008*on + 0.008*is + 0.007*he + 0.006*that + 0.006*) + 0.006*( + 0.006*who'),
 (9,
  u'0.032*. + 0.026*the + 0.026*and + 0.022*of + 0.022*" + 0.021*a + 0.020*, + 0.015*that + 0.013*to + 0.009*in + 0.009*it + 0.009*i + 0.009*is + 0.008*this + 0.008*movie + 0.007*for + 0.007*his + 0.006*\'_s + 0.006*) + 0.006*film')]



In [75]:

    
LDA_model.get_document_topics(corpus_processed[0])









    Out[75]:





[(4, 0.94704933837128213)]



In [77]:

    
doc_topics = LDA_model[corpus_processed]



In [78]:

    
doc_topics[1]









    Out[78]:





[(0, 0.01428764777370942),
 (1, 0.014288378183196067),
 (2, 0.014288441454276673),
 (3, 0.87140621304095234),
 (4, 0.014288298442854355),
 (5, 0.014287667479202367),
 (6, 0.014288274275986387),
 (7, 0.014287818823966531),
 (8, 0.014289939602374114),
 (9, 0.014287320923481802)]



In [79]:

    
pyLDAvis.gensim.prepare(LDA_model, corpus_processed,
                        id2word)









    Out[79]:

POS tagging



In [81]:

    
text = nltk.word_tokenize("And now for something completely different")
nltk.pos_tag(text)









    Out[81]:





[('And', 'CC'),
 ('now', 'RB'),
 ('for', 'IN'),
 ('something', 'NN'),
 ('completely', 'RB'),
 ('different', 'JJ')]

NER



In [82]:

    
text = nltk.word_tokenize("US president Barack Obama signed a new treaty with the Indian prime minister Narendra Modi, in New Delhi.")
pos_tags = nltk.pos_tag(text)
print pos_tags









    



[('US', 'NNP'), ('president', 'NN'), ('Barack', 'NNP'), ('Obama', 'NNP'), ('signed', 'VBD'), ('a', 'DT'), ('new', 'JJ'), ('treaty', 'NN'), ('with', 'IN'), ('the', 'DT'), ('Indian', 'JJ'), ('prime', 'JJ'), ('minister', 'NN'), ('Narendra', 'NNP'), ('Modi', 'NNP'), (',', ','), ('in', 'IN'), ('New', 'NNP'), ('Delhi', 'NNP'), ('.', '.')]



In [84]:

    
try:
    chunk_tags = nltk.ne_chunk(pos_tags, binary=False)
except:
    print "Done"
print chunk_tags









    



(S
  (GPE US/NNP)
  president/NN
  (PERSON Barack/NNP Obama/NNP)
  signed/VBD
  a/DT
  new/JJ
  treaty/NN
  with/IN
  the/DT
  (GPE Indian/JJ)
  prime/JJ
  minister/NN
  (PERSON Narendra/NNP Modi/NNP)
  ,/,
  in/IN
  (GPE New/NNP Delhi/NNP)
  ./.)



In [85]:

    
from nltk.corpus import wordnet as wn



In [88]:

    
wn.synsets('dog.n.01')









    Out[88]:





[]



In [ ]:

	alt.atheism	comp.graphics	sci.med	soc.religion.christian
alt.atheism	479	0	0	1
comp.graphics	0	584	0	0
sci.med	0	2	592	0
soc.religion.christian	0	3	0	596