notebook.community

Edit and run



In [6]:

    
import pandas as pd
import fastparquet
from gensim.models.wrappers import FastText
import csv

import matplotlib.pyplot as plt
from nltk.tokenize import sent_tokenize
from nltk.tokenize import word_tokenize
from nltk.stem.porter import PorterStemmer
import nltk
nltk.download('punkt')
nltk.download("stopwords") 
from nltk.corpus import stopwords

import re
import string









    



[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\goldaw\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\goldaw\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!



In [2]:

    
pd.options.mode.chained_assignment = None



In [3]:

    
# load data from precomputed dataframe
pfile = fastparquet.ParquetFile('5col_DFrame.parq') 
df = pfile.to_pandas() # all columns 
#df2 = pfile.to_pandas(columns=['floats', 'times']) # pick some columns



In [4]:

    
df.tail()









    Out[4]:







  
    
      
      sentiment
      text
      text_clean1
      hashtags
      wordlist
      no_names_hashtags
    
  
  
    
      10494
      Negative
      The question about God and the Veterans. What ...
      question god veterans softball gopdebates
      [gopdebates]
      [question, god, veterans, softball]
      question god veterans softball
    
    
      10495
      Negative
      I thought #LastComicStanding airs on Wednesday...
      thought lastcomicstanding airs wednesday night...
      [lastcomicstanding, gopdebates]
      [thought, airs, wednesday, nights]
      thought airs wednesday nights
    
    
      10496
      Negative
      Bingo! Put that in your article!!! #GOPDebates...
      bingo put article gopdebates httpstcoxaaqwagf
      [gopdebates]
      [bingo, put, article]
      bingo put article
    
    
      10497
      Negative
      RT @RWSurferGirl: Fox is cherry picking the ca...
      fox cherry picking candidates jeb softball que...
      [gopdebates, gopdebates]
      [fox, cherry, picking, candidates, jeb, softba...
      fox cherry picking candidates jeb softball que...
    
    
      10498
      Neutral
      Waiting on Trumps answer about God #GOPDebates...
      waiting trumps answer god gopdebates aintenoug...
      [gopdebates, aintenoughpopcornforthis]
      [waiting, trumps, answer, god]
      waiting trumps answer god



In [7]:

    
# write data to txt
dftrain = pd.DataFrame()
#dftrain.loc[:, 'data'] = "__" + df.loc[:,'sentiment'] + "__" + " " + df.loc[:,'text_clean1']

dftrain.loc[:, 'data'] = "__label__" + df.loc[:,'sentiment'] + " " + df.loc[:,'text_clean1']



with open('train.txt', 'w') as f:
    dftrain.iloc[0:10000].to_csv(f, header=None, index=None, sep='\n', quoting=csv.QUOTE_NONE, quotechar='')


with open('test.txt', 'w') as f:
    dftrain.iloc[10000:].to_csv(f, header=None, index=None, sep='\n', quoting=csv.QUOTE_NONE, quotechar='')



In [8]:

    
###FAST TEXT###
import fasttext

classifier = fasttext.supervised('train.txt', 'model')









    



---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-8-c5d94cc413b0> in <module>()
      1 ###FAST TEXT###
----> 2 import fasttext
      3 
      4 classifier = fasttext.supervised('train.txt', 'model')

ModuleNotFoundError: No module named 'fasttext'



In [ ]:

    
result = classifier.test('test.txt')


print ('P@1:', result.precision)
print ('R@1:', result.recall)
print ('Number of examples:', result.nexamples)



In [ ]:

    
texts = ['motherfucking', 'I really like what senator mccain said about the future of our great nation']
labels = classifier.predict(texts)
print(labels)

	sentiment	text	text_clean1	hashtags	wordlist	no_names_hashtags
10494	Negative	The question about God and the Veterans. What ...	question god veterans softball gopdebates	[gopdebates]	[question, god, veterans, softball]	question god veterans softball
10495	Negative	I thought #LastComicStanding airs on Wednesday...	thought lastcomicstanding airs wednesday night...	[lastcomicstanding, gopdebates]	[thought, airs, wednesday, nights]	thought airs wednesday nights
10496	Negative	Bingo! Put that in your article!!! #GOPDebates...	bingo put article gopdebates httpstcoxaaqwagf	[gopdebates]	[bingo, put, article]	bingo put article
10497	Negative	RT @RWSurferGirl: Fox is cherry picking the ca...	fox cherry picking candidates jeb softball que...	[gopdebates, gopdebates]	[fox, cherry, picking, candidates, jeb, softba...	fox cherry picking candidates jeb softball que...
10498	Neutral	Waiting on Trumps answer about God #GOPDebates...	waiting trumps answer god gopdebates aintenoug...	[gopdebates, aintenoughpopcornforthis]	[waiting, trumps, answer, god]	waiting trumps answer god