notebook.community

Edit and run



In [2]:

    
import sqlalchemy as sa
import pandas as pd



In [3]:

    
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer



In [8]:

    
db_engine = sa.create_engine("mysql+pymysql://will:melody11@192.168.1.200/tweet_harvester?charset=utf8mb4&use_unicode=True", encoding="utf8")



In [664]:

    
# upload some stopwords (1 off)
stopwords_df = pd.DataFrame(stopwords.words('english'), columns=['stopword'])
stopwords_df['language'] = 'en'
stopwords_df['stopword_id'] = stopwords_df.index + 1
stemmer = SnowballStemmer("english")

stopwords_df['word_stemmed'] = stopwords_df['stopword'].apply(stemmer.stem)
stopwords_df.to_sql('stopword', db_engine, if_exists='append', index=False)