In [2]:
import sqlalchemy as sa
import pandas as pd

In [3]:
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer

In [8]:
db_engine = sa.create_engine("mysql+pymysql://will:melody11@192.168.1.200/tweet_harvester?charset=utf8mb4&use_unicode=True", encoding="utf8")

In [664]:
# upload some stopwords (1 off)
stopwords_df = pd.DataFrame(stopwords.words('english'), columns=['stopword'])
stopwords_df['language'] = 'en'
stopwords_df['stopword_id'] = stopwords_df.index + 1
stemmer = SnowballStemmer("english")

stopwords_df['word_stemmed'] = stopwords_df['stopword'].apply(stemmer.stem)
stopwords_df.to_sql('stopword', db_engine, if_exists='append', index=False)