In [1]:
import matplotlib.pyplot as plt
from seaborn import pairplot
from seaborn import set as sns_set
from context import *
from util.classifiers import *
from util.dfmgmt import initSet, wrangle
In [2]:
sns_set(style="whitegrid", font_scale=1)
%matplotlib inline
target = 'charted' # main feature to be predicted
bestFeatures = ['density', 'unique_words', 'sentiment', 'explicit']
In [3]:
df = initSet()
df = df[df['decade'] != 2010]
In [4]:
# Check head
df.head(10)
Out[4]:
In [5]:
# Drop both year and decade
dropList = ['most_used_term']
removeList = ['decade', 'year', 'charted']
df, features = wrangle(df, dropList, removeList, True)
df.head()
Out[5]:
In [6]:
X = df[features]
y = df[target]
model = Classifiers(X, y)
In [7]:
model.initProc()
In [8]:
model.defaultParams()
In [9]:
model.gridSearch()
In [10]:
# Update model params
model.updateParams()
In [11]:
model.plotModels()
In [12]:
model.getBestParams('best_param_no_time.txt')
In [13]:
df = initSet()
df = df[df['decade'] != 2010]
# Keep both year and decade
dropList = ['most_used_term']
removeList = ['charted', 'decade']
df, features = wrangle(df, dropList, removeList, True)
df.head()
Out[13]:
In [14]:
X = df[features]
y = df[target]
model = Classifiers(X, y)
In [15]:
model.initProc()
In [16]:
model.defaultParams()
In [17]:
model.gridSearch()
In [18]:
# Update model params
model.updateParams()
In [19]:
model.plotModels()
In [20]:
model.getBestParams('best_param_time.txt')
In [21]:
from util.polarize import *
df = initSet()
df = df[df['decade'] != 2010]
# Keep both year and decade
dropList = ['most_used_term']
removeList = ['charted', 'decade']
bestFeatures[bestFeatures.index('sentiment')] = 'sentiment_polarity'
df = wrangle(df, dropList, removeList)
df['sentiment_polarity'] = df.apply(sentiment, axis=1)
df.head()
Out[21]:
In [22]:
X = df[bestFeatures]
y = df[target]
model = Classifiers(X, y)
In [23]:
model.initProc()
In [24]:
model.defaultParams()
In [25]:
model.gridSearch()
In [26]:
# Update model params
model.updateParams()
In [27]:
model.plotModels()
In [28]:
model.getBestParams('default_best_param.txt')