In [1]:
import sqlite3
import numpy as np
import pandas as pd
import scipy.stats as stats
import scipy as sci
import matplotlib.pyplot as plt
import sklearn as skl
import statsmodels.api as sma
import statsmodels as sm
import statsmodels.formula.api as smf
from sklearn import decomposition
import statsmodels.stats.multicomp as comp
import seaborn as sns
%matplotlib inline
In [18]:
mons = pd.read_csv("../Datasets/pokemon_preUSUM_data_with_total.csv")
In [19]:
mons.head(2)
Out[19]:
In [21]:
filtered = mons.sort_values("STAT_TOTAL", ascending=False).drop_duplicates("DEXID").sort_index()
In [24]:
filtered.shape
Out[24]:
In [25]:
mons.shape
Out[25]:
In [26]:
filtered.to_csv("../Datasets/pokemon_stats_maxbyID.csv")
In [44]:
filtered = mons[~mons["NAME"].str.contains("-MEGA") &
~mons["NAME"].str.contains("PRIMAL") &
~mons["NAME"].str.contains("ALOLA")].sort_values("STAT_TOTAL", ascending=False).drop_duplicates("DEXID").sort_values("DEXID", ascending=True)
In [45]:
filtered.head(100)
Out[45]:
In [47]:
filtered.to_csv("../Datasets/pokemon_nomegas_byid.csv")
In [ ]: