In [1]:
import sqlite3
import numpy as np
import pandas as pd
import scipy.stats as stats
import scipy as sci
import matplotlib.pyplot as plt
import sklearn as skl
import statsmodels.api as sma
import statsmodels as sm
import statsmodels.formula.api as smf
from sklearn import decomposition
import statsmodels.stats.multicomp as comp
import seaborn as sns

%matplotlib inline


C:\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools

In [18]:
mons = pd.read_csv("../Datasets/pokemon_preUSUM_data_with_total.csv")

In [19]:
mons.head(2)


Out[19]:
DEXID NAME HP ATTACK DEFENSE SPECIAL_ATTACK SPECIAL_DEFENSE SPEED STAT_TOTAL GEN
0 1 BULBASAUR 45 49 49 65 65 45 318 1
1 2 IVYSAUR 60 62 63 80 80 60 405 1

In [21]:
filtered = mons.sort_values("STAT_TOTAL", ascending=False).drop_duplicates("DEXID").sort_index()

In [24]:
filtered.shape


Out[24]:
(802, 10)

In [25]:
mons.shape


Out[25]:
(1025, 10)

In [26]:
filtered.to_csv("../Datasets/pokemon_stats_maxbyID.csv")

In [44]:
filtered = mons[~mons["NAME"].str.contains("-MEGA") & 
                ~mons["NAME"].str.contains("PRIMAL") &
               ~mons["NAME"].str.contains("ALOLA")].sort_values("STAT_TOTAL", ascending=False).drop_duplicates("DEXID").sort_values("DEXID", ascending=True)

In [45]:
filtered.head(100)


Out[45]:
DEXID NAME HP ATTACK DEFENSE SPECIAL_ATTACK SPECIAL_DEFENSE SPEED STAT_TOTAL GEN
0 1 BULBASAUR 45 49 49 65 65 45 318 1
1 2 IVYSAUR 60 62 63 80 80 60 405 1
2 3 VENUSAUR 80 82 83 100 100 80 525 1
3 4 CHARMANDER 39 52 43 60 50 65 309 1
4 5 CHARMELEON 58 64 58 80 65 80 405 1
5 6 CHARIZARD 78 84 78 109 85 100 534 1
6 7 SQUIRTLE 44 48 65 50 64 43 314 1
7 8 WARTORTLE 59 63 80 65 80 58 405 1
8 9 BLASTOISE 79 83 100 85 105 78 530 1
9 10 CATERPIE 45 30 35 20 20 45 195 1
10 11 METAPOD 50 20 55 25 25 30 205 1
11 12 BUTTERFREE 60 45 50 90 80 70 395 1
12 13 WEEDLE 40 35 30 20 20 50 195 1
13 14 KAKUNA 45 25 50 25 25 35 205 1
14 15 BEEDRILL 65 90 40 45 80 75 395 1
15 16 PIDGEY 40 45 40 35 35 56 251 1
16 17 PIDGEOTTO 63 60 55 50 50 71 349 1
17 18 PIDGEOT 83 80 75 70 70 101 479 1
18 19 RATTATA 30 56 35 25 35 72 253 1
810 20 RATICATE-1 75 71 70 40 80 77 413 1
20 21 SPEAROW 40 60 30 31 31 70 262 1
21 22 FEAROW 65 90 65 61 61 100 442 1
22 23 EKANS 35 60 44 40 54 55 288 1
23 24 ARBOK 60 95 69 65 79 80 448 1
24 25 PIKACHU 35 55 40 50 50 90 320 1
25 26 RAICHU 60 90 55 90 80 110 485 1
26 27 SANDSHREW 50 75 85 20 30 40 300 1
27 28 SANDSLASH 75 100 110 45 55 65 450 1
28 29 NIDORAN-F 55 47 52 40 40 41 275 1
29 30 NIDORINA 70 62 67 55 55 56 365 1
... ... ... ... ... ... ... ... ... ... ...
70 71 VICTREEBEL 80 105 65 100 70 70 490 1
71 72 TENTACOOL 40 40 35 50 100 70 335 1
72 73 TENTACRUEL 80 70 65 80 120 100 515 1
73 74 GEODUDE 40 80 100 30 30 20 300 1
74 75 GRAVELER 55 95 115 45 45 35 390 1
75 76 GOLEM 80 120 130 55 65 45 495 1
76 77 PONYTA 50 85 55 65 65 90 410 1
77 78 RAPIDASH 65 100 70 80 80 105 500 1
78 79 SLOWPOKE 90 65 65 40 40 15 315 1
79 80 SLOWBRO 95 75 110 100 80 30 490 1
80 81 MAGNEMITE 25 35 70 95 55 45 325 1
81 82 MAGNETON 50 60 95 120 70 70 465 1
82 83 FARFETCH�D 52 90 55 58 62 60 377 1
83 84 DODUO 35 85 45 35 35 75 310 1
84 85 DODRIO 60 110 70 60 60 100 460 1
85 86 SEEL 65 45 55 45 70 45 325 1
86 87 DEWGONG 90 70 80 70 95 70 475 1
87 88 GRIMER 80 80 50 40 50 25 325 1
88 89 MUK 105 105 75 65 100 50 500 1
89 90 SHELLDER 30 65 100 45 25 40 305 1
90 91 CLOYSTER 50 95 180 85 45 70 525 1
91 92 GASTLY 30 35 30 100 35 80 310 1
92 93 HAUNTER 45 50 45 115 55 95 405 1
93 94 GENGAR 60 65 60 130 75 110 500 1
94 95 ONIX 35 45 160 30 45 70 385 1
95 96 DROWZEE 60 48 45 43 90 42 328 1
96 97 HYPNO 85 73 70 73 115 67 483 1
97 98 KRABBY 30 105 90 25 25 50 325 1
98 99 KINGLER 55 130 115 50 50 75 475 1
99 100 VOLTORB 40 30 50 55 55 100 330 1

100 rows × 10 columns


In [47]:
filtered.to_csv("../Datasets/pokemon_nomegas_byid.csv")

In [ ]: