In [403]:
import pandas as pd
import numpy as np
import re
data = pd.read_json("AllCards.json")
data = data.T
data.index = range(len(data))
data.head()
Out[403]:
In [404]:
data.shape
Out[404]:
In [405]:
data = data[data['cmc']>=0]
data = data[data['cmc']<100]
len(data)
Out[405]:
In [406]:
def hasItem(c,x):
if type(x) is not list and np.isnan(x):
return False
if (c in x):
return True
else:
return False
for col in ['White','Black','Red','Green','Blue']:
data[col.lower()] = [1 if hasItem(col,x) else 0 for x in data['colors']]
In [407]:
del data['colorIdentity']
del data['colors']
del data['hand']
del data['imageName']
del data['names']
del data['mciNumber'] #apparently this is a phone card number...
#when wizards of the coast worked with a company called mci prepaid.
del data['supertypes']
del data['life']
data.head()
Out[407]:
In [408]:
np.unique(data['power'])
Out[408]:
In [409]:
data['manaCost'].head()
Out[409]:
In [410]:
sum([int(x) if x.isnumeric() else 0 for x in costList])
Out[410]:
In [411]:
data['redCost'] = [x.count('R') for x in data['manaCost']]
data['blueCost'] = [x.count('U') for x in data['manaCost']]
data['greenCost'] = [x.count('G') for x in data['manaCost']]
data['blackCost'] = [x.count('B') for x in data['manaCost']]
data['whiteCost'] = [x.count('W') for x in data['manaCost']]
data['xCost'] = [x.count('X') for x in data['manaCost']]
data['colorlessCost'] = [sum([int(x) if x.isnumeric() else 0 for x in costList]) for costList in data['manaCost']]
data.head()
Out[411]:
In [412]:
del data['manaCost']
del data['cmc']
data.head()
Out[412]:
In [413]:
uniq_subtypes = []
for i in data['subtypes']:
if type(i) is list:
for j in i:
if j not in uniq_subtypes:
uniq_subtypes.append(j)
else:
if i not in uniq_subtypes:
uniq_subtypes.append(i)
uniq_subtypes = [x for x in uniq_subtypes if x is not np.nan]
uniq_subtypes
Out[413]:
In [414]:
for i in uniq_subtypes:
data[i] = [1 if hasItem(i,x) else 0 for x in data['subtypes']]
del data['subtypes']
data.head()
Out[414]:
In [415]:
np.unique(data['power'])
Out[415]:
In [416]:
def hasAsterisk(x):
try:
if '*' in x:
return 1
else:
return 0
except:
return 1
def cleanPowerToughness(x):
if x is np.nan:
return 0
try:
int(x)
return int(x)
except:
return 0
data['power_has_*'] = [hasAsterisk(x) for x in data['power']]
data['toughness_has_*'] = [hasAsterisk(x) for x in data['toughness']]
data['power'] = [cleanPowerToughness(x) for x in data['power']]
data['toughness'] = [cleanPowerToughness(x) for x in data['toughness']]
data.head()
Out[416]:
In [417]:
data['starter'] = [1 if x is True else 0 for x in data['starter']]
data.head()
Out[417]:
In [418]:
np.unique(data['layout'])
Out[418]:
In [419]:
uniq_layouts = list(set(data['layout']))
uniq_layouts
Out[419]:
In [420]:
for i in uniq_layouts:
data["layout_"+i] = [1 if x==i else 0 for x in data['layout']]
del data['layout']
data.head()
Out[420]:
In [421]:
np.unique(data['types'])
Out[421]:
In [422]:
text_keywords = "Deathtouch Defender Double Strike Enchant Equip First Strike Flash Flying \
Haste Hexproof Indestructible Lifelink Menace Prowess Reach Trample Vigilance Absorb Affinity\
Amplify Annihilator Aura Swap Awaken Banding Battle Cry Bestow Bloodthirst Bushido Buyback \
Cascade Champion Changeling Cipher Conspire Convoke Cumulative Upkeep Cycling Dash Delve Dethrone\
Devoid Devour Dredge Echo Entwine Epic Evoke Evolve Exalted Exploit Extort Fading Fear Flanking \
Flashback Forecast Fortify Frenzy Fuse Graft Gravestorm Haunt Hidden Agenda Hideaway Horsemanship\
Infect Ingest Intimidate Kicker Landwalk Level Up Living Weapon Madness Miracle Modular Morph Myriad\
Ninjutsu Offering Outlast Overload Persist Phasing Poisonous Protection Provoke Prowl Rampage\
Rebound Recover Reinforce Renown Replicate Retrace Ripple Scavenge Skulk Shadow Shroud Soulbond\
Soulshift Splice Split Second Storm Sunburst Surge Suspend Totem Armor Transfigure Transmute\
Tribute Undying Unearth Unleash Vanishing Wither Activate Attach Cast Counter Destroy Discard\
Exchange Exile Fight Play Regenerate Reveal Sacrifice Scry Search Shuffle Tap Untap"
text_keywords.split()
Out[422]:
In [423]:
for keyword in text_keywords.split():
data['keyword_'+keyword] = [1 if x==keyword else 0 for x in data['text']]
data.head()
Out[423]:
In [424]:
del data['text']
data.head()
Out[424]:
In [425]:
uniq_types = []
for i in np.unique(data['types']):
for j in i:
if j not in uniq_types:
uniq_types.append(j)
for uniq_type in uniq_types:
data['type_'+uniq_type] = [int(hasItem(uniq_type,x)) for x in data['types']]
data.head()
Out[425]:
In [426]:
del data['types']
In [427]:
np.unique(data['loyalty'])
Out[427]:
In [428]:
def removeNan(x):
if x is np.nan:
return 0
else:
return int(x)
data['loyalty'] = [removeNan(x) for x in data['loyalty']]
data.head()
Out[428]:
In [429]:
def isLegendary(x):
if "Legendary" in x:
return 1
else:
return 0
data['isLegendary'] = [isLegendary(x) for x in data['type']]
del data['type']
del data['name']
data.head()
Out[429]:
In [430]:
data.to_csv("cleaned_AllCards.csv")
In [ ]: