Our labels are located in the 'genres' column


In [35]:
from __future__ import print_function,division
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv("../../data/steam/data.clean.csv")
df.shape,df.columns


Out[3]:
((8520, 6),
 Index([u'appid', u'name', u'detailed_description', u'about_the_game',
        u'categories', u'genres'],
       dtype='object'))

In [5]:
df['num_labels'] = df['genres'].apply(lambda txt: len(txt.split(',')))
df


Out[5]:
appid name detailed_description about_the_game categories genres num_labels
0 30 Day of Defeat enlist in an intense brand of axis vs. allied ... enlist in an intense brand of axis vs. allied ... Multi-player,Valve Anti-Cheat enabled Action 1
1 40 Deathmatch Classic enjoy fast-paced multiplayer gaming with death... enjoy fast-paced multiplayer gaming with death... Multi-player,Valve Anti-Cheat enabled Action 1
2 50 Half-Life: Opposing Force return to the black mesa research facility as ... return to the black mesa research facility as ... Single-player,Multi-player,Valve Anti-Cheat en... Action 1
3 60 Ricochet a futuristic action game that challenges your ... a futuristic action game that challenges your ... Multi-player,Valve Anti-Cheat enabled Action 1
4 70 Half-Life named game of the year by over 50 publications... named game of the year by over 50 publications... Single-player,Multi-player,Valve Anti-Cheat en... Action 1
5 80 Counter-Strike: Condition Zero with its extensive tour of duty campaign, a ne... with its extensive tour of duty campaign, a ne... Single-player,Multi-player,Valve Anti-Cheat en... Action 1
6 130 Half-Life: Blue Shift made by gearbox software and originally releas... made by gearbox software and originally releas... Single-player Action 1
7 220 Half-Life 2 1998. half-life sends a shock through the game... 1998. half-life sends a shock through the game... Single-player,Steam Achievements,Steam Trading... Action 1
8 240 Counter-Strike: Source the next installment of the world's # 1 online... the next installment of the world's # 1 online... Multi-player,Cross-Platform Multiplayer,Steam ... Action 1
9 280 Half-Life: Source winner of over 50 game of the year awards, hal... winner of over 50 game of the year awards, hal... Single-player Action 1
10 300 Day of Defeat: Source day of defeat offers intense online action gam... day of defeat offers intense online action gam... Multi-player,Cross-Platform Multiplayer,Steam ... Action 1
11 320 Half-Life 2: Deathmatch fast multiplayer action set in the half-life 2... fast multiplayer action set in the half-life 2... Multi-player,Valve Anti-Cheat enabled,Includes... Action 1
12 340 Half-Life 2: Lost Coast originally planned as a section of the highway... originally planned as a section of the highway... Single-player,Commentary available Action 1
13 360 Half-Life Deathmatch: Source half-life deathmatch: source is a recreation o... half-life deathmatch: source is a recreation o... Multi-player,Valve Anti-Cheat enabled Action 1
14 380 Half-Life 2: Episode One half-life 2 has sold over 4 million copies wor... half-life 2 has sold over 4 million copies wor... Single-player,Steam Achievements,Captions avai... Action 1
15 400 Portal portal is a new single player game from valve.... portal is a new single player game from valve.... Single-player,Steam Achievements,Captions avai... Action 1
16 420 Half-Life 2: Episode Two half-life 2: episode two is the second in a tr... half-life 2: episode two is the second in a tr... Single-player,Steam Achievements,Captions avai... Action 1
17 440 Team Fortress 2 the tough break update is now available!about ... "the most fun you can have online" - pc gamer ... Multi-player,Cross-Platform Multiplayer,Steam ... Action,Free to Play 2
18 500 Left 4 Dead steam big pictureabout the gamefrom valve (the... from valve (the creators of counter-strike, ha... Single-player,Multi-player,Co-op,Steam Achieve... Action 1
19 550 Left 4 Dead 2 set in the zombie apocalypse, left 4 dead 2 (l... set in the zombie apocalypse, left 4 dead 2 (l... Single-player,Multi-player,Co-op,Steam Achieve... Action 1
20 570 Dota 2 dota is a competitive game of action and strat... dota is a competitive game of action and strat... Multi-player,Co-op,Steam Trading Cards,Steam W... Action,Free to Play,Strategy 3
21 620 Portal 2 portal 2 draws from the award-winning formula ... portal 2 draws from the award-winning formula ... Single-player,Co-op,Steam Achievements,Full co... Action,Adventure 2
22 630 Alien Swarm alien swarm is a game and source sdk release f... alien swarm is a game and source sdk release f... Single-player,Multi-player,Co-op,Steam Achieve... Action 1
23 730 Counter-Strike: Global Offensive counter-strike: global offensive (cs: go) will... counter-strike: global offensive (cs: go) will... Multi-player,Steam Achievements,Full controlle... Action 1
24 1002 Rag Doll Kung Fu featuring a wide collection of single and mult... featuring a wide collection of single and mult... Single-player,Multi-player Indie 1
25 1200 Red Orchestra: Ostfront 41-45 fight in the theatre of war that changed the w... fight in the theatre of war that changed the w... Multi-player,Steam Achievements,Valve Anti-Che... Action 1
26 1250 Killing Floor steam halloween sale - kf and all dlc - 75% of... killing floor is a co-op survival horror fps s... Single-player,Multi-player,Cross-Platform Mult... Action 1
27 1300 SiN Episodes: Emergence you are john blade, commander of hardcorps, an... you are john blade, commander of hardcorps, an... Single-player,Stats Action 1
28 1500 Darwinia combining fast-paced action with strategic bat... combining fast-paced action with strategic bat... Single-player Indie,Strategy 2
29 1510 Uplink you play an uplink agent who makes a living by... you play an uplink agent who makes a living by... Single-player Indie,Strategy 2
... ... ... ... ... ... ... ...
8490 457230 Atlas Reactor VR Character Viewer outwit to outlive in atlas reactor, the genre-... outwit to outlive in atlas reactor, the genre-... Single-player Strategy 1
8491 457420 Surgeon Simulator VR: Meet The Medic surgeon simulator vr: meet the medic be the me... surgeon simulator vr: meet the medic be the me... Single-player Action,Free to Play,Indie,Simulation 4
8492 457440 Watch This! watch this! is a first-person single player pl... watch this! is a first-person single player pl... Single-player,Steam Achievements,Partial Contr... Action,Adventure,Indie 3
8493 457450 Defend your Crypt defend your crypt is a strategy and puzzle gam... defend your crypt is a strategy and puzzle gam... Single-player,Steam Achievements,Steam Trading... Indie,Strategy 2
8494 457480 Domino Sky domino sky is a physics based game where you p... domino sky is a physics based game where you p... Single-player,Steam Achievements,Steam Trading... Casual,Indie,Simulation,Strategy 4
8495 457490 Forgotten, Not Lost - A Kinetic Novel an old farmer lives with his wife - however, h... an old farmer lives with his wife - however, h... Single-player,Steam Trading Cards Casual,RPG,Simulation 3
8496 457520 Moustache Mountain it is said, that an ancient civilization once ... it is said, that an ancient civilization once ... Single-player,Steam Achievements,Full controll... Action,Casual,Indie 3
8497 457530 My Lady you are miss bauxmont, the heiress to the baux... you are miss bauxmont, the heiress to the baux... Single-player,Steam Achievements,Captions avai... Casual,Indie,Simulation 3
8498 457570 Camp Sunshine camp sunshine is a 16-bit blood-soaked horror ... camp sunshine is a 16-bit blood-soaked horror ... Single-player,Steam Achievements,Partial Contr... Action,Adventure,Casual,Indie,RPG 5
8499 457580 The Visitor the visitor is a vr only horror experience abo... the visitor is a vr only horror experience abo... Single-player Casual,Indie 2
8500 457690 Hotel Blind hotel blind is a simulator of a blind person i... hotel blind is a simulator of a blind person i... Single-player,Steam Achievements,Partial Contr... Casual,Indie,Simulation 3
8501 457710 Road Madness in the 21st century, the world is occurred by ... in the 21st century, the world is occurred by ... Single-player,Steam Achievements,Steam Trading... Action,Racing 2
8502 457790 Capria: Magic of the Elements capria: magic of the elements is a first-perso... capria: magic of the elements is a first-perso... Single-player Action,Casual,Indie,Early Access 4
8503 457820 Outrage outrage is a short cyberpunk dungeon crawler. ... outrage is a short cyberpunk dungeon crawler. ... Single-player,Steam Achievements,Steam Trading... Adventure,Indie,RPG 3
8504 457860 Apollo 11 VR apollo 11 vr is the story of the greatest jour... apollo 11 vr is the story of the greatest jour... Single-player Adventure,Simulation 2
8505 457870 Minigame Party VR what is minigame party vr?minigame party vr is... what is minigame party vr?minigame party vr is... Single-player Action,Casual,Indie 3
8506 457930 Starship: Nova Strike we have been waiting for you commander. the dz... we have been waiting for you commander. the dz... Single-player Action,Casual,Indie 3
8507 457940 Krog Wars a new action game that can be played in 2d mod... a new action game that can be played in 2d mod... Single-player Action,Casual,Indie 3
8508 457960 Holopoint holopoint is pure archery madness. fight your ... holopoint is pure archery madness. fight your ... Single-player,Full controller support Action,Indie,Simulation,Sports 4
8509 458030 StarFringe: Adversus this is the story about the beginning of the c... this is the story about the beginning of the c... Single-player,Steam Achievements Strategy,Early Access 2
8510 458290 Space Bit Attack notice: made for vrspace bit attack was made w... the galaxy needs you! transport yourself into ... Single-player,Full controller support Action,Indie 2
8511 458370 VR Baseball - Home Run Derby there is no feeling greater than stepping up t... there is no feeling greater than stepping up t... Single-player,Full controller support,VR Support Indie,Sports 2
8512 458420 Last Heroes 3 presentationlast heroes is an rpg developed by... presentationlast heroes is an rpg developed by... Single-player,Full controller support,Steam Tr... Adventure,Casual,Indie,RPG,Strategy 5
8513 458700 Cursor Challenge do you want to test your skills and your refle... do you want to test your skills and your refle... Single-player,Co-op Casual,Indie 2
8514 458900 Julai julai - is an arcade flight shooter.the action... julai - is an arcade flight shooter.the action... Single-player Action,Casual,Indie,Simulation 4
8515 459100 Epsilon corp. epsilon is a secret community. there are a lot... epsilon is a secret community. there are a lot... Single-player Action,Adventure,Indie 3
8516 459260 Bowslinger disclaimer: this game requires an htc vive to ... disclaimer: this game requires an htc vive to ... Single-player Casual,Indie,Sports 3
8517 459310 The Hero Project: Redemption Season america's #1 reality show for heroes is back f... america's #1 reality show for heroes is back f... Single-player,Steam Achievements,Captions avai... Adventure,Indie,RPG 3
8518 459630 R.C. Bot Inc. energy and money: iridium. the race has begun!... energy and money: iridium. the race has begun!... Single-player,Steam Achievements,Full controll... Action,Casual,Indie,Simulation,Strategy 5
8519 460150 One Last Chance do you remember that high school crush you nev... do you remember that high school crush you nev... Single-player Casual,Indie,Simulation 3

8520 rows × 7 columns


In [14]:
# label cardinality
cardinality = df['num_labels'].mean()
cardinality


Out[14]:
2.4605633802816902

In [41]:
df['labelset'] = df['genres'].map(lambda txt: set(txt.split(',')))

label_list_list = df['labels'].tolist()
flattened_list_of_labels = [label for label_list in label_list_list for label in label_list]
unique_labels = set(flattened_list_of_labels)
L = len(unique_labels)
L,unique_labels


Out[41]:
(23,
 {'Accounting',
  'Action',
  'Adventure',
  'Animation & Modeling',
  'Audio Production',
  'Casual',
  'Design & Illustration',
  'Early Access',
  'Education',
  'Free to Play',
  'Indie',
  'Massively Multiplayer',
  'Photo Editing',
  'RPG',
  'Racing',
  'Simulation',
  'Software Training',
  'Sports',
  'Strategy',
  'Uncategorized',
  'Utilities',
  'Video Production',
  'Web Publishing'})

In [42]:
# label density
label_density = cardinality / L
label_density


Out[42]:
0.10698101653398653

In [55]:
# number of distinct labelsets
distinct_labelsets = []

for labelset in df['labelset'].tolist():
    
    x = str(sorted(tuple(labelset)))
    
    if x not in distinct_labelsets:
        distinct_labelsets.append(x)

num_distinct_labelsets = len(distinct_labelsets)
num_distinct_labelsets


Out[55]:
585

In [56]:
# number of labelsets appearing in a single instance

# this code is far from optimal; crude quick solution only

occurrences = dict()
for x in distinct_labelsets:
    occurrences[x] = 0
    
for index,row in df.iterrows():
    labelset_str = str(sorted(tuple(row['labelset'])))
    occurrences[labelset_str] += 1
    
one_hit_wonders = 0    
    
for k,v in occurrences.iteritems():
    if v == 1:
        one_hit_wonders += 1
        
one_hit_wonders


Out[56]:
237