In [122]:
%load_ext autoreload
%autoreload 2
import re
import aphrodite.results
import sklearn.metrics
import pandas as pd

import vislab
import vislab.results
import vislab.datasets


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [123]:
label_df = vislab.datasets.behance.get_illustration_df()

In [124]:
label_df


/Users/mmt/anaconda/envs/aphrodite/lib/python2.7/site-packages/pandas/core/config.py:570: DeprecationWarning: height has been deprecated.

  warnings.warn(d.msg, DeprecationWarning)
Out[124]:
<class 'pandas.core.frame.DataFrame'>
Index: 24798 entries, behance_4501765_bad5b807dca87bbe144f5b0942986995 to behance_11643227_0aefea58304581fb511fe79f97e40874
Data columns (total 23 columns):
image_url         24798  non-null values
owner             24798  non-null values
project_id        24798  non-null values
tag_3d            24798  non-null values
tag_animals       24798  non-null values
tag_city          24798  non-null values
tag_fantasy       24798  non-null values
tag_food          24798  non-null values
tag_girl          24798  non-null values
tag_ink           24798  non-null values
tag_lettering     24798  non-null values
tag_logo          24798  non-null values
tag_minimal       24798  non-null values
tag_nature        24798  non-null values
tag_pencil        24798  non-null values
tag_portrait      24798  non-null values
tag_retro         24798  non-null values
tag_skull         24798  non-null values
tag_surreal       24798  non-null values
tag_vector        24798  non-null values
tag_vintage       24798  non-null values
tag_watercolor    24798  non-null values
tag_wood          24798  non-null values
dtypes: bool(20), int64(1), object(2)

Total number of elements


In [125]:
n_elem = len( label_df.index )
print n_elem


24798

Extract the tags


In [126]:
tags = label_df.columns
tags = [ tag for tag in tags if 'tag' in tag ]
print tags


['tag_3d', 'tag_animals', 'tag_city', 'tag_fantasy', 'tag_food', 'tag_girl', 'tag_ink', 'tag_lettering', 'tag_logo', 'tag_minimal', 'tag_nature', 'tag_pencil', 'tag_portrait', 'tag_retro', 'tag_skull', 'tag_surreal', 'tag_vector', 'tag_vintage', 'tag_watercolor', 'tag_wood']

In [127]:
n_tags = len( tags )
print n_tags


20

Create the 2D histogram


In [128]:
H = np.zeros( (n_tags, n_tags) )

In [129]:
for i, t1 in enumerate( tags ):
    for j, t2 in enumerate( tags ):
        H[i,j] = len( label_df[ label_df[t1]==True ][ label_df[t2]==True ] )

In [130]:
H[:5,:5]


Out[130]:
array([[ 1451.,    16.,    28.,    18.,    16.],
       [   16.,   565.,     0.,    29.,    24.],
       [   28.,     0.,   410.,     4.,     4.],
       [   18.,    29.,     4.,   389.,     4.],
       [   16.,    24.,     4.,     4.,   645.]])

Single-tag counts


In [148]:
main_counts = zip( tags, list( H.diagonal() ) )
for e in main_counts:
    print e[0], '\t', int( e[1] )


tag_3d 	1451
tag_animals 	565
tag_city 	410
tag_fantasy 	389
tag_food 	645
tag_girl 	311
tag_ink 	582
tag_lettering 	3191
tag_logo 	2821
tag_minimal 	354
tag_nature 	513
tag_pencil 	548
tag_portrait 	630
tag_retro 	652
tag_skull 	280
tag_surreal 	338
tag_vector 	790
tag_vintage 	721
tag_watercolor 	332
tag_wood 	451

List of most commonly co-occuring tag pairs


In [131]:
Ht = np.triu( H, 1 )

In [132]:
t_pairs = []
for i in range( 1, Ht.shape[0] ):
    for j in range( i, Ht.shape[1] ):
        t_pairs += [ ( tags[i], tags[j], Ht[i,j] ) ]

In [133]:
taglist1, taglist2, scorelist = zip( *t_pairs )

In [134]:
frame = pd.DataFrame( { 'tag1':taglist1, 'tag2':taglist2, 'score':scorelist } )

In [135]:
frame = frame.sort_index( by='score', ascending=False )

In [136]:
frame.head(20)


/Users/mmt/anaconda/envs/aphrodite/lib/python2.7/site-packages/pandas/core/config.py:570: DeprecationWarning: height has been deprecated.

  warnings.warn(d.msg, DeprecationWarning)
/Users/mmt/anaconda/envs/aphrodite/lib/python2.7/site-packages/pandas/core/config.py:570: DeprecationWarning: height has been deprecated.

  warnings.warn(d.msg, DeprecationWarning)
Out[136]:
score tag1 tag2
100 691 tag_lettering tag_logo
166 280 tag_retro tag_vintage
105 173 tag_lettering tag_retro
109 158 tag_lettering tag_vintage
121 139 tag_logo tag_vintage
58 112 tag_food tag_logo
117 108 tag_logo tag_retro
113 101 tag_logo tag_minimal
120 85 tag_logo tag_vector
108 76 tag_lettering tag_vector
9 68 tag_animals tag_nature
57 68 tag_food tag_lettering
123 68 tag_logo tag_wood
101 62 tag_lettering tag_minimal
111 61 tag_lettering tag_wood
165 55 tag_retro tag_vector
97 49 tag_ink tag_watercolor
152 48 tag_pencil tag_watercolor
90 44 tag_ink tag_pencil
34 44 tag_city tag_vintage

Printing full histogram of all tag pair occurances


In [137]:
from jinja2 import Template
TEMPLATE='''
<table>

<tr>
<td></td>
{% for row in table %}
<td>{{ row.label }}</td>
{% endfor %}
</tr>

{% for row in table %}
<tr>
<td>{{ row.label }}</td>
{% for element in row.elements %}
<td style="background-color:
    rgb( {{ (128*element/hmax + 127) | round | int }},
         {{ (128*element/hmax + 127) | round | int }}, 
         {{ (128*element/hmax + 127) | round | int }} );
">{{ element | int }}</td>
{% endfor %}
</tr>
{% endfor %}

</table>
'''

def hist_to_table( h, labels ):
    
    h = h.copy()
    for i in range( h.shape[0] ):
        h[i,i] = 0
    
    hmax = h.max()
    
    table = []
#     table += [ [ '' ] + labels ]
    for i in range( h.shape[0] ):
        table += [ {'label':labels[i], 'elements':list(h[i,:]) } ]
        
    template = Template( TEMPLATE )
    result = template.render( table=table, hmax=hmax )
    
    from IPython.display import HTML
    return HTML( result )

DIAGONAL ELEMENTS HAVE BEEN ZEROED-OUT TO BETTER VISUALIZE


In [138]:
res = hist_to_table( H, tags ); res


Out[138]:
tag_3d tag_animals tag_city tag_fantasy tag_food tag_girl tag_ink tag_lettering tag_logo tag_minimal tag_nature tag_pencil tag_portrait tag_retro tag_skull tag_surreal tag_vector tag_vintage tag_watercolor tag_wood
tag_3d 0 16 28 18 16 4 8 222 70 32 16 0 4 36 29 16 23 14 0 37
tag_animals 16 0 0 29 24 0 20 27 28 8 68 15 4 21 4 12 19 21 9 8
tag_city 28 0 0 4 4 0 12 19 32 8 19 4 8 32 0 12 31 44 11 12
tag_fantasy 18 29 4 0 4 7 22 0 12 4 11 0 12 8 12 24 24 12 8 7
tag_food 16 24 4 4 0 12 0 68 112 4 20 4 0 28 0 4 28 28 8 12
tag_girl 4 0 0 7 12 0 11 8 4 0 17 23 43 9 4 8 17 12 11 4
tag_ink 8 20 12 22 0 11 0 32 12 8 8 44 32 4 24 12 12 12 49 4
tag_lettering 222 27 19 0 68 8 32 0 691 62 11 36 4 173 13 4 76 158 8 61
tag_logo 70 28 32 12 112 4 12 691 0 101 12 24 12 108 24 4 85 139 12 68
tag_minimal 32 8 8 4 4 0 8 62 101 0 0 0 8 13 0 4 18 24 0 4
tag_nature 16 68 19 11 20 17 8 11 12 0 0 15 16 8 12 12 16 12 25 15
tag_pencil 0 15 4 0 4 23 44 36 24 0 15 0 41 0 9 23 4 27 48 8
tag_portrait 4 4 8 12 0 43 32 4 12 8 16 41 0 4 8 32 28 4 19 0
tag_retro 36 21 32 8 28 9 4 173 108 13 8 0 4 0 4 4 55 280 0 16
tag_skull 29 4 0 12 0 4 24 13 24 0 12 9 8 4 0 7 36 0 0 0
tag_surreal 16 12 12 24 4 8 12 4 4 4 12 23 32 4 7 0 4 8 0 4
tag_vector 23 19 31 24 28 17 12 76 85 18 16 4 28 55 36 4 0 40 4 4
tag_vintage 14 21 44 12 28 12 12 158 139 24 12 27 4 280 0 8 40 0 8 36
tag_watercolor 0 9 11 8 8 11 49 8 12 0 25 48 19 0 0 0 4 8 0 0
tag_wood 37 8 12 7 12 4 4 61 68 4 15 8 0 16 0 4 4 36 0 0

In [138]: