In [1]:
import pandas as pd
import numpy as np
import glob, os
from utils import *
from augment import *
import seaborn as sns
import matplotlib.pyplot as plt
import cPickle

sns.set_style("whitegrid")

%matplotlib inline

label_numbers =  {"Button":0, "Icon":1, "Image":2, "Input field":3, "Line":4, "Text":5, "Navigation Menu":6}
number_label = {0:"Button", 1:"Icon", 2:"Image", 3:"Input field", 4:"Line", 5:"Text", 6: "Navigation Menu"}

In [2]:
#df = read_txts_and_combine() #uncomment if you want to reread all .txt logs into one .csv
df = pd.read_csv("img_labeled/logs/log_all.csv")
df.head()


Out[2]:
id x_abs y_abs x_rel y_rel width height label filename
0 0 233 517 0.21181818181818182 0.7230769230769231 94 41 Image 04
1 1 359 522 0.32636363636363636 0.73006993006993 362 32 Navigation Menu 04
2 2 748 520 0.68 0.7272727272727273 23 31 Icon 04
3 3 776 524 0.7054545454545454 0.7328671328671329 15 26 Icon 04
4 4 796 525 0.7236363636363636 0.7342657342657343 18 25 Icon 04

In [3]:
print "Number of labeled webpages: %i"%(len(df.groupby("filename")))
print "Number of labels: %i"%(len(df))


Number of labeled webpages: 103
Number of labels: 1809

In [4]:
print "Number of labels per category:"
print df.groupby("label").count().id


Number of labels per category:
label
Button             190
Form                70
Icon               273
Image              259
Input field        164
Line               111
List                95
Navigation Menu    119
Rating bar           1
Slider              12
Text               465
Video               18
label               32
Name: id, dtype: int64

In [5]:
df.groupby("label").count().id.plot(kind="bar")#, color=sns.color_palette()[0])


Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0xbf73198>

In [6]:
#only take the labels for which we have at least 100 entries
df = filter_top_100(df)
print("%i labels left"%len(df))


1581 labels left
C:\Users\feita1\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\indexing.py:549: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item_labels[indexer[info_axis]]] = value

In [7]:
#pickle_images("img_labeled/logs/", df)

In [8]:


In [ ]:


In [ ]:


In [11]:




In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: