In [ ]:
import glob
import pandas as pd
import seaborn as sns
from raspberry_pi.grain_counter import (
    GrainCounter,
    StrawCounter,
    BeanCounter,
    QuinoaCounter,
    LentilCounter,
    RockCounter,
)

In [ ]:
clean_photos = glob.glob("/hackzurich2017/raw_data/clean/*.jpg") 
dirty_photos = glob.glob("/hackzurich2017/raw_data/dirt/*.jpg")

In [ ]:
res_dirty = {}
gc = GrainCounter()
sc = StrawCounter()
bc = BeanCounter()
qc = QuinoaCounter()
lc = LentilCounter()
rc = RockCounter()

for path in dirty_photos:
    res_dirty[path] = {}
    res_dirty[path]['grains'] = gc.count(path)
    res_dirty[path]['straws'] = sc.count(path)
    res_dirty[path]['beans'] = bc.count(path)
    res_dirty[path]['quinoa'] = qc.count(path)
    res_dirty[path]['lentils'] = lc.count(path)
    res_dirty[path]['rocks'] = rc.count(path)

In [ ]:
df_dirty = pd.DataFrame.from_dict(res_dirty, orient='index')

In [ ]:
res_clean = {}
gc = GrainCounter()
sc = StrawCounter()
bc = BeanCounter()
qc = QuinoaCounter()
lc = LentilCounter()
rc = RockCounter()

for path in clean_photos:
    res_clean[path] = {}
    res_clean[path]['grains'] = gc.count(path)
    res_clean[path]['straws'] = sc.count(path)
    res_clean[path]['beans'] = bc.count(path)
    res_clean[path]['quinoa'] = qc.count(path)
    res_clean[path]['lentils'] = lc.count(path)
    res_clean[path]['rocks'] = rc.count(path)

In [ ]:
df_clean = pd.DataFrame.from_dict(res_clean, orient='index')

In [ ]:
df_dirty['defect'] = 'dirty'
df_clean['defect'] = 'clean'

In [ ]:
df = pd.concat([df_clean, df_dirty])

In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
sns.set(style="whitegrid", palette="pastel", color_codes=True)
sns.set_context("talk")


df_melt = pd.melt(df, id_vars='defect')
plt.figure(figsize=(10,10))
sns.violinplot(x='variable', y='value', hue='defect',
               data=df_melt, split=True, inner='quart')

In [ ]:
df_nograin = df.drop('grains', axis=1)
df_melt = pd.melt(df_nograin, id_vars='defect')
plt.figure(figsize=(10,10))
sns.violinplot(x='variable', y='value', hue='defect',
               data=df_melt, split=True, inner='quart')

In [ ]:
df_clean.describe()

In [ ]:
df_dirty.describe()