In [34]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import glob
import LoadDataset
import seaborn as sns
import pandas as pd
%matplotlib inline
In [35]:
DATA_SET_FOLDER = '/home/ankdesh/explore/DeepLearning-UdacityCapston/data/train'
In [36]:
heights = []
widths = []
for fileName in glob.glob(DATA_SET_FOLDER + '/*.png'):
img = Image.open(fileName)
heights.append(img.height)
widths.append(img.width)
numLen = [] # Num of digits in the number in image
for (imgs,labels) in LoadDataset.getNextImage(DATA_SET_FOLDER):
numLen.append(len(labels))
In [37]:
sns.distplot(pd.Series(numLen, name="Histogram - Len of number"), kde = False, label = "Histogram")
Out[37]:
In [38]:
sns.distplot(pd.Series(widths, name="Histogram - Width of images"), kde = False, label = "Histogram")
Out[38]:
In [39]:
sns.distplot(pd.Series(heights, name="Histogram - Height of images"), kde = False, label = "Histogram")
Out[39]:
In [41]:
lenMean = np.mean(numLen)
widthMean = np.mean(widths)
heightMean = np.mean(heights)
In [43]:
print (widthMean, heightMean)
In [54]:
cummHistNumLen = []
for i in range(10):
cummHistNumLen.append(float(len([x for x in numLen if x <= i])) / len(numLen) * 100)
In [55]:
print (cummHistNumLen)
In [ ]: