In [22]:
%matplotlib inline
from query.scripts.script_util import *
from pprint import pprint

In [20]:
from django.db import models
faces = Face.objects.annotate(height=F('bbox_y2')-F('bbox_y1')).filter(frame__video__id=791, labeler__name='mtcnn', height__gte=0.3, track=OuterRef('pk')).annotate(min_frame=Min('frame__number'), max_frame=Max('frame__number'))
print(FaceTrack.objects.annotate(min_frame=Subquery(faces.values('min_frame'), output_field=models.IntegerField()), max_frame=Subquery(faces.values('max_frame'), output_field=models.IntegerField()))).query


SELECT "query_tvnews_facetrack"."id", "query_tvnews_facetrack"."gender_id", "query_tvnews_facetrack"."identity_id", (SELECT MAX(U3."number") AS "max_frame" FROM "query_tvnews_face" U0 INNER JOIN "query_tvnews_labeler" U2 ON (U0."labeler_id" = U2."id") INNER JOIN "query_tvnews_frame" U3 ON (U0."frame_id" = U3."id") WHERE (U0."track_id" = ("query_tvnews_facetrack"."id") AND U2."name" = mtcnn AND U3."video_id" = 791 AND (U0."bbox_y2" - U0."bbox_y1") >= 0.3) GROUP BY U0."id", (U0."bbox_y2" - U0."bbox_y1")) AS "max_frame", (SELECT MIN(U3."number") AS "min_frame" FROM "query_tvnews_face" U0 INNER JOIN "query_tvnews_labeler" U2 ON (U0."labeler_id" = U2."id") INNER JOIN "query_tvnews_frame" U3 ON (U0."frame_id" = U3."id") WHERE (U0."track_id" = ("query_tvnews_facetrack"."id") AND U2."name" = mtcnn AND U3."video_id" = 791 AND (U0."bbox_y2" - U0."bbox_y1") >= 0.3) GROUP BY U0."id", (U0."bbox_y2" - U0."bbox_y1")) AS "min_frame" FROM "query_tvnews_facetrack"

In [60]:
print(FaceTrack.objects.all().values('id').query)
print(FaceTrack.objects.all().filter(face__frame__video__channel='CNN').values('id').query)
print(FaceTrack.objects.annotate(height=F('face__bbox_y2')-F('face__bbox_y1')).values('id').query)


SELECT "query_tvnews_facetrack"."id" FROM "query_tvnews_facetrack"
SELECT "query_tvnews_facetrack"."id" FROM "query_tvnews_facetrack" INNER JOIN "query_tvnews_face" ON ("query_tvnews_facetrack"."id" = "query_tvnews_face"."track_id") INNER JOIN "query_tvnews_frame" ON ("query_tvnews_face"."frame_id" = "query_tvnews_frame"."id") INNER JOIN "query_tvnews_video" ON ("query_tvnews_frame"."video_id" = "query_tvnews_video"."id") WHERE "query_tvnews_video"."channel" = CNN
SELECT "query_tvnews_facetrack"."id" FROM "query_tvnews_facetrack" LEFT OUTER JOIN "query_tvnews_face" ON ("query_tvnews_facetrack"."id" = "query_tvnews_face"."track_id")

In [61]:
print(FaceTrack.objects.all().count())
print(FaceTrack.objects.all().filter(face__frame__video__channel='CNN').count())
print(FaceTrack.objects.annotate(height=F('face__bbox_y2')-F('face__bbox_y1')).count())


761091
504839
870872

In [24]:
# Get total number of detected female vs male faces
gender_frames = Gender.objects.annotate(count=Count('tvnews_face__frame')).values('name', 'count')
total = gender_frames[0]['count'] + gender_frames[1]['count']
print('Female: {:.1f}%, Male: {:.1f}%'.format(gender_frames[1]['count'] / float(total) * 100, gender_frames[0]['count'] / float(total) * 100))
pd.DataFrame(list(gender_frames))


Female: 42.1%, Male: 57.9%
Out[24]:
count name
0 569779 male
1 414019 female
2 512 unknown

In [3]:
# Get # of frames with a man vs. woman on them
shows = [t['show'] for t in Video.objects.distinct('show').values('show')]
counts = {}
for show in shows:
    print 'Computing for {}'.format(show)
    counts[show] = {}
    for gender in ['male', 'female']:
        counts[show][gender] = Frame.objects.filter(
            video__show=show, faceinstance__gender__name=gender).distinct('id').count()
    counts[show]['total'] = Frame.objects.filter(video__show=show).annotate(c=Count('faceinstance')).filter(c__gt=0).count()

print ''
    
for show, count in counts.iteritems():
    print('{}: female {:.1f}%, male {:.1f}%'.format(show, (count['female'] / float(count['total']) * 100), (count['male'] / float(count['total']) * 100)))
pd.DataFrame(counts)


Computing for Americas News Headquarters
Computing for CNN Newsroom
Computing for CNN Newsroom With Poppy Harlow
Computing for Fareed Zakaria GPS
Computing for On the Record With Brit Hume
Computing for Shepard Smith Reporting
Computing for Situation Room With Wolf Blitzer
Computing for Special Report With Bret Baier
Computing for The Five
Computing for The Lead With Jake Tapper
Computing for The Real Story With Gretchen Carlson
Computing for The Situation Room

Special Report With Bret Baier: female 31.9%, male 86.2%
The Real Story With Gretchen Carlson: female 61.0%, male 68.4%
On the Record With Brit Hume: female 42.8%, male 74.4%
Fareed Zakaria GPS: female 30.2%, male 78.2%
Shepard Smith Reporting: female 44.9%, male 75.5%
The Five: female 49.7%, male 61.6%
CNN Newsroom With Poppy Harlow: female 66.9%, male 69.5%
The Situation Room: female 47.2%, male 71.8%
Americas News Headquarters: female 47.8%, male 68.3%
Situation Room With Wolf Blitzer: female 47.6%, male 77.8%
The Lead With Jake Tapper: female 49.6%, male 71.5%
CNN Newsroom: female 59.2%, male 65.7%
Out[3]:
Americas News Headquarters CNN Newsroom CNN Newsroom With Poppy Harlow Fareed Zakaria GPS On the Record With Brit Hume Shepard Smith Reporting Situation Room With Wolf Blitzer Special Report With Bret Baier The Five The Lead With Jake Tapper The Real Story With Gretchen Carlson The Situation Room
female 21540 24188 32230 10786 20424 21052 28376 17113 24827 24010 27893 20404
male 30809 26844 33461 27944 35533 35379 46349 46213 30767 34600 31284 31040
total 45096 40831 48158 35725 47766 46878 59607 53591 49981 48407 45761 43212

In [20]:
# Get # of total faces per show that are man vs. woman
def talking_heads(qs):
    return qs.annotate(height=F('bbox_y2')-F('bbox_y1')).filter(height__gte=0.25)

shows = [t['show'] for t in Video.objects.distinct('show').values('show')]
counts = {}
for show in shows:
    counts[show] = {}
    for gender in ['male', 'female']:
        counts[show][gender] = talking_heads(FaceInstance.objects.filter(frame__video__show=show, gender__name=gender)).count()
    counts[show]['total'] = talking_heads(FaceInstance.objects.filter(frame__video__show=show)).count()    

print ''
    
for show, count in counts.iteritems():
    print('{}: female {:.1f}%, male {:.1f}%'.format(show, (count['female'] / float(count['total']) * 100), (count['male'] / float(count['total']) * 100)))
    
pd.DataFrame(counts)


Special Report With Bret Baier: female 11.9%, male 88.1%
The Real Story With Gretchen Carlson: female 44.1%, male 55.9%
On the Record With Brit Hume: female 27.8%, male 72.2%
Fareed Zakaria GPS: female 17.7%, male 82.3%
Shepard Smith Reporting: female 24.6%, male 75.4%
The Five: female 39.8%, male 60.2%
CNN Newsroom With Poppy Harlow: female 40.3%, male 59.7%
The Situation Room: female 31.7%, male 68.3%
Americas News Headquarters: female 34.8%, male 65.2%
Situation Room With Wolf Blitzer: female 29.5%, male 70.5%
The Lead With Jake Tapper: female 32.0%, male 68.0%
CNN Newsroom: female 41.0%, male 59.0%
Out[20]:
Americas News Headquarters CNN Newsroom CNN Newsroom With Poppy Harlow Fareed Zakaria GPS On the Record With Brit Hume Shepard Smith Reporting Situation Room With Wolf Blitzer Special Report With Bret Baier The Five The Lead With Jake Tapper The Real Story With Gretchen Carlson The Situation Room
female 13314 10516 12973 4358 12173 8451 16786 6917 15401 12898 20558 9136
male 24925 15161 19222 20223 31549 25889 40166 51373 23336 27364 26099 19649
total 38239 25677 32195 24581 43722 34340 56969 58313 38737 40262 46657 28785

In [9]:
# Get # of total faces per show that are man vs. woman
def talking_heads(qs):
    return qs.annotate(height=F('bbox_y2')-F('bbox_y1')).filter(height__gte=0.25)

id = 4457280
FaceFeatures.dropTempFeatureModel()
FaceFeatures.getTempFeatureModel([id])

shows = ['CNN Newsroom With Poppy Harlow']
counts = {}
for show in shows:
    counts[show] = {}
    for gender in ['male', 'female']:
        counts[show][gender] = talking_heads(FaceInstance.objects.filter(frame__video__show=show, gender__name=gender, facefeaturestemp__distto_4457280__gte=1.7)).count()
    counts[show]['total'] = talking_heads(FaceInstance.objects.filter(frame__video__show=show, facefeaturestemp__distto_4457280__gte=1.7)).count()    

print ''
    
for show, count in counts.iteritems():
    print('{}: female {:.1f}%, male {:.1f}%'.format(show, (count['female'] / float(count['total']) * 100), (count['male'] / float(count['total']) * 100)))
    
fropd.DataFrame(counts)


CNN Newsroom With Poppy Harlow: female 29.9%, male 70.1%
Out[9]:
CNN Newsroom With Poppy Harlow
female 2584
male 6062
total 8646

In [70]:
from collections import defaultdict
labeler_names = [l['labeler__name'] for l in Face.objects.values('labeler__name').distinct()]
print(labeler_names)

videos = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
for frame in Frame.objects.filter(Q(video__show='Situation Room With Wolf Blitzer') | Q(video__show='Special Report With Bret Baier')).select_related('video')[:10000:10]:
    faces = Face.objects.filter(frame=frame).select_related('labeler')
    for face in faces:
        videos[frame.video.id][frame.id][face.labeler.name].append(face)
        
print(dict(videos).keys())
        
AREA_THRESHOLD = 0.02
DIST_THRESHOLD = 0.10

mistakes = defaultdict(lambda: defaultdict(tuple))
for video, frames in videos.iteritems():
    for frame, labelers in frames.iteritems():
        for labeler, faces in labelers.iteritems():
            for face in faces:
                if bbox_area(face) < AREA_THRESHOLD:
                    continue

                mistake = True
                for other_labeler in labeler_names:
                    if labeler == other_labeler: continue
                    other_faces = labelers[other_labeler] if other_labeler in labelers else []
                    for other_face in other_faces:
                        if bbox_dist(face, other_face) < DIST_THRESHOLD:
                            mistake = False
                            break

                    if mistake and len(other_faces) > 0:
                        mistakes[video][frame] = (faces, other_faces)
                        break
                else:
                    continue
                break

result = []
for video, frames in list(mistakes.iteritems())[:100]:
    for frame, (faces, other_faces) in frames.iteritems():
        result.append({
            'video': video,
            'start_frame': frame,
            'bboxes': [bbox_to_dict(f) for f in faces + other_faces]
        })
print(len(result))


[u'handlabeled', u'mtcnn']
[786]

NameErrorTraceback (most recent call last)
<ipython-input-70-ea43500a03ee> in <module>()
     19         for labeler, faces in labelers.iteritems():
     20             for face in faces:
---> 21                 if bbox_area(face) < AREA_THRESHOLD:
     22                     continue
     23 

NameError: name 'bbox_area' is not defined