notebook.community

Edit and run



In [22]:

    
%matplotlib inline
from query.scripts.script_util import *
from pprint import pprint



In [20]:

    
from django.db import models
faces = Face.objects.annotate(height=F('bbox_y2')-F('bbox_y1')).filter(frame__video__id=791, labeler__name='mtcnn', height__gte=0.3, track=OuterRef('pk')).annotate(min_frame=Min('frame__number'), max_frame=Max('frame__number'))
print(FaceTrack.objects.annotate(min_frame=Subquery(faces.values('min_frame'), output_field=models.IntegerField()), max_frame=Subquery(faces.values('max_frame'), output_field=models.IntegerField()))).query









    



SELECT "query_tvnews_facetrack"."id", "query_tvnews_facetrack"."gender_id", "query_tvnews_facetrack"."identity_id", (SELECT MAX(U3."number") AS "max_frame" FROM "query_tvnews_face" U0 INNER JOIN "query_tvnews_labeler" U2 ON (U0."labeler_id" = U2."id") INNER JOIN "query_tvnews_frame" U3 ON (U0."frame_id" = U3."id") WHERE (U0."track_id" = ("query_tvnews_facetrack"."id") AND U2."name" = mtcnn AND U3."video_id" = 791 AND (U0."bbox_y2" - U0."bbox_y1") >= 0.3) GROUP BY U0."id", (U0."bbox_y2" - U0."bbox_y1")) AS "max_frame", (SELECT MIN(U3."number") AS "min_frame" FROM "query_tvnews_face" U0 INNER JOIN "query_tvnews_labeler" U2 ON (U0."labeler_id" = U2."id") INNER JOIN "query_tvnews_frame" U3 ON (U0."frame_id" = U3."id") WHERE (U0."track_id" = ("query_tvnews_facetrack"."id") AND U2."name" = mtcnn AND U3."video_id" = 791 AND (U0."bbox_y2" - U0."bbox_y1") >= 0.3) GROUP BY U0."id", (U0."bbox_y2" - U0."bbox_y1")) AS "min_frame" FROM "query_tvnews_facetrack"



In [60]:

    
print(FaceTrack.objects.all().values('id').query)
print(FaceTrack.objects.all().filter(face__frame__video__channel='CNN').values('id').query)
print(FaceTrack.objects.annotate(height=F('face__bbox_y2')-F('face__bbox_y1')).values('id').query)









    



SELECT "query_tvnews_facetrack"."id" FROM "query_tvnews_facetrack"
SELECT "query_tvnews_facetrack"."id" FROM "query_tvnews_facetrack" INNER JOIN "query_tvnews_face" ON ("query_tvnews_facetrack"."id" = "query_tvnews_face"."track_id") INNER JOIN "query_tvnews_frame" ON ("query_tvnews_face"."frame_id" = "query_tvnews_frame"."id") INNER JOIN "query_tvnews_video" ON ("query_tvnews_frame"."video_id" = "query_tvnews_video"."id") WHERE "query_tvnews_video"."channel" = CNN
SELECT "query_tvnews_facetrack"."id" FROM "query_tvnews_facetrack" LEFT OUTER JOIN "query_tvnews_face" ON ("query_tvnews_facetrack"."id" = "query_tvnews_face"."track_id")



In [61]:

    
print(FaceTrack.objects.all().count())
print(FaceTrack.objects.all().filter(face__frame__video__channel='CNN').count())
print(FaceTrack.objects.annotate(height=F('face__bbox_y2')-F('face__bbox_y1')).count())



In [24]:

    
# Get total number of detected female vs male faces
gender_frames = Gender.objects.annotate(count=Count('tvnews_face__frame')).values('name', 'count')
total = gender_frames[0]['count'] + gender_frames[1]['count']
print('Female: {:.1f}%, Male: {:.1f}%'.format(gender_frames[1]['count'] / float(total) * 100, gender_frames[0]['count'] / float(total) * 100))
pd.DataFrame(list(gender_frames))









    



Female: 42.1%, Male: 57.9%






    Out[24]:







  
    
      
      count
      name
    
  
  
    
      0
      569779
      male
    
    
      1
      414019
      female
    
    
      2
      512
      unknown



In [3]:

    
# Get # of frames with a man vs. woman on them
shows = [t['show'] for t in Video.objects.distinct('show').values('show')]
counts = {}
for show in shows:
    print 'Computing for {}'.format(show)
    counts[show] = {}
    for gender in ['male', 'female']:
        counts[show][gender] = Frame.objects.filter(
            video__show=show, faceinstance__gender__name=gender).distinct('id').count()
    counts[show]['total'] = Frame.objects.filter(video__show=show).annotate(c=Count('faceinstance')).filter(c__gt=0).count()

print ''
    
for show, count in counts.iteritems():
    print('{}: female {:.1f}%, male {:.1f}%'.format(show, (count['female'] / float(count['total']) * 100), (count['male'] / float(count['total']) * 100)))
pd.DataFrame(counts)









    



Computing for Americas News Headquarters
Computing for CNN Newsroom
Computing for CNN Newsroom With Poppy Harlow
Computing for Fareed Zakaria GPS
Computing for On the Record With Brit Hume
Computing for Shepard Smith Reporting
Computing for Situation Room With Wolf Blitzer
Computing for Special Report With Bret Baier
Computing for The Five
Computing for The Lead With Jake Tapper
Computing for The Real Story With Gretchen Carlson
Computing for The Situation Room

Special Report With Bret Baier: female 31.9%, male 86.2%
The Real Story With Gretchen Carlson: female 61.0%, male 68.4%
On the Record With Brit Hume: female 42.8%, male 74.4%
Fareed Zakaria GPS: female 30.2%, male 78.2%
Shepard Smith Reporting: female 44.9%, male 75.5%
The Five: female 49.7%, male 61.6%
CNN Newsroom With Poppy Harlow: female 66.9%, male 69.5%
The Situation Room: female 47.2%, male 71.8%
Americas News Headquarters: female 47.8%, male 68.3%
Situation Room With Wolf Blitzer: female 47.6%, male 77.8%
The Lead With Jake Tapper: female 49.6%, male 71.5%
CNN Newsroom: female 59.2%, male 65.7%






    Out[3]:







  
    
      
      Americas News Headquarters
      CNN Newsroom
      CNN Newsroom With Poppy Harlow
      Fareed Zakaria GPS
      On the Record With Brit Hume
      Shepard Smith Reporting
      Situation Room With Wolf Blitzer
      Special Report With Bret Baier
      The Five
      The Lead With Jake Tapper
      The Real Story With Gretchen Carlson
      The Situation Room
    
  
  
    
      female
      21540
      24188
      32230
      10786
      20424
      21052
      28376
      17113
      24827
      24010
      27893
      20404
    
    
      male
      30809
      26844
      33461
      27944
      35533
      35379
      46349
      46213
      30767
      34600
      31284
      31040
    
    
      total
      45096
      40831
      48158
      35725
      47766
      46878
      59607
      53591
      49981
      48407
      45761
      43212



In [20]:

    
# Get # of total faces per show that are man vs. woman
def talking_heads(qs):
    return qs.annotate(height=F('bbox_y2')-F('bbox_y1')).filter(height__gte=0.25)

shows = [t['show'] for t in Video.objects.distinct('show').values('show')]
counts = {}
for show in shows:
    counts[show] = {}
    for gender in ['male', 'female']:
        counts[show][gender] = talking_heads(FaceInstance.objects.filter(frame__video__show=show, gender__name=gender)).count()
    counts[show]['total'] = talking_heads(FaceInstance.objects.filter(frame__video__show=show)).count()    

print ''
    
for show, count in counts.iteritems():
    print('{}: female {:.1f}%, male {:.1f}%'.format(show, (count['female'] / float(count['total']) * 100), (count['male'] / float(count['total']) * 100)))
    
pd.DataFrame(counts)









    



Special Report With Bret Baier: female 11.9%, male 88.1%
The Real Story With Gretchen Carlson: female 44.1%, male 55.9%
On the Record With Brit Hume: female 27.8%, male 72.2%
Fareed Zakaria GPS: female 17.7%, male 82.3%
Shepard Smith Reporting: female 24.6%, male 75.4%
The Five: female 39.8%, male 60.2%
CNN Newsroom With Poppy Harlow: female 40.3%, male 59.7%
The Situation Room: female 31.7%, male 68.3%
Americas News Headquarters: female 34.8%, male 65.2%
Situation Room With Wolf Blitzer: female 29.5%, male 70.5%
The Lead With Jake Tapper: female 32.0%, male 68.0%
CNN Newsroom: female 41.0%, male 59.0%






    Out[20]:







  
    
      
      Americas News Headquarters
      CNN Newsroom
      CNN Newsroom With Poppy Harlow
      Fareed Zakaria GPS
      On the Record With Brit Hume
      Shepard Smith Reporting
      Situation Room With Wolf Blitzer
      Special Report With Bret Baier
      The Five
      The Lead With Jake Tapper
      The Real Story With Gretchen Carlson
      The Situation Room
    
  
  
    
      female
      13314
      10516
      12973
      4358
      12173
      8451
      16786
      6917
      15401
      12898
      20558
      9136
    
    
      male
      24925
      15161
      19222
      20223
      31549
      25889
      40166
      51373
      23336
      27364
      26099
      19649
    
    
      total
      38239
      25677
      32195
      24581
      43722
      34340
      56969
      58313
      38737
      40262
      46657
      28785



In [9]:

    
# Get # of total faces per show that are man vs. woman
def talking_heads(qs):
    return qs.annotate(height=F('bbox_y2')-F('bbox_y1')).filter(height__gte=0.25)

id = 4457280
FaceFeatures.dropTempFeatureModel()
FaceFeatures.getTempFeatureModel([id])

shows = ['CNN Newsroom With Poppy Harlow']
counts = {}
for show in shows:
    counts[show] = {}
    for gender in ['male', 'female']:
        counts[show][gender] = talking_heads(FaceInstance.objects.filter(frame__video__show=show, gender__name=gender, facefeaturestemp__distto_4457280__gte=1.7)).count()
    counts[show]['total'] = talking_heads(FaceInstance.objects.filter(frame__video__show=show, facefeaturestemp__distto_4457280__gte=1.7)).count()    

print ''
    
for show, count in counts.iteritems():
    print('{}: female {:.1f}%, male {:.1f}%'.format(show, (count['female'] / float(count['total']) * 100), (count['male'] / float(count['total']) * 100)))
    
fropd.DataFrame(counts)









    



CNN Newsroom With Poppy Harlow: female 29.9%, male 70.1%






    Out[9]:







  
    
      
      CNN Newsroom With Poppy Harlow
    
  
  
    
      female
      2584
    
    
      male
      6062
    
    
      total
      8646



In [70]:

    
from collections import defaultdict
labeler_names = [l['labeler__name'] for l in Face.objects.values('labeler__name').distinct()]
print(labeler_names)

videos = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
for frame in Frame.objects.filter(Q(video__show='Situation Room With Wolf Blitzer') | Q(video__show='Special Report With Bret Baier')).select_related('video')[:10000:10]:
    faces = Face.objects.filter(frame=frame).select_related('labeler')
    for face in faces:
        videos[frame.video.id][frame.id][face.labeler.name].append(face)
        
print(dict(videos).keys())
        
AREA_THRESHOLD = 0.02
DIST_THRESHOLD = 0.10

mistakes = defaultdict(lambda: defaultdict(tuple))
for video, frames in videos.iteritems():
    for frame, labelers in frames.iteritems():
        for labeler, faces in labelers.iteritems():
            for face in faces:
                if bbox_area(face) < AREA_THRESHOLD:
                    continue

                mistake = True
                for other_labeler in labeler_names:
                    if labeler == other_labeler: continue
                    other_faces = labelers[other_labeler] if other_labeler in labelers else []
                    for other_face in other_faces:
                        if bbox_dist(face, other_face) < DIST_THRESHOLD:
                            mistake = False
                            break

                    if mistake and len(other_faces) > 0:
                        mistakes[video][frame] = (faces, other_faces)
                        break
                else:
                    continue
                break

result = []
for video, frames in list(mistakes.iteritems())[:100]:
    for frame, (faces, other_faces) in frames.iteritems():
        result.append({
            'video': video,
            'start_frame': frame,
            'bboxes': [bbox_to_dict(f) for f in faces + other_faces]
        })
print(len(result))









    



[u'handlabeled', u'mtcnn']
[786]






    




NameErrorTraceback (most recent call last)
<ipython-input-70-ea43500a03ee> in <module>()
     19         for labeler, faces in labelers.iteritems():
     20             for face in faces:
---> 21                 if bbox_area(face) < AREA_THRESHOLD:
     22                     continue
     23 

NameError: name 'bbox_area' is not defined

	Americas News Headquarters	CNN Newsroom	CNN Newsroom With Poppy Harlow	Fareed Zakaria GPS	On the Record With Brit Hume	Shepard Smith Reporting	Situation Room With Wolf Blitzer	Special Report With Bret Baier	The Five	The Lead With Jake Tapper	The Real Story With Gretchen Carlson	The Situation Room
female	21540	24188	32230	10786	20424	21052	28376	17113	24827	24010	27893	20404
male	30809	26844	33461	27944	35533	35379	46349	46213	30767	34600	31284	31040
total	45096	40831	48158	35725	47766	46878	59607	53591	49981	48407	45761	43212

	Americas News Headquarters	CNN Newsroom	CNN Newsroom With Poppy Harlow	Fareed Zakaria GPS	On the Record With Brit Hume	Shepard Smith Reporting	Situation Room With Wolf Blitzer	Special Report With Bret Baier	The Five	The Lead With Jake Tapper	The Real Story With Gretchen Carlson	The Situation Room
female	13314	10516	12973	4358	12173	8451	16786	6917	15401	12898	20558	9136
male	24925	15161	19222	20223	31549	25889	40166	51373	23336	27364	26099	19649
total	38239	25677	32195	24581	43722	34340	56969	58313	38737	40262	46657	28785