In [ ]:
import pandas as pd
import bisect
import numpy as np
import pickle
import csv
In [ ]:
def get_percentile(current_observation, distribution_df):
data_distribution = np.array(distribution_df)
return float(bisect.bisect(data_distribution, float(current_observation))) / float(len(data_distribution))
In [ ]:
def get_strongest_factors(feature_influence_scores, feature_category, variable='interview'):
strongest_factors = {}
relevant_feature_influence_scores = feature_influence_scores[feature_category][variable]
for scores in relevant_feature_influence_scores:
strongest_factors[scores[2]] = scores[0] * scores[1]
return strongest_factors
In [ ]:
def mention_if_strong_factor(feature_key, strongest_factors):
factor_assessment = ''
if feature_key in strongest_factors:
contribution = strongest_factors[feature_key]
if contribution > 0:
factor_assessment = 'In our model, a higher score on this feature typically leads to a higher overall assessment score.'
elif contribution < 0:
factor_assessment = 'In our model, a higher score on this feature typically leads to a lower overall assessment score.'
if len(factor_assessment) > 0:
factor_assessment += '\n'
return factor_assessment
In [ ]:
def describe_individual_feature(scores, training_data, strongest_factors, feature_key, description_function, feature_human_explanations):
feature_score = scores[feature_key]
feature_training_data = training_data[feature_key]
feature_percentile = get_percentile(feature_score, feature_training_data)
feature_description = '*** %s ***\n' % feature_human_explanations[feature_key]
feature_description += 'This feature typically ranges between %f and %f. ' % (np.min(feature_training_data), np.max(feature_training_data))
feature_description += 'The score for this video is %f (percentile: %d).\n' %(feature_score, 100 * feature_percentile)
feature_description += mention_if_strong_factor(feature_key, strongest_factors)
feature_description += description_function(feature_score, feature_percentile)
feature_description += '\n'
return feature_description
In [ ]:
def describe_entropy(score, percentile):
entropy_description = ''
if score < 1.0:
entropy_description += "I have some problems detecting the person's face. Probably the person is not always looking into the camera, or the video is of low quality.\n"
elif score > 2.0:
entropy_description += "It looks like the person is consistently facing the camera.\n"
return entropy_description
In [ ]:
def describe_mei_median(score, percentile):
mei_median_description = ''
if percentile < 0.25:
mei_median_description += 'When taking the median of the degree of movement, this person moves a lot.\n'
elif percentile > 0.75:
mei_median_description += 'When taking the median of the degree of movement, this person moves quite a lot.\n'
return mei_median_description
In [ ]:
def describe_mei_mean(score, percentile):
mei_mean_description = ''
if percentile < 0.25:
mei_mean_description += 'When averaging the degree of movement, this person moves a lot.\n'
elif percentile > 0.75:
mei_mean_description += 'When averaging the degree of movement, this person moves quite a lot.\n'
return mei_mean_description
In [ ]:
def describe_nothing(score, percentile):
return ''
In [ ]:
def describe_mei(mei_scores, training_data, feature_influence, feature_human_explanations):
mei_description = '** FEATURES OBTAINED FROM MOTION ENERGY ANALYSIS **\n'
mei_description += 'I focused on the person\'s face and verified how much movement was detected over time.\n\n'
strongest_factors = get_strongest_factors(feature_influence, 'mei')
mei_description += describe_individual_feature(mei_scores, training_data, strongest_factors, 'Entropy', describe_entropy, feature_human_explanations)
mei_description += describe_individual_feature(mei_scores, training_data, strongest_factors, 'Median', describe_mei_median, feature_human_explanations)
mei_description += describe_individual_feature(mei_scores, training_data, strongest_factors, 'Mean', describe_mei_mean, feature_human_explanations)
return mei_description
In [ ]:
def describe_openface(openface_scores, training_data, feature_influence, feature_human_explanations):
openface_description = '** FEATURES OBTAINED FROM FACIAL ACTION UNIT ANALYSIS **\n'
openface_description += 'I focused on Action Units in the person\'s face: activity of dedicated face muscles.\n'
openface_description += 'These values may say something about how expressive the person is.\n\n'
strongest_factors = get_strongest_factors(feature_influence, 'openface')
openface_description += 'FEATURES FROM THE EYES\n'
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, '% presence (AU01_c)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'max intensity (AU01_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'mean intensity (AU01_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, '% presence (AU02_c)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'max intensity (AU02_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'mean intensity (AU02_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, '% presence (AU04_c)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'max intensity (AU04_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'mean intensity (AU04_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, '% presence (AU05_c)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'max intensity (AU05_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'mean intensity (AU05_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, '% presence (AU07_c)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'max intensity (AU07_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'mean intensity (AU07_r)', describe_nothing, feature_human_explanations)
openface_description += '\n'
openface_description += 'FEATURES FROM THE MOUTH\n'
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, '% presence (AU10_c)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'max intensity (AU10_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'mean intensity (AU10_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, '% presence (AU12_c)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'max intensity (AU12_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'mean intensity (AU12_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, '% presence (AU15_c)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'max intensity (AU15_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'mean intensity (AU15_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, '% presence (AU20_c)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'max intensity (AU20_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'mean intensity (AU20_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, '% presence (AU23_c)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'max intensity (AU23_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'mean intensity (AU23_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, '% presence (AU25_c)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'max intensity (AU25_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'mean intensity (AU25_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, '% presence (AU28_c)', describe_nothing, feature_human_explanations)
openface_description += '\n'
openface_description += 'FEATURES FROM THE CHIN\n'
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, '% presence (AU17_c)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'max intensity (AU17_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'mean intensity (AU17_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, '% presence (AU14_c)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'max intensity (AU14_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'mean intensity (AU14_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, '% presence (AU26_c)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'max intensity (AU26_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'mean intensity (AU26_r)', describe_nothing, feature_human_explanations)
openface_description += '\n'
openface_description += 'FEATURES FROM OTHER AREAS\n'
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, '% presence (AU06_c)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'max intensity (AU06_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'mean intensity (AU06_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, '% presence (AU09_c)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'max intensity (AU09_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'mean intensity (AU09_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, '% presence (AU45_c)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'max intensity (AU45_r)', describe_nothing, feature_human_explanations)
openface_description += describe_individual_feature(openface_scores, training_data, strongest_factors, 'mean intensity (AU45_r)', describe_nothing, feature_human_explanations)
return openface_description
In [ ]:
def describe_visual_aspects(mei_scores, openface_scores, training_data, feature_influence, feature_human_explanations):
visual_report = '*******************\n* VISUAL FEATURES *\n*******************\n'
visual_report += 'Here is the report on what I could \'see\':\n\n'
mei_entropy = mei_scores['Entropy']
# Catch for detection bug
if mei_entropy < 0.5:
visual_report += '...oops. It looks like the person had posters in the background, and I actually focused on a poster instead of the person. Nothing more to assess visually...'
return visual_report
else:
visual_report += describe_mei(mei_scores, training_data, feature_influence, feature_human_explanations)
visual_report += '---------------------------------------------------\n\n'
visual_report += describe_openface(openface_scores, training_data, feature_influence, feature_human_explanations)
visual_report += '\n'
return visual_report
In [ ]:
def describe_word_count(score, percentile):
word_count_description = ''
if percentile < 0.25:
word_count_description = 'This person does not speak a lot.'
elif percentile > 0.75:
word_count_description = 'This person speaks a lot.'
if len(word_count_description) > 0:
word_count_description += '\n'
return word_count_description
In [ ]:
def describe_unique_words(score, percentile):
unique_word_description = ''
if percentile < 0.25:
unique_word_description = 'This person has an unusually large vocabulary.'
elif percentile > 0.75:
unique_word_description = 'This person has an unusually small vocabulary.'
if len(unique_word_description) > 0:
unique_word_description += '\n'
return unique_word_description
In [ ]:
def describe_txt(txt_scores, training_data, feature_influence, feature_human_explanations):
txt_description = '** FEATURES OBTAINED FROM SIMPLE TEXT ANALYSIS **\n'
txt_description += 'Cognitive capability may be important for the job. I looked at a few very simple text statistics first.\n\n'
strongest_factors = get_strongest_factors(feature_influence, 'txt')
txt_description += describe_individual_feature(txt_scores, training_data, strongest_factors, 'word_count', describe_word_count, feature_human_explanations)
txt_description += describe_individual_feature(txt_scores, training_data, strongest_factors, 'word_count_unique', describe_unique_words, feature_human_explanations)
return txt_description
In [ ]:
def describe_ari(score, percentile):
# mapping taken from https://en.wikipedia.org/wiki/Automated_readability_index
score = np.ceil(score)
if score <= 1:
score_interpretation = 'Kindergarten'
elif score == 2:
score_interpretation = 'First Grade'
elif score == 3:
score_interpretation = 'Second Grade'
elif score == 4:
score_interpretation = 'Third Grade'
elif score == 5:
score_interpretation = 'Fourth Grade'
elif score == 6:
score_interpretation = 'Fifth Grade'
elif score == 7:
score_interpretation = 'Sixth Grade'
elif score == 8:
score_interpretation = 'Seventh Grade'
elif score == 9:
score_interpretation = 'Eighth Grade'
elif score == 10:
score_interpretation = 'Ninth Grade'
elif score == 11:
score_interpretation = 'Tenth Grade'
elif score == 12:
score_interpretation = 'Eleventh Grade'
elif score == 13:
score_interpretation = 'Twelfth Grade'
else:
score_interpretation = 'College'
ari_description = 'According to the ARI score, the estimated educational level needed to understand this person is %s.\n' % score_interpretation
return ari_description
In [ ]:
def describe_gunning_fog(score, percentile):
# mapping taken from https://en.wikipedia.org/wiki/Gunning_fog_index
score = np.ceil(score)
if score < 6:
score_interpretation = 'Under Sixth Grade'
elif score == 6:
score_interpretation = 'Sixth Grade'
elif score == 7:
score_interpretation = 'Seventh Grade'
elif score == 8:
score_interpretation = 'Eighth Grade'
elif score == 9:
score_interpretation = 'High School Freshman'
elif score == 10:
score_interpretation = 'High School Sophomore'
elif score == 11:
score_interpretation = 'High School Junior'
elif score == 12:
score_interpretation = 'High School Senior'
elif score == 13:
score_interpretation = 'College Freshman'
elif score == 14:
score_interpretation = 'College Sophomore'
elif score == 16:
score_interpretation = 'College Senior'
else:
score_interpretation = 'College Graduate'
gunning_fog_description = 'According to the Gunning Fog Index, the estimated educational level needed to understand this person is %s.\n' % score_interpretation
return gunning_fog_description
In [ ]:
def describe_flesch_kincaid_ease(score, percentile):
# mapping taken from https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests
if score > 90:
score_interpretation = 'very easy. Easily understood by an average 11-year-old student.'
elif score > 80:
score_interpretation = 'easy. Conversational English for consumers.'
elif score > 70:
score_interpretation = 'fairly easy to understand.'
elif score > 60:
score_interpretation = 'plain English. Easily understood by 13- to 15-year-old students.'
elif score > 50:
score_interpretation = 'fairly difficult.'
elif score > 30:
score_interpretation = 'difficult.'
else:
score_interpretation = 'very difficult to understand. Best understood by university graduates.'
flesch_kincaid_ease_assessment = 'According to the Flesch-Kincaid reading ease score, this person\'s text is %s' % score_interpretation
return flesch_kincaid_ease_assessment
In [ ]:
def describe_lix(score, percentile):
# mapping taken from (Anderson, 1983)
score = np.ceil(score)
score_interpretation = ''
if score < 10:
score_interpretation = 'First Grade'
elif score <= 14:
score_interpretation = 'Second Grade'
elif score <= 19:
score_interpretation = 'Third Grade'
elif score <= 23:
score_interpretation = 'Fourth Grade'
elif score <= 27:
score_interpretation = 'Fifth Grade'
elif score <= 31:
score_interpretation = 'Sixth Grade'
elif score <= 35:
score_interpretation = 'Seventh Grade'
elif score <= 39:
score_interpretation = 'Eighth Grade'
elif score <= 43:
score_interpretation = 'Ninth Grade'
elif score <= 47:
score_interpretation = 'Tenth Grade'
elif score <= 51:
score_interpretation = 'Eleventh Grade'
elif score <= 55:
score_interpretation = 'Twelfth Grade'
else:
score_interpretation = 'College'
lix_assessment = 'According to the LIX score, the estimated educational level needed to understand this person is %s.\n' % score_interpretation
return lix_assessment
In [ ]:
def describe_rix(score, percentile):
# mapping taken from (Anderson, 1983)
score_interpretation = ''
if score < 0.2:
score_interpretation = 'First Grade'
elif score < 0.5:
score_interpretation = 'Second Grade'
elif score < 0.8:
score_interpretation = 'Third Grade'
elif score < 1.3:
score_interpretation = 'Fourth Grade'
elif score < 1.8:
score_interpretation = 'Fifth Grade'
elif score < 2.4:
score_interpretation = 'Sixth Grade'
elif score < 3.0:
score_interpretation = 'Seventh Grade'
elif score < 3.7:
score_interpretation = 'Eighth Grade'
elif score < 4.5:
score_interpretation = 'Ninth Grade'
elif score < 5.3:
score_interpretation = 'Tenth Grade'
elif score < 6.2:
score_interpretation = 'Eleventh Grade'
elif score < 7.2:
score_interpretation = 'Twelfth Grade'
else:
score_interpretation = 'College'
rix_assessment = 'According to the RIX score, the estimated educational level needed to understand this person is %s.\n' % score_interpretation
return rix_assessment
In [ ]:
def describe_readability(readability_scores, training_data, feature_influence, feature_human_explanations):
readability_description = '** FEATURES OBTAINED FROM READABILITY ANALYSIS **\n'
readability_description += 'As slightly more sophisticated measure, I ran several readability metrics.\n'
readability_description += 'Note that several of these were originally designed for larger, written texts. This is why metrics may disagree.\n\n'
strongest_factors = get_strongest_factors(feature_influence, 'readability')
readability_description += describe_individual_feature(readability_scores, training_data, strongest_factors, 'ARI', describe_ari, feature_human_explanations)
readability_description += describe_individual_feature(readability_scores, training_data, strongest_factors, 'Coleman Liau Index', describe_nothing, feature_human_explanations)
readability_description += describe_individual_feature(readability_scores, training_data, strongest_factors, 'Flesch-Kincaid Grade Level', describe_nothing, feature_human_explanations)
readability_description += describe_individual_feature(readability_scores, training_data, strongest_factors, 'SMOG Index', describe_nothing, feature_human_explanations)
readability_description += describe_individual_feature(readability_scores, training_data, strongest_factors, 'LIX', describe_lix, feature_human_explanations)
readability_description += describe_individual_feature(readability_scores, training_data, strongest_factors, 'RIX', describe_rix, feature_human_explanations)
readability_description += describe_individual_feature(readability_scores, training_data, strongest_factors, 'Gunning Fog Index', describe_gunning_fog, feature_human_explanations)
readability_description += describe_individual_feature(readability_scores, training_data, strongest_factors, 'Flesch Reading Ease', describe_flesch_kincaid_ease, feature_human_explanations)
return readability_description
In [ ]:
def describe_linguistic_use(txt_scores, readability_scores, training_data, feature_influence, feature_human_explanations):
linguistic_report = '*******************\n* USE OF LANGUAGE *\n*******************\n'
linguistic_report += 'Here is the report on the person\'s language use:\n\n'
linguistic_report += describe_txt(txt_scores, training_data, feature_influence, feature_human_explanations)
linguistic_report += '---------------------------------------------------\n\n'
linguistic_report += describe_readability(readability_scores, training_data, feature_influence, feature_human_explanations)
return linguistic_report
In [ ]:
def assess_video(txt_scores, readability_scores, mei_scores, openface_scores, training_data, feature_influence, feature_human_explanations):
overall_assessment = ''
overall_assessment += '%s\n' % describe_linguistic_use(txt_scores, readability_scores, training_data, feature_influence, feature_human_explanations)
overall_assessment += '%s\n' % describe_visual_aspects(mei_scores, openface_scores, training_data, feature_influence, feature_human_explanations)
return overall_assessment
In [ ]:
def feature_csv_to_dict(path):
feature_dict = {}
data_reader = csv.DictReader(open(path))
for entry in data_reader:
video_id = entry['']
feature_dict[video_id] = {}
for key in entry.keys():
if not key == '':
feature_dict[video_id][key] = float(entry[key])
return feature_dict
In [ ]:
def generate_qualitative_descriptions(predictions, test_data_txt, test_data_readability, test_data_mei, test_data_openface, train_data, feature_influence, feature_human_explanations):
descriptions = {}
for video_id in predictions:
current_description = '****************************************************\n'
current_description += '* ASSESSMENT REPORT FOR VIDEO %s: *\n' % video_id
current_description += '****************************************************\n\n'
current_description += 'On a scale from 0.0 to 1.0, I would rate this person\'s interviewability as %f.\n' % predictions[video_id]
current_description += 'Below, I will report on linguistic and visual assessment of the person.\nPercentiles are obtained by comparing the person against scores of 6000 earlier assessed people.\n\n'
current_description += '---------------------------------------------------\n\n'
current_description += assess_video(test_data_txt[video_id], test_data_readability[video_id], test_data_mei[video_id], test_data_openface[video_id], train_data, feature_influence, feature_human_explanations)
descriptions[video_id] = (current_description, None)
return descriptions
In [ ]:
# Map of human descriptions
# We do not use the Centers of Mass, as our motion energy images are on segmented faces.
# CoM would likely be a useful features on 'full' videos though.
human_descriptions = \
{'% presence (AU01_c)': 'Action Unit 1: how often was the inner brow raised?',
'% presence (AU02_c)': 'Action Unit 2: how often was the outer brow raised?',
'% presence (AU04_c)': 'Action Unit 4: how often was the brow lowered?',
'% presence (AU05_c)': 'Action Unit 5: how often was the upper lid raised?',
'% presence (AU06_c)': 'Action Unit 6: how often was the cheek raised?',
'% presence (AU07_c)': 'Action Unit 7: how often was the eyelid tightened?',
'% presence (AU09_c)': 'Action Unit 9: how often did the nose wrinkle?',
'% presence (AU10_c)': 'Action Unit 10: how often was the upper lip raised?',
'% presence (AU12_c)': 'Action Unit 12: how often was the lip corner pulled?',
'% presence (AU14_c)': 'Action Unit 14: how often was the dimple present?',
'% presence (AU15_c)': 'Action Unit 15: how often was the lip corner depressed?',
'% presence (AU17_c)': 'Action Unit 17: how often was the chin raised?',
'% presence (AU20_c)': 'Action Unit 20: how often was the lip stretched?',
'% presence (AU23_c)': 'Action Unit 23: how often was the lip tightened?',
'% presence (AU25_c)': 'Action Unit 25: how often did the lips part?',
'% presence (AU26_c)': 'Action Unit 26: how often did the jaw drop?',
'% presence (AU28_c)': 'Action Unit 28: how often was the lip sucked?',
'% presence (AU45_c)': 'Action Unit 45: how often did the person blink?',
'ARI': 'US grade level required for comprehension according to the ARI score',
'Coleman Liau Index': 'US grade level required for comprehension according to the Coleman Liau score',
'Entropy': 'Motion energy entropy: how varied is the degree of movement across the person\'s face?',
'Flesch Reading Ease': 'Reading ease according to the Flesch score',
'Flesch-Kincaid Grade Level': 'US grade level required for comprehension according to the Flesch-Kincaid score',
'Gunning Fog Index': 'US grade level required for comprehension according to the Gunning-Fog score',
#'Horizontal CoM': 'Horizontal center of face',
'LIX': 'Readability assessment according to the Lix score',
'Mean': 'Mean motion energy: how much does the person move on average?',
'Median': 'Median motion energy: what is the typical degree of movement of this person?',
'RIX': 'Readability assessment according to the RIX score',
'SMOG Index': 'Years of reading required to understand the text according to the SMOG score',
#'Vertical CoM': 'Vertical center of face',
'max intensity (AU01_r)': 'Action Unit 1: how much was the inner brow raised at most?',
'max intensity (AU02_r)': 'Action Unit 2: how much was the outer brow raised at most?',
'max intensity (AU04_r)': 'Action Unit 4: how much was the brow lowered at most?',
'max intensity (AU05_r)': 'Action Unit 5: how much was the upper lid raised at most?',
'max intensity (AU06_r)': 'Action Unit 6: how much was the cheek raised at most?',
'max intensity (AU07_r)': 'Action Unit 7: how much was the eyelid tightened at most?',
'max intensity (AU09_r)': 'Action Unit 9: how much did the nose wrinkle at most?',
'max intensity (AU10_r)': 'Action Unit 10: how much was the upper lip raised at most?',
'max intensity (AU12_r)': 'Action Unit 12: how much was the lip corner pulled at most?',
'max intensity (AU14_r)': 'Action Unit 14: how much was the dimple present at most?',
'max intensity (AU15_r)': 'Action Unit 15: how much was the lip corner depressed at most?',
'max intensity (AU17_r)': 'Action Unit 17: how much was the chin raised at most?',
'max intensity (AU20_r)': 'Action Unit 20: how much was the lip stretched at most?',
'max intensity (AU23_r)': 'Action Unit 23: how much was the lip tightened at most?',
'max intensity (AU25_r)': 'Action Unit 25: how much did the lips part at most?',
'max intensity (AU26_r)': 'Action Unit 26: how much did the jaw drop at most?',
'max intensity (AU28_r)': 'Action Unit 28: how much was the lip sucked at most?',
'max intensity (AU45_r)': 'Action Unit 45: how much did the person blink at most?',
'mean intensity (AU01_r)': 'Action Unit 1: how much was the inner brow raised on average?',
'mean intensity (AU02_r)': 'Action Unit 2: how much was the outer brow raised on average?',
'mean intensity (AU04_r)': 'Action Unit 4: how much was the brow lowered on average?',
'mean intensity (AU05_r)': 'Action Unit 5: how much was the upper lid raised on average?',
'mean intensity (AU06_r)': 'Action Unit 6: how much was the cheek raised on average?',
'mean intensity (AU07_r)': 'Action Unit 7: how much was the eyelid tightened on average?',
'mean intensity (AU09_r)': 'Action Unit 9: how much did the nose wrinkle on average?',
'mean intensity (AU10_r)': 'Action Unit 10: how much was the upper lip raised on average?',
'mean intensity (AU12_r)': 'Action Unit 12: how much was the lip corner pulled on average?',
'mean intensity (AU14_r)': 'Action Unit 14: how much was the dimple present on average?',
'mean intensity (AU15_r)': 'Action Unit 15: how much was the lip corner depressed on average?',
'mean intensity (AU17_r)': 'Action Unit 17: how much was the chin raised on average?',
'mean intensity (AU20_r)': 'Action Unit 20: how much was the lip stretched on average?',
'mean intensity (AU23_r)': 'Action Unit 23: how much was the lip tightened on average?',
'mean intensity (AU25_r)': 'Action Unit 25: how much did the lips part on average?',
'mean intensity (AU26_r)': 'Action Unit 26: how much did the jaw drop on average?',
'mean intensity (AU28_r)': 'Action Unit 28: how much was the lip sucked on average?',
'mean intensity (AU45_r)': 'Action Unit 45: how much did the person blink on average?',
'word_count': 'Amount of spoken words',
'word_count_unique': 'Amount of unique words'}
In [ ]:
# Read training data from combined csv: we will use this as comparison data for assessing percentiles
train_data = pd.read_csv('../feature extraction/features/Train/combined.csv')
In [ ]:
feature_influence = pickle.load(open('../regression/feature_influence.pkl'))
In [ ]:
val_data_txt = feature_csv_to_dict('../feature extraction/features/Validation/Txt.csv')
val_data_readability = feature_csv_to_dict('../feature extraction/features/Validation/Readability.csv')
val_data_openface = feature_csv_to_dict('../feature extraction/features/Validation/OpenFace.csv')
val_data_mei = feature_csv_to_dict('../feature extraction/features/Validation/MEI.csv')
In [ ]:
validation_subset = ['cT3oyHhUznw.000.mp4',\
'sHVXhr7_EOs.000.mp4',\
'ax8wm9K41og.002.mp4',\
'B2riMsP8LD8.002.mp4',\
'kSk-rf7a1Ig.004.mp4',\
'o2wtRccAgjE.005.mp4',\
'DVh_7dO2cWY.001.mp4',\
'2SzC9dm4Yy4.001.mp4',\
'7fOxteINSUg.002.mp4',\
'EvZ0esZgPK4.005.mp4']
In [ ]:
all_predictions = pickle.load(open('../submission/Validation/prediction_all.pkl'))
In [ ]:
all_descriptions = generate_qualitative_descriptions(all_predictions['interview'], val_data_txt, val_data_readability, val_data_mei, val_data_openface, train_data, feature_influence, human_descriptions)
In [ ]:
# filter only the validation files of interest
predictions = {}
descriptions = {}
for video_id in all_descriptions:
if video_id in validation_subset:
descriptions[video_id] = all_descriptions[video_id]
for variable in all_predictions:
if not variable in predictions:
predictions[variable] = {}
predictions[variable][video_id] = all_predictions[variable][video_id]
In [ ]:
f = open('../submission/Validation/prediction.pkl', 'wb')
pickle.dump(predictions, f)
f.close()
f = open('../submission/Validation/description.pkl', 'wb')
pickle.dump(descriptions, f)
f.close()
In [ ]:
test_data_txt = feature_csv_to_dict('../feature extraction/features/Test/Txt.csv')
test_data_readability = feature_csv_to_dict('../feature extraction/features/Test/Readability.csv')
test_data_openface = feature_csv_to_dict('../feature extraction/features/Test/OpenFace.csv')
test_data_mei = feature_csv_to_dict('../feature extraction/features/Test/MEI.csv')
In [ ]:
predictions = pickle.load(open('../submission/Test/prediction.pkl'))
In [ ]:
descriptions = generate_qualitative_descriptions(predictions['interview'], test_data_txt, test_data_readability, test_data_mei, test_data_openface, train_data, feature_influence, human_descriptions)
In [ ]:
f = open('../submission/Test/description.pkl', 'wb')
pickle.dump(descriptions, f)
f.close()
In [ ]:
f = open('../submission/Validation/prediction.pkl', 'rb')
prediction_val = pickle.load(f)
f.close()
f = open('../submission/Validation/description.pkl', 'rb')
description_val = pickle.load(f)
f.close()
In [ ]:
prediction_val