In [1]:

    
import sys
import pandas as pd



In [2]:

    
aggregated_data_path = '../../CF_output/combined/'



In [3]:

    
batches = [3,4,5,6]

Combine constructiveness and toxicity annotations from different batches



In [4]:

    
dfs = []
for batch in batches:
    filename = aggregated_data_path + 'batch' + str(batch) + '_constructiveness_and_toxicity_combined.csv'
    dfs.append(pd.read_csv(filename))



In [5]:

    
combined_annotations_df = pd.concat(dfs)



In [6]:

    
# Sort the merged dataframe on constructiveness and toxicity
combined_annotations_df.shape









    Out[6]:





(4158, 25)



In [7]:

    
# Relevant columns
cols = (['article_id', 'article_author', 'article_published_date',
        'article_title', 'article_url', 'article_text',
       'comment_author', 'comment_counter', 'comment_text',
       'agree_constructiveness_expt', 'agree_toxicity_expt', 'constructive', 'constructive_internal_gold', 
       'crowd_toxicity_level', 'crowd_toxicity_level_internal_gold',
       'has_content', 'crowd_discard',  
       'constructive_characteristics', 'non_constructive_characteristics',
       'toxicity_characteristics',                 
       'crowd_comments_constructiveness_expt', 
       'crowd_comments_toxicity_expt',
       'other_con_chars', 'other_noncon_chars', 'other_toxic_chars'         
        ])

Write contructiveness and toxicity combined CSV



In [8]:

    
output_dir = '../../CF_output/annotated_data/'



In [9]:

    
combined_annotations_df.to_csv( output_dir + 'constructiveness_and_toxicity_annotations.csv', columns = cols, index = False)