In [ ]:
from IPython.display import display

from rmgpy.chemkin import loadChemkinFile
from rmgpy.data.kinetics.family import TemplateReaction
from rmgpy.data.kinetics.library import LibraryReaction

Load chemkin files


In [ ]:
chem_path_1 = '/home/mjliu/Documents/Models/Hexylbenzene/e_run2/chem_annotated.inp'
dict_path_1 = '/home/mjliu/Documents/Models/Hexylbenzene/e_run2/species_dictionary.txt'

chem_path_2 = '/home/mjliu/Documents/Models/Hexylbenzene/e_run3/chem_annotated.inp'
dict_path_2 = '/home/mjliu/Documents/Models/Hexylbenzene/e_run3/species_dictionary.txt'

In [ ]:
spc_list_1, rxn_list_1 = loadChemkinFile(chem_path_1, dict_path_1)
spc_list_2, rxn_list_2 = loadChemkinFile(chem_path_2, dict_path_2)

In [ ]:
print len(spc_list_1), len(rxn_list_1)
print len(spc_list_2), len(rxn_list_2)

In [ ]:


In [ ]:
common = 0
for spc1 in spc_list_1:
    for spc2 in spc_list_2:
        if spc1.isIsomorphic(spc2):
            common += 1
            break
print common

In [ ]:


In [ ]:
common = 0
rxn_list_2_copy = list(rxn_list_2)
for rxn1 in rxn_list_1:
    i = 0
    while i < len(rxn_list_2_copy):
        if rxn1.isIsomorphic(rxn_list_2_copy[i]):
            common += 1
            del rxn_list_2_copy[i]
            break
        else:
            i += 1
print common

In [ ]:
common = 0
rxn_list_1_copy = list(rxn_list_1)
for rxn2 in rxn_list_2:
    i = 0
    while i < len(rxn_list_1_copy):
        if rxn2.isIsomorphic(rxn_list_1_copy[i]):
            common += 1
            del rxn_list_1_copy[i]
            break
        else:
            i += 1
print common

In [ ]:
len(rxn_list_2_copy)

In [ ]:
common

Analyze reactions


In [ ]:
def analyze_reaction_list(rxn_list):
    families = {}
    library_rxns = []
    for rxn in rxn_list:
        if isinstance(rxn, TemplateReaction):
            if rxn.family not in families:
                families[rxn.family] = [rxn]
            else:
                families[rxn.family].append(rxn)
        elif isinstance(rxn, LibraryReaction):
            library_rxns.append(rxn)
            
    family_counts = {}
    for family, rxns in families.iteritems():
        family_counts[family] = len(rxns)
    
    return families, family_counts, library_rxns

In [ ]:
families_1, family_counts_1, library_rxns_1 = analyze_reaction_list(rxn_list_1)
print family_counts_1, len(library_rxns_1)

In [ ]:
families_2, family_counts_2, library_rxns_2 = analyze_reaction_list(rxn_list_2)
print family_counts_2, len(library_rxns_2)

Get percentages for kinetics families


In [ ]:
counts_1 = [(family, count) for family, count in family_counts_1.iteritems()]
counts_1.sort(key=lambda x: x[1], reverse=True)
for family, c in counts_1:
    print '{0} {1}'.format(family, c)

In [ ]:
counts_1 = [(family, count) for family, count in family_counts_2.iteritems()]
counts_1.sort(key=lambda x: x[1], reverse=True)
for family, c in counts_1:
    print '{0} {1}'.format(family, c)

Count all duplicates


In [ ]:
def count_duplicates(families):
    output = {}
    for family, reactions in families.iteritems():
        sorted_reactions = []
        for reaction in reactions:
            if not sorted_reactions:
                sorted_reactions.append([reaction])
            else:
                found = False
                for rxn_list in sorted_reactions:
                    for rxn in rxn_list:
                        if reaction.isIsomorphic(rxn):
                            rxn_list.append(reaction)
                            found = True
                            break
                    if found:
                        break
                else:
                    sorted_reactions.append([reaction])
        output[family] = sorted_reactions

    all_stats = {}
    for family, sorted_reactions in output.iteritems():
        stats = {}
        stats['rxn_num'] = len(sorted_reactions)
        stats['total_rxns'] = 0
        for rxn_list in sorted_reactions:
            stats['total_rxns'] += len(rxn_list)
        stats['rxns'] = sorted_reactions
        
        all_stats[family] = stats
    
    return all_stats

In [ ]:
stats_1 = count_duplicates(families_1)

In [ ]:
total_total = 0
total_rxn = 0
for family, stats in stats_1.iteritems():
    print family
    print stats['total_rxns'], stats['rxn_num']
    total_total += stats['total_rxns']
    total_rxn += stats['rxn_num']
print total_total, total_rxn

In [ ]:
stats_2 = count_duplicates(families_2)

In [ ]:
total_total = 0
total_rxn = 0
for family, stats in stats_2.iteritems():
    print family
    print stats['total_rxns'], stats['rxn_num']
    total_total += stats['total_rxns']
    total_rxn += stats['rxn_num']
print total_total, total_rxn

In [ ]:

Count duplicates in intra add families


In [ ]:
def analyze_intra_add_rxns(families):
    target = ['Intra_R_Add_Endocyclic', 'Intra_R_Add_Exocyclic', 'Intra_R_Add_Polycyclic']
    output = {}
    for family, reactions in families.iteritems():
        if family in target:
            sorted_reactions = []
            for reaction in reactions:
                if not sorted_reactions:
                    sorted_reactions.append([reaction])
                else:
                    found = False
                    for rxn_list in sorted_reactions:
                        for rxn in rxn_list:
                            if reaction.isIsomorphic(rxn):
                                rxn_list.append(reaction)
                                found = True
                                break
                        if found:
                            break
                    else:
                        sorted_reactions.append([reaction])
            output[family] = sorted_reactions
    
    cross_duplicates = []
    
    if 'Intra_R_Add_Endocyclic' in output and 'Intra_R_Add_Exocyclic' in output:
        for rxn_list_1 in output['Intra_R_Add_Endocyclic']:
            for rxn_list_2 in output['Intra_R_Add_Exocyclic']:
                if rxn_list_1[0].isIsomorphic(rxn_list_2[0]):
                    cross_duplicates.append(rxn_list_1 + rxn_list_2)
                    break
    
    return output, cross_duplicates

In [ ]:
output_1, cross_1 = analyze_intra_add_rxns(families_1)

In [ ]:
total_num = 0
for rxn_list in cross_1:
    display(rxn_list[0])
    print len(rxn_list)
    total_num += len(rxn_list)
print len(cross_1), total_num

# display(cross_1[22][0])
# for rxn in cross_1[22]:
#     print rxn.kinetics.comment

In [ ]:
for family, sorted_reactions in output_1.iteritems():
    print family, len(sorted_reactions)
    total_num = 0
    for rxn_list in sorted_reactions:
        total_num += len(rxn_list)
        display(rxn_list[0])
        print len(rxn_list)
    print 'Total reactions = {}'.format(total_num)

In [ ]:
output_2, cross_2 = analyze_intra_add_rxns(families_2)

In [ ]:
for rxn_list in cross_2:
    display(rxn_list[0])
    print len(rxn_list)

In [ ]:
for family, sorted_reactions in output_2.iteritems():
    print family
    for rxn_list in sorted_reactions:
        display(rxn_list[0])
        print len(rxn_list)

In [ ]:
display(output_2['Intra_R_Add_Polycyclic'][5][0])
for rxn in output_2['Intra_R_Add_Polycyclic'][5]:
    print rxn.kinetics.comment

In [ ]:
total_num = 0
for rxn_list in output_2['Intra_R_Add_Polycyclic']:
    total_num += len(rxn_list)
print len(output_2['Intra_R_Add_Polycyclic']), total_num

Find reactions which are no longer generated


In [ ]:
old_intra_add = [rxn_list[0] for family in output_1.itervalues() for rxn_list in family]
new_intra_add = [rxn_list[0] for family in output_2.itervalues() for rxn_list in family]

In [ ]:
diff_reactions = []
for rxn in old_intra_add:
    for rxn2 in new_intra_add:
        if rxn.isIsomorphic(rxn2):
            break
    else:
        diff_reactions.append(rxn)

In [ ]:
for rxn in diff_reactions:
    display(rxn)

In [ ]:
diff_reactions = []
for rxn in new_intra_add:
    for rxn2 in old_intra_add:
        if rxn.isIsomorphic(rxn2):
            break
    else:
        diff_reactions.append(rxn)

In [ ]:
for rxn in diff_reactions:
    display(rxn)

In [ ]: