In [1]:
import sys

In [2]:
import re

In [3]:
import glob
import pandas as pd

In [4]:
sys.path.append('./support_files/')

In [5]:
import aggregate_mummer_results as amr
import name_extractions

In [6]:
import plotting

In [7]:
import matplotlib.pyplot as plt
import matplotlib as mpl

% matplotlib inline

In [8]:
import seaborn as sns
sns.set_style('ticks')

In [9]:
summary_df = pd.read_csv("./percent_identities.tsv", sep='\t')

In [10]:
name_extractions.summary_stat_type('estimated % identity (2)')


Out[10]:
'estimated % identity'

In [11]:
summary_df.head()


Out[11]:
Unnamed: 0 % identity (1) % identity (2) estimated % identity (1) estimated % identity (2) frac of query aligned (1) frac of query aligned (2) mummer file number alignments aggregated (1) number alignments aggregated (2) query alignment length total (1) query alignment length total (2) query bp query contigs query id query name ref bp ref contigs ref id ref name
0 0 89.512390 89.512390 1.101441 1.101441 0.012305 0.012305 elviz-contigs-1056229.Burkholderiales-1/elviz-... 94 94 33702 33702 2738909 4662 Ga0066473 elviz-contigs-1056229.Burkholderiales-1 2022224 1990 Ga0066401 elviz-contigs-1056013.Comamonadaceae-1
1 0 90.094688 90.087233 9.281476 10.257853 0.103019 0.113866 elviz-contigs-1056229.Burkholderiales-1/elviz-... 799 918 282160 311868 2738909 4662 Ga0066473 elviz-contigs-1056229.Burkholderiales-1 4860710 5582 Ga0066479 elviz-contigs-1056247.Burkholderiales-1
2 0 91.209037 91.226886 3.687345 3.793253 0.040427 0.041580 elviz-contigs-1056229.Burkholderiales-1/elviz-... 280 293 110727 113885 2738909 4662 Ga0066473 elviz-contigs-1056229.Burkholderiales-1 2881263 2858 Ga0066437 elviz-contigs-1056121.Comamonadaceae-2
3 0 100.000000 100.000000 0.005769 0.005769 0.000058 0.000058 elviz-contigs-1056229.Burkholderiales-1/elviz-... 1 1 158 158 2738909 4662 Ga0066473 elviz-contigs-1056229.Burkholderiales-1 2904105 102 Ga0066438 elviz-contigs-1056124.Methylophilus-1
4 0 93.064607 92.872315 5.462180 5.595582 0.058692 0.060250 elviz-contigs-1056229.Burkholderiales-1/elviz-... 270 282 160753 165020 2738909 4662 Ga0066473 elviz-contigs-1056229.Burkholderiales-1 3012106 445 Ga0081644 Acidovorax-69

In [14]:
column_pairs = [
    ('% identity (1)', '% identity (2)'),
    ('% of query aligned (1)', '% of query aligned (2)'),
    ('estimated % identity (1)', 'estimated % identity (2)')
               ]

In [15]:
plotting.plot_old_versus_new_ani(summary_df, column_pairs)


0
('% identity (1)', '% identity (2)')
Axes(0.125,0.125;0.227941x0.775)
1
('% of query aligned (1)', '% of query aligned (2)')
Axes(0.398529,0.125;0.227941x0.775)
2
('estimated % identity (1)', 'estimated % identity (2)')
Axes(0.672059,0.125;0.227941x0.775)

In [ ]: