In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import brewer2mpl
import colorsys
import math
import dendropy as dp
import json

from datetime import datetime
from Bio import AlignIO, SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
from Levenshtein import distance
from itertools import combinations, product, permutations
from time import time
from __future__ import division
from collections import Counter, defaultdict
from copy import deepcopy
from random import shuffle, choice, sample
from scipy.stats.mstats import mquantiles
from scipy.stats import norm, expon, poisson, binom
from scipy.misc import comb
from IPython.display import Math
from networkx.readwrite import json_graph 
from dendropy import Tree

%matplotlib inline

In [2]:
# Is gene flow structured by HA and NA distribution? Let's take a look.

trees = dict()
segments = [1, 2, 3, 4, 5, 6, 7, 8] # for now it is just these 6 segments; change later when all 8 are done by Kyle

for segment in segments:
    trees[segment] = Tree.get_from_path('20141201 KY H3N8 Segment {0}.nexus'.format(segment), 'nexus')

In [3]:
tree_distances = np.zeros((8,8))
tree_distances
for seg1, seg2 in combinations(trees.keys(), 2):
    print(seg1, seg2)
    print(trees[seg1].euclidean_distance(trees[seg2]))
    tree_distances[seg1-1, seg2-1] = trees[seg1].euclidean_distance(trees[seg2])


(1, 2)
173.246111148
(1, 3)
179.726746862
(1, 4)
181.354074937
(1, 5)
178.430946575
(1, 6)
182.304839262
(1, 7)
178.966334688
(1, 8)
182.233396293
(2, 3)
27.7328385834
(2, 4)
27.5554198993
(2, 5)
28.5274575756
(2, 6)
28.4791866939
(2, 7)
28.1037815326
(2, 8)
30.2159531658
(3, 4)
18.2890764664
(3, 5)
20.7623459486
(3, 6)
18.5631941281
(3, 7)
20.3708825065
(3, 8)
21.0539440917
(4, 5)
18.8433379793
(4, 6)
14.6168582877
(4, 7)
17.5860824539
(4, 8)
18.9348795572
(5, 6)
18.4847563993
(5, 7)
19.5152263271
(5, 8)
20.5645313929
(6, 7)
16.9878685396
(6, 8)
18.0072832945
(7, 8)
20.4291902619

In [4]:
plt.pcolor(tree_distances)
plt.axes().set_aspect('equal')
plt.colorbar()
plt.xticks(np.arange(1,9)-0.5, np.arange(1,9))
plt.yticks(np.arange(1,9)-0.5, np.arange(1,9))
plt.title('Segment-wise \nTree Euclidean Distance')
plt.xlabel('Segment A')
plt.ylabel('Segment B')
plt.savefig('Segment-Wise Tree Euclidean Distance.pdf', bbox_inches='tight')



In [4]: