In [3]:
import re
from collections import defaultdict
import itertools
import pandas as pd
import numpy as np
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
from matplotlib import collections as mc
In [4]:
with open("blast_tsv_columns.txt") as fh:
blast_cols = next(fh).rstrip(" \n").split(" ")
blast_cols
Out[4]:
In [5]:
blast_hits = pd.read_csv("metaspades1_to_candidates_filt.tsv",header=None,names=blast_cols,sep="\t")
In [6]:
blast_hits.head()
Out[6]:
In [17]:
def plot_hits(df, sseqid,out_file):
#Calculate reference line (subject sequence)
main_line = [(0,0),(df.slen.iloc[0],0)]
#Calculate lines of query hits, shifted with respect to their alignment to subject sequence
full_queries = []
aligned_queries = []
labels = [(0,-0.05,sseqid)]
for hit_number,(_,row) in enumerate(df.iterrows()):
y_pos = (hit_number + 1) * 0.1
#Positive strand
if row["sstart"] < row["send"]:
shifted_start = row["sstart"] - row["qstart"]
#Negative strand
else:
shifted_start = row["sstart"] - row["qend"]
#Full length of the query
new_full_query = [(shifted_start,y_pos),(shifted_start+row["qlen"],y_pos)]
#Segment of the query that aligns to the reference
new_aligned_query = [(row["sstart"],y_pos),(row["send"],y_pos)]
new_label = (shifted_start,y_pos,row["qseqid"])
full_queries.append(new_full_query)
aligned_queries.append(new_aligned_query)
labels.append(new_label)
pad_left_down = [(-df.slen.iloc[0]/4,-len(new_full_query)*0.2), (0,-len(new_full_query)*0.2)]
pad_right_up = [(df.slen.iloc[0],(1+len(new_full_query))*0.1), (5*df.slen.iloc[0]/4,(1+len(new_full_query))*0.1)]
#Matplotlib plot
mpl.rcParams['figure.figsize'] = (10.0, 8.0)
fig,ax = plt.subplots()
lc1 = mc.LineCollection([main_line],linewidths=10,colors=["blue"])
lc2 = mc.LineCollection(full_queries,linewidths=3,colors=["black"])
lc3 = mc.LineCollection(aligned_queries,linewidths=3,colors=["green"])
pad = mc.LineCollection([pad_left_down,pad_right_up],linewidths=3,colors=["white"])
ax.add_collection(lc1)
ax.add_collection(lc2)
ax.add_collection(lc3)
ax.add_collection(pad)
#Add labels
for lbl in labels:
ax.text(lbl[0],lbl[1]+0.01,lbl[2])
#Set title
ax.autoscale()
ax.set_title(sseqid)
fig.savefig(out_file)
plt.close()
In [18]:
for sseqid,df in blast_hits.groupby("sseqid"):
plot_hits(df,sseqid,"{}.png".format(sseqid))