In [1]:
import pyparanoid.genomedb as gdb
import pyparanoid.genomeplot as gplot
In [2]:
### Load a list of strains that have been added to a genome database
### you've built using pyparanoid.genomedb methods.
strains = [line.rstrip() for line in open("../src/Brass.txt","r")]
In [3]:
### First, use the download_genbank_files() function to check if all
### of the strains have genbank files and download the ones that are
### missing from RefSeq and Ensembl.
gdb.download_genbank_files(strains,"../../data/genomedb")
In [4]:
### Now let's generate a list of strains and locus tags for a specific
### PyParanoid group that we are interested in.
from Bio import SeqIO
o = open("../src/locus_tag_list.txt",'w')
for seq in SeqIO.parse(open("../../data/Pseudo/prop_homolog_faa/group_22008.faa",'r'),'fasta'):
vals = seq.id.split("|")
if vals[0] in strains:
o.write("{}\t{}\n".format(vals[0],vals[1]))
o.close()
!head -n 5 ../src/locus_tag_list.txt > ../src/plot_list.txt
## You can modify this file to add strains
In [5]:
## This function will plot genomic regions from the strains in "locus_tag_list.txt", use the
## genbank files in "genomedb", and color the results based on PyParanoid group membership.
## This step can take a while...eventually I'm going to come back to this to optimize the
## homolog searching. You can also dump this GenomeDiagram object to a python pickle for
## future work.
GD = gplot.plot_genomic_regions("../src/plot_list.txt","../../data/genomedb", \
"../../data/Pseudo")
In [6]:
GD.draw(x=0.02,format="linear", orientation = "landscape", track_size=0.35, fragments=1)
GD.write("all_group22008.pdf", "pdf")
In [7]:
from IPython.display import IFrame
IFrame("all_group22008.pdf", width=600, height=300)
Out[7]:
In [8]:
## Optionally, you can specify a set of gene groups to highlight in green as well as a custom
## size window span to view a larger chunk of the genome.
## Setting labels=True is sometimes helpful for smaller plots, but generally leads to clutter
## when you are looking at many genomes.
highlight = ["group_16803","group_22008","group_15086","group_06700","group_10985",\
"group_09490","group_10988"]
GD2 = gplot.plot_genomic_regions("../src/plot_list.txt","../../data/genomedb",\
"../../data/Pseudo",span=90000,hl_groups=highlight,labels=True)
In [9]:
GD2.draw(x=0.02,format="linear", orientation = "landscape", track_size=0.35, fragments=1, \
start=0, end=100000, pagesize=(1000,4000))
GD2.write("highlighted_group22008.pdf", "pdf")
IFrame("highlighted_group22008.pdf", width=600, height=300)
Out[9]: