In [1]:
# import everything
import os
import random
import sys
import gzip
import matplotlib.lines
import matplotlib.colors
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
In [2]:
# read the gziped files
explib = {}
with gzip.open('run_accession-experiment_lib.tsv.gz', 'r') as f:
for l in f:
p = l.strip().split("\t")
if len(p) == 1:
p.append('OTHER')
explib[p[0]] = p[1]
In [3]:
# Read SRA.partie, Kate's file
# data is in these columns:
# 0: SRA id (needs to have .sra removed)
# 1: title (percent unique kmer)
# 2: percent unique kmer
# 3: title (percent 16S)
# 4: percent 16S
# 5: title (percent PHAGE)
# 6: percent PHAGE
# 7: title (percent PROKARYOTE)
# 8: percent PROKARYOTE
data = {}
experimentlibraries = {}
with gzip.open('SRA.partie.tsv.gz', 'r') as f:
for l in f:
p = l.strip().split("\t")
# data is unique kmers, percent 16S, percent phage, percent prok, percent prok + phage
p[0] = p[0].replace('.sra', '')
data[p[0]] = [float(p[2]), float(p[4]), float(p[6]), float(p[8]), float(p[6]) + float(p[8])]
if p[0] in explib:
if explib[p[0]] in experimentlibraries:
experimentlibraries[explib[p[0]]].append(p[0])
else:
experimentlibraries[explib[p[0]]] = [p[0]]
else:
sys.stderr.write("No " + p[0] + " in exp\n")
In [4]:
# initiate the figure
allcolors = ['indigo', 'gold', 'hotpink', 'firebrick', 'indianred', 'yellow',
'mistyrose', 'olive', 'pink', 'tomato', 'orangered', 'navajowhite', 'lime', 'palegreen', 'greenyellow',
'burlywood', 'seashell', 'mediumspringgreen', 'fuchsia', 'papayawhip', 'blanchedalmond', 'chartreuse',
'dimgray', 'black', 'peachpuff', 'springgreen', 'aquamarine', 'white', 'orange', 'brown', 'ivory',
'dodgerblue', 'peru', 'lawngreen', 'chocolate', 'crimson', 'forestgreen', 'slateblue', 'cyan',
'mintcream', 'silver', 'antiquewhite', 'mediumorchid', 'skyblue', 'gray', 'goldenrod', 'floralwhite',
'moccasin', 'saddlebrown', 'grey', 'mediumvioletred', 'slategrey', 'red', 'deeppink', 'limegreen',
'palegoldenrod', 'plum', 'turquoise', 'lavender', 'maroon', 'yellowgreen', 'sandybrown', 'thistle',
'violet', 'navy', 'magenta', 'dimgrey', 'tan', 'rosybrown', 'olivedrab', 'blue', 'ghostwhite',
'honeydew', 'cornflowerblue', 'linen', 'powderblue', 'seagreen', 'snow', 'sienna', 'mediumblue',
'royalblue', 'green', 'mediumpurple', 'midnightblue', 'cornsilk', 'paleturquoise', 'bisque',
'slategray', 'khaki', 'wheat', 'teal', 'deepskyblue', 'salmon', 'steelblue', 'palevioletred',
'aliceblue', 'orchid', 'gainsboro', 'mediumseagreen', 'mediumturquoise', 'lemonchiffon', 'cadetblue',
'lavenderblush', 'coral', 'purple', 'aqua', 'whitesmoke', 'mediumslateblue', 'mediumaquamarine',
'beige', 'blueviolet', 'azure', 'oldlace']
fig = plt.figure()
ax = fig.add_subplot(111)
labels = {}
In [5]:
# plot the scatter plot
# data is unique kmers, percent 16S, percent phage, percent prok, percent prok + phage
for e in ['WGS', 'AMPLICON', 'CLONE', 'OTHER', 'RNA-Seq', 'WGA']:
col = allcolors.pop(0)
print(e + "\t" + str(len(experimentlibraries[e])))
prok = []
phage = []
labels[col] = e
for i in range(1000):
r = random.randint(0, len(experimentlibraries[e]) - 1)
prok.append(data[experimentlibraries[e][r]][3])
phage.append(data[experimentlibraries[e][r]][2])
ax.scatter(phage, prok, label=e, c=col)
In [6]:
# generate the legend
# this is to get the legend on a 3D plot
scatterproxy = []
labeltexts = []
for color in labels:
scatterproxy.append(matplotlib.lines.Line2D([0], [0], linestyle="none", c=color, marker='o'))
labeltexts.append(labels[color])
ax.legend(scatterproxy, labeltexts, numpoints=1)
Out[6]:
In [7]:
# show the figure
ax.set_xlabel('Percent phage')
ax.set_ylabel('Percent prokaryote')
ax.set_ylim([0,100])
ax.set_xlim([0, 100])
plt.show()
In [ ]: