In [1]:
%pylab inline
%config InlineBackend.figure_format = 'retina'
In [2]:
import pandas as pd
import seaborn as sns
from ficus import FigureManager
from IPython.display import FileLink
import glob
import os
from collections import OrderedDict
import matplotlib.pyplot as plt
from grave import plot_network
In [3]:
import networkx as nx
In [8]:
meta = pd.read_csv('../sacPom.csv')
In [10]:
meta.tail()
Out[10]:
In [12]:
G = nx.read_graphml('../fugu.cdbg.graphml')
In [7]:
with FigureManager(show=True, figsize=(16,16)) as (fig, ax):
plot_network(G, layout='kamada_kawai', ax=ax)
In [34]:
from goetia.dbg import make_dBG
from khmer._oxli.parsing import FastxParser
In [36]:
graph = make_dBG(41, 2e9, 4, storage='_ByteStorage')
In [37]:
for record in FastxParser('sacPom/sacPom.cdbg.fasta.5138.FASTA'):
graph.add_sequence(record.sequence)
In [59]:
sequences = dict()
for record in FastxParser('sacPom/sacPom.cdbg.fasta.5138.FASTA'):
sequences[record.name] = record
In [41]:
graph.get_counts(sequences['ID=6823 L=391 type=FULL'])
Out[41]:
In [73]:
counts = pd.read_csv('sacPom/sacPom.cdbg.fasta.5138.FASTA.tsv', na_values='None', delimiter='\t', quotechar='"')
In [27]:
bcalm = pd.read_csv('sacPom/bcalm-cdna/files.unitigs.fa.tsv', na_values='None', delimiter='\t', quotechar='"')
In [29]:
bcalm[bcalm['max'] > 1]
Out[29]:
In [74]:
counts['L'] = counts.name.str.split(expand=True)[1].str.partition('=', expand=True)[2]
counts['type'] = counts.name.str.split(expand=True)[2].str.partition('=', expand=True)[2]
counts['ID'] = counts.name.str.split(expand=True)[0].str.partition('=', expand=True)[2]
In [75]:
bad = counts[counts['internal_max'] > 1]
In [76]:
bad
Out[76]:
In [60]:
S = sequences['ID=5173 L=106 type=FULL']
In [64]:
S
Out[64]:
In [70]:
for kmer in list(S.kmers(41))[1:-1]:
for seq in sequences.values():
if kmer in seq.sequence:
print(kmer, seq.name, seq, seq.sequence.find(kmer))
break
In [72]:
'CAGGGAAAGTACAAGGATCCAACAAAGGTGATCGTTTAACTAAAACATTTGAAGGTTTTAGAAATCAATTGGACAAAGTTCAATTTATAAGGAAACTCATGTCAAA' in sequences['ID=631 L=4064 type=ISLAND'].sequence
Out[72]:
In [ ]: