In [2]:
!ls ../../1_find_poophage/2_hmmer/
In [9]:
import pandas as pd
In [26]:
#1. Load poophage seqids
poophage_seqs = []
with open("../../1_find_poophage/2_hmmer/454_seqs_poophage.fa") as fh:
for line in fh:
if line.startswith(">"):
poophage_seqs.append(line.rstrip("\n").lstrip(">"))
print(len(poophage_seqs))
poophage_seqs = frozenset(poophage_seqs)
In [23]:
kaiju_virus_df = pd.read_csv("./454_seqs_kaiju.names.virus.txt",sep="\t",header=None,names=["classified","seqid","taxid","tax"])
kaiju_virus_df.head()
Out[23]:
In [31]:
#It's empty !
kaiju_virus_df[kaiju_virus_df["seqid"].apply(lambda x: x in poophage_seqs)].shape
Out[31]:
In [ ]: