In [1]:
import kvector
In [2]:
motifs = kvector.read_motifs('kvector/tests/data/example_rbps.motif', residues='ACGT')
motifs.head()
Out[2]:
You can can access individual motifs with the usual pandas indexing:
In [3]:
# the 4th (counting from 0) motif
motifs[3]
Out[3]:
In [4]:
# Specific motif name
motifs['M004_0.6_BRUNOL4_ENSG00000101489_Homo_sapiens\tM004_0.6_BRUNOL4_ENSG00000101489_Homo_sapiens\t5.0']
Out[4]:
In [5]:
%pdb
In [6]:
motif_kmer_vectors = kvector.motifs_to_kmer_vectors(motifs, residues='ACGT',
kmer_lengths=(3, 4))
motif_kmer_vectors
Out[6]:
In [7]:
asdf = 'akjsdhfkjahsf klasjdfk asdfasdf'
In [8]:
asdf.replace('\t', ' ')
Out[8]:
In [9]:
kmer_vector = kvector.count_kmers('kvector/tests/data/example.fasta', kmer_lengths=(3, 4))
kmer_vector.head()
Out[9]:
Since this is a pandas dataframe, you can do convenient things like get the mean and standard deviation.
In [10]:
kmer_vector.mean()
Out[10]:
In [11]:
kmer_vector.std()
Out[11]: