In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
from ggplot import *
import matplotlib.pyplot as plt
In [2]:
###################
## Read in Data
####################
all_df = pd.read_table("../data/outputs/TFBS_map_DF_all_bicoid_test.csv", na_values = 'NA',sep= "\t", index_col = 0)
In [3]:
# remove all rows with NAs
all_df = all_df.dropna()
# Check
print all_df
position score species raw_position strand motif_found \
0 10 5.013668 0 10 positive ataatttt
2 -751 8.946094 0 154 negative tcctcgcc
1 157 10.457056 0 157 positive ttcctcgc
3 -684 5.243600 0 221 negative tcgttccc
4 -649 3.285077 0 256 negative tattgccg
5 -616 3.594098 0 289 negative ttggtacc
6 -598 6.417715 0 307 negative ctacattt
7 334 3.702168 0 334 positive gaacggaa
8 404 3.491528 0 404 positive gcaaaagt
9 451 3.794091 0 451 positive gtttttgc
10 -450 9.909568 0 455 negative tgggatta
12 -438 5.787577 0 467 negative agggcttg
11 481 4.094957 0 481 positive ttggtacc
14 -385 4.736640 0 520 negative gtaccgat
13 523 5.020957 0 523 positive tgtaccga
15 -338 9.909568 0 567 negative gaagggat
16 593 3.509995 0 593 positive caaggcag
17 603 3.346478 0 603 positive agcataac
18 -296 4.374594 0 609 negative tgctgggt
19 -291 5.257062 0 614 negative ggttattt
20 -281 4.983569 0 624 negative tgtttttt
21 -264 4.389700 0 641 negative ttaaccct
24 -242 8.959556 0 663 negative taagccca
22 668 5.016483 0 668 positive aagcccag
23 675 10.016483 0 675 positive gcacagca
25 690 6.349059 0 690 positive ttttggtg
26 -115 5.920559 0 790 negative atgattat
28 -59 6.380240 0 846 negative tttcaaat
27 859 3.391992 0 859 positive aagtccca
29 873 5.612093 0 873 positive ggggccgt
.. ... ... ... ... ... ...
204 89 4.894493 7 89 positive cgaagcct
205 169 10.457056 7 169 positive ttaatccg
206 -720 8.946094 7 185 negative cagattat
207 -656 5.243600 7 249 negative tagatttt
208 -621 3.391992 7 284 negative aagttttg
209 -589 3.594098 7 316 negative tggctttc
210 -571 6.417715 7 334 negative ggaattaa
211 342 3.702168 7 342 positive ttaaacgg
212 -542 4.431520 7 363 negative aggagtag
213 392 3.491528 7 392 positive aaaaaccg
214 431 3.794091 7 431 positive ctaaccca
215 -449 9.909568 7 456 negative gggattag
216 463 4.094957 7 463 positive attagccg
217 -437 5.787577 7 468 negative gggcttga
218 506 8.959556 7 506 positive ttaagctg
219 -382 4.736640 7 523 negative ccgatttt
220 -335 9.909568 7 570 negative gggattag
221 -307 3.374593 7 598 negative cagcataa
222 -268 4.389700 7 637 negative tgacttaa
223 645 5.016483 7 645 positive ttaaccct
224 651 9.909568 7 651 positive ctaatccc
225 -247 8.959556 7 658 negative cagcttaa
226 666 8.997031 7 666 positive ttaagccc
227 -222 3.285077 7 683 negative gagttttg
228 -198 3.391992 7 707 negative aagttttg
229 -115 5.920559 7 790 negative atgattat
230 -56 6.380240 7 849 negative caaattaa
231 857 5.505177 7 857 positive ttaagtcc
232 858 3.285077 7 858 positive taagtccc
233 899 4.422532 7 899 positive attatctg
align_position
0 10
2 220
1 223
3 320
4 355
5 388
6 414
7 442
8 530
9 584
10 591
12 603
11 617
14 684
13 687
15 743
16 771
17 781
18 787
19 792
20 809
21 827
24 850
22 855
23 867
25 892
26 1015
28 1075
27 1088
29 1102
.. ...
204 100
205 206
206 222
207 327
208 362
209 401
210 420
211 429
212 450
213 491
214 533
215 575
216 582
217 587
218 625
219 646
220 698
221 726
222 796
223 810
224 817
225 824
226 833
227 856
228 892
229 984
230 1046
231 1054
232 1055
233 1122
[231 rows x 7 columns]
In [6]:
ggplot(aes(x='species', y = 'strand'), data = all_df) +\
geom_bar()ga
Out[6]:
<ggplot: (300979097)>
In [ ]:
Content source: iamciera/montium_analyses_2_conservation
Similar notebooks: