Script to find additional organisms to add to the tree, namely from crocodiles and birds. Not used in final paper.


In [2]:
import Bio
from Bio import Entrez

In [3]:
Entrez.email = "k.lyons@utexas.edu"

List of species we are interested in adding.


In [4]:
sp = ["Gallus gallus", "Struthio camelus", "Chrysemys picta", "Pelomedusa subrufa", "Crocodylus porosus", "Alligator mississippiensis"]

List of loci in our dataset that we would like to add for these species.


In [5]:
ge = ["16S", "18S", "BDNF", "CMOS", "CYTB", "ND2", "ND4", "NT3", "PDC", "R35", "RAG1", "RAG2"]

In [6]:
results = []

Search loop: using Entrez database, pull DNA records matching our organisms and genes.


In [7]:
for i in sp:
    for j in ge:
        handle = Entrez.esearch(db="nucleotide", term = "{0}[Orgn] & {1}[Gene]".format(i, j))
        record = Entrez.read(handle)
        handle.close()
        if len(record["IdList"])!=0:
            print record["IdList"]
            results.append(record["IdList"])
print results


['71979934', '358485507', '358485152', '500050309', '409191702', '409191700', '409191698', '409191696', '211239', '193879672', '71384184']
['700587922', '672106662', '662117077', '589913147', '584458524', '584298116', '574957866', '565384511', '565384505', '565384499', '565384497', '565384493', '565384491', '565384489', '565384487', '565384484', '565384482', '565384478', '565384468', '565384464']
['700587922', '672106662', '662117077', '589913147', '584458524', '584298116', '578896092', '578896090', '578896088', '578896086', '578896084', '578896082', '578896080', '578896078', '574957866', '283049385', '283049371', '283049357', '283049343', '283049329']
['700587922', '672106662', '662117077', '589913147', '584458524', '584298116', '574957866', '283049385', '283049371', '283049357', '283049343', '283049329', '283049315', '283049301', '283049287', '283049273', '283049259', '283049245', '283049231', '283049217']
['157952221', '157952219', '358485511', '358485419']
['358485504', '358485068', '513196845', '513196843', '513196839', '513196836']
['45382798', '358485503', '358485057', '12188807']
['71895136', '358485507', '358485115']
['358485507', '358485115', '513187109', '513187106', '38324843']
['697496001', '697495999', '697495996', '697495993', '697049309', '193879660', '23451066']
['405133320']
['14141905', '17467162', '17467158', '225638976', '225638974', '306441043', '31322895', '3668119']
['14141905']
['14141905']
['405133506']
['697476454', '697476452', '697049810']
['12232600', '12188815']
['697521815', '697043917']
['697521820', '697043917', '405133692']
['636526422', '641774930', '641774929', '641774928', '641774927', '452114041', '264681758']
['605669481', '576098184', '5902189', '256259930', '256259928', '225194916', '225194914', '225194912', '225194910']
['605669481', '576098184', '5902189', '379054485']
['605669481', '576098184', '5902189', '482667294', '109657805']
['636528841', '636526569', '530594328']
['452114054', '225195035', '225195034', '225195033', '225195032', '264681993', '109657819']
['636528845', '636526584', '641754715']
['636528845', '636526584', '530635409']
['374115355', '374115353', '264681782', '213494327', '72256191']
['658131273', '658131271', '658131269', '658131267', '658131265', '658131263', '658131261', '658131259', '658131257', '658131255', '658131253', '658130936', '5835582', '359385410', '359385408', '359385406', '359385404', '359385402', '359385400', '359385398']
['5835582']
['658131423', '658131421', '658131419', '658131417', '658131415', '658131413', '658131411', '658131409', '658131407', '658131405', '658131403', '5835582', '359815882', '359815880', '359815878', '359815876', '359815874', '359815872', '359815870', '359815868']
['37704784', '326368000', '307698079', '307698078', '307698077', '307698076', '307698075', '307698074', '307698073', '307698072', '307698071', '307698070', '307698069', '307698068', '307698067', '307698066', '307698065', '297372970', '297372969', '297372968']
['375280221', '375280219', '297372880', '297372878', '297372876', '297372874', '297372872', '297372870', '297372868', '297372866', '297372864', '297372862', '297372860', '297372858', '297372856', '297372854', '297372852', '297372850', '66276625']
['297372936', '297372934', '297372932', '297372930', '297372928', '297372926', '297372924', '297372922', '297372920', '297372918', '297372916', '297372914', '297372912', '297372910', '297372908', '297372906', '297372904', '297372902', '297372900', '297372898']
['399144113', '157060762']
['108802842', '300959532', '300959530', '300959528', '300959526', '300959524', '300959522', '300959516', '300959514', '300959512', '16551126', '81295111']
['108802842']
['397454952', '397454950', '397454948', '397454946', '108802842', '298253660', '298253658', '21310124']
['392936214']
['399144237']
['561050211', '564232710', '399144109', '193879642', '23451064', '161705108']
['405133284']
['5835540', '193735576', '13123654', '13123652', '13123650', '13123648', '13123646', '13123644', '13123642', '13123640', '13123638', '13123636', '13123634', '13123632', '13123630', '13123628', '13123626', '13123624', '13123622', '13123620']
['5835540']
['5835540', '298253722']
['405133470']
['561045407', '564257361']
['392936190']
['561038604', '564238249', '399144233']
['561038604', '564238245', '564238243', '405133656']
[['71979934', '358485507', '358485152', '500050309', '409191702', '409191700', '409191698', '409191696', '211239', '193879672', '71384184'], ['700587922', '672106662', '662117077', '589913147', '584458524', '584298116', '574957866', '565384511', '565384505', '565384499', '565384497', '565384493', '565384491', '565384489', '565384487', '565384484', '565384482', '565384478', '565384468', '565384464'], ['700587922', '672106662', '662117077', '589913147', '584458524', '584298116', '578896092', '578896090', '578896088', '578896086', '578896084', '578896082', '578896080', '578896078', '574957866', '283049385', '283049371', '283049357', '283049343', '283049329'], ['700587922', '672106662', '662117077', '589913147', '584458524', '584298116', '574957866', '283049385', '283049371', '283049357', '283049343', '283049329', '283049315', '283049301', '283049287', '283049273', '283049259', '283049245', '283049231', '283049217'], ['157952221', '157952219', '358485511', '358485419'], ['358485504', '358485068', '513196845', '513196843', '513196839', '513196836'], ['45382798', '358485503', '358485057', '12188807'], ['71895136', '358485507', '358485115'], ['358485507', '358485115', '513187109', '513187106', '38324843'], ['697496001', '697495999', '697495996', '697495993', '697049309', '193879660', '23451066'], ['405133320'], ['14141905', '17467162', '17467158', '225638976', '225638974', '306441043', '31322895', '3668119'], ['14141905'], ['14141905'], ['405133506'], ['697476454', '697476452', '697049810'], ['12232600', '12188815'], ['697521815', '697043917'], ['697521820', '697043917', '405133692'], ['636526422', '641774930', '641774929', '641774928', '641774927', '452114041', '264681758'], ['605669481', '576098184', '5902189', '256259930', '256259928', '225194916', '225194914', '225194912', '225194910'], ['605669481', '576098184', '5902189', '379054485'], ['605669481', '576098184', '5902189', '482667294', '109657805'], ['636528841', '636526569', '530594328'], ['452114054', '225195035', '225195034', '225195033', '225195032', '264681993', '109657819'], ['636528845', '636526584', '641754715'], ['636528845', '636526584', '530635409'], ['374115355', '374115353', '264681782', '213494327', '72256191'], ['658131273', '658131271', '658131269', '658131267', '658131265', '658131263', '658131261', '658131259', '658131257', '658131255', '658131253', '658130936', '5835582', '359385410', '359385408', '359385406', '359385404', '359385402', '359385400', '359385398'], ['5835582'], ['658131423', '658131421', '658131419', '658131417', '658131415', '658131413', '658131411', '658131409', '658131407', '658131405', '658131403', '5835582', '359815882', '359815880', '359815878', '359815876', '359815874', '359815872', '359815870', '359815868'], ['37704784', '326368000', '307698079', '307698078', '307698077', '307698076', '307698075', '307698074', '307698073', '307698072', '307698071', '307698070', '307698069', '307698068', '307698067', '307698066', '307698065', '297372970', '297372969', '297372968'], ['375280221', '375280219', '297372880', '297372878', '297372876', '297372874', '297372872', '297372870', '297372868', '297372866', '297372864', '297372862', '297372860', '297372858', '297372856', '297372854', '297372852', '297372850', '66276625'], ['297372936', '297372934', '297372932', '297372930', '297372928', '297372926', '297372924', '297372922', '297372920', '297372918', '297372916', '297372914', '297372912', '297372910', '297372908', '297372906', '297372904', '297372902', '297372900', '297372898'], ['399144113', '157060762'], ['108802842', '300959532', '300959530', '300959528', '300959526', '300959524', '300959522', '300959516', '300959514', '300959512', '16551126', '81295111'], ['108802842'], ['397454952', '397454950', '397454948', '397454946', '108802842', '298253660', '298253658', '21310124'], ['392936214'], ['399144237'], ['561050211', '564232710', '399144109', '193879642', '23451064', '161705108'], ['405133284'], ['5835540', '193735576', '13123654', '13123652', '13123650', '13123648', '13123646', '13123644', '13123642', '13123640', '13123638', '13123636', '13123634', '13123632', '13123630', '13123628', '13123626', '13123624', '13123622', '13123620'], ['5835540'], ['5835540', '298253722'], ['405133470'], ['561045407', '564257361'], ['392936190'], ['561038604', '564238249', '399144233'], ['561038604', '564238245', '564238243', '405133656']]

In [ ]:
print

If sequences are of appropriate length, print the names.


In [8]:
from Bio import SeqIO
for sp in results:
    if len(sp)==0:
        pass
    elif len(sp)!=0:    
        for num in sp:
            handle = Entrez.efetch(db="nucleotide", rettype="gb", retmode="text", id="{0}".format(num))
            for seq_record in SeqIO.parse(handle, "gb"):
                if len(seq_record)<2699:
                    print num
                    print seq_record.id, seq_record.description[:50] + "..."
                    print "Sequence length %i," % len(seq_record)
            handle.close()


71979934
NM_001031616.1 Gallus gallus brain-derived neurotrophic factor (B...
Sequence length 840,
500050309
KC594110.1 Gallus gallus brain-derived neurotrophic factor (B...
Sequence length 629,
409191702
JX507084.1 Gallus gallus brain-derived neurotrophic factor tr...
Sequence length 1189,
409191700
JX507083.1 Gallus gallus brain-derived neurotrophic factor tr...
Sequence length 1125,
409191698
JX507082.1 Gallus gallus brain-derived neurotrophic factor tr...
Sequence length 1248,
409191696
JX507081.1 Gallus gallus brain-derived neurotrophic factor tr...
Sequence length 1338,
211239
M83377.1 Gallus gallus brain-derived neurotrophic factor (B...
Sequence length 738,
193879672
EU737949.1 Gallus gallus brain-derived neurotrophic factor (B...
Sequence length 675,
71384184
DQ124361.1 Gallus gallus brain derived neurotrophic factor (B...
Sequence length 840,
565384511
KF650607.1 Gallus gallus clone 16 black strain cytochrome b (...
Sequence length 488,
565384505
KF650606.1 Gallus gallus clone 16 dark brown strain cytochrom...
Sequence length 488,
565384499
KF650605.1 Gallus gallus clone 15 dark brown strain cytochrom...
Sequence length 488,
565384497
KF650604.1 Gallus gallus clone 14 dark brown strain cytochrom...
Sequence length 488,
565384493
KF650603.1 Gallus gallus clone 15 black strain cytochrome b (...
Sequence length 488,
565384491
KF650602.1 Gallus gallus clone 14 black strain cytochrome b (...
Sequence length 488,
565384489
KF650601.1 Gallus gallus clone 13 black strain cytochrome b (...
Sequence length 488,
565384487
KF650600.1 Gallus gallus clone 13 dark brown strain cytochrom...
Sequence length 488,
565384484
KF650599.1 Gallus gallus clone 12 dark brown strain cytochrom...
Sequence length 488,
565384482
KF650598.1 Gallus gallus clone 12 black strain cytochrome b (...
Sequence length 488,
565384478
KF650597.1 Gallus gallus clone 11 dark brown strain cytochrom...
Sequence length 488,
565384468
KF650596.1 Gallus gallus clone 10 dark brown strain cytochrom...
Sequence length 488,
565384464
KF650595.1 Gallus gallus clone 9 dark brown strain cytochrome...
Sequence length 488,
578896092
KF792740.1 Gallus gallus voucher UAM24741 NADH dehydrogenase ...
Sequence length 1033,
578896090
KF792739.1 Gallus gallus voucher UAM21484 NADH dehydrogenase ...
Sequence length 1033,
578896088
KF792738.1 Gallus gallus voucher UAM24801 NADH dehydrogenase ...
Sequence length 1033,
578896086
KF792737.1 Gallus gallus voucher UAM24862 NADH dehydrogenase ...
Sequence length 1033,
578896084
KF792736.1 Gallus gallus voucher UAM21483 NADH dehydrogenase ...
Sequence length 1033,
578896082
KF792735.1 Gallus gallus voucher UAM21485 NADH dehydrogenase ...
Sequence length 1033,
578896080
KF792734.1 Gallus gallus voucher UAM27321 NADH dehydrogenase ...
Sequence length 1033,
578896078
KF792733.1 Gallus gallus voucher UAM27320 NADH dehydrogenase ...
Sequence length 1033,
157952221
NM_001109762.1 Gallus gallus neurotrophin 3 (NTF3), transcript va...
Sequence length 956,
157952219
NM_001031488.1 Gallus gallus neurotrophin 3 (NTF3), transcript va...
Sequence length 896,
513196845
XM_004943304.1 PREDICTED: Gallus gallus phosducin (PDC), transcri...
Sequence length 1574,
513196843
XM_426634.4 PREDICTED: Gallus gallus phosducin (PDC), transcri...
Sequence length 1663,
513196839
XM_004943303.1 PREDICTED: Gallus gallus phosducin (PDC), transcri...
Sequence length 1669,
513196836
XM_004943302.1 PREDICTED: Gallus gallus phosducin (PDC), transcri...
Sequence length 1847,
513187106
XM_004941550.1 PREDICTED: Gallus gallus recombination activating ...
Sequence length 2552,
38324843
AY443150.1 Gallus gallus recombination activating protein 2 (...
Sequence length 1142,
697496001
XM_009679450.1 PREDICTED: Struthio camelus australis brain-derive...
Sequence length 1510,
697495999
XM_009679449.1 PREDICTED: Struthio camelus australis brain-derive...
Sequence length 1498,
697495996
XM_009679448.1 PREDICTED: Struthio camelus australis brain-derive...
Sequence length 1761,
697495993
XM_009679447.1 PREDICTED: Struthio camelus australis brain-derive...
Sequence length 1504,
193879660
EU737943.1 Struthio camelus brain-derived neurotrophic factor...
Sequence length 688,
23451066
AF416632.1 Struthio camelus brain-derived neurotrophic factor...
Sequence length 600,
405133320
JX533090.1 Struthio camelus voucher mOst oocyte maturation fa...
Sequence length 993,
17467162
L78809.1 Struthio camelus cytochrome b (cytb) gene, partial...
Sequence length 924,
17467158
L77906.1 Struthio camelus cytochrome b (cytb) gene, partial...
Sequence length 924,
225638976
FJ785365.1 Struthio camelus isolate 2 cytochrome b (cytb) gen...
Sequence length 321,
225638974
FJ785364.1 Struthio camelus isolate 1 cytochrome b (cytb) gen...
Sequence length 321,
306441043
HQ122573.1 Struthio camelus cytochrome b (cytb) gene, partial...
Sequence length 517,
31322895
AY239165.1 Struthio camelus cytochrome b (cytb) gene, partial...
Sequence length 240,
405133506
JX533281.1 Struthio camelus voucher mOst neurotrophin-3 (NT3)...
Sequence length 645,
697476454
XM_009672651.1 PREDICTED: Struthio camelus australis phosducin (P...
Sequence length 756,
697476452
XM_009672650.1 PREDICTED: Struthio camelus australis phosducin (P...
Sequence length 750,
12232600
AJ293976.1 Struthio camelus partial r35 gene for orphan G pro...
Sequence length 805,
12188815
AJ293977.1 Struthio camelus partial r35 gene for orphan G pro...
Sequence length 102,
697521820
XM_009688448.1 PREDICTED: Struthio camelus australis recombinatio...
Sequence length 1605,
405133692
JX533396.1 Struthio camelus voucher mOst recombination activa...
Sequence length 1446,
641774930
XM_005306372.2 PREDICTED: Chrysemys picta bellii brain-derived ne...
Sequence length 1089,
641774929
XM_005306371.2 PREDICTED: Chrysemys picta bellii brain-derived ne...
Sequence length 1271,
641774928
XM_005306370.2 PREDICTED: Chrysemys picta bellii brain-derived ne...
Sequence length 1218,
641774927
XM_005306369.2 PREDICTED: Chrysemys picta bellii brain-derived ne...
Sequence length 1074,
452114041
KC181173.1 Chrysemys picta voucher MVZ:230532 brain-derived n...
Sequence length 670,
264681758
GU085549.1 Chrysemys picta brain-derived neurotrophic factor ...
Sequence length 662,
256259930
GQ395733.1 Chrysemys picta isolate CPUC04 cytochrome b (cytb)...
Sequence length 801,
256259928
GQ395732.1 Chrysemys picta isolate CPUC01 cytochrome b (cytb)...
Sequence length 721,
225194916
FJ770589.1 Chrysemys picta voucher HBS27448 cytochrome b (cyt...
Sequence length 1044,
225194914
FJ770588.1 Chrysemys picta voucher HBS27134 cytochrome b (cyt...
Sequence length 1070,
225194912
FJ770587.1 Chrysemys picta voucher HBS26210 cytochrome b (cyt...
Sequence length 1070,
225194910
FJ770586.1 Chrysemys picta voucher HBS23173 cytochrome b (cyt...
Sequence length 1070,
379054485
JN993981.1 Chrysemys picta isolate CpicB NADH dehydrogenase s...
Sequence length 2105,
482667294
KC688173.1 Chrysemys picta voucher RCT428 NADH dehydrogenase ...
Sequence length 768,
109657805
DQ646419.1 Chrysemys picta marginata NADH dehydrogenase subun...
Sequence length 684,
530594328
XM_005290693.1 PREDICTED: Chrysemys picta bellii phosducin (PDC),...
Sequence length 750,
452114054
KC181185.1 Chrysemys picta voucher MVZ:230532 orphan G protei...
Sequence length 998,
225195035
FJ770672.1 Chrysemys picta voucher HBS27448 RNA fingerprint p...
Sequence length 915,
225195034
FJ770671.1 Chrysemys picta voucher HBS27134 RNA fingerprint p...
Sequence length 944,
225195033
FJ770670.1 Chrysemys picta voucher HBS26210 RNA fingerprint p...
Sequence length 960,
225195032
FJ770669.1 Chrysemys picta voucher HBS23173 RNA fingerprint p...
Sequence length 960,
264681993
GU085683.1 Chrysemys picta orphan G protein-coupled receptor ...
Sequence length 890,
109657819
DQ649454.1 Chrysemys picta marginata RNA fingerprint protein ...
Sequence length 851,
530635409
XM_005304514.1 PREDICTED: Chrysemys picta bellii recombination ac...
Sequence length 1581,
374115355
JQ073096.1 Pelomedusa subrufa voucher MTD_T 5509 brain-derive...
Sequence length 679,
374115353
JQ073095.1 Pelomedusa subrufa voucher FGZC324 brain-derived n...
Sequence length 625,
264681782
GU085561.1 Pelomedusa subrufa brain-derived neurotrophic fact...
Sequence length 610,
213494327
FJ230875.1 Pelomedusa subrufa brain-derived neurotrophic fact...
Sequence length 718,
72256191
AY988081.1 Pelomedusa subrufa brain-derived neurotrophic fact...
Sequence length 717,
658131273
HG973239.1 Pelomedusa subrufa mitochondrial partial cytb gene...
Sequence length 571,
658131271
HG973238.1 Pelomedusa subrufa mitochondrial partial cytb gene...
Sequence length 674,
658131269
HG973237.1 Pelomedusa subrufa mitochondrial partial cytb gene...
Sequence length 674,
658131267
HG973236.1 Pelomedusa subrufa mitochondrial partial cytb gene...
Sequence length 672,
658131265
HG973235.1 Pelomedusa subrufa mitochondrial partial cytb gene...
Sequence length 619,
658131263
HG973234.1 Pelomedusa subrufa mitochondrial partial cytb gene...
Sequence length 674,
658131261
HG973233.1 Pelomedusa subrufa mitochondrial partial cytb gene...
Sequence length 674,
658131259
HG973232.1 Pelomedusa subrufa mitochondrial partial cytb gene...
Sequence length 674,
658131257
HG973231.1 Pelomedusa subrufa mitochondrial partial cytb gene...
Sequence length 674,
658131255
HG973230.1 Pelomedusa subrufa mitochondrial partial cytb gene...
Sequence length 674,
658131253
HG973229.1 Pelomedusa subrufa mitochondrial partial cytb gene...
Sequence length 674,
658130936
HG934050.1 Pelomedusa subrufa mitochondrial partial cytb gene...
Sequence length 319,
359385410
FN645272.1 Pelomedusa subrufa mitochondrial partial cytb gene...
Sequence length 687,
359385408
FN645271.1 Pelomedusa subrufa mitochondrial partial cytb gene...
Sequence length 687,
359385406
FN645270.1 Pelomedusa subrufa mitochondrial partial cytb gene...
Sequence length 687,
359385404
FN645269.1 Pelomedusa subrufa mitochondrial partial cytb gene...
Sequence length 687,
359385402
FN645268.1 Pelomedusa subrufa mitochondrial partial cytb gene...
Sequence length 687,
359385400
FN645267.1 Pelomedusa subrufa mitochondrial partial cytb gene...
Sequence length 687,
359385398
FN645266.1 Pelomedusa subrufa mitochondrial partial cytb gene...
Sequence length 687,
658131423
HG973316.1 Pelomedusa subrufa mitochondrial ND4 gene (partial...
Sequence length 792,
658131421
HG973315.1 Pelomedusa subrufa mitochondrial ND4 gene (partial...
Sequence length 205,
658131419
HG973314.1 Pelomedusa subrufa mitochondrial ND4 gene (partial...
Sequence length 799,
658131417
HG973313.1 Pelomedusa subrufa mitochondrial ND4 gene (partial...
Sequence length 779,
658131415
HG973312.1 Pelomedusa subrufa mitochondrial ND4 gene (partial...
Sequence length 794,
658131413
HG973311.1 Pelomedusa subrufa mitochondrial ND4 gene (partial...
Sequence length 231,
658131411
HG973310.1 Pelomedusa subrufa mitochondrial ND4 gene (partial...
Sequence length 790,
658131409
HG973309.1 Pelomedusa subrufa mitochondrial ND4 gene (partial...
Sequence length 810,
658131407
HG973308.1 Pelomedusa subrufa mitochondrial ND4 gene (partial...
Sequence length 810,
658131405
HG973307.1 Pelomedusa subrufa mitochondrial ND4 gene (partial...
Sequence length 810,
658131403
HG973306.1 Pelomedusa subrufa mitochondrial ND4 gene (partial...
Sequence length 810,
359815882
FN645331.1 Pelomedusa subrufa mitochondrial nd4 gene (partial...
Sequence length 813,
359815880
FN645330.1 Pelomedusa subrufa mitochondrial nd4 gene (partial...
Sequence length 813,
359815878
FN645329.1 Pelomedusa subrufa mitochondrial nd4 gene (partial...
Sequence length 813,
359815876
FN645328.1 Pelomedusa subrufa mitochondrial nd4 gene (partial...
Sequence length 813,
359815874
FN645327.1 Pelomedusa subrufa mitochondrial nd4 gene (partial...
Sequence length 813,
359815872
FN645326.1 Pelomedusa subrufa mitochondrial nd4 gene (partial...
Sequence length 813,
359815870
FN645325.1 Pelomedusa subrufa mitochondrial nd4 gene (partial...
Sequence length 813,
359815868
FN645324.1 Pelomedusa subrufa mitochondrial nd4 gene (partial...
Sequence length 813,
37704784
AY339639.1 Pelomedusa subrufa RNA fingerprint fragment 35 (R3...
Sequence length 927,
326368000
FR717085.1 Pelomedusa subrufa R35 gene intron, specimen vouch...
Sequence length 1029,
307698079
GU213879.1 Pelomedusa subrufa voucher MVZ241332 orphan G prot...
Sequence length 1045,
307698078
GU213878.1 Pelomedusa subrufa voucher MVZ241331 orphan G prot...
Sequence length 1040,
307698077
GU213877.1 Pelomedusa subrufa voucher MVZ241330 orphan G prot...
Sequence length 1034,
307698076
GU213876.1 Pelomedusa subrufa voucher MVZ241329 orphan G prot...
Sequence length 1033,
307698075
GU213875.1 Pelomedusa subrufa voucher MVZ236628 orphan G prot...
Sequence length 1102,
307698074
GU213874.1 Pelomedusa subrufa voucher MVZ238887 orphan G prot...
Sequence length 1008,
307698073
GU213873.1 Pelomedusa subrufa voucher MVZ238883 orphan G prot...
Sequence length 1077,
307698072
GU213872.1 Pelomedusa subrufa voucher MVZ238879 orphan G prot...
Sequence length 1015,
307698071
GU213871.1 Pelomedusa subrufa voucher MVZ238878 orphan G prot...
Sequence length 1108,
307698070
GU213870.1 Pelomedusa subrufa voucher MVZ245229 orphan G prot...
Sequence length 1039,
307698069
GU213869.1 Pelomedusa subrufa voucher MVZ245226 orphan G prot...
Sequence length 1036,
307698068
GU213868.1 Pelomedusa subrufa voucher LSU20315 orphan G prote...
Sequence length 1009,
307698067
GU213867.1 Pelomedusa subrufa voucher LSU20145 orphan G prote...
Sequence length 968,
307698066
GU213866.1 Pelomedusa subrufa voucher FGZC324 orphan G protei...
Sequence length 998,
307698065
GU213865.1 Pelomedusa subrufa voucher R-184287 orphan G prote...
Sequence length 1020,
297372970
FN645409.1 Pelomedusa subrufa partial r35 gene, intron region...
Sequence length 700,
297372969
FN645408.1 Pelomedusa subrufa partial r35 gene, intron region...
Sequence length 700,
297372968
FN645407.1 Pelomedusa subrufa partial r35 gene, intron region...
Sequence length 700,
375280221
JQ073217.1 Pelomedusa subrufa voucher MTD_T 5509 recombinatio...
Sequence length 1389,
375280219
JQ073216.1 Pelomedusa subrufa voucher FGZC324 recombination a...
Sequence length 1444,
297372880
FN645348.1 Pelomedusa subrufa partial rag1 gene for recombina...
Sequence length 593,
297372878
FN645347.1 Pelomedusa subrufa partial rag1 gene for recombina...
Sequence length 593,
297372876
FN645346.1 Pelomedusa subrufa partial rag1 gene for recombina...
Sequence length 593,
297372874
FN645345.1 Pelomedusa subrufa partial rag1 gene for recombina...
Sequence length 593,
297372872
FN645344.1 Pelomedusa subrufa partial rag1 gene for recombina...
Sequence length 593,
297372870
FN645343.1 Pelomedusa subrufa partial rag1 gene for recombina...
Sequence length 593,
297372868
FN645342.1 Pelomedusa subrufa partial rag1 gene for recombina...
Sequence length 593,
297372866
FN645341.1 Pelomedusa subrufa partial rag1 gene for recombina...
Sequence length 593,
297372864
FN645340.1 Pelomedusa subrufa partial rag1 gene for recombina...
Sequence length 593,
297372862
FN645339.1 Pelomedusa subrufa partial rag1 gene for recombina...
Sequence length 593,
297372860
FN645338.1 Pelomedusa subrufa partial rag1 gene for recombina...
Sequence length 593,
297372858
FN645337.1 Pelomedusa subrufa partial rag1 gene for recombina...
Sequence length 593,
297372856
FN645336.1 Pelomedusa subrufa partial rag1 gene for recombina...
Sequence length 593,
297372854
FN645335.1 Pelomedusa subrufa partial rag1 gene for recombina...
Sequence length 593,
297372852
FN645334.1 Pelomedusa subrufa partial rag1 gene for recombina...
Sequence length 593,
297372850
FN645333.1 Pelomedusa subrufa partial rag1 gene for recombina...
Sequence length 593,
66276625
AY988102.1 Pelomedusa subrufa recombination activating protei...
Sequence length 862,
297372936
FN645376.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
297372934
FN645375.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
297372932
FN645374.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
297372930
FN645373.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
297372928
FN645372.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
297372926
FN645371.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
297372924
FN645370.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
297372922
FN645369.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
297372920
FN645368.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
297372918
FN645367.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
297372916
FN645366.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
297372914
FN645365.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
297372912
FN645364.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
297372910
FN645363.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
297372908
FN645362.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
297372906
FN645361.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
297372904
FN645360.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
297372902
FN645359.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
297372900
FN645358.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
297372898
FN645357.1 Pelomedusa subrufa partial rag2 gene for recombina...
Sequence length 644,
399144113
JN654790.1 Crocodylus porosus brain-derived neurotrophic fact...
Sequence length 670,
157060762
EF646312.1 Crocodylus porosus brain-derived neurotrophic fact...
Sequence length 600,
300959532
GU331912.1 Crocodylus porosus isolate MAS3 cytochrome b (cytb...
Sequence length 579,
300959530
GU331911.1 Crocodylus porosus isolate MAS2 cytochrome b (cytb...
Sequence length 579,
300959528
GU331910.1 Crocodylus porosus isolate MAS1 cytochrome b (cytb...
Sequence length 579,
300959526
GU331909.1 Crocodylus porosus isolate AP3 cytochrome b (cytb)...
Sequence length 579,
300959524
GU331908.1 Crocodylus porosus isolate AP2 cytochrome b (cytb)...
Sequence length 579,
300959522
GU331907.1 Crocodylus porosus isolate AP1 cytochrome b (cytb)...
Sequence length 579,
300959516
GU331904.1 Crocodylus porosus isolate MAS4 cytochrome b (cytb...
Sequence length 1156,
300959514
GU331903.1 Crocodylus porosus isolate AP5 cytochrome b (cytb)...
Sequence length 1156,
300959512
GU331902.1 Crocodylus porosus isolate AP4 cytochrome b (cytb)...
Sequence length 1156,
16551126
AF306452.1 Crocodylus porosus cytochrome b (cytb) gene, parti...
Sequence length 400,
397454952
JN588518.1 Crocodylus porosus isolate YPM14751 NADH dehydroge...
Sequence length 958,
397454950
JN588517.1 Crocodylus porosus isolate YPM14727 NADH dehydroge...
Sequence length 958,
397454948
JN588516.1 Crocodylus porosus isolate L064 NADH dehydrogenase...
Sequence length 958,
397454946
JN588515.1 Crocodylus porosus isolate L063 NADH dehydrogenase...
Sequence length 958,
298253660
GU064612.1 Crocodylus porosus isolate LD149 NADH dehydrogenas...
Sequence length 612,
298253658
GU064611.1 Crocodylus porosus isolate MF954 NADH dehydrogenas...
Sequence length 612,
21310124
AF380946.1 Crocodylus porosus NADH dehydrogenase subunit 4 (N...
Sequence length 736,
392936214
JN568503.1 Crocodylus porosus isolate CP 4 35 G protein-coupl...
Sequence length 702,
399144237
JN654852.1 Crocodylus porosus recombination activating protei...
Sequence length 1046,
564232710
XM_006260669.1 PREDICTED: Alligator mississippiensis brain-derive...
Sequence length 881,
399144109
JN654788.1 Alligator mississippiensis brain-derived neurotrop...
Sequence length 670,
193879642
EU737934.1 Alligator mississippiensis brain-derived neurotrop...
Sequence length 688,
23451064
AF416631.1 Alligator mississippiensis brain-derived neurotrop...
Sequence length 600,
161705108
EU275888.1 Alligator mississippiensis brain-derived neurotrop...
Sequence length 704,
405133284
JX533072.1 Alligator mississippiensis oocyte maturation facto...
Sequence length 996,
193735576
EU496863.1 Alligator mississippiensis cytochrome b (cytb) gen...
Sequence length 1227,
13123654
AF318572.1 Alligator mississippiensis isolate TX_1147 cytochr...
Sequence length 693,
13123652
AF318571.1 Alligator mississippiensis isolate MS_RB1 cytochro...
Sequence length 693,
13123650
AF318570.1 Alligator mississippiensis isolate TX_1229 cytochr...
Sequence length 708,
13123648
AF318569.1 Alligator mississippiensis isolate GA_Ossabaw cyto...
Sequence length 708,
13123646
AF318568.1 Alligator mississippiensis isolate MS_RB3_mut cyto...
Sequence length 709,
13123644
AF318567.1 Alligator mississippiensis isolate FL_Apopka_8 cyt...
Sequence length 709,
13123642
AF318566.1 Alligator mississippiensis isolate FL_Apopka_6 cyt...
Sequence length 709,
13123640
AF318565.1 Alligator mississippiensis isolate FL_Apopka_5 cyt...
Sequence length 709,
13123638
AF318564.1 Alligator mississippiensis isolate SC_SRS_G/H/A cy...
Sequence length 1163,
13123636
AF318563.1 Alligator mississippiensis isolate SC_SRS_A/A/0 cy...
Sequence length 1163,
13123634
AF318562.1 Alligator mississippiensis isolate SC_SRS_BC/F/I c...
Sequence length 1162,
13123632
AF318561.1 Alligator mississippiensis isolate GA_TE_1 cytochr...
Sequence length 1162,
13123630
AF318560.1 Alligator mississippiensis isolate GA_BM_1 cytochr...
Sequence length 1162,
13123628
AF318559.1 Alligator mississippiensis isolate AL_MC_4 cytochr...
Sequence length 1162,
13123626
AF318558.1 Alligator mississippiensis isolate AL_MC_1 cytochr...
Sequence length 1162,
13123624
AF318557.1 Alligator mississippiensis isolate SC_H/H/I cytoch...
Sequence length 1199,
13123622
AF318556.1 Alligator mississippiensis isolate SC_93_424 cytoc...
Sequence length 1199,
13123620
AF318555.1 Alligator mississippiensis isolate SC_93-365 cytoc...
Sequence length 1199,
298253722
GU064643.1 Alligator mississippiensis isolate Alli01 NADH deh...
Sequence length 612,
405133470
JX533263.1 Alligator mississippiensis neurotrophin-3 (NT3) ge...
Sequence length 645,
564257361
XM_006267573.1 PREDICTED: Alligator mississippiensis phosducin (P...
Sequence length 753,
392936190
JN568491.1 Alligator mississippiensis voucher JK 613 35 G pro...
Sequence length 702,
399144233
JN654850.1 Alligator mississippiensis recombination activatin...
Sequence length 1091,
564238245
XM_006275933.1 PREDICTED: Alligator mississippiensis recombinatio...
Sequence length 1732,
564238243
XM_006275932.1 PREDICTED: Alligator mississippiensis recombinatio...
Sequence length 1827,
405133656
JX533378.1 Alligator mississippiensis recombination activatin...
Sequence length 1443,

In [9]:
import pandas as pd
seqs = pd.read_csv("sequences.csv",index_col=0)
print seqs
print type(seqs)


---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-9-5f97d79f6d78> in <module>()
      1 import pandas as pd
----> 2 seqs = pd.read_csv("sequences.csv",index_col=0)
      3 print seqs
      4 print type(seqs)

/home/april/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, na_fvalues, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format)
    441                     infer_datetime_format=infer_datetime_format)
    442 
--> 443         return _read(filepath_or_buffer, kwds)
    444 
    445     parser_f.__name__ = name

/home/april/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
    226 
    227     # Create the parser.
--> 228     parser = TextFileReader(filepath_or_buffer, **kwds)
    229 
    230     if nrows is not None:

/home/april/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in __init__(self, f, engine, **kwds)
    531             self.options['has_index_names'] = kwds['has_index_names']
    532 
--> 533         self._make_engine(self.engine)
    534 
    535     def _get_options_with_defaults(self, engine):

/home/april/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _make_engine(self, engine)
    668     def _make_engine(self, engine='c'):
    669         if engine == 'c':
--> 670             self._engine = CParserWrapper(self.f, **self.options)
    671         else:
    672             if engine == 'python':

/home/april/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in __init__(self, src, **kwds)
   1030         kwds['allow_leading_cols'] = self.index_col is not False
   1031 
-> 1032         self._reader = _parser.TextReader(src, **kwds)
   1033 
   1034         # XXX

/home/april/anaconda/lib/python2.7/site-packages/pandas/parser.so in pandas.parser.TextReader.__cinit__ (pandas/parser.c:3213)()

/home/april/anaconda/lib/python2.7/site-packages/pandas/parser.so in pandas.parser.TextReader._setup_parser_source (pandas/parser.c:5595)()

IOError: File sequences.csv does not exist

In [55]:
bdnf = seqs.loc['BDNF',:]
print type(bdnf)


<class 'pandas.core.series.Series'>

In [58]:
for x in bdnf:
    handle = Entrez.efetch(db="nucleotide", rettype="fasta", retmode="text", id="{0}".format(int(x)))
    seq_record = SeqIO.read(handle, "fasta")
    handle.close()
    print("%s with %i features" % (seq_record.id, len(seq_record.features)))


gi|193879672|gb|EU737949.1| with 0 features
gi|193879660|gb|EU737943.1| with 0 features
gi|452114041|gb|KC181173.1| with 0 features
gi|374115355|gb|JQ073096.1| with 0 features
gi|399144113|gb|JN654790.1| with 0 features
gi|161705108|gb|EU275888.1| with 0 features

In [54]:
for x in bdnf:
    print (x)
    print type(x)


0
<type 'str'>
1
<type 'str'>
2
<type 'str'>
3
<type 'str'>
4
<type 'str'>
5
<type 'str'>
6
<type 'str'>
7
<type 'str'>
8
<type 'str'>
9
<type 'str'>
10
<type 'str'>
11
<type 'str'>

In [ ]:
Copyright (c) <2014> <Katie Lyons, k.lyons@gmail.com>


Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.