In [5]:
def search(seq, threshold=1):
    url="http://34.251.101.117:8081/search?threshold=%f&seq=%s" % (float(threshold),seq)
    results = requests.get(url).json()
    samples = []
    for i,j in list(results.values())[0]["results"].items():
        samples.append(i)
    return samples

In [6]:
import requests

In [33]:
## TN4401
## http://www.ebi.ac.uk/ena/data/view/AKT72966
TN4401_seq="ATGAGCAAGAAGAACAAGCTACTCAGCGTCCTATCCGACGCCGAACAGGAGGCCTTGTATGGCCTGCCGGAGTTCGATGATGCGCAGCAGCTGGAATATCTGGCTGTGACAGAAACCGAGTTGGCGCTTGCCAACAGCCGGCCCAGTCTTCATGCCAAGGTCTATTGCCTCTTGCAGATCGGCTACTTCAAGGCCAAGCATGCCTTCTTCAGTTTCGACTGGGACGAAGTCGAGGACGATTGCGCTTTCGTGCTGAGCCGCTATTTCCACGGCGAGGTGTTCGAACCCAAGGCGATCACCAAGCATGAACGCTACACCCAGCGCGAGCAGATCGCCAAACTATTCGGCTACTGGCCGTGGTCGGCCGCCTTCCTGCCGCAGCTCGAGCAGCAGGCCGCACGGACCGTGCGGCGTGACGTAACGCCGGGGTTCGTCGCCGCCGAGCTGATCGTCTGGCTCAATGAGCACAAGATTATCCGGCCCGGCTATACGACCTTGCAAGAGTTGGTCAGCGAAACGCTATCCGCCGAGCGCCGGCGCCTGGGCAATCTGCTGGAGCAAGCGTTGGACGAATCTGCCAAGGCCGCGCTGAGCCAGCTTCTGGTCCGTGATGACACCCTGTCGCAACTGGCGGCGCTCAAGCAGGACGCCAAGAATTTCGGCTGGCGCCAGATGGTCCGCGAGCGCGAAAAGCGCGCCACGCTGGAGCCGCTGCACGCAATCGCCAAGACACTGCTGCCTAAACTCGACGTCTCGCAGCAGAACCTGCTGTACTACGCGAGCCTGGCAAACTTCTACACCGTCCACGACCTGCGCAATCTGAAGGCCGATCAGACCCAACTCTACCTGCTGTGCTATGCCTGGGTACGCTACCGGCAGCTCTCCGACAACCTGGTCGATGCGATGGCGTACCACATGAAGCAGTTGGAGGACGAAAGCAGCGCTGGTGCCAAACAATCCTTTGCCGACGAGCAGGTGCGCCGTCAGCAGGAAACGCCGCGGGTAGGCCGCCTGCTGTCACTCTACGTTGATGATAGCGTGGCCGATCCCACGCCCTTCGGCGAGGTGCGGCAGCGCGCCTACAAGATCATGCCTCGCGATACGCTGCAAACCACGGCGCAGCGTATGAGCGTGAAGCCGGCGAGCAAGCTGGCCTTGCAGTGGCAGGCGGTGGACGGGCTGGCCGACCGCATGCGCCGCCATCTTCGTCCACTGTACGTTGCGCTCGACCTCTCCGGCTCCAATCCAGACAGTCCATGGCTTGCCGCGCTGGCCTGGGCCAAAGGTGTTTTTGCCAAGCAGCAGCGGCTGTCGCAACGGCCACTCGACGAGTGGCCCGCGGCGACACTTCCGAAACGCTTGCGGCCGTACCTGCTGACCTTCGACGCCGACGGTAAACCAACCGGCCTGCACGCTGACCGCTACGAGTTCTGGCTGTACCGCCAGATCAGGAAGCGTCTCCAATCGGGTGAACTCTACCTCGACGACAGCCTGCAACACCGGCACTTCTCAGATGAACTGGTTGAGATGGACAGGAAGATCGATGTGCTCGCAAAAATGGAGATCCCCTTCCTGCAGCAACCCGTCCATGCCCAACTCGATGCGCTGACTGCCGAACTGCGCACGCAGTGGCTGGCGTTCAACCGCGAGCTGAAACAGGGCAAGCTGACGCACCTCGAATACAACAAGGACACGCAGAAACTGACCTGGCGCAAGCCCAAGAGCGAGAATGAGAAAGCGCGCGAAAAGGCATTCTACGAGAAGCTTCCGTTCTGCGACGTGGCCGACGTGTTTCGCTTCGTCAACGACGAGTGCAAGTTTCTGTCGGCGTTCACGCCCATGCAGCCGCGCTACGCGAAGAAGGTCGCCGACGCTGACAGCCTGATGGCGGTCATCATCGCGCAAGCGATGAACCACGGCAACCACGTCATGGCGCGCACCAGCGACATCCCGTACCACGTGCTGGATAGCGGCTACCAGCAGTACCTGCGCCAGGCAACGCTGCACGCGGCCAACGACTGCATCAGCAACGCCATCGCCACGCTACCGATCTTCCCATACTACTCGTTCGACCTCGAAACGCTGTACGGTGCCGTCGACGGGCAGAAATTCAGCGTCGAACGGCCGACCGTGAAGGCGCGCTATTCGCGCAAGTACTTCGGGCGCGGCAAGGGCGTGGTCGCCTACACGCTGCTGTGCAACCACATCCCGCTCAACGGCTACCTGATCGGCGCGCACGACTACGAGGCCCACCACGTGTTCGACATCTGGTATCGCAACACGTCGGACATCATGCCGACCGCGATCACCGGCGACATGCACAGCGTCAACAAGGCCAACTTCGCCATCCTGCATTGGTTTGGCCGGCGGTTCGAGCCGCGCTTCACCGACCTCAACGCGCAATTGAAAGAACTCTACTGCGCTGACGATCCAGCACAGTATCAGGCGTGCCTGATTCGGCCAGTCGGGAAAATCGACTGCGATCTCATCCATCGCGAGAAGCCGAACATCGACCGGATCGTCGCCACACTCGGGCTGAAGGAAATGACGCAGGGCACGCTGATCCGCAAGCTGTGCACCTACACCACGACGAACCCGACGCGGCGCGCGATCTTCGAGTTCGACAAGCTCATCCGCAGCATCTACACGCTTCGCTACCTGCGCGATCCGCAACTGGAACGCAACGTGCACCGCTCGCAAAATCGCCTCGAGTCCTACCATCAGCTACGCTCGGCCATTGCCCAGGTCGGCGGTAAGAAGGAACTGACCGGTCGCACCGACATTGAAATCGAGATCAGCAACCAGTGCGCACGGCTGATCGCCAACACGATCATTTTCTACAACTCGGCAATCTTGTCGCGGCTGGTGACCAAGTACGAGGCGGCCGGCAACAGCAAGGCGCTGGCACTCATCACAAAAATTTCGCCTGCGGCATGGCGGCACATCCTGCTCAACGGGCACTACACCTTTCAAAGCAGCGGCAAGACGATCGATCTCGATGCGATCGTCGCAGGCCTGGAACTGGAGTGA"

In [34]:
TN4401_samples = search(TN4401_seq, threshold=1)

In [35]:
## TN4401
## http://www.ebi.ac.uk/ena/data/view/AKT72966
kpc2="CGTTGATGTCACTGTATCGCCGTCTAGTTCTGCTGTCTTGTCTCTCATGGCCGCTGGCTGGCTTTTCTGCCACCGCGCTGACCAACCTCGTCGCGGAACCATTCGCTAAACTCGAACAGGACTTTGGCGGCTCCATCGGTGTGTACGCGATGGATACCGGCTCAGGCGCAACTGTAAGTTACCGCGCTGAGGAGCGCTTCCCACTGTGCAGCTCATTCAAGGGCTTTCTTGCTGCCGCTGTGCTGGCTCGCAGCCAGCAGCAGGCCGGCTTGCTGGACACACCCATCCGTTACGGCAAAAATGCGCTGGTTCCGTGGTCACCCATCTCGGAAAAATATCTGACAACAGGCATGACGGTGGCGGAGCTGTCCGCGGCCGCCGTGCAATACAGTGATAACGCCGCCGCCAATTTGTTGCTGAAGGAGTTGGGCGGCCCGGCCGGGCTGACGGCCTTCATGCGCTCTATCGGCGATACCACGTTCCGTCTGGACCGCTGGGAGCTGGAGCTGAACTCCGCCATCCCAGGCGATGCGCGCGATACCTCATCGCCGCGCGCCGTGACGGAAAGCTTACAAAAACTGACACTGGGCTCTGCACTGGCTGCGCCGCAGCGGCAGCAGTTTGTTGATTGGCTAAAGGGAAACACGACCGGCAACCACCGCATCCGCGCGGCGGTGCCGGCAGACTGGGCAGTCGGAGACAAAACCGGAACCTGCGGAGTGTATGGCACGGCAAATGACTATGCCGTCGTCTGGCCCACTGGGCGCGCACCTATTGTGTTGGCCGTCTACACCCGGGCGCCTAACAAGGATGACAAGCACAGCGAGGCCGTCATCGCCGCTGCGGCTAGACTCGCGCTCGAGGGATTGGGCGTCAACGGGCAGTAAGGCTCTGAAAATCATCTATTGGCCCACCACC"
kpc2_samples = search(kpc2, threshold=.9)

In [38]:
print(len(set(kpc2_samples) & set(TN4401_samples)))
print(len(kpc2_samples))
print(len(TN4401_samples))


2531
3011
2533

In [37]:
set(kpc2_samples) & set(samples)


Out[37]:
{'SRR2135155',
 'SRR1196711',
 'SRR1561267',
 'SRR2135070',
 'SRR1196942',
 'SRR1166954',
 'SRR1173048',
 'SRR849546',
 'SRR2127616',
 'SRR2131493',
 'ERR1217328',
 'SRR1181015',
 'SRR5093488',
 'SRR5168390',
 'SRR2965804',
 'SRR2131398',
 'SRR1180715',
 'SRR3996238',
 'SRR2131443',
 'SRR2132029',
 'SRR1203983',
 'SRR3996252',
 'SRR1810045',
 'SRR1204039',
 'ERR564181',
 'SRR5092817',
 'SRR4198748',
 'SRR2965616',
 'ERR1217192',
 'SRR4198709',
 'SRR2048071',
 'SRR849527',
 'SRR1635683',
 'SRR2127574',
 'SRR3371709',
 'SRR2135474',
 'SRR999522',
 'SRR1166950',
 'SRR1180855',
 'SRR1187841',
 'SRR2127582',
 'SRR2131473',
 'ERR564151',
 'SRR1203955',
 'SRR2135212',
 'SRR2965785',
 'SRR2127549',
 'SRR2127304',
 'SRR2135251',
 'SRR1582889',
 'SRR2965811',
 'SRR5168836',
 'SRR2127473',
 'SRR5168393',
 'SRR1561215',
 'SRR5201504',
 'SRR2135163',
 'SRR2127310',
 'SRR5168512',
 'SRR3112338',
 'SRR1196919',
 'SRR515575',
 'SRR2127365',
 'SRR4025860',
 'SRR2131906',
 'SRR1183628',
 'SRR2724088',
 'ERR1334623',
 'ERR1024623',
 'SRR2135140',
 'SRR2965607',
 'SRR2965747',
 'ERR1024575',
 'SRR4198676',
 'SRR4198698',
 'SRR1817646',
 'SRR4017916',
 'SRR3982253',
 'SRR2965594',
 'SRR2965710',
 'SRR2965704',
 'ERR1334652',
 'SRR1167020',
 'SRR2135442',
 'SRR2135436',
 'ERR1217125',
 'SRR4198716',
 'SRR4198682',
 'ERR1289727',
 'SRR2965665',
 'SRR2127355',
 'SRR2724089',
 'SRR2965793',
 'SRR1196846',
 'SRR2127454',
 'SRR4198815',
 'SRR1187688',
 'SRR999542',
 'ERR1217392',
 'SRR2127819',
 'SRR2965816',
 'SRR2915809',
 'SRR4414058',
 'SRR4198683',
 'SRR4198814',
 'SRR4198861',
 'SRR2965688',
 'SRR1817399',
 'ERR1334705',
 'ERR1217124',
 'ERR1334685',
 'SRR4198754',
 'SRR2965679',
 'SRR2135168',
 'SRR2965765',
 'SRR2048069',
 'SRR1187697',
 'SRR4017837',
 'SRR1187814',
 'SRR4198841',
 'SRR1180775',
 'ERR1217341',
 'ERR1228211',
 'ERR1217415',
 'SRR5092821',
 'ERR1217019',
 'ERR564162',
 'SRR1203988',
 'SRR1183547',
 'ERR564192',
 'ERR1024581',
 'ERR1334491',
 'ERR1334696',
 'SRR2965801',
 'SRR2135088',
 'SRR1166976',
 'SRR5093539',
 'ERR1334595',
 'SRR2127627',
 'SRR2127622',
 'ERR727151',
 'SRR3110254',
 'SRR2960139',
 'SRR1166978',
 'SRR1203942',
 'SRR2965590',
 'SRR4035118',
 'SRR2127470',
 'SRR2127648',
 'ERR1024594',
 'SRR2127418',
 'SRR2127592',
 'SRR1197658',
 'SRR1582878',
 'SRR1561340',
 'SRR4025853',
 'SRR3982229',
 'SRR2915803',
 'SRR2127797',
 'SRR2127846',
 'SRR2131779',
 'SRR1203966',
 'SRR2550679',
 'SRR1203937',
 'SRR1166993',
 'SRR1820179',
 'SRR2033765',
 'SRR1204068',
 'SRR4017919',
 'SRR1166958',
 'SRR1180875',
 'SRR1610095',
 'SRR4017914',
 'SRR2127558',
 'SRR1010900',
 'SRR2135221',
 'SRR2915814',
 'ERR1289725',
 'ERR1217430',
 'SRR1203972',
 'SRR2127808',
 'SRR2135262',
 'SRR2127318',
 'SRR1810051',
 'SRR3987119',
 'SRR849555',
 'SRR4025857',
 'SRR1165879',
 'SRR1203932',
 'SRR2048084',
 'SRR1180908',
 'SRR1196936',
 'SRR515502',
 'SRR2131909',
 'SRR1187828',
 'SRR2965691',
 'SRR1582880',
 'ERR1217331',
 'SRR1187768',
 'SRR1187830',
 'ERR1024634',
 'SRR2127696',
 'SRR1187691',
 'SRR1196771',
 'SRR2965789',
 'SRR2127366',
 'SRR4198747',
 'SRR2135429',
 'SRR2965591',
 'SRR4198680',
 'SRR2965818',
 'SRR1167027',
 'SRR3242004',
 'ERR1024558',
 'ERR1334444',
 'SRR1635678',
 'SRR2127769',
 'SRR1817264',
 'SRR1166957',
 'SRR1166948',
 'SRR4414028',
 'SRR4198769',
 'ERR1024601',
 'SRR5168872',
 'SRR2131437',
 'SRR2965652',
 'SRR2135049',
 'SRR1582886',
 'SRR2127389',
 'SRR2550662',
 'SRR2965627',
 'SRR2965636',
 'ERR1217394',
 'SRR2135165',
 'SRR849553',
 'SRR2965618',
 'SRR1203970',
 'ERR1289745',
 'ERR1334460',
 'SRR2135089',
 'SRR1817645',
 'SRR5168884',
 'SRR2127377',
 'SRR1187823',
 'SRR2048076',
 'SRR5168387',
 'ERR564169',
 'ERR1217459',
 'SRR4025985',
 'SRR2965664',
 'ERR1217317',
 'ERR1334583',
 'SRR2127789',
 'SRR2033763',
 'ERR1217475',
 'SRR1203944',
 'SRR1817398',
 'SRR2965694',
 'ERR1217444',
 'ERR1024642',
 'SRR2127487',
 'SRR2965766',
 'SRR1817642',
 'SRR2135100',
 'SRR1196962',
 'SRR1187746',
 'SRR1196696',
 'SRR2965721',
 'SRR3467249',
 'SRR5092816',
 'SRR4017847',
 'SRR2724126',
 'ERR1024600',
 'SRR2135233',
 'SRR5168835',
 'SRR2132021',
 'SRR1203931',
 'SRR4198870',
 'SRR1582896',
 'SRR3996254',
 'SRR2048218',
 'SRR2048106',
 'SRR5093432',
 'SRR2131497',
 'SRR515586',
 'SRR2127501',
 'SRR2135172',
 'SRR5168516',
 'SRR3996240',
 'SRR4035116',
 'SRR1817595',
 'SRR1582875',
 'SRR5168881',
 'SRR2135259',
 'ERR1228188',
 'SRR5093413',
 'ERR1217281',
 'ERR564138',
 'SRR4017902',
 'SRR1167000',
 'ERR1217241',
 'SRR1166981',
 'SRR3953715',
 'SRR4026002',
 'ERR1374934',
 'SRR2724122',
 'ERR1024587',
 'SRR2127735',
 'ERR1228190',
 'SRR3982101',
 'SRR1166989',
 'SRR5092826',
 'SRR2135095',
 'ERR1228215',
 'SRR515594',
 'SRR2965812',
 'SRR1204064',
 'SRR1610116',
 'ERR1217103',
 'SRR2131449',
 'SRR4017856',
 'SRR2131480',
 'SRR2127384',
 'SRR2127500',
 'SRR2127709',
 'SRR2048108',
 'SRR2127368',
 'SRR5093718',
 'SRR849787',
 'SRR4198889',
 'SRR999488',
 'SRR4198819',
 'SRR1181016',
 'SRR2127293',
 'SRR2127638',
 'SRR2965729',
 'SRR4017906',
 'SRR3112254',
 'SRR2127589',
 'SRR2127706',
 'ERR1228142',
 'SRR4414031',
 'SRR1172772',
 'SRR1197700',
 'SRR1187806',
 'ERR720256',
 'SRR849529',
 'SRR1203957',
 'SRR1203943',
 'SRR4036832',
 'SRR1197650',
 'SRR5093758',
 'SRR1172946',
 'SRR1203967',
 'SRR2127315',
 'SRR2135223',
 'SRR2132018',
 'SRR2127663',
 'SRR2127475',
 'SRR2135268',
 'ERR1217147',
 'SRR1203974',
 'SRR4198699',
 'ERR1217353',
 'SRR4025861',
 'SRR1187813',
 'SRR2135460',
 'ERR1024597',
 'SRR1197655',
 'SRR2135195',
 'SRR1204009',
 'SRR2131858',
 'ERR1374918',
 'ERR1217411',
 'SRR5168373',
 'SRR2135487',
 'ERR1334584',
 'SRR2965640',
 'SRR1187761',
 'ERR564186',
 'SRR1167009',
 'SRR2135083',
 'SRR5167857',
 'SRR1196912',
 'ERR1228180',
 'SRR1204050',
 'ERR1415614',
 'ERR1334713',
 'SRR2135156',
 'SRR2127788',
 'SRR1196886',
 'SRR2127803',
 'SRR1166953',
 'SRR2135113',
 'SRR4025977',
 'SRR1167023',
 'SRR1203992',
 'SRR5168370',
 'ERR1024602',
 'SRR2965628',
 'SRR5168850',
 'SRR1180754',
 'SRR2048211',
 'SRR4017924',
 'SRR2550693',
 'SRR2965809',
 'SRR1180706',
 'SRR2127890',
 'SRR2965613',
 'SRR4414062',
 'SRR1582900',
 'ERR1217239',
 'SRR1582861',
 'SRR849536',
 'SRR849786',
 'SRR1180981',
 'ERR1415540',
 'SRR2965682',
 'SRR2965752',
 'SRR2048075',
 'SRR5168847',
 'SRR4414053',
 'SRR5093492',
 'SRR5093482',
 'ERR1217021',
 'SRR5168882',
 'SRR1183503',
 'ERR1024591',
 'SRR2127394',
 'SRR2965686',
 'SRR4017892',
 'SRR5092830',
 'ERR1217477',
 'ERR1217257',
 'ERR1217215',
 'SRR2965699',
 'SRR1204030',
 'SRR4414042',
 'SRR4414060',
 'SRR5092825',
 'SRR2127546',
 'SRR1010933',
 'SRR4198719',
 'ERR1024554',
 'ERR1228228',
 'ERR1024604',
 'SRR2135484',
 'SRR1196779',
 'ERR1514977',
 'SRR2131476',
 'ERR1334714',
 'SRR4198875',
 'SRR1635684',
 'SRR2965634',
 'SRR2127379',
 'SRR2127800',
 'ERR1289834',
 'SRR2135173',
 'SRR1206021',
 'SRR1196772',
 'SRR1510963',
 'SRR1561328',
 'SRR1582866',
 'SRR4414054',
 'SRR1561309',
 'SRR1180869',
 'SRR2724084',
 'ERR1334625',
 'SRR1204041',
 'SRR849784',
 'SRR5092815',
 'SRR3112255',
 'SRR1187742',
 'SRR2965795',
 'SRR1187833',
 'SRR2965781',
 'ERR1334676',
 'SRR4026001',
 'SRR4198714',
 'SRR2135146',
 'ERR1289751',
 'SRR1187750',
 'SRR2915823',
 'SRR4198735',
 'SRR1187780',
 'SRR2127848',
 'SRR1166951',
 'SRR2965728',
 'ERR1415497',
 'SRR2965673',
 'SRR4414016',
 'SRR4025995',
 'ERR1334468',
 'SRR2965646',
 'SRR2135267',
 'SRR2131789',
 'SRR5092558',
 'SRR2127511',
 'SRR1817836',
 'SRR2127609',
 'ERR1334433',
 'SRR1561315',
 'SRR2965705',
 'SRR3996236',
 'SRR1187809',
 'ERR1024590',
 'SRR2048095',
 'SRR1820165',
 'SRR2965604',
 'ERR1217291',
 'ERR1289755',
 'SRR5093540',
 'SRR3982283',
 'ERR1334691',
 'SRR4198862',
 'SRR4198743',
 'SRR3057156',
 'SRR2135136',
 'ERR1228205',
 'SRR1197657',
 'SRR2965597',
 'SRR1561330',
 'SRR1635680',
 'SRR1582892',
 'ERR564175',
 'SRR1196976',
 'SRR1167007',
 'ERR1024557',
 'ERR1334682',
 'ERR1024641',
 'SRR1818025',
 'SRR2127481',
 'ERR1541568',
 'SRR5168515',
 'SRR1196705',
 'SRR1196679',
 'SRR1203987',
 'SRR1204070',
 'SRR1196726',
 'SRR2127443',
 'SRR2135137',
 'SRR2135468',
 'SRR2131827',
 'SRR5092538',
 'SRR2965799',
 'SRR1561216',
 'SRR849679',
 'SRR1610139',
 'ERR1217419',
 'SRR2131447',
 'SRR2127829',
 'SRR2048077',
 'SRR1222430',
 'SRR4026006',
 'SRR5168842',
 'SRR4198722',
 'ERR1334620',
 'SRR4017890',
 'SRR2965692',
 'SRR1180955',
 'ERR1334486',
 'ERR1415564',
 'SRR5167856',
 'SRR1196943',
 'SRR2135067',
 'ERR1024584',
 'ERR1415558',
 'SRR2127439',
 'SRR2135055',
 'ERR1217382',
 'ERR1334471',
 'SRR1181017',
 'SRR4414008',
 'ERR1541570',
 'SRR5093483',
 'SRR1203953',
 'SRR2135447',
 'SRR2915829',
 'SRR1203994',
 'SRR1203929',
 'ERR1217293',
 'SRR2135106',
 'SRR1203986',
 'SRR2965791',
 'SRR1582885',
 'SRR1166963',
 'SRR515505',
 'ERR1216997',
 'SRR2135243',
 'SRR2965624',
 'SRR2127675',
 'SRR2131823',
 'SRR1818047',
 'SRR2965719',
 'SRR1197684',
 'SRR4198729',
 'SRR2135181',
 'SRR3987123',
 'SRR1166961',
 'SRR2127768',
 'ERR1217379',
 'SRR1204022',
 'ERR1514900',
 'SRR1196806',
 'SRR4414046',
 'SRR2127606',
 'SRR1196821',
 'SRR1817382',
 'SRR2131416',
 'SRR515506',
 'SRR4198728',
 'SRR2965693',
 'SRR1561294',
 'ERR1217370',
 'SRR1204078',
 'SRR1196983',
 'SRR2135485',
 'SRR2135226',
 'SRR1181068',
 'SRR4025841',
 'SRR2965620',
 'SRR849827',
 'SRR2135122',
 'SRR4414038',
 'SRR5093534',
 'SRR4017843',
 'SRR1561325',
 'SRR1817647',
 'SRR5168876',
 'SRR1010943',
 'SRR2127739',
 'SRR2127576',
 'SRR4198744',
 'SRR2131783',
 'SRR4017811',
 'SRR1204003',
 'SRR3982153',
 'SRR2127621',
 'ERR1415555',
 'ERR1334694',
 'SRR1180888',
 'SRR5146438',
 'SRR2127744',
 'ERR1289765',
 'SRR5168873',
 'ERR1514887',
 'ERR314421',
 'SRR1196680',
 'SRR1010939',
 'SRR1820181',
 'SRR2135483',
 'SRR1204042',
 'SRR5093493',
 'SRR1561270',
 'SRR4198686',
 'SRR4017848',
 'SRR4198725',
 'SRR4198869',
 'SRR1582894',
 'SRR849544',
 'SRR1197699',
 'SRR2127644',
 'SRR4198712',
 'ERR1217232',
 'SRR1181005',
 'SRR1817408',
 'SRR5093686',
 'SRR1166977',
 'SRR1180858',
 'SRR4414047',
 'SRR4198670',
 'ERR1024628',
 'SRR515551',
 'SRR3996250',
 'ERR1514913',
 'SRR1196837',
 'SRR2965779',
 'SRR4017898',
 'SRR2048210',
 'SRR2135482',
 'SRR1204004',
 'SRR4198741',
 'SRR2131454',
 'ERR1334496',
 'ERR1638919',
 'SRR999518',
 'SRR1187711',
 'SRR2127804',
 'ERR1024564',
 'SRR2135492',
 'SRR3982095',
 'SRR1187753',
 'SRR3982284',
 'SRR2127417',
 'SRR1181038',
 'SRR2135431',
 'SRR2127595',
 'SRR4198742',
 'SRR1180805',
 'SRR849537',
 'SRR5093489',
 'ERR1217432',
 'ERR1217240',
 'ERR1334655',
 'SRR1180740',
 'SRR2127628',
 'SRR1501127',
 'ERR1217269',
 'SRR4035117',
 'SRR1561301',
 'SRR2960067',
 'SRR4017918',
 'ERR1217279',
 'SRR2965623',
 'SRR2127325',
 'SRR1187765',
 'SRR5168525',
 'SRR2127728',
 'SRR2965738',
 'SRR2131428',
 'SRR3982097',
 'SRR2135174',
 'SRR2127715',
 'SRR849805',
 'SRR2127597',
 'SRR2135266',
 'SRR1561198',
 'ERR1334619',
 'SRR2135123',
 'SRR1582855',
 'ERR1514895',
 'SRR1180844',
 'SRR2127801',
 'SRR2135102',
 'SRR2131923',
 'SRR1561247',
 'SRR1204069',
 'ERR1334621',
 'SRR1810036',
 'SRR2135261',
 'SRR1166997',
 'ERR1228219',
 'SRR4198672',
 'SRR2127599',
 'SRR1196871',
 'ERR564188',
 'SRR2924449',
 'SRR2127584',
 'SRR5092579',
 'SRR2965787',
 'SRR4198684',
 'SRR1820707',
 'SRR2127773',
 'SRR2915824',
 'SRR5093673',
 'SRR2131782',
 'ERR1217218',
 'SRR1820714',
 'SRR2127580',
 'SRR4414013',
 'SRR1180930',
 'SRR1196822',
 'SRR4017911',
 'SRR515908',
 'SRR1196887',
 'SRR4198715',
 'SRR1204054',
 'ERR1217204',
 'SRR1196743',
 'SRR2915812',
 'SRR849808',
 'SRR2135120',
 'ERR1228152',
 'SRR5168477',
 'SRR4198740',
 'SRR1166962',
 'ERR1334592',
 'SRR1010925',
 'ERR1217461',
 'SRR2915813',
 'SRR2135116',
 'SRR1187824',
 'SRR5168883',
 'SRR1166973',
 'ERR1024563',
 'SRR1010942',
 'SRR2724111',
 'ERR1024589',
 'SRR3226932',
 'SRR1204060',
 'SRR2132878',
 'SRR4017907',
 'ERR564195',
 'SRR1501128',
 'SRR4414024',
 'SRR2135197',
 'SRR1196950',
 'SRR2965732',
 'SRR1187710',
 'ERR1289775',
 'SRR2131453',
 'SRR2965775',
 'SRR2127560',
 'SRR2127839',
 'SRR1167014',
 'SRR2048078',
 'SRR2127665',
 'ERR1217428',
 'SRR1610125',
 'SRR2127643',
 'SRR2965651',
 'SRR1203965',
 'SRR1582884',
 'SRR2127358',
 'SRR1203968',
 'ERR1228197',
 'ERR502553',
 'ERR1024627',
 'SRR1582859',
 'SRR1167019',
 'SRR2131470',
 'SRR3226936',
 'SRR5168494',
 'SRR4414049',
 'SRR2132017',
 'ERR1217308',
 'SRR2033764',
 'SRR2127623',
 'SRR1206028',
 'SRR1196665',
 'SRR1635681',
 'SRR2131793',
 'SRR4414036',
 'ERR1334612',
 'SRR5168216',
 'ERR1217283',
 'SRR1196734',
 'SRR2135444',
 'SRR849803',
 'SRR1204046',
 'SRR1206020',
 'SRR1187770',
 'SRR2127802',
 'SRR849534',
 'SRR2131452',
 'SRR1561336',
 'SRR2965796',
 'SRR1166987',
 'SRR2127484',
 'SRR1203991',
 'SRR3982102',
 'SRR2135162',
 'SRR1196829',
 'SRR2048112',
 'ERR1217474',
 'ERR1228203',
 'ERR564144',
 'SRR1010917',
 'SRR1196968',
 'SRR2965718',
 'SRR2965798',
 'SRR1166972',
 'SRR1010921',
 'ERR1024576',
 'SRR2965744',
 'SRR2965740',
 'SRR1196935',
 'ERR1334420',
 'SRR2724078',
 'SRR1820708',
 'SRR2127559',
 'SRR1818071',
 'SRR2965648',
 'SRR1167003',
 'SRR1582901',
 'SRR2965690',
 'SRR2127771',
 'SRR2127357',
 'ERR1228199',
 'ERR1217235',
 'SRR2127602',
 'SRR2048109',
 'SRR1166975',
 'SRR1197660',
 'SRR849872',
 'SRR1173007',
 'SRR2135084',
 'SRR2135461',
 'SRR1196805',
 'SRR1010920',
 'SRR2965706',
 'SRR4198678',
 'SRR3987106',
 'SRR1180897',
 'SRR2135499',
 'SRR2048082',
 'SRR2048117',
 'SRR2127625',
 'SRR2127345',
 'SRR3996269',
 'SRR2965762',
 'SRR1610133',
 'SRR1634583',
 'SRR1196797',
 'ERR1217267',
 'SRR2135501',
 'SRR849806',
 'SRR1817403',
 'SRR2127478',
 'SRR4414051',
 'SRR2819358',
 'SRR2127458',
 'SRR1180889',
 'SRR1010924',
 'ERR1024592',
 'SRR3112257',
 'SRR1818046',
 'SRR1582879',
 'ERR1228182',
 'SRR4198675',
 'SRR3996267',
 'SRR1561261',
 'ERR1217337',
 'SRR4198736',
 'ERR474302',
 'SRR1820173',
 'SRR3110108',
 'SRR4198681',
 'SRR2965644',
 'SRR2127850',
 'SRR2965659',
 'SRR3095383',
 'SRR1582873',
 'SRR1196755',
 'SRR2127382',
 'ERR1334498',
 'SRR2048085',
 'SRR1582864',
 'SRR1187747',
 'SRR1167028',
 'SRR2915801',
 'SRR2131856',
 'SRR999537',
 'SRR1197695',
 'ERR1217404',
 'SRR4198727',
 'SRR2965668',
 'SRR4414018',
 'SRR1204019',
 'SRR1582874',
 'SRR2965756',
 'SRR4198817',
 'ERR1217433',
 'SRR1582869',
 'SRR2135433',
 'ERR1334502',
 'SRR5093756',
 'SRR999486',
 'SRR2724116',
 'SRR1561343',
 'SRR999541',
 'SRR2127299',
 'SRR1197645',
 'SRR2135446',
 'ERR1217143',
 'SRR4017889',
 'SRR4198749',
 'SRR1166994',
 'SRR5168476',
 'SRR1180863',
 'SRR1817412',
 'SRR2127838',
 'SRR4017851',
 'ERR1334470',
 'ERR1334443',
 'SRR4198871',
 'SRR1187818',
 'SRR1806632',
 'SRR2965703',
 'SRR2915807',
 'SRR2965635',
 'SRR2127414',
 'SRR1166992',
 'SRR2135237',
 'ERR1228164',
 'ERR1448242',
 'SRR2965743',
 'SRR1203939',
 'SRR1166952',
 'SRR1610123',
 'SRR2127636',
 'SRR2127290',
 'SRR1204063',
 'ERR1289761',
 'SRR1817255',
 'SRR1203938',
 'SRR2127872',
 'SRR2127298',
 'SRR1203996',
 'SRR2127676',
 ...}

In [22]:
from Bio import Entrez
import time
Entrez.email ="phelimb@well.ox.ac.uk"
def search_genbank(GI):
    request = Entrez.epost("nucleotide",id=GI)
    result = Entrez.read(request)
    webEnv = result["WebEnv"]
    queryKey = result["QueryKey"]
    handle = Entrez.efetch(db="nucleotide",retmode="xml", webenv=webEnv, query_key=queryKey)
    for r in Entrez.parse(handle):
        print(search(r.get("GBSeq_sequence").upper()))

In [24]:
## Search for a GI from genbank
## e.g. https://www.ncbi.nlm.nih.gov/nuccore/1150750917

## REPLACE THIS WITH GI OF INTEREST ##
GI="1150750917"
############
search_genbank(GI)


['ERR1681620', 'ERR1163291', 'ERR1609376', 'ERR1623360', 'ERR1622314', 'ERR1656135', 'ERR1609380', 'ERR1622709', 'ERR1623341', 'ERR1163331', 'ERR1622651', 'ERR1623390', 'ERR1366473', 'SRR2053338', 'ERR1622310', 'ERR1759157', 'ERR197184', 'ERR956997', 'ERR1623239', 'ERR1681650', 'SRR1645608', 'ERR1623521', 'SRR5184275', 'SRR2767734', 'ERR1623227', 'ERR1759167', 'ERR1622655', 'ERR1623229', 'ERR1759128', 'ERR1681717', 'ERR1656450', 'ERR1623113', 'ERR715581', 'SRR3745275', 'ERR1622312', 'ERR1759166', 'ERR1622305', 'ERR1622034', 'SRR1788031', 'ERR1622541', 'ERR1407280', 'ERR1623222', 'SRR2053340', 'SRR1788026', 'ERR1681704', 'SRR1967426', 'SRR1958396', 'SRR1788027', 'ERR1681654', 'ERR1623579', 'SRR5129179', 'ERR1681785', 'ERR1622079', 'ERR1623230', 'ERR1609434', 'ERR1623215', 'ERR1407279', 'ERR1623237', 'ERR1623346', 'ERR1407278', 'ERR1622547', 'ERR1360255', 'ERR1609449', 'ERR1623228', 'ERR1622435', 'ERR1623394', 'SRR3322633', 'ERR1622708', 'ERR135710', 'SRR1957973', 'ERR1622928', 'ERR1681839', 'ERR1681783', 'ERR1623457', 'ERR1622654', 'ERR1609312', 'ERR1622881', 'ERR1609378', 'SRR3584989', 'ERR1622946', 'ERR1759207', 'ERR1623525', 'ERR1622370', 'ERR1229301', 'ERR1046133', 'ERR1622670', 'ERR1609197', 'ERR1622309', 'SRR3882972', 'SRR1788028', 'SRR3707448', 'ERR1622739', 'ERR1229297', 'ERR1609196', 'SRR4302136', 'SRR1788024', 'ERR1623088', 'ERR1623225', 'ERR1622311', 'SRR2075991', 'ERR1149371', 'ERR1218581', 'ERR1622769', 'SRR1960364', 'ERR1622930', 'ERR1229302', 'SRR5201504', 'ERR1622883', 'ERR702345', 'ERR1218720', 'ERR1623226', 'SRR1965341', 'SRR3168915', 'ERR1623212', 'ERR1623214', 'SRR4302224', 'ERR1623594', 'SRR3407159', 'SRR1788032', 'ERR1622648', 'ERR1609215', 'SRR1814872', 'SRR3170531', 'SRR2054237', 'ERR1432660', 'ERR1609201', 'ERR1622653', 'SRR3452849', 'SRR2010693', 'ERR1432659', 'ERR1622107', 'ERR1609245', 'SRR4302302', 'ERR1622060', 'ERR1681606', 'ERR1622545', 'ERR1622729', 'SRR4302214', 'SRR2015698', 'ERR1622839', 'ERR1035693', 'ERR135713', 'ERR1623241', 'ERR1609249', 'ERR1218643', 'SRR4289227', 'ERR1622589', 'ERR1544012', 'ERR1623223', 'ERR1562562', 'ERR1622058', 'ERR1681602', 'ERR1623444', 'ERR1622987', 'ERR1622770', 'ERR1623342', 'ERR1360256', 'SRR3170679', 'ERR1681601', 'ERR1622725', 'ERR1623523', 'ERR1681781', 'SRR2054248', 'ERR1229300', 'ERR1622840', 'SRR1969022', 'ERR1359224', 'ERR1681854', 'ERR1623279']

In [ ]:


In [ ]: