In [1]:
import Bio
from Bio import Entrez

Entrez.email = "vm@gmail.com"

In [2]:
handle = Entrez.einfo()
record = Entrez.read(handle)

print(record)


{u'DbList': ['pubmed', 'protein', 'nuccore', 'nucleotide', 'nucgss', 'nucest', 'structure', 'genome', 'assembly', 'genomeprj', 'bioproject', 'biosample', 'blastdbinfo', 'books', 'cdd', 'clinvar', 'clone', 'gap', 'gapplus', 'grasp', 'dbvar', 'epigenomics', 'gene', 'gds', 'geoprofiles', 'homologene', 'medgen', 'journals', 'mesh', 'ncbisearch', 'nlmcatalog', 'omim', 'orgtrack', 'pmc', 'popset', 'probe', 'proteinclusters', 'pcassay', 'biosystems', 'pccompound', 'pcsubstance', 'pubmedhealth', 'seqannot', 'snp', 'sra', 'taxonomy', 'toolkit', 'toolkitall', 'toolkitbook', 'unigene', 'gencoll', 'gtr']}

In [3]:
print(record.keys())

record['DbList']


[u'DbList']
Out[3]:
['pubmed', 'protein', 'nuccore', 'nucleotide', 'nucgss', 'nucest', 'structure', 'genome', 'assembly', 'genomeprj', 'bioproject', 'biosample', 'blastdbinfo', 'books', 'cdd', 'clinvar', 'clone', 'gap', 'gapplus', 'grasp', 'dbvar', 'epigenomics', 'gene', 'gds', 'geoprofiles', 'homologene', 'medgen', 'journals', 'mesh', 'ncbisearch', 'nlmcatalog', 'omim', 'orgtrack', 'pmc', 'popset', 'probe', 'proteinclusters', 'pcassay', 'biosystems', 'pccompound', 'pcsubstance', 'pubmedhealth', 'seqannot', 'snp', 'sra', 'taxonomy', 'toolkit', 'toolkitall', 'toolkitbook', 'unigene', 'gencoll', 'gtr']

In [3]:
#import xml.etree.cElementTree as ET
import bs4

In [6]:
dbname = 'sra'
handle = Entrez.esearch(db=dbname, retmax=10000, term='16S sequence bacteria')

records = Entrez.read(handle)

#print(records)

#with open('')
for recid  in records['IdList']:
    h = Entrez.efetch(db=dbname, id=recid, rettype="gb", retmode="xml")
    rec = h.read()
    #xml.sax.parseString(rec, SeqHandler())
    soup = bs4.BeautifulSoup(rec, 'xml')
    #print(soup.find_all('GBSeq_sequence').contents)
    print (soup.GBSeq_sequence.contents[0].upper())


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-6-19d4ae9bbd8e> in <module>()
     13     soup = bs4.BeautifulSoup(rec, 'xml')
     14     #print(soup.find_all('GBSeq_sequence').contents)
---> 15     print (soup.GBSeq_sequence.contents[0].upper())

AttributeError: 'NoneType' object has no attribute 'contents'

In [8]:
hsearch = Entrez.esearch(db='pubmed', term='Cypripedioideae')

record = Entrez.read(hsearch)

record


Out[8]:
{u'Count': '10', u'RetMax': '10', u'IdList': ['24001522', '22685605', '21718793', '21241312', '19168860', '21642160', '15120407', '21653371', '21680361', '21684950'], u'TranslationStack': [{u'Count': '10', u'Field': 'All Fields', u'Term': 'Cypripedioideae[All Fields]', u'Explode': 'N'}, 'GROUP'], u'TranslationSet': [], u'RetStart': '0', u'QueryTranslation': 'Cypripedioideae[All Fields]'}

In [11]:
for rec_id in record['IdList'][0:1]:
    h = Entrez.efetch(db="nucleotide", id=rec_id, rettype="gb", retmode="xml")
    print(h.read())


<?xml version="1.0"?>
 <!DOCTYPE GBSet PUBLIC "-//NCBI//NCBI GBSeq/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_GBSeq.dtd">
 <GBSet>
<GBSeq>
  <GBSeq_locus>BZ277537</GBSeq_locus>
  <GBSeq_length>732</GBSeq_length>
  <GBSeq_strandedness>double</GBSeq_strandedness>
  <GBSeq_moltype>DNA</GBSeq_moltype>
  <GBSeq_topology>linear</GBSeq_topology>
  <GBSeq_division>GSS</GBSeq_division>
  <GBSeq_update-date>15-OCT-2002</GBSeq_update-date>
  <GBSeq_create-date>15-OCT-2002</GBSeq_create-date>
  <GBSeq_definition>CH230-392F5.TJ CHORI-230 Segment 2 Rattus norvegicus genomic clone CH230-392F5, genomic survey sequence</GBSeq_definition>
  <GBSeq_primary-accession>BZ277537</GBSeq_primary-accession>
  <GBSeq_accession-version>BZ277537.1</GBSeq_accession-version>
  <GBSeq_other-seqids>
    <GBSeqid>gnl|dbGSS|4084010</GBSeqid>
    <GBSeqid>gb|BZ277537.1|</GBSeqid>
    <GBSeqid>gi|24001522</GBSeqid>
  </GBSeq_other-seqids>
  <GBSeq_keywords>
    <GBKeyword>GSS</GBKeyword>
  </GBSeq_keywords>
  <GBSeq_source>Rattus norvegicus (Norway rat)</GBSeq_source>
  <GBSeq_organism>Rattus norvegicus</GBSeq_organism>
  <GBSeq_taxonomy>Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Muroidea; Muridae; Murinae; Rattus</GBSeq_taxonomy>
  <GBSeq_references>
    <GBReference>
      <GBReference_reference>1</GBReference_reference>
      <GBReference_position>1..732</GBReference_position>
      <GBReference_authors>
        <GBAuthor>Zhao,S.</GBAuthor>
        <GBAuthor>Shetty,J.</GBAuthor>
        <GBAuthor>Shatsman,S.</GBAuthor>
        <GBAuthor>Tsegaye,G.</GBAuthor>
        <GBAuthor>Geer,K.</GBAuthor>
        <GBAuthor>Shvartsbeyn,A.</GBAuthor>
        <GBAuthor>Gebregeorgis,E.</GBAuthor>
        <GBAuthor>Overton,L.</GBAuthor>
        <GBAuthor>Russell,D.</GBAuthor>
        <GBAuthor>Chen,D.</GBAuthor>
        <GBAuthor>Riggs,F.</GBAuthor>
        <GBAuthor>de Jong,P.</GBAuthor>
        <GBAuthor>Fraser,C.M.</GBAuthor>
      </GBReference_authors>
      <GBReference_title>Rat BAC End Sequences from Library CHORI-230 MboI segment</GBReference_title>
      <GBReference_journal>Unpublished</GBReference_journal>
    </GBReference>
  </GBSeq_references>
  <GBSeq_comment>Other_GSSs: CH230-392F5.TV~Contact: Shaying Zhao~Department of Eukaryotic Genomics~The Institute for Genomic Research~9712 Medical Center Dr., Rockville, MD 20850, USA~Tel: 301 838 0200~Fax: 301 838 0208~Email: szhao@tigr.org~Clones are derived from the rat BAC library CHORI-230 (http://www.chori.org/bacpac/rat230.htm). For BAC library availability, please contact Pieter de Jong (pdejong@mail.cho.org). Clones may be purchased from BACPAC Resources (http://www.chori.org/bacpac/or ering_information.htm). BAC end page: http://www.tigr.org/tdb/bac_ends/rat/bac_end_intro.html~Plate: 392 row: F column: 5~Seq primer: SP6~Class: BAC ends</GBSeq_comment>
  <GBSeq_feature-table>
    <GBFeature>
      <GBFeature_key>source</GBFeature_key>
      <GBFeature_location>1..732</GBFeature_location>
      <GBFeature_intervals>
        <GBInterval>
          <GBInterval_from>1</GBInterval_from>
          <GBInterval_to>732</GBInterval_to>
          <GBInterval_accession>BZ277537.1</GBInterval_accession>
        </GBInterval>
      </GBFeature_intervals>
      <GBFeature_quals>
        <GBQualifier>
          <GBQualifier_name>organism</GBQualifier_name>
          <GBQualifier_value>Rattus norvegicus</GBQualifier_value>
        </GBQualifier>
        <GBQualifier>
          <GBQualifier_name>mol_type</GBQualifier_name>
          <GBQualifier_value>genomic DNA</GBQualifier_value>
        </GBQualifier>
        <GBQualifier>
          <GBQualifier_name>strain</GBQualifier_name>
          <GBQualifier_value>BN/SsNHsd/MCW</GBQualifier_value>
        </GBQualifier>
        <GBQualifier>
          <GBQualifier_name>db_xref</GBQualifier_name>
          <GBQualifier_value>taxon:10116</GBQualifier_value>
        </GBQualifier>
        <GBQualifier>
          <GBQualifier_name>clone</GBQualifier_name>
          <GBQualifier_value>CH230-392F5</GBQualifier_value>
        </GBQualifier>
        <GBQualifier>
          <GBQualifier_name>sex</GBQualifier_name>
          <GBQualifier_value>Female</GBQualifier_value>
        </GBQualifier>
        <GBQualifier>
          <GBQualifier_name>cell_type</GBQualifier_name>
          <GBQualifier_value>Brain</GBQualifier_value>
        </GBQualifier>
        <GBQualifier>
          <GBQualifier_name>clone_lib</GBQualifier_name>
          <GBQualifier_value>LIBGSS_003368 CHORI-230 Segment 2</GBQualifier_value>
        </GBQualifier>
        <GBQualifier>
          <GBQualifier_name>note</GBQualifier_name>
          <GBQualifier_value>Vector: pTARBAC1.3; Site_1: MboI; Site_2: MboI; CHORI-230 Rat (BN/SsNHsd/MCW) BAC library produced by Pieter de Jong</GBQualifier_value>
        </GBQualifier>
      </GBFeature_quals>
    </GBFeature>
  </GBSeq_feature-table>
  <GBSeq_sequence>tccaacttagaaaaaactcaaacatgcacagtacaacttaacaatgagtttgctttatgtcattttgatatattttgactttgtgggccaaatttggaaaagaaccaacaaacaaggcaattacctttccaaaaaacacctttaccactatgcagtcattcttttgagcctttctgacagctaagactataatttgggtgggtctgaaagggatgatagagatagaaagttggttttggaaaccagagaaccataccctaaaaatatgactggctagagaaatttgtctaaggccagtaagccgcttgattccacttgtacaatcaccccaatgaggcctatgtgaacggatgagtttgtcaggaggaagaacaccttggtttacaggagctctatagaacgtgatttctagaatcctttctaagagacagaaggtcatactggcgtcctcactgtacctttacttagctcatttgcatacagaaccatctggtacttcagttctattctgagaaatgaataccaaggtcttacatgtcacatgaatgtaggtatgaaagactcttctacaaagagatactgtgcactatacaactgtgcccaactgcccacgacatccactgatccatcagaatgggcatttcttcttgggttttcatgcaggccatctagtcaatgaacttaaccaccaccggcgcaaccgtgagtagactttttctgtcaccccaattt</GBSeq_sequence>
  <GBSeq_xrefs>
    <GBXref>
      <GBXref_dbname>BioSample</GBXref_dbname>
      <GBXref_id>LIBGSS_003368</GBXref_id>
    </GBXref>
  </GBSeq_xrefs>
</GBSeq>

</GBSet>



In [5]:
print(record['sra'])


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-5-d37043a56955> in <module>()
----> 1 print(record['sra'])

KeyError: 'sra'

In [ ]: