In [1]:
from Bio import Entrez, Medline, SeqIO

Do not forget to inform NCBI of your email address (change below)


In [2]:
Entrez.email = "put@your_email.here"

In [3]:
#This gives you the list of available databases
handle = Entrez.einfo()
rec = Entrez.read(handle)
print(rec)


{u'DbList': ['pubmed', 'protein', 'nuccore', 'nucleotide', 'nucgss', 'nucest', 'structure', 'genome', 'assembly', 'genomeprj', 'bioproject', 'biosample', 'blastdbinfo', 'books', 'cdd', 'clinvar', 'clone', 'gap', 'gapplus', 'grasp', 'dbvar', 'epigenomics', 'gene', 'gds', 'geoprofiles', 'homologene', 'medgen', 'journals', 'mesh', 'ncbisearch', 'nlmcatalog', 'omim', 'orgtrack', 'pmc', 'popset', 'probe', 'proteinclusters', 'pcassay', 'biosystems', 'pccompound', 'pcsubstance', 'pubmedhealth', 'seqannot', 'snp', 'sra', 'taxonomy', 'toolkit', 'toolkitall', 'toolkitbook', 'unigene', 'gencoll', 'gtr']}

In [4]:
handle = Entrez.esearch(db="nucleotide", term='CRT[Gene Name] AND "Plasmodium falciparum"[Organism]')
rec_list = Entrez.read(handle)
if rec_list['RetMax'] < rec_list['Count']:
    handle = Entrez.esearch(db="nucleotide", term='CRT[Gene Name] AND "Plasmodium falciparum"[Organism]',
                            retmax=rec_list['Count'])
    rec_list = Entrez.read(handle)

In [5]:
id_list = rec_list['IdList']
hdl = Entrez.efetch(db='nucleotide', id=id_list, rettype='gb')

In [6]:
recs = list(SeqIO.parse(hdl, 'gb'))

In [7]:
for rec in recs:
    if rec.name == 'KM288867':
        break
print(rec.name)
print(rec.description)


KM288867
Plasmodium falciparum clone PF3D7_0709000 chloroquine resistance transporter (CRT) gene, complete cds.

In [8]:
for feature in rec.features:
    if feature.type == 'gene':
        print(feature.qualifiers['gene'])
    elif feature.type == 'exon':
        loc = feature.location
        print('Exon', loc.start, loc.end, loc.strand)
    else:
        print('not processed:\n%s' % feature)


not processed:
type: source
location: [0:10000](+)
qualifiers:
    Key: clone, Value: ['PF3D7_0709000']
    Key: db_xref, Value: ['taxon:5833']
    Key: mol_type, Value: ['genomic DNA']
    Key: organism, Value: ['Plasmodium falciparum']

['CRT']
not processed:
type: mRNA
location: join{[2751:3543](+), [3720:3989](+), [4168:4341](+), [4513:4646](+), [4799:4871](+), [4994:5070](+), [5166:5249](+), [5376:5427](+), [5564:5621](+), [5769:5862](+), [6055:6100](+), [6247:6302](+), [6471:7598](+)}
qualifiers:
    Key: gene, Value: ['CRT']
    Key: product, Value: ['chloroquine resistance transporter']
Sub-Features
type: mRNA
location: [2751:3543](+)
qualifiers:

type: mRNA
location: [3720:3989](+)
qualifiers:

type: mRNA
location: [4168:4341](+)
qualifiers:

type: mRNA
location: [4513:4646](+)
qualifiers:

type: mRNA
location: [4799:4871](+)
qualifiers:

type: mRNA
location: [4994:5070](+)
qualifiers:

type: mRNA
location: [5166:5249](+)
qualifiers:

type: mRNA
location: [5376:5427](+)
qualifiers:

type: mRNA
location: [5564:5621](+)
qualifiers:

type: mRNA
location: [5769:5862](+)
qualifiers:

type: mRNA
location: [6055:6100](+)
qualifiers:

type: mRNA
location: [6247:6302](+)
qualifiers:

type: mRNA
location: [6471:7598](+)
qualifiers:


not processed:
type: 5'UTR
location: [2751:3452](+)
qualifiers:
    Key: gene, Value: ['CRT']

not processed:
type: primer_bind
location: [2935:2958](+)
qualifiers:

not processed:
type: primer_bind
location: [3094:3121](+)
qualifiers:

not processed:
type: CDS
location: join{[3452:3543](+), [3720:3989](+), [4168:4341](+), [4513:4646](+), [4799:4871](+), [4994:5070](+), [5166:5249](+), [5376:5427](+), [5564:5621](+), [5769:5862](+), [6055:6100](+), [6247:6302](+), [6471:6548](+)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:706072609']
    Key: gene, Value: ['CRT']
    Key: product, Value: ['chloroquine resistance transporter']
    Key: protein_id, Value: ['AIW62921.1']
    Key: translation, Value: ['MKFASKKNNQKNSSKNDERYRELDNLVQEGNGSRLGGGSCLGKCAHVFKLIFKEIKDNIFIYILSIIYLSVCVMNKIFAKRTLNKIGNYSFVTSETHNFICMIMFFIVYSLFGNKKGNSKERHRSFNLQFFAISMLDACSVILAFIGLTRTTGNIQSFVLQLSIPINMFFCFLILRYRYHLYNYLGAVIIVVTIALVEMKLSFETQEENSIIFNLVLISALIPVCFSNMTREIVFKKYKIDILRLNAMVSFFQLFTSCLILPVYTLPFLKQLHLPYNEIWTNIKNGFACLFLGRNTVVENCGLGMAKLCDDCDGAWKTFALFSFFNICDNLITSYIIDKFSTMTYTIVSCIQGPAIAIAYYFKFLAGDVVREPRLLDFVTLFGYLFGSIIYRVGNIILERKKMRNEENEDSEGELTNVDSIITQ']
Sub-Features
type: CDS
location: [3452:3543](+)
qualifiers:

type: CDS
location: [3720:3989](+)
qualifiers:

type: CDS
location: [4168:4341](+)
qualifiers:

type: CDS
location: [4513:4646](+)
qualifiers:

type: CDS
location: [4799:4871](+)
qualifiers:

type: CDS
location: [4994:5070](+)
qualifiers:

type: CDS
location: [5166:5249](+)
qualifiers:

type: CDS
location: [5376:5427](+)
qualifiers:

type: CDS
location: [5564:5621](+)
qualifiers:

type: CDS
location: [5769:5862](+)
qualifiers:

type: CDS
location: [6055:6100](+)
qualifiers:

type: CDS
location: [6247:6302](+)
qualifiers:

type: CDS
location: [6471:6548](+)
qualifiers:


('Exon', ExactPosition(3452), ExactPosition(3543), 1)
('Exon', ExactPosition(3720), ExactPosition(3989), 1)
('Exon', ExactPosition(4168), ExactPosition(4341), 1)
not processed:
type: primer_bind
location: [4288:4323](-)
qualifiers:

('Exon', ExactPosition(4513), ExactPosition(4646), 1)
('Exon', ExactPosition(4799), ExactPosition(4871), 1)
('Exon', ExactPosition(4994), ExactPosition(5070), 1)
('Exon', ExactPosition(5166), ExactPosition(5249), 1)
('Exon', ExactPosition(5376), ExactPosition(5427), 1)
('Exon', ExactPosition(5564), ExactPosition(5621), 1)
('Exon', ExactPosition(5769), ExactPosition(5862), 1)
('Exon', ExactPosition(6055), ExactPosition(6100), 1)
('Exon', ExactPosition(6247), ExactPosition(6302), 1)
('Exon', ExactPosition(6471), ExactPosition(6548), 1)
not processed:
type: 3'UTR
location: [6548:7598](+)
qualifiers:
    Key: gene, Value: ['CRT']

not processed:
type: primer_bind
location: [7833:7856](-)
qualifiers:


In [9]:
for name, value in rec.annotations.items():
    print('%s=%s' % (name, value))


sequence_version=1
source=Plasmodium falciparum (malaria parasite P. falciparum)
taxonomy=['Eukaryota', 'Alveolata', 'Apicomplexa', 'Aconoidasida', 'Haemosporida', 'Plasmodium', 'Plasmodium (Laverania)']
keywords=['']
references=[Reference(title='Versatile control of Plasmodium falciparum gene expression with an inducible protein-RNA interaction', ...), Reference(title='Direct Submission', ...)]
accessions=['KM288867']
data_file_division=INV
date=12-NOV-2014
organism=Plasmodium falciparum
gi=706072608

In [10]:
print(len(rec.seq))


10000

In [11]:
refs = rec.annotations['references']
for ref in refs:
    if ref.pubmed_id != '':
        print(ref.pubmed_id)
        handle = Entrez.efetch(db="pubmed", id=[ref.pubmed_id],
                                rettype="medline", retmode="text")
        records = Medline.parse(handle)
        for med_rec in records:
            for k, v in med_rec.items():
                print('%s: %s' % (k, v))


25370483
LID: 10.1038/ncomms6329 [doi]
STAT: In-Process
DEP: 20141105
MID: ['NIHMS630149']
DA: 20141105
AID: ['ncomms6329 [pii]', '10.1038/ncomms6329 [doi]']
CRDT: ['2014/11/06 06:00']
DP: 2014
GR: ['1DP2OD007124/OD/NIH HHS/United States', '5-T32-ES007020/ES/NIEHS NIH HHS/United States', '5-T32-GM08334/GM/NIGMS NIH HHS/United States', 'DP2 OD007124/OD/NIH HHS/United States', 'P30 ES002109/ES/NIEHS NIH HHS/United States']
OWN: NLM
PT: ['Journal Article', 'Research Support, N.I.H., Extramural', "Research Support, Non-U.S. Gov't"]
LA: ['eng']
FAU: ['Goldfless, Stephen J', 'Wagner, Jeffrey C', 'Niles, Jacquin C']
JT: Nature communications
LR: 20150117
PG: 5329
TI: Versatile control of Plasmodium falciparum gene expression with an inducible protein-RNA interaction.
PMCR: ['2015/05/05 00:00']
PL: England
TA: Nat Commun
JID: 101528555
AB: The available tools for conditional gene expression in Plasmodium falciparum are limited. Here, to enable reliable control of target gene expression, we build a system to efficiently modulate translation. We overcame several problems associated with other approaches for regulating gene expression in P. falciparum. Specifically, our system functions predictably across several native and engineered promoter contexts, and affords control over reporter and native parasite proteins irrespective of their subcellular compartmentalization. Induction and repression of gene expression are rapid, homogeneous and stable over prolonged periods. To demonstrate practical application of our system, we used it to reveal direct links between antimalarial drugs and their native parasite molecular target. This is an important outcome given the rapid spread of resistance, and intensified efforts to efficiently discover and optimize new antimalarial drugs. Overall, the studies presented highlight the utility of our system for broadly controlling gene expression and performing functional genetics in P. falciparum.
AD: Department of Biological Engineering, Massachusetts Institute of Technology, 77 Massachusetts Avenue, Cambridge, Massachusetts 02139, USA. Department of Biological Engineering, Massachusetts Institute of Technology, 77 Massachusetts Avenue, Cambridge, Massachusetts 02139, USA. Department of Biological Engineering, Massachusetts Institute of Technology, 77 Massachusetts Avenue, Cambridge, Massachusetts 02139, USA.
VI: 5
IS: 2041-1723 (Electronic) 2041-1723 (Linking)
PMC: PMC4223869
AU: ['Goldfless SJ', 'Wagner JC', 'Niles JC']
MHDA: 2014/11/06 06:00
PHST: ['2014/04/15 [received]', '2014/09/20 [accepted]']
OID: ['NLM: NIHMS630149 [Available on 05/05/15]', 'NLM: PMC4223869 [Available on 05/05/15]']
EDAT: 2014/11/06 06:00
SI: ['GENBANK/KM288848', 'GENBANK/KM288849', 'GENBANK/KM288850', 'GENBANK/KM288851', 'GENBANK/KM288852', 'GENBANK/KM288853', 'GENBANK/KM288854', 'GENBANK/KM288855', 'GENBANK/KM288856', 'GENBANK/KM288857', 'GENBANK/KM288858', 'GENBANK/KM288859', 'GENBANK/KM288860', 'GENBANK/KM288861', 'GENBANK/KM288862', 'GENBANK/KM288863', 'GENBANK/KM288864', 'GENBANK/KM288865', 'GENBANK/KM288866', 'GENBANK/KM288867']
SO: Nat Commun. 2014 Nov 5;5:5329. doi: 10.1038/ncomms6329.
SB: IM
PMID: 25370483
PST: epublish

In [11]: