In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
r = requests.get("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=nuccore&id=935523605")

In [3]:
if r.status_code == 200:
    print(BeautifulSoup(r.content,"xml").find("Item",Name="Title").text )
    print(BeautifulSoup(r.content,"xml").find("Item",Name="TaxId").text )


gut metagenome genome assembly P9E90-k21-2014-09-20, contig contig-6000102, whole genome shotgun sequence
749906

In [10]:
term = "XP_004967523.1[accn]"
db = "protein"
r = requests.get("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db={}&term={}&usehistory=y".format(db,term))

In [13]:
doc_xml = BeautifulSoup(r.content,"xml")
print(doc_xml.find("WebEnv").text)
print(doc_xml.find("QueryKey").text)


NCID_1_61086232_165.112.9.37_9001_1461339738_678317138_0MetA0_S_MegaStore_F_1
1

In [16]:
query_key = doc_xml.find("QueryKey").text
webenv = doc_xml.find("WebEnv").text
uri = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db={}&query_key={}&WebEnv={}&rettype=fasta&retmode=text".format(db,query_key,webenv)
print(uri)
r = requests.get(uri)


http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&query_key=1&WebEnv=NCID_1_61086232_165.112.9.37_9001_1461339738_678317138_0MetA0_S_MegaStore_F_1&rettype=fasta&retmode=text

In [17]:
r.content.decode()


Out[17]:
'>gi|514772549|ref|XP_004967523.1| PREDICTED: dual specificity protein phosphatase PHS1-like [Setaria italica]\nMEQREATQPEEAAEARGREQPSSILPKENEDKDLKLSSRVVSLFFGGDISTPAQTFEKWLSLVRKRSGAF\nRPSGFPHRGSRIEVMPSGSFSLFGSGDLSEHLVREESVGKDPLTCDQPPEISLWERLGNASTLDIESSEF\nSWDVLSSLHHTEHSSGSEHSEDEMNKALEVTVNSGGVVFFALFSSSSNSELPEEAAAVIKFSSSKMATQA\nERLGYEFARLLGVQTPQARVVYNSSPEWQGIKHAAENARAVAVSNNDEVGEMTCSELMEALELSRCLILM\nSYIHGSPLLESSKAFNLREAACVTASSLGRVLMLDLILRNEDRLPCRQLGWRGNPANLMISDKSSSPNVD\nRLQDSISTTESSNRLIREILLREKRSHSTNGRLDSVELNPMSQKLEALKNERENTESTNDTFHIVAIDTG\nVPRRPPAGRRMKDHERYPKVVELILNCSDYSANILYEISGGKLGHPGPDEFTCTDSCVSLSDEDNAVAIH\nEFRGSFRAALRDLEGFHLFLLQLYQKLDGLLRVFLSIITKSSEEPDNNDCVLSDFPSPGASYSTPCKQLN\nNELHSDSEMLKSTTKSSSAGSRGSSDSVSPLSRDSWSNKFFKGSAEAPRNLRMTMKLRDFYKNPKVDPEL\nLKEIEQWNEALKTDVIKFCQENNFHSGFFDGTENNMVADAYELKVRLEHIIERIALISDAANTERPSLVV\nNNLFIGGALAARSKYTLQHLGITHVLCLCSNEIGQSDSQFPDLFEYKNFSIRDDDDANISDLFEEASDFI\nDHVNHIGGKVLVHCFEGKSRSATVVLAFLMLRMGFTLAKAWNLLKKVHRRAQPNDGFAKALLALDKKLHG\nKVSMDWQHKRPEMKVCPICSKNVGLSTSSLKLHLQKAHKRLSAGSVDSAMTMEIQKSIESLRISRGGSLS\nPSQKLTKAFANELSF\n\n'