In [1]:
import ga4gh_client.client as client
c = client.HttpClient("http://1kgenomes.ga4gh.org")
Reference sets collect together named reference sequences as part of released assemblies. The API provides methods for accessing reference sequences.
The Thousand Genomes data presented here are mapped to GRCh37, and so this server makes that reference genome available. Datasets and reference genomes are decoupled in the data model, so it is possible to use the same reference set in multiple datasets.
Here, we list the details of the Reference Set.
In [2]:
for reference_set in c.search_reference_sets():
ncbi37 = reference_set
print "name: {}".format(ncbi37.name)
print "ncbi_taxon_id: {}".format(ncbi37.ncbi_taxon_id)
print "description: {}".format(ncbi37.description)
print "source_uri: {}".format(ncbi37.source_uri)
In [3]:
reference_set = c.get_reference_set(reference_set_id=ncbi37.id)
print reference_set
In [4]:
counter = 0
for reference in c.search_references(reference_set_id="WyJOQ0JJMzciXQ"):
counter += 1
if counter > 5:
break
print reference
In [5]:
reference = c.get_reference(reference_id="WyJOQ0JJMzciLCIxIl0")
print reference
In [6]:
reference_bases = c.list_reference_bases("WyJOQ0JJMzciLCIxIl0", start=15000, end= 16000)
print reference_bases
print len(reference_bases)
https://ga4gh-schemas.readthedocs.io/en/latest/schemas/reference_service.proto.html