In [2]:
import json, requests
import pandas as pd

# BASE = "http://localhost:3000/" # Local machine
BASE = "http://192.168.99.100:3000/" # Local Docker instance
# BASE = "http://192.168.99.100:8080/idmapping/v1/" # Agent on Docker
# BASE = "http://52.33.174.107:3000/" # EC2

# BASE = 'http://52.35.61.6:8080/idmapping/v1/'

def jprint(data):
    print(json.dumps(data, indent=4))

# Mixed species query allowed - human, mouse, yeast, and fly
query1 = {
    "ids": ["rAd5", "p53", "mapk1"]
}

res1 = requests.post(BASE + 'map', json=query1)
res_json = res1.json()

jprint(res_json)


{
    "unmatched": [],
    "matched": [
        {
            "matches": {
                "Symbol": "RAD5",
                "EMBL-CDS": [
                    "AAA34951.1",
                    "CAA97556.1",
                    "AAB23590.1",
                    "DAA09350.1"
                ],
                "EMBL": [
                    "M96644",
                    "Z73204",
                    "S46103",
                    "BK006945"
                ],
                "PIR": "S64859",
                "UniRef100": "UniRef100_P32849",
                "chromosome": "XII",
                "GO": [
                    "GO:0000781",
                    "GO:0005737",
                    "GO:0000790",
                    "GO:0005524",
                    "GO:0008094",
                    "GO:0000400",
                    "GO:0009378",
                    "GO:0000403",
                    "GO:0008270",
                    "GO:0032508",
                    "GO:0006302",
                    "GO:0042276",
                    "GO:0010994",
                    "GO:0006301",
                    "GO:0000209"
                ],
                "type_of_gene": "protein-coding",
                "GI": [
                    "6323060",
                    "417587",
                    "172347",
                    "257212",
                    "1360348"
                ],
                "UniParc": "UPI000013307E",
                "UniProtKB-AC": "P32849",
                "UniRef50": "UniRef50_P32849",
                "Synonyms": [
                    "REV2",
                    "SNM2"
                ],
                "RefSeq": "NP_013132.1",
                "dbXrefs": "SGD:S000004022",
                "tax_id": "559292",
                "UniRef90": "UniRef90_P32849",
                "UniProtKB-ID": "RAD5_YEAST",
                "LocusTag": "YLR032W",
                "description": "DNA helicase RAD5",
                "GeneID": "850719"
            },
            "in": "rAd5",
            "species": "yeast",
            "inType": "Symbol"
        },
        {
            "matches": {
                "GO": [
                    "GO:0005737",
                    "GO:0005874",
                    "GO:0005525",
                    "GO:0003924",
                    "GO:0005200",
                    "GO:0007017"
                ],
                "UniRef100": "UniRef100_A0A0B4LGH1",
                "chromosome": "2R",
                "EMBL-CDS": "AHN56630.1",
                "UniProtKB-AC": "A0A0B4LGH1",
                "map_location": "60C6-60C6",
                "RefSeq": "NP_001286835.1",
                "type_of_gene": "protein-coding",
                "UniRef90": "UniRef90_P08841",
                "dbXrefs": "FLYBASE:FBgn0003888",
                "Symbol": "betaTub60D",
                "EMBL": "AE013599",
                "UniProtKB-ID": "A0A0B4LGH1_DROME",
                "Full_name_from_nomenclature_authority": "beta-Tubulin at 60D",
                "GeneID": "37888",
                "GI": [
                    "599127420",
                    "665403429"
                ],
                "UniParc": "UPI00001E1043",
                "UniRef50": "UniRef50_P07437",
                "description": "beta-Tubulin at 60D",
                "Synonyms": [
                    "143391_i_at",
                    "3t",
                    "B3t",
                    "BETA 60D",
                    "CG3401",
                    "D.m.BETA-60D",
                    "DTB3",
                    "Dmbeta3",
                    "Dmel\\CG3401",
                    "T",
                    "Tub",
                    "Tub60D",
                    "beta-Tub60D",
                    "beta-Tub6D",
                    "beta-tub",
                    "beta3",
                    "beta3 TU",
                    "beta3-Tub",
                    "beta3-tubulin",
                    "beta3TUB",
                    "beta3Tub",
                    "beta3t",
                    "beta3tub",
                    "beta60C",
                    "betaTub",
                    "betaTub3",
                    "betaTub60C",
                    "beta[[3]] tubulin",
                    "beta[[3]]-Tub",
                    "beta[[3]]-tubulin",
                    "betatub60D",
                    "p50",
                    "p50/tubulin",
                    "p53",
                    "p53/tubulin"
                ],
                "tax_id": "7227",
                "LocusTag": "Dmel_CG3401",
                "UniGene": "Dm.21376"
            },
            "in": "p53",
            "species": "fly",
            "inType": "Synonyms"
        },
        {
            "matches": {
                "GO": [
                    "GO:0005737",
                    "GO:0005634",
                    "GO:0005657",
                    "GO:0003684",
                    "GO:0003690",
                    "GO:0046872",
                    "GO:0002039",
                    "GO:0043565",
                    "GO:0003700",
                    "GO:0044212",
                    "GO:0006915",
                    "GO:0007049",
                    "GO:0034644",
                    "GO:0031065",
                    "GO:0043525",
                    "GO:0010165",
                    "GO:0006351"
                ],
                "map_location": "17p13.1",
                "UniRef100": "UniRef100_A0A087WXZ1",
                "chromosome": "17",
                "EMBL-CDS": [],
                "UniProtKB-AC": "A0A087WXZ1",
                "Synonyms": [
                    "BCC7",
                    "LFS1",
                    "P53",
                    "TRP53"
                ],
                "RefSeq": "NP_001263627.1",
                "type_of_gene": "protein-coding",
                "UniRef90": "UniRef90_P04637",
                "dbXrefs": [
                    "MIM:191170",
                    "HGNC:HGNC:11998",
                    "Ensembl:ENSG00000141510",
                    "HPRD:01859",
                    "Vega:OTTHUMG00000162125"
                ],
                "Symbol": "TP53",
                "Ensembl_TRS": "ENST00000618944",
                "EMBL": [
                    "AC007421",
                    "AC087388"
                ],
                "UniProtKB-ID": "A0A087WXZ1_HUMAN",
                "Full_name_from_nomenclature_authority": "tumor protein p53",
                "GI": "454545207",
                "UniParc": "UPI0002B83575",
                "Ensembl_PRO": "ENSP00000481401",
                "UniRef50": "UniRef50_P04637",
                "description": "tumor protein p53",
                "Ensembl": "ENSG00000141510",
                "tax_id": "9606",
                "GeneID": "7157",
                "UniGene": [
                    "Hs.437460",
                    "Hs.740601"
                ]
            },
            "in": "p53",
            "species": "human",
            "inType": "Synonyms"
        },
        {
            "matches": {
                "Symbol": "Trp53-ps",
                "Synonyms": "p53",
                "description": "transformation related protein 53, pseudogene",
                "chromosome": "17",
                "tax_id": "10090",
                "Full_name_from_nomenclature_authority": "transformation related protein 53, pseudogene",
                "dbXrefs": "MGI:MGI:98835",
                "type_of_gene": "pseudo",
                "map_location": [
                    "17 C",
                    "17 28.3 cM"
                ],
                "GeneID": "22060"
            },
            "in": "p53",
            "species": "mouse",
            "inType": "Synonyms"
        },
        {
            "matches": {
                "GO": [
                    "GO:0005622",
                    "GO:0005524",
                    "GO:0004707"
                ],
                "UniRef100": "UniRef100_Q499G7",
                "chromosome": "22",
                "EMBL-CDS": "AAH99905.1",
                "UniProtKB-AC": "Q499G7",
                "map_location": "22q11.21",
                "RefSeq": "NP_002736.3",
                "type_of_gene": "protein-coding",
                "UniRef90": "UniRef90_P28482",
                "dbXrefs": [
                    "MIM:176948",
                    "HGNC:HGNC:6871",
                    "Ensembl:ENSG00000100030",
                    "HPRD:01496",
                    "Vega:OTTHUMG00000030508"
                ],
                "Symbol": "MAPK1",
                "EMBL": "BC099905",
                "UniProtKB-ID": "Q499G7_HUMAN",
                "Full_name_from_nomenclature_authority": "mitogen-activated protein kinase 1",
                "GeneID": "5594",
                "GI": [
                    "66932916",
                    "71533985"
                ],
                "UniParc": "UPI00005725B4",
                "UniRef50": "UniRef50_P28482",
                "Synonyms": [
                    "ERK",
                    "ERK-2",
                    "ERK2",
                    "ERT1",
                    "MAPK2",
                    "P42MAPK",
                    "PRKM1",
                    "PRKM2",
                    "p38",
                    "p40",
                    "p41",
                    "p41mapk",
                    "p42-MAPK"
                ],
                "tax_id": "9606",
                "description": "mitogen-activated protein kinase 1",
                "UniGene": "Hs.431850"
            },
            "in": "mapk1",
            "species": "human",
            "inType": "Symbol"
        },
        {
            "matches": {
                "GO": [
                    "GO:0030424",
                    "GO:0005901",
                    "GO:0005737",
                    "GO:0005856",
                    "GO:0005829",
                    "GO:0032839",
                    "GO:0005769",
                    "GO:0070062",
                    "GO:0005925",
                    "GO:0005794",
                    "GO:0005770",
                    "GO:0015630",
                    "GO:0005815",
                    "GO:0005739",
                    "GO:0072686",
                    "GO:0005654",
                    "GO:0005634",
                    "GO:0043204",
                    "GO:0043234",
                    "GO:0031143",
                    "GO:0005524",
                    "GO:0016301",
                    "GO:0004707",
                    "GO:0019902",
                    "GO:0001784",
                    "GO:0004672",
                    "GO:0004674",
                    "GO:0008353",
                    "GO:0006915",
                    "GO:0050853",
                    "GO:0060020",
                    "GO:0061308",
                    "GO:0072584",
                    "GO:0007049",
                    "GO:0006974",
                    "GO:0097011",
                    "GO:0019858",
                    "GO:0038127",
                    "GO:0070371",
                    "GO:0060324",
                    "GO:0007507",
                    "GO:0060716",
                    "GO:0031663",
                    "GO:0060291",
                    "GO:0060425",
                    "GO:0033598",
                    "GO:0000165",
                    "GO:0000189",
                    "GO:0045596",
                    "GO:0014032",
                    "GO:0009887",
                    "GO:0042473",
                    "GO:0018105",
                    "GO:0018107",
                    "GO:0030335",
                    "GO:0008284",
                    "GO:0010800",
                    "GO:0045893",
                    "GO:0045727",
                    "GO:0006468",
                    "GO:0051493",
                    "GO:2000641",
                    "GO:0090170",
                    "GO:0031647",
                    "GO:0051090",
                    "GO:0032872",
                    "GO:0070849",
                    "GO:0043627",
                    "GO:0043330",
                    "GO:0032496",
                    "GO:0009636",
                    "GO:0019233",
                    "GO:0050852",
                    "GO:0048538",
                    "GO:0030878",
                    "GO:0060440",
                    "GO:0006351"
                ],
                "map_location": [
                    "16 A3",
                    "16 10.53 cM"
                ],
                "PIR": "S16444",
                "UniRef100": "UniRef100_P63085",
                "chromosome": "16",
                "EMBL-CDS": [
                    "CAA41548.1",
                    "BAC29053.1",
                    "BAC33251.1",
                    "BAC40044.1",
                    "BAE21053.1",
                    "AAH58258.1",
                    "BAA01733.1"
                ],
                "UniProtKB-AC": "P63085",
                "Synonyms": [
                    "9030612K14Rik",
                    "AA407128",
                    "AU018647",
                    "C78273",
                    "ERK",
                    "Erk2",
                    "MAPK2",
                    "PRKM2",
                    "Prkm1",
                    "p41mapk",
                    "p42mapk"
                ],
                "RefSeq": [
                    "NP_001033752.1",
                    "NP_036079.1",
                    "XP_006522210.1"
                ],
                "type_of_gene": "protein-coding",
                "UniRef90": "UniRef90_P28482",
                "GeneID": "26413",
                "Symbol": "Mapk1",
                "Ensembl_TRS": [
                    "ENSMUST00000023462",
                    "ENSMUST00000069107",
                    "ENSMUST00000115731"
                ],
                "EMBL": [
                    "X58712",
                    "AK035386",
                    "AK048127",
                    "AK087925",
                    "AK132241",
                    "BC058258",
                    "D10939"
                ],
                "UniProtKB-ID": "MK01_MOUSE",
                "Full_name_from_nomenclature_authority": "mitogen-activated protein kinase 1",
                "GI": [
                    "52001076",
                    "53002",
                    "34849482",
                    "26352828",
                    "148665021",
                    "26330646",
                    "74205494",
                    "26339160",
                    "84579909",
                    "6754632",
                    "286076",
                    "568995370"
                ],
                "UniParc": "UPI0000003FEA",
                "Ensembl_PRO": [
                    "ENSMUSP00000023462",
                    "ENSMUSP00000065983",
                    "ENSMUSP00000111396"
                ],
                "UniRef50": "UniRef50_P28482",
                "description": "mitogen-activated protein kinase 1",
                "Ensembl": "ENSMUSG00000063358",
                "tax_id": "10090",
                "dbXrefs": [
                    "MGI:MGI:1346858",
                    "Ensembl:ENSMUSG00000063358",
                    "Vega:OTTMUSG00000020203"
                ],
                "UniGene": "Mm.196581"
            },
            "in": "mapk1",
            "species": "mouse",
            "inType": "Symbol"
        }
    ]
}

In [3]:
# Mixed species query allowed - human, mouse, yeast, and fly
query1 = {
    "ids": ["7157", "P04637"],
    "species": "human"
}

res1 = requests.post(BASE + 'labels', json=query1)
res_json = res1.json()

# print(len(res_json["matched"]))
jprint(res_json)


{
    "P04637": "TP53",
    "7157": "TP53"
}

In [4]:
import json, requests

# Utility function to display JSON
def jprint(data):
    print(json.dumps(data, indent=4))

# Mixed species query allowed - human, mouse, yeast, and fly
query = {
    "ids": ["Antp", "HOXA7"],
    "idTypes": ["GeneID", "Symbol", "UniProtKB-ID", "Synonyms"]
}

res = requests.post(BASE + 'map', json=query1)
jprint(res1.json())


{
    "P04637": "TP53",
    "7157": "TP53"
}

In [5]:
yeast_genes = pd.read_csv("./yeast_genes.txt", names=["GeneID"], dtype={"GeneID": str})
print(len(yeast_genes))

id_list = yeast_genes["GeneID"].tolist()

query_heavy = {
    "ids": id_list, # List of yeast genes
    "species": "yeast"
}

jprint(query_heavy)


---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
<ipython-input-5-e681e912b721> in <module>()
----> 1 yeast_genes = pd.read_csv("./yeast_genes.txt", names=["GeneID"], dtype={"GeneID": str})
      2 print(len(yeast_genes))
      3 
      4 id_list = yeast_genes["GeneID"].tolist()
      5 

/Users/kono/anaconda/lib/python3.4/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
    496                     skip_blank_lines=skip_blank_lines)
    497 
--> 498         return _read(filepath_or_buffer, kwds)
    499 
    500     parser_f.__name__ = name

/Users/kono/anaconda/lib/python3.4/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    273 
    274     # Create the parser.
--> 275     parser = TextFileReader(filepath_or_buffer, **kwds)
    276 
    277     if (nrows is not None) and (chunksize is not None):

/Users/kono/anaconda/lib/python3.4/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    588             self.options['has_index_names'] = kwds['has_index_names']
    589 
--> 590         self._make_engine(self.engine)
    591 
    592     def _get_options_with_defaults(self, engine):

/Users/kono/anaconda/lib/python3.4/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
    729     def _make_engine(self, engine='c'):
    730         if engine == 'c':
--> 731             self._engine = CParserWrapper(self.f, **self.options)
    732         else:
    733             if engine == 'python':

/Users/kono/anaconda/lib/python3.4/site-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1101         kwds['allow_leading_cols'] = self.index_col is not False
   1102 
-> 1103         self._reader = _parser.TextReader(src, **kwds)
   1104 
   1105         # XXX

pandas/parser.pyx in pandas.parser.TextReader.__cinit__ (pandas/parser.c:3246)()

pandas/parser.pyx in pandas.parser.TextReader._setup_parser_source (pandas/parser.c:6111)()

OSError: File b'./yeast_genes.txt' does not exist

In [ ]:
q2 = {
    "ids": [
        "YAL003W",
        "YAL030W",
        "YAL038W",
        "YAL040C",
        "YAR007C",
        "YBL005W",
        "YBL021C",
        "YBL026W",
        "YBL050W",
        "YBL069W",
        "YBL079W",
        "YBR018C",
        "YBR019C",
        "YBR020W",
        "YBR043C",
        "YBR045C",
        "YBR050C",
        "YBR072W",
        "YBR093C",
        "YBR109C",
        "YBR112C",
        "YBR118W",
        "YBR135W",
        "YBR155W",
        "YBR160W"],
    "species": "yeast"
}

res_large = requests.post(BASE + 'labels', json=q2)

jprint(res_large.json())

test with very large input

Such file can be created from BioGRID networks...

cat BIOGRID-ORGANISM-Homo_sapiens-3.4.129.mitab | awk -F"\t" '{print $1 "\n" $2}' | awk -F":" '{print $2}' | uniq > ~/Desktop/human_genes_list_large.txt

In [ ]:
import pandas as pd

large_gene_list = pd.read_csv("./human_genes_list_large.txt", names=["GeneID"], dtype={"GeneID": str})

In [ ]:
len(large_gene_list)

In [ ]:
id_list = large_gene_list["GeneID"].tolist()

query_heavy = {
    "ids": id_list, # Huge list!
}

res_large = requests.post(BASE + 'map', data=json.dumps(query_heavy), headers=HEADERS)

In [ ]:
largeJS = res_large.json()

print(len(largeJS))

In [ ]:
# Randomly pick 100 IDs from original list
import random

list_size = len(id_list)

def call_random(server_location):
    random_ids = []

    for i in range(0, 2000):
        next_id = id_list[random.randint(0, list_size-1)]
        random_ids.append(next_id)

    query_rand = {
        "ids": random_ids,
    }

    res_rand = requests.post(server_location + 'map', data=json.dumps(query_rand), headers=HEADERS)

In [ ]:
%%timeit -n 100

call_random("http://192.168.99.100:3000/")

In [ ]:
%%timeit -n 100

call_random("http://192.168.99.100:8080/idmapping/v1/")