notebook.community

Edit and run



In [1]:

    
import json, requests
import pandas as pd

# BASE = "http://localhost:3000/" # Local machine
#BASE = "http://192.168.99.100:3000/" # Local Docker instance
# BASE = "http://192.168.99.100:8080/idmapping/v1/" # Agent on Docker
BASE = "http://ec2-52-37-236-192.us-west-2.compute.amazonaws.com:3000/" # EC2

# BASE = 'http://52.35.61.6:8080/idmapping/v1/'

def jprint(data):
    print(json.dumps(data, indent=4))

# Mixed species query allowed - human, mouse, yeast, and fly
query1 = {
    "ids": ["rAd5", "p53", "mapk1"]
}

res1 = requests.post(BASE + 'map', json=query1)
res_json = res1.json()

# jprint(res_json)



In [3]:

    
# Mixed species query allowed - human, mouse, yeast, and fly
query1 = {
    "ids": ["TT5", "TT4"],
    "idTypes": ["GeneID", "Symbol", "UniProtKB-ID", "Synonyms"]
}

res1 = requests.post(BASE + 'map', json=query1)
res_json = res1.json()

# print(len(res_json["matched"]))
jprint(res_json)









    



{
    "matched": [
        {
            "matches": {
                "UniProtKB-ID": "CFI1_ARATH",
                "Symbol": "TT5",
                "GeneID": "824678",
                "Synonyms": [
                    "A11",
                    "CFI",
                    "CHALCONE FLAVANONE ISOMERASE",
                    "CHI",
                    "TRANSPARENT TESTA 5"
                ]
            },
            "in": "TT5",
            "inType": "Symbol",
            "species": "arath"
        },
        {
            "matches": {
                "UniProtKB-ID": "Q460R0_ARATH",
                "Symbol": "TT4",
                "GeneID": "831241",
                "Synonyms": [
                    "ATCHS",
                    "CHALCONE SYNTHASE",
                    "CHALCONE/STILBENE SYNTHASE",
                    "CHS",
                    "MAC12.28",
                    "MAC12_28",
                    "NARINGENIN-CHALCONE SYNTHASE",
                    "TRANSPARENT TESTA 4"
                ]
            },
            "in": "TT4",
            "inType": "Symbol",
            "species": "arath"
        }
    ],
    "unmatched": []
}



In [11]:

    
import json, requests

# Utility function to display JSON
def jprint(data):
    print(json.dumps(data, indent=4))

# Mixed species query allowed - human, mouse, yeast, and fly
query = {
    "ids": ["Antp", "HOXA7", "TT4"],
    "idTypes": ["GeneID", "Symbol", "UniProtKB-ID", "Synonyms"]
}

res = requests.post(BASE + 'map', json=query)
jprint(res.json())









    



{
    "unmatched": [],
    "matched": [
        {
            "inType": "Synonyms",
            "matches": {
                "GeneID": "3204",
                "Symbol": "HOXA7",
                "Synonyms": [
                    "ANTP",
                    "HOX1",
                    "HOX1.1",
                    "HOX1A"
                ],
                "UniProtKB-ID": "HXA7_HUMAN"
            },
            "in": "Antp",
            "species": "human"
        },
        {
            "inType": "Synonyms",
            "matches": {
                "GeneID": "40835",
                "Symbol": "Antp",
                "Synonyms": [
                    "3.4",
                    "ANT-C",
                    "ANT-P",
                    "ANTC",
                    "ANTP",
                    "Ant",
                    "AntP",
                    "AntP1",
                    "Antp P1",
                    "Antp P2",
                    "Antp1",
                    "Aus",
                    "BG:DS07700.1",
                    "CG1028",
                    "DMANTPE1",
                    "DRO15DC96Z",
                    "DmAntp",
                    "Dmel\\CG1028",
                    "Hu",
                    "Ns",
                    "Scx",
                    "antp",
                    "l(3)84Ba"
                ],
                "UniProtKB-ID": "Q7KSY7_DROME"
            },
            "in": "Antp",
            "species": "fly"
        },
        {
            "inType": "Symbol",
            "matches": {
                "GeneID": "3204",
                "Symbol": "HOXA7",
                "Synonyms": [
                    "ANTP",
                    "HOX1",
                    "HOX1.1",
                    "HOX1A"
                ],
                "UniProtKB-ID": "HXA7_HUMAN"
            },
            "in": "HOXA7",
            "species": "human"
        },
        {
            "inType": "Symbol",
            "matches": {
                "GeneID": "15404",
                "Symbol": "Hoxa7",
                "Synonyms": [
                    "AV118143",
                    "Hox-1.1",
                    "M6"
                ],
                "UniProtKB-ID": "Q8JZW2_MOUSE"
            },
            "in": "HOXA7",
            "species": "mouse"
        },
        {
            "inType": "Symbol",
            "matches": {
                "GeneID": "831241",
                "Symbol": "TT4",
                "Synonyms": [
                    "ATCHS",
                    "CHALCONE SYNTHASE",
                    "CHALCONE/STILBENE SYNTHASE",
                    "CHS",
                    "MAC12.28",
                    "MAC12_28",
                    "NARINGENIN-CHALCONE SYNTHASE",
                    "TRANSPARENT TESTA 4"
                ],
                "UniProtKB-ID": "Q460R0_ARATH"
            },
            "in": "TT4",
            "species": "arath"
        }
    ]
}



In [5]:

    
yeast_genes = pd.read_csv("./yeast_genes.txt", names=["GeneID"], dtype={"GeneID": str})
print(len(yeast_genes))

id_list = yeast_genes["GeneID"].tolist()

query_heavy = {
    "ids": id_list, # List of yeast genes
    "species": "yeast"
}

jprint(query_heavy)









    



---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
<ipython-input-5-e681e912b721> in <module>()
----> 1 yeast_genes = pd.read_csv("./yeast_genes.txt", names=["GeneID"], dtype={"GeneID": str})
      2 print(len(yeast_genes))
      3 
      4 id_list = yeast_genes["GeneID"].tolist()
      5 

/Users/kono/anaconda/lib/python3.4/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
    496                     skip_blank_lines=skip_blank_lines)
    497 
--> 498         return _read(filepath_or_buffer, kwds)
    499 
    500     parser_f.__name__ = name

/Users/kono/anaconda/lib/python3.4/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    273 
    274     # Create the parser.
--> 275     parser = TextFileReader(filepath_or_buffer, **kwds)
    276 
    277     if (nrows is not None) and (chunksize is not None):

/Users/kono/anaconda/lib/python3.4/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    588             self.options['has_index_names'] = kwds['has_index_names']
    589 
--> 590         self._make_engine(self.engine)
    591 
    592     def _get_options_with_defaults(self, engine):

/Users/kono/anaconda/lib/python3.4/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
    729     def _make_engine(self, engine='c'):
    730         if engine == 'c':
--> 731             self._engine = CParserWrapper(self.f, **self.options)
    732         else:
    733             if engine == 'python':

/Users/kono/anaconda/lib/python3.4/site-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1101         kwds['allow_leading_cols'] = self.index_col is not False
   1102 
-> 1103         self._reader = _parser.TextReader(src, **kwds)
   1104 
   1105         # XXX

pandas/parser.pyx in pandas.parser.TextReader.__cinit__ (pandas/parser.c:3246)()

pandas/parser.pyx in pandas.parser.TextReader._setup_parser_source (pandas/parser.c:6111)()

OSError: File b'./yeast_genes.txt' does not exist



In [ ]:

    
q2 = {
    "ids": [
        "YAL003W",
        "YAL030W",
        "YAL038W",
        "YAL040C",
        "YAR007C",
        "YBL005W",
        "YBL021C",
        "YBL026W",
        "YBL050W",
        "YBL069W",
        "YBL079W",
        "YBR018C",
        "YBR019C",
        "YBR020W",
        "YBR043C",
        "YBR045C",
        "YBR050C",
        "YBR072W",
        "YBR093C",
        "YBR109C",
        "YBR112C",
        "YBR118W",
        "YBR135W",
        "YBR155W",
        "YBR160W"],
    "species": "yeast"
}

res_large = requests.post(BASE + 'labels', json=q2)

jprint(res_large.json())

test with very large input

Such file can be created from BioGRID networks...

cat BIOGRID-ORGANISM-Homo_sapiens-3.4.129.mitab | awk -F"\t" '{print $1 "\n" $2}' | awk -F":" '{print $2}' | uniq > ~/Desktop/human_genes_list_large.txt



In [ ]:

    
import pandas as pd

large_gene_list = pd.read_csv("./human_genes_list_large.txt", names=["GeneID"], dtype={"GeneID": str})



In [ ]:

    
len(large_gene_list)



In [ ]:

    
id_list = large_gene_list["GeneID"].tolist()

query_heavy = {
    "ids": id_list, # Huge list!
}

res_large = requests.post(BASE + 'map', data=json.dumps(query_heavy), headers=HEADERS)



In [ ]:

    
largeJS = res_large.json()

print(len(largeJS))



In [ ]:

    
# Randomly pick 100 IDs from original list
import random

list_size = len(id_list)

def call_random(server_location):
    random_ids = []

    for i in range(0, 2000):
        next_id = id_list[random.randint(0, list_size-1)]
        random_ids.append(next_id)

    query_rand = {
        "ids": random_ids,
    }

    res_rand = requests.post(server_location + 'map', data=json.dumps(query_rand), headers=HEADERS)



In [ ]:

    
%%timeit -n 100

call_random("http://192.168.99.100:3000/")



In [ ]:

    
%%timeit -n 100

call_random("http://192.168.99.100:8080/idmapping/v1/")