DAVID Web Service

For more informations please see: http://david.abcc.ncifcrf.gov/content.jsp?file=WS.html

Available functions:

authenticate()
- authenticate user by email address
- return true if user has registered email with DAVID knowledge base
addList(inputIds, idType, listName, listType)
- add a gene list or background list to current session
getAllAnnotationCategoryNames()
- return all available annotation category names
getAllListNames()
- return all list names
getAllPopulationNames()
- return background names
getChartReport(threshold, count)
- generate chart report
getConversionTypes()
- return all acceptable idTypes
getCurrentList()
- return the position of current list
getCurrentSpecies()
- return current species of the current list
getCurrentPopulation()
- return the position of current background list
getDefaultCategoryNames()
- return all default category names
getGeneClusterReport(overlap, initialSeed, finalSeed, linkage, kappa)
- generate gene cluster report
`getGeneReportCategories()
- return gene report categories
- no argument needed
`getListName(pos)
- get the name of a list
- argument is the position of the list
`getListReport()
- generate list report
`getSpecies()
- return species of the current list
`getSummaryReport()
- return a summary report
`getTableReport()
- generate table report
`getTermClusterReport(overlap, initialSeed, finalSeed, linkage, kappa)
- generate term cluster report
`setCurrentList(pos)
- switch between gene lists
- argument is the position of the list
`setCurrentPopulation(pos)
- switch between background lists
- argument is the position of the list
`setCurrentSpecies(string)
- select specie(s) to use; argument is a string of integers delimited by commas
`setCategories()
- let user select categories
- argument is a string with category names delimited by commas
- return a list of validated category names



In [26]:

    
# we need to install suds and nvd3 to display our results
!pip install --user --quiet suds
!pip install --user --quiet python-nvd3



In [27]:

    
# set your registered email address here
email = ''



In [28]:

    
import sys
import pandas
from StringIO import StringIO
from suds.client import Client
david_wsdl_url = 'http://david.abcc.ncifcrf.gov/webservice/services/DAVIDWebService?wsdl'
client = Client(david_wsdl_url)
registered = client.service.authenticate(email)



In [29]:

    
# set your input data here; For example in Galaxy you could enter `get(4)`
uniprot = pandas.read_csv('/home/bag/Downloads/uniprot-cytochrome.tab', sep='\t')



In [30]:

    
%%javascript
require.config({paths: {d3: "//d3js.org/d3.v3.min"}});



In [31]:

    
from IPython.display import HTML
from nvd3 import pieChart
import nvd3
nvd3.ipynb.initialize_javascript(use_remote=True)



In [32]:

    
# define a plotting fucntion based on d3.js and nvd3
def pie_chart(x, y, name='piechart'):
    """
    x and y are lists of values and label
    name needs to be different between different plots, otherwise one plot overwrites the other
    """
    chart = pieChart(name=name, color_category='category20c', height=650, width=650)
    chart.set_containerheader("\n\n<h2>PieChart</h2>\n\n")
    xdata = x
    ydata = y
    extra_serie = {"tooltip": {"y_start": "", "y_end": " score"}}
    chart.add_serie(y=ydata, x=xdata, extra=extra_serie)
    chart.buildcontent()
    return chart.htmlcontent



In [33]:

    
def david_setup(input_ids, id_type='UNIPROT_ACCESSION', 
                bg_ids=[], bg_name='IPython_bg_name',
                list_name='IPython_example_list', category=''):
    """
    possible categories:
        * BBID,GOTERM_CC_FAT,BIOCARTA,GOTERM_MF_FAT,SMART,COG_ONTOLOGY,SP_PIR_KEYWORDS,
        KEGG_PATHWAY,INTERPRO,UP_SEQ_FEATURE,OMIM_DISEASE,GOTERM_BP_FAT,PIR_SUPERFAMILY
    
    """
    david = client.service
    input_ids = ','.join(input_ids)
    if bg_ids:
        bg_ids = ','.join(bg_ids)

    list_type = 0
    print 'Percentage mapped: %s' % david.addList(input_ids, id_type, list_name, list_type)
    if bg_ids:
        list_type = 1
        print 'Percentage mapped (background): %s' % david.addList(bg_ids, id_type, bg_name, list_type)

    david.setCategories(category)
    return david



In [34]:

    
def report_to_table(request):
    """
    Converts a DAVID report to a pandas DataFrame.
    """
    results = list()
    for row in request:
        results.append(dict(row))
    df = pandas.DataFrame()
    return df.from_dict(results)



In [35]:

    
david = david_setup(uniprot['Entry'][:100], 'UNIPROT_ACCESSION', category='GOTERM_CC_FAT')









    



Percentage mapped: 0.99



In [36]:

    
ct = 2
thd = 0.1
request = david.getChartReport(thd, ct)
table = report_to_table(request)
table[['categoryName','termName', 'listHits', 'percent', 'ease', 'foldEnrichment', 'benjamini']]









    Out[36]:






  
    
      
      categoryName
      termName
      listHits
      percent
      ease
      foldEnrichment
      benjamini
    
  
  
    
      0 
       GOTERM_CC_FAT
                                GO:0005739~mitochondrion
       59
       60.204082
       1.250867e-40
        7.708658
       1.676161e-38
    
    
      1 
       GOTERM_CC_FAT
                       GO:0005740~mitochondrial envelope
       44
       44.897959
       2.134157e-40
       14.914028
       1.429885e-38
    
    
      2 
       GOTERM_CC_FAT
                       GO:0031966~mitochondrial membrane
       42
       42.857143
       1.408322e-38
       15.139425
       6.290504e-37
    
    
      3 
       GOTERM_CC_FAT
                           GO:0031090~organelle membrane
       56
       57.142857
       1.448194e-36
        7.256610
       4.851449e-35
    
    
      4 
       GOTERM_CC_FAT
                           GO:0044429~mitochondrial part
       45
       45.918367
       3.648952e-35
       10.741176
       9.779191e-34
    
    
      5 
       GOTERM_CC_FAT
                           GO:0031967~organelle envelope
       45
       45.918367
       2.176204e-34
       10.308065
       4.860189e-33
    
    
      6 
       GOTERM_CC_FAT
                                     GO:0031975~envelope
       45
       45.918367
       2.501831e-34
       10.274920
       4.789220e-33
    
    
      7 
       GOTERM_CC_FAT
                 GO:0005743~mitochondrial inner membrane
       29
       29.591837
       3.294510e-24
       13.459622
       5.518305e-23
    
    
      8 
       GOTERM_CC_FAT
                     GO:0019866~organelle inner membrane
       29
       29.591837
       2.454137e-23
       12.518676
       3.653938e-22
    
    
      9 
       GOTERM_CC_FAT
                            GO:0070469~respiratory chain
       14
       14.285714
       3.276517e-15
       26.510815
       4.463097e-14
    
    
      10
       GOTERM_CC_FAT
               GO:0005789~endoplasmic reticulum membrane
       17
       17.346939
       4.233412e-11
        8.975382
       5.157067e-10
    
    
      11
       GOTERM_CC_FAT
                  GO:0044455~mitochondrial membrane part
       13
       13.265306
       5.626435e-11
       14.770311
       6.282850e-10
    
    
      12
       GOTERM_CC_FAT
       GO:0042175~nuclear envelope-endoplasmic reticu...
       17
       17.346939
       9.522033e-11
        8.501330
       9.815013e-10
    
    
      13
       GOTERM_CC_FAT
              GO:0005746~mitochondrial respiratory chain
       10
       10.204082
       5.125087e-10
       22.190972
       4.905440e-09
    
    
      14
       GOTERM_CC_FAT
                   GO:0044432~endoplasmic reticulum part
       17
       17.346939
       1.800255e-09
        6.957861
       1.608228e-08
    
    
      15
       GOTERM_CC_FAT
                                    GO:0005792~microsome
       14
       14.285714
       8.898764e-09
        8.389498
       7.452714e-08
    
    
      16
       GOTERM_CC_FAT
                 GO:0005741~mitochondrial outer membrane
       10
       10.204082
       1.134508e-08
       15.780247
       8.942589e-08
    
    
      17
       GOTERM_CC_FAT
                           GO:0042598~vesicular fraction
       14
       14.285714
       1.261559e-08
        8.148816
       9.391605e-08
    
    
      18
       GOTERM_CC_FAT
                     GO:0031968~organelle outer membrane
       10
       10.204082
       4.077342e-08
       13.655983
       2.875599e-07
    
    
      19
       GOTERM_CC_FAT
                               GO:0019867~outer membrane
       10
       10.204082
       5.674731e-08
       13.150206
       3.802069e-07
    
    
      20
       GOTERM_CC_FAT
                        GO:0005783~endoplasmic reticulum
       23
       23.469388
       4.543511e-07
        3.402616
       2.899189e-06
    
    
      21
       GOTERM_CC_FAT
                          GO:0012505~endomembrane system
       19
       19.387755
       5.859228e-06
        3.450668
       3.568749e-05
    
    
      22
       GOTERM_CC_FAT
       GO:0005750~mitochondrial respiratory chain com...
        4
        4.081633
       6.428897e-06
       94.681481
       3.745473e-05
    
    
      23
       GOTERM_CC_FAT
                GO:0045275~respiratory chain complex III
        4
        4.081633
       6.428897e-06
       94.681481
       3.745473e-05
    
    
      24
       GOTERM_CC_FAT
                                GO:0000267~cell fraction
       22
       22.448980
       1.239361e-05
        2.885031
       6.919571e-05
    
    
      25
       GOTERM_CC_FAT
                            GO:0005624~membrane fraction
       17
       17.346939
       1.289663e-04
        2.984398
       6.910649e-04
    
    
      26
       GOTERM_CC_FAT
                           GO:0005626~insoluble fraction
       17
       17.346939
       1.961841e-04
        2.877685
       1.010691e-03
    
    
      27
       GOTERM_CC_FAT
                        GO:0019898~extrinsic to membrane
       11
       11.224490
       2.235796e-03
        3.162438
       1.104712e-02
    
    
      28
       GOTERM_CC_FAT
                                      GO:0005829~cytosol
       18
       18.367347
       9.570662e-03
        1.922105
       4.498004e-02
    
    
      29
       GOTERM_CC_FAT
                        GO:0043020~NADPH oxidase complex
        2
        2.040816
       6.096751e-02
       31.560494
       2.522341e-01
    
    
      30
       GOTERM_CC_FAT
           GO:0032592~integral to mitochondrial membrane
        2
        2.040816
       8.046160e-02
       23.670370
       3.124904e-01



In [37]:

    
overlap = 2
initialSeed = 2
finalSeed = 1
linkage = 1
kappa = 1
request = david.getGeneClusterReport(overlap, initialSeed, finalSeed, linkage, kappa)
table = report_to_table(request)
table[['name', 'score']]









    Out[37]:






  
    
      
      name
      score
    
  
  
    
      0 
        Gene Cluster 2
       33.474752
    
    
      1 
        Gene Cluster 5
       33.474752
    
    
      2 
       Gene Cluster 25
       33.385599
    
    
      3 
       Gene Cluster 12
       31.105503
    
    
      4 
       Gene Cluster 24
       28.330592
    
    
      5 
       Gene Cluster 21
       27.484442
    
    
      6 
        Gene Cluster 6
       25.763687
    
    
      7 
       Gene Cluster 16
       25.763687
    
    
      8 
       Gene Cluster 10
       24.133724
    
    
      9 
        Gene Cluster 9
       23.591595
    
    
      10
       Gene Cluster 13
       23.391020
    
    
      11
       Gene Cluster 19
       23.227674
    
    
      12
       Gene Cluster 20
       23.227674
    
    
      13
       Gene Cluster 15
       22.794789
    
    
      14
        Gene Cluster 1
       22.590638
    
    
      15
        Gene Cluster 4
       22.129459
    
    
      16
        Gene Cluster 7
       21.967561
    
    
      17
       Gene Cluster 14
       21.967561
    
    
      18
       Gene Cluster 23
       21.967561
    
    
      19
       Gene Cluster 18
       13.502007
    
    
      20
       Gene Cluster 17
       11.906495
    
    
      21
       Gene Cluster 22
       11.247333
    
    
      22
        Gene Cluster 3
       10.775179
    
    
      23
        Gene Cluster 8
        2.359300
    
    
      24
       Gene Cluster 11
        0.783642



In [42]:

    
overlap = 3
initialSeed = 3
finalSeed = 3
linkage = 0.5
kappa = 50
request = david.getTermClusterReport(overlap, initialSeed, finalSeed, linkage, kappa)
table = report_to_table(request)
table[['name', 'score']]









    Out[42]:






  
    
      
      name
      score
    
  
  
    
      0
                         GO:0005739~mitochondrion
       33.450913
    
    
      1
                     GO:0070469~respiratory chain
       11.341551
    
    
      2
       GO:0005746~mitochondrial respiratory chain
        6.558009
    
    
      3
        GO:0005789~endoplasmic reticulum membrane
        6.528910
    
    
      4
          GO:0005741~mitochondrial outer membrane
        6.149982
    
    
      5
                             GO:0043025~cell soma
        0.597256
    
    
      6
                   GO:0031410~cytoplasmic vesicle
        0.058873
    
    
      7
                           GO:0005654~nucleoplasm
        0.007770
    
    
      8
                   GO:0005615~extracellular space
        0.007698
    
    
      9
           GO:0005887~integral to plasma membrane
        0.000484



In [43]:

    
HTML(pie_chart(table['name'], table['score'], name="relaxed"))









    Out[43]:





    

PieChart



In [44]:

    
overlap = 5
initialSeed = 5
finalSeed = 5
linkage = 0.5
kappa = 50 
request = david.getTermClusterReport(overlap, initialSeed, finalSeed, linkage, kappa)
table = report_to_table(request)
print table[['name', 'score']]









    



                                        name      score
0                   GO:0005739~mitochondrion  33.450913
1  GO:0005789~endoplasmic reticulum membrane   6.528910



In [45]:

    
HTML(pie_chart(table['name'], table['score']))









    Out[45]:





    

PieChart

	categoryName	termName	listHits	percent	ease	foldEnrichment	benjamini
0	GOTERM_CC_FAT	GO:0005739~mitochondrion	59	60.204082	1.250867e-40	7.708658	1.676161e-38
1	GOTERM_CC_FAT	GO:0005740~mitochondrial envelope	44	44.897959	2.134157e-40	14.914028	1.429885e-38
2	GOTERM_CC_FAT	GO:0031966~mitochondrial membrane	42	42.857143	1.408322e-38	15.139425	6.290504e-37
3	GOTERM_CC_FAT	GO:0031090~organelle membrane	56	57.142857	1.448194e-36	7.256610	4.851449e-35
4	GOTERM_CC_FAT	GO:0044429~mitochondrial part	45	45.918367	3.648952e-35	10.741176	9.779191e-34
5	GOTERM_CC_FAT	GO:0031967~organelle envelope	45	45.918367	2.176204e-34	10.308065	4.860189e-33
6	GOTERM_CC_FAT	GO:0031975~envelope	45	45.918367	2.501831e-34	10.274920	4.789220e-33
7	GOTERM_CC_FAT	GO:0005743~mitochondrial inner membrane	29	29.591837	3.294510e-24	13.459622	5.518305e-23
8	GOTERM_CC_FAT	GO:0019866~organelle inner membrane	29	29.591837	2.454137e-23	12.518676	3.653938e-22
9	GOTERM_CC_FAT	GO:0070469~respiratory chain	14	14.285714	3.276517e-15	26.510815	4.463097e-14
10	GOTERM_CC_FAT	GO:0005789~endoplasmic reticulum membrane	17	17.346939	4.233412e-11	8.975382	5.157067e-10
11	GOTERM_CC_FAT	GO:0044455~mitochondrial membrane part	13	13.265306	5.626435e-11	14.770311	6.282850e-10
12	GOTERM_CC_FAT	GO:0042175~nuclear envelope-endoplasmic reticu...	17	17.346939	9.522033e-11	8.501330	9.815013e-10
13	GOTERM_CC_FAT	GO:0005746~mitochondrial respiratory chain	10	10.204082	5.125087e-10	22.190972	4.905440e-09
14	GOTERM_CC_FAT	GO:0044432~endoplasmic reticulum part	17	17.346939	1.800255e-09	6.957861	1.608228e-08
15	GOTERM_CC_FAT	GO:0005792~microsome	14	14.285714	8.898764e-09	8.389498	7.452714e-08
16	GOTERM_CC_FAT	GO:0005741~mitochondrial outer membrane	10	10.204082	1.134508e-08	15.780247	8.942589e-08
17	GOTERM_CC_FAT	GO:0042598~vesicular fraction	14	14.285714	1.261559e-08	8.148816	9.391605e-08
18	GOTERM_CC_FAT	GO:0031968~organelle outer membrane	10	10.204082	4.077342e-08	13.655983	2.875599e-07
19	GOTERM_CC_FAT	GO:0019867~outer membrane	10	10.204082	5.674731e-08	13.150206	3.802069e-07
20	GOTERM_CC_FAT	GO:0005783~endoplasmic reticulum	23	23.469388	4.543511e-07	3.402616	2.899189e-06
21	GOTERM_CC_FAT	GO:0012505~endomembrane system	19	19.387755	5.859228e-06	3.450668	3.568749e-05
22	GOTERM_CC_FAT	GO:0005750~mitochondrial respiratory chain com...	4	4.081633	6.428897e-06	94.681481	3.745473e-05
23	GOTERM_CC_FAT	GO:0045275~respiratory chain complex III	4	4.081633	6.428897e-06	94.681481	3.745473e-05
24	GOTERM_CC_FAT	GO:0000267~cell fraction	22	22.448980	1.239361e-05	2.885031	6.919571e-05
25	GOTERM_CC_FAT	GO:0005624~membrane fraction	17	17.346939	1.289663e-04	2.984398	6.910649e-04
26	GOTERM_CC_FAT	GO:0005626~insoluble fraction	17	17.346939	1.961841e-04	2.877685	1.010691e-03
27	GOTERM_CC_FAT	GO:0019898~extrinsic to membrane	11	11.224490	2.235796e-03	3.162438	1.104712e-02
28	GOTERM_CC_FAT	GO:0005829~cytosol	18	18.367347	9.570662e-03	1.922105	4.498004e-02
29	GOTERM_CC_FAT	GO:0043020~NADPH oxidase complex	2	2.040816	6.096751e-02	31.560494	2.522341e-01
30	GOTERM_CC_FAT	GO:0032592~integral to mitochondrial membrane	2	2.040816	8.046160e-02	23.670370	3.124904e-01

	name	score
0	Gene Cluster 2	33.474752
1	Gene Cluster 5	33.474752
2	Gene Cluster 25	33.385599
3	Gene Cluster 12	31.105503
4	Gene Cluster 24	28.330592
5	Gene Cluster 21	27.484442
6	Gene Cluster 6	25.763687
7	Gene Cluster 16	25.763687
8	Gene Cluster 10	24.133724
9	Gene Cluster 9	23.591595
10	Gene Cluster 13	23.391020
11	Gene Cluster 19	23.227674
12	Gene Cluster 20	23.227674
13	Gene Cluster 15	22.794789
14	Gene Cluster 1	22.590638
15	Gene Cluster 4	22.129459
16	Gene Cluster 7	21.967561
17	Gene Cluster 14	21.967561
18	Gene Cluster 23	21.967561
19	Gene Cluster 18	13.502007
20	Gene Cluster 17	11.906495
21	Gene Cluster 22	11.247333
22	Gene Cluster 3	10.775179
23	Gene Cluster 8	2.359300
24	Gene Cluster 11	0.783642

	name	score
0	GO:0005739~mitochondrion	33.450913
1	GO:0070469~respiratory chain	11.341551
2	GO:0005746~mitochondrial respiratory chain	6.558009
3	GO:0005789~endoplasmic reticulum membrane	6.528910
4	GO:0005741~mitochondrial outer membrane	6.149982
5	GO:0043025~cell soma	0.597256
6	GO:0031410~cytoplasmic vesicle	0.058873
7	GO:0005654~nucleoplasm	0.007770
8	GO:0005615~extracellular space	0.007698
9	GO:0005887~integral to plasma membrane	0.000484