notebook.community

Edit and run



In [1]:

    
import sys,pprint
import os
import json
sys.path.append(os.path.join(os.getcwd(),'..'))
import watson_developer_cloud



In [2]:

    
DISCOVERY_USERNAME='CHANGE_ME'
DISCOVERY_PASSWORD='CHANGE_ME'
pp = pprint.PrettyPrinter(indent=4)



In [10]:

    
discovery = watson_developer_cloud.DiscoveryV1(
    '2016-11-07',
    username=DISCOVERY_USERNAME,
    password=DISCOVERY_PASSWORD)

environments = discovery.get_environments()
pp.pprint(environments)

news_environments = [x for x in environments['environments'] if
                     x['name'] == 'Watson News Environment']
news_environment_id = news_environments[0]['environment_id']
pp.pprint(news_environment_id)

collections = discovery.list_collections(news_environment_id)
news_collections = [x for x in collections['collections']]
pp.pprint(collections)









    



{   'environments': [   {   'created': '2016-12-15T20:14:15.132Z',
                            'description': 'Watson News cluster environment',
                            'environment_id': 'eeb606be-b79a-442b-8612-68ff81d8e46f',
                            'name': 'Watson News Environment',
                            'read_only': True,
                            'status': 'active',
                            'updated': '2016-12-15T20:14:15.132Z'}]}
'eeb606be-b79a-442b-8612-68ff81d8e46f'
{   'collections': [   {   'collection_id': 'e5c88c17-a12d-4403-bf70-fa76ef0cd97e',
                           'configuration_id': '7b379eee-a7ae-4ab8-8008-66d00bc978a0',
                           'created': '2016-12-15T20:14:15.153Z',
                           'description': 'Watson News pre-enriched collection '
                                          'of curated news sources',
                           'language': 'en_us',
                           'name': 'watson_news',
                           'status': 'active',
                           'updated': '2016-12-15T20:14:15.153Z'}]}



In [11]:

    
pp.pprint(discovery.list_configurations(environment_id=news_environment_id))
default_config_id = discovery.get_default_configuration_id(environment_id=news_environment_id)
pp.pprint(default_config_id)









    



{   'configurations': [   {   'configuration_id': '7b379eee-a7ae-4ab8-8008-66d00bc978a0',
                              'created': '2016-12-15T20:14:15.138Z',
                              'description': 'Default configuration for Watson '
                                             'News cluster',
                              'name': 'Default Configuration',
                              'updated': '2016-12-15T20:14:15.138Z'}]}
'7b379eee-a7ae-4ab8-8008-66d00bc978a0'



In [12]:

    
default_config = discovery.get_configuration(environment_id=news_environment_id, configuration_id=default_config_id)
pp.pprint(default_config)









    



{   'configuration_id': '7b379eee-a7ae-4ab8-8008-66d00bc978a0',
    'created': '2016-12-15T20:14:15.138Z',
    'description': 'Default configuration for Watson News cluster',
    'name': 'Default Configuration',
    'updated': '2016-12-15T20:14:15.138Z'}



In [ ]:



In [13]:

    
new_environment = discovery.create_environment(name="new env", description="bogus env")



In [ ]:

    
pp.pprint(new_environment)

if (discovery.get_environment(environment_id=new_environment['environment_id'])['status'] == 'active'):
    writable_environment_id = new_environment['environment_id']
    new_collection = discovery.create_collection(environment_id=writable_environment_id,
                                                name='Example Collection',
                                                description="just a test")
    
    pp.pprint(new_collection)
    #pp.pprint(discovery.get_collections(environment_id=writable_environment_id))
    #res = discovery.delete_collection(environment_id='10b733d0-1232-4924-a670-e6ffaed2e641',
    #                                  collection_id=new_collection['collection_id'])
#    pp.pprint(res)



In [23]:

    
collections = discovery.list_collections(environment_id=writable_environment_id)
pp.pprint(collections)









    



{   'collections': [   {   'collection_id': 'dfdf5d04-b828-4feb-828e-0da223bfb145',
                           'configuration_id': '6eca556e-1805-4f92-a9ef-b39e87069c53',
                           'created': '2016-12-16T20:01:18.246Z',
                           'description': 'just a test',
                           'language': 'en_us',
                           'name': 'Example Collection',
                           'status': 'active',
                           'updated': '2016-12-16T20:01:18.246Z'}]}



In [21]:

    
with open(os.path.join(os.getcwd(),'..','resources','simple.html')) as fileinfo:
    pp.pprint(discovery.test_document(environment_id=writable_environment_id, fileinfo=fileinfo))









    



{   'enriched_field_units': 1,
    'notices': [],
    'original_media_type': 'text/html',
    'snapshots': [   {   'snapshot': {   'html': '<html>\n'
                                                 '<head>\n'
                                                 '  <title>Simple HTML '
                                                 'Page</title>\n'
                                                 '</head>\n'
                                                 '<body>\n'
                                                 '  <h1>Chapter 1</h1>\n'
                                                 '  <p>The content of the '
                                                 'first chapter.</p>\n'
                                                 '</body>\n'
                                                 '</html>'},
                         'step': 'html_input'},
                     {   'snapshot': {   'html': "<?xml version='1.0' "
                                                 "encoding='UTF-8' "
                                                 "standalone='yes'?><html>\n"
                                                 '<head>\n'
                                                 '    <meta '
                                                 'content="text/html; '
                                                 'charset=UTF-8" '
                                                 'http-equiv="Content-Type"/>\n'
                                                 '  <title>Simple HTML '
                                                 'Page</title>\n'
                                                 '\n'
                                                 '</head>\n'
                                                 '<body>\n'
                                                 '\n'
                                                 '  <h1>Chapter 1</h1>\n'
                                                 '  <p>The content of the '
                                                 'first chapter.</p>\n'
                                                 '\n'
                                                 '</body></html>'},
                         'step': 'html_output'},
                     {   'snapshot': {   'extracted_metadata': {   'title': 'Simple '
                                                                            'HTML '
                                                                            'Page'},
                                         'html': "<?xml version='1.0' "
                                                 "encoding='UTF-8' "
                                                 "standalone='yes'?><html>\n"
                                                 '<head>\n'
                                                 '    <meta '
                                                 'content="text/html; '
                                                 'charset=UTF-8" '
                                                 'http-equiv="Content-Type"/>\n'
                                                 '    \n'
                                                 '  <title>Simple HTML '
                                                 'Page</title>\n'
                                                 '\n'
                                                 '\n'
                                                 '</head>\n'
                                                 '<body>\n'
                                                 '\n'
                                                 '\n'
                                                 '  <h1>Chapter 1</h1>\n'
                                                 '  <p>The content of the '
                                                 'first chapter.</p>\n'
                                                 '\n'
                                                 '</body></html>',
                                         'metadata': {},
                                         'text': 'Simple HTML Page\n'
                                                 '\n'
                                                 'Chapter 1\n'
                                                 '\n'
                                                 'The content of the first '
                                                 'chapter.'},
                         'step': 'json_output'},
                     {   'snapshot': {   'extracted_metadata': {   'title': 'Simple '
                                                                            'HTML '
                                                                            'Page'},
                                         'html': "<?xml version='1.0' "
                                                 "encoding='UTF-8' "
                                                 "standalone='yes'?><html>\n"
                                                 '<head>\n'
                                                 '    <meta '
                                                 'content="text/html; '
                                                 'charset=UTF-8" '
                                                 'http-equiv="Content-Type"/>\n'
                                                 '    \n'
                                                 '  <title>Simple HTML '
                                                 'Page</title>\n'
                                                 '\n'
                                                 '\n'
                                                 '</head>\n'
                                                 '<body>\n'
                                                 '\n'
                                                 '\n'
                                                 '  <h1>Chapter 1</h1>\n'
                                                 '  <p>The content of the '
                                                 'first chapter.</p>\n'
                                                 '\n'
                                                 '</body></html>',
                                         'metadata': {},
                                         'text': 'Simple HTML Page\n'
                                                 '\n'
                                                 'Chapter 1\n'
                                                 '\n'
                                                 'The content of the first '
                                                 'chapter.'},
                         'step': 'json_normalizations_output'},
                     {   'snapshot': {   'enriched_field_units': 1,
                                         'enriched_text': {   'concepts': [   {   'dbpedia': 'http://dbpedia.org/resource/HTML',
                                                                                  'freebase': 'http://rdf.freebase.com/ns/m.03g20',
                                                                                  'opencyc': 'http://sw.opencyc.org/concept/Mx4rvWVukJwpEbGdrcN5Y29ycA',
                                                                                  'relevance': 0.932431,
                                                                                  'text': 'HTML',
                                                                                  'yago': 'http://yago-knowledge.org/resource/HTML'}],
                                                              'docSentiment': {   'mixed': False,
                                                                                  'type': 'neutral'},
                                                              'entities': [],
                                                              'keywords': [   {   'relevance': 0.915478,
                                                                                  'sentiment': {   'mixed': False,
                                                                                                   'type': 'neutral'},
                                                                                  'text': 'Simple '
                                                                                          'HTML '
                                                                                          'Page'},
                                                                              {   'relevance': 0.539264,
                                                                                  'sentiment': {   'mixed': False,
                                                                                                   'type': 'neutral'},
                                                                                  'text': 'content'},
                                                                              {   'relevance': 0.511578,
                                                                                  'sentiment': {   'mixed': False,
                                                                                                   'type': 'neutral'},
                                                                                  'text': 'chapter'}],
                                                              'language': 'english',
                                                              'relations': [],
                                                              'status': 'OK',
                                                              'taxonomy': [   {   'confident': False,
                                                                                  'label': '/technology '
                                                                                           'and '
                                                                                           'computing/programming '
                                                                                           'languages/javascript',
                                                                                  'score': 0.577361},
                                                                              {   'confident': False,
                                                                                  'label': '/business '
                                                                                           'and '
                                                                                           'industrial/company/bankruptcy',
                                                                                  'score': 0.57735},
                                                                              {   'confident': False,
                                                                                  'label': '/art '
                                                                                           'and '
                                                                                           'entertainment/books '
                                                                                           'and '
                                                                                           'literature',
                                                                                  'score': 0.367906}]},
                                         'extracted_metadata': {   'title': 'Simple '
                                                                            'HTML '
                                                                            'Page'},
                                         'html': "<?xml version='1.0' "
                                                 "encoding='UTF-8' "
                                                 "standalone='yes'?><html>\n"
                                                 '<head>\n'
                                                 '    <meta '
                                                 'content="text/html; '
                                                 'charset=UTF-8" '
                                                 'http-equiv="Content-Type"/>\n'
                                                 '    \n'
                                                 '  <title>Simple HTML '
                                                 'Page</title>\n'
                                                 '\n'
                                                 '\n'
                                                 '</head>\n'
                                                 '<body>\n'
                                                 '\n'
                                                 '\n'
                                                 '  <h1>Chapter 1</h1>\n'
                                                 '  <p>The content of the '
                                                 'first chapter.</p>\n'
                                                 '\n'
                                                 '</body></html>',
                                         'metadata': {},
                                         'text': 'Simple HTML Page\n'
                                                 '\n'
                                                 'Chapter 1\n'
                                                 '\n'
                                                 'The content of the first '
                                                 'chapter.'},
                         'step': 'enrichments_output'},
                     {   'snapshot': {   'enriched_field_units': 1,
                                         'enriched_text': {   'concepts': [   {   'dbpedia': 'http://dbpedia.org/resource/HTML',
                                                                                  'freebase': 'http://rdf.freebase.com/ns/m.03g20',
                                                                                  'opencyc': 'http://sw.opencyc.org/concept/Mx4rvWVukJwpEbGdrcN5Y29ycA',
                                                                                  'relevance': 0.932431,
                                                                                  'text': 'HTML',
                                                                                  'yago': 'http://yago-knowledge.org/resource/HTML'}],
                                                              'docSentiment': {   'mixed': False,
                                                                                  'type': 'neutral'},
                                                              'entities': [],
                                                              'keywords': [   {   'relevance': 0.915478,
                                                                                  'sentiment': {   'mixed': False,
                                                                                                   'type': 'neutral'},
                                                                                  'text': 'Simple '
                                                                                          'HTML '
                                                                                          'Page'},
                                                                              {   'relevance': 0.539264,
                                                                                  'sentiment': {   'mixed': False,
                                                                                                   'type': 'neutral'},
                                                                                  'text': 'content'},
                                                                              {   'relevance': 0.511578,
                                                                                  'sentiment': {   'mixed': False,
                                                                                                   'type': 'neutral'},
                                                                                  'text': 'chapter'}],
                                                              'language': 'english',
                                                              'relations': [],
                                                              'status': 'OK',
                                                              'taxonomy': [   {   'confident': False,
                                                                                  'label': '/technology '
                                                                                           'and '
                                                                                           'computing/programming '
                                                                                           'languages/javascript',
                                                                                  'score': 0.577361},
                                                                              {   'confident': False,
                                                                                  'label': '/business '
                                                                                           'and '
                                                                                           'industrial/company/bankruptcy',
                                                                                  'score': 0.57735},
                                                                              {   'confident': False,
                                                                                  'label': '/art '
                                                                                           'and '
                                                                                           'entertainment/books '
                                                                                           'and '
                                                                                           'literature',
                                                                                  'score': 0.367906}]},
                                         'extracted_metadata': {   'title': 'Simple '
                                                                            'HTML '
                                                                            'Page'},
                                         'html': "<?xml version='1.0' "
                                                 "encoding='UTF-8' "
                                                 "standalone='yes'?><html>\n"
                                                 '<head>\n'
                                                 '    <meta '
                                                 'content="text/html; '
                                                 'charset=UTF-8" '
                                                 'http-equiv="Content-Type"/>\n'
                                                 '    \n'
                                                 '  <title>Simple HTML '
                                                 'Page</title>\n'
                                                 '\n'
                                                 '\n'
                                                 '</head>\n'
                                                 '<body>\n'
                                                 '\n'
                                                 '\n'
                                                 '  <h1>Chapter 1</h1>\n'
                                                 '  <p>The content of the '
                                                 'first chapter.</p>\n'
                                                 '\n'
                                                 '</body></html>',
                                         'metadata': {},
                                         'text': 'Simple HTML Page\n'
                                                 '\n'
                                                 'Chapter 1\n'
                                                 '\n'
                                                 'The content of the first '
                                                 'chapter.'},
                         'step': 'normalizations_output'}],
    'status': 'completed'}



In [25]:

    
with open(os.path.join(os.getcwd(),'..','resources','simple.html')) as fileinfo:
    res = discovery.add_document(environment_id=writable_environment_id,
                                 collection_id=collections['collections'][0]['collection_id'],
                                 fileinfo=fileinfo)
    pp.pprint(res)









    



{'document_id': '10ea31bd-70f1-4fcd-a635-869ae8a06771', 'status': 'processing'}



In [26]:

    
with open(os.path.join(os.getcwd(),'..','resources','simple.html')) as fileinfo:
    res = discovery.update_document(environment_id=writable_environment_id,
                                    collection_id=collections['collections'][0]['collection_id'],
                                    document_id='some_user_specified_id',
                                    file_info=fileinfo)
    pp.pprint(res)









    



{'document_id': 'some_user_specfied_id', 'status': 'processing'}



In [29]:

    
res = discovery.get_collection(environment_id=writable_environment_id,
                               collection_id=collections['collections'][0]['collection_id'])
pp.pprint(res['document_counts'])









    



{'available': 1, 'failed': 0, 'processing': 0}



In [30]:

    
res = discovery.delete_environment(environment_id=writable_environment_id)
pp.pprint(res)









    



{'environment_id': '68f1d470-7e02-466d-b79c-446d3e544bf9', 'status': 'deleted'}



In [10]:

    
res = discovery.delete_training_data(environment_id=writable_environment_id,
                                     collection_id=collections['collections'][0]['collection_id'])
print(res)









    



<Response [204]>



In [14]:

    
examples = [
    {
        'document_id': 'my_id',
        'relevance': 0,
        'cross_reference': 'my_cross_id'
    }
]
res = discovery.add_training_data_query(environment_id=writable_environment_id,
                                        collection_id=collections['collections'][0]['collection_id'],
                                        natural_language_query='foo',
                                        query_id='some_unique_id',
                                        filter='bar',
                                        examples=examples)
print(json.dumps(res, indent=4))









    



{
    "filter": "bar", 
    "query_id": "some_unique_id", 
    "examples": [
        {
            "relevance": 0, 
            "cross_reference": "my_cross_id", 
            "document_id": "my_id"
        }
    ], 
    "natural_language_query": "foo"
}



In [12]:

    
res = discovery.list_training_data(environment_id=writable_environment_id,
                                   collection_id=collections['collections'][0]['collection_id'])

print(json.dumps(res, indent=4))









    



{
    "collection_id": "cb86efe7-d5b1-42c6-8c7b-2ca248dc3fb0", 
    "environment_id": "1cf6702f-d6f4-40f3-97cf-fb99eb110ef8", 
    "queries": [
        {
            "filter": "bar", 
            "query_id": "86d74eeba3b2b2175617dcbbd7dabab7db840a1", 
            "examples": [
                {
                    "relevance": 0, 
                    "cross_reference": "my_cross_id", 
                    "document_id": "my_id"
                }
            ], 
            "natural_language_query": "foo"
        }
    ]
}



In [13]:

    
res = discovery.delete_training_data_query(environment_id=writable_environment_id,
                                           collection_id=collections['collections'][0]['collection_id'],
                                           query_id='86d74eeba3b2b2175617dcbbd7dabab7db840a1')

print(res)









    



<Response [204]>



In [15]:

    
res = discovery.get_training_data_query(environment_id=writable_environment_id,
                                        collection_id=collections['collections'][0]['collection_id'],
                                        query_id='86d74eeba3b2b2175617dcbbd7dabab7db840a1')

print(json.dumps(res, indent=4))









    



{
    "filter": "bar", 
    "query_id": "86d74eeba3b2b2175617dcbbd7dabab7db840a1", 
    "examples": [
        {
            "relevance": 0, 
            "cross_reference": "my_cross_id", 
            "document_id": "my_id"
        }
    ], 
    "natural_language_query": "foo"
}



In [16]:

    
res = discovery.list_training_data_query_examples(environment_id=writable_environment_id,
                                                  collection_id=collections['collections'][0]['collection_id'],
                                                  query_id='86d74eeba3b2b2175617dcbbd7dabab7db840a1')

print(json.dumps(res, indent=4))









    



[
    {
        "relevance": 0, 
        "cross_reference": "my_cross_id", 
        "document_id": "my_id"
    }
]



In [22]:

    
res = discovery.add_training_data_query_example(environment_id=writable_environment_id,
                                                collection_id=collections['collections'][0]['collection_id'],
                                                query_id='86d74eeba3b2b2175617dcbbd7dabab7db840a1',
                                                document_id='my_other_id',
                                                relevance=0,
                                                cross_reference='my_other_cross_id')

print(json.dumps(res, indent=4))









    



{
    "relevance": 0, 
    "cross_reference": "my_other_cross_id", 
    "document_id": "my_other_id"
}



In [23]:

    
res = discovery.get_training_data_query_example(environment_id=writable_environment_id,
                                                collection_id=collections['collections'][0]['collection_id'],
                                                query_id='86d74eeba3b2b2175617dcbbd7dabab7db840a1',
                                                example_id='my_other_id')

print(json.dumps(res, indent=4))









    



{
    "relevance": 0, 
    "cross_reference": "my_other_cross_id", 
    "document_id": "my_other_id"
}



In [24]:

    
res = discovery.update_training_data_query_example(environment_id=writable_environment_id,
                                                   collection_id=collections['collections'][0]['collection_id'],
                                                   query_id='86d74eeba3b2b2175617dcbbd7dabab7db840a1',
                                                   example_id='my_other_id',
                                                   relevance=1,
                                                   cross_reference='my_other_cross_id')

print(json.dumps(res, indent=4))









    



{
    "relevance": 1, 
    "cross_reference": "my_other_cross_id", 
    "document_id": "my_other_id"
}



In [21]:

    
res = discovery.delete_training_data_query_example(environment_id=writable_environment_id,
                                                   collection_id=collections['collections'][0]['collection_id'],
                                                   query_id='86d74eeba3b2b2175617dcbbd7dabab7db840a1',
                                                   example_id='my_other_id')

print(res)









    



<Response [204]>