California Proposition 65 requires labeling for chemical acknowledged in the state of California as causing cancer or reproductive/developmental toxicity. This enables us to categorize the chemicals on the Prop 65 list as instances of 'carcinogen', 'reproductive toxicant', 'male reproductive toxicant', 'female reproductive toxicant', and 'developmental toxicant' in Wikidata adding some very basic (and somewhat indirect) chem-disease relationships
Note that California's OEHHA allows users to download a pre-exported .csv file of chemicals listed under Prop 65 (does not include de-listed chemicals). Alternatively, users can export the complete list of chemicals from OEHHA which will include chemicals that are under consideration, currently listed, or formerly listed.
This notebook partially explores both exports for the best way for loading the data into Wikidata. The final bot will likely NOT include both methods.
In [103]:
from wikidataintegrator import wdi_core, wdi_login, wdi_helpers
from wikidataintegrator.ref_handlers import update_retrieved_if_new_multiple_refs
import pandas as pd
from pandas import read_csv
import requests
from tqdm.notebook import trange, tqdm
import ipywidgets
import widgetsnbextension
import time
import os
import datetime
In [2]:
## Here are the object QIDs, assuming that a chemical is the subject
object_qid = {'femrep':'Q55427776',
'menrep': 'Q55427774',
'devtox': 'Q72941151',
'cancer': 'Q187661',
'reptox': 'Q55427767'}
list_date = {'femrep':'Female Reproductive Toxicity - Date of Listing',
'menrep':'Male Reproductive Toxicity - Date of Listing',
'devtox':'Male Reproductive Toxicity - Date of Listing',
'cancer': 'None',
'reptox': 'None'}
list_prop = "P31"
In [3]:
## Note that the property start date is used for list date.
## When placed in the references, Deltabot moved it out as a qualifier
from datetime import datetime
import copy
def create_reference(prop65_url):
refStatedIn = wdi_core.WDItemID(value="Q28455381", prop_nr="P248", is_reference=True)
timeStringNow = datetime.now().strftime("+%Y-%m-%dT00:00:00Z")
refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True)
refURL = wdi_core.WDUrl(value=prop65_url, prop_nr="P854", is_reference=True)
return [refStatedIn, refRetrieved, refURL]
In [ ]:
## Login for Scheduled bot
print("Logging in...")
try:
from scheduled_bots.local import WDUSER, WDPASS
except ImportError:
if "WDUSER" in os.environ and "WDPASS" in os.environ:
WDUSER = os.environ['WDUSER']
WDPASS = os.environ['WDPASS']
else:
raise ValueError("WDUSER and WDPASS must be specified in local.py or as environment variables")
In [4]:
## Login for local run
print("Logging in...")
import wdi_user_config ## Credentials stored in a wdi_user_config file
login_dict = wdi_user_config.get_credentials()
login = wdi_login.WDLogin(login_dict['WDUSER'], login_dict['WDPASS'])
The manually triggered export of chemical list from the OEHHA site has less header junk, random title notes, and other things that disrupt the structure which would make the name convers easier. Additionally, the data on the cancer, reproductive toxicity, etc. is more structured, and doesn't have random blank spaces
In [5]:
## Files should be downloaded to data/downloads
downloadpath = 'data/downloads/'
## Get the latest file
filelist = []
for eachfilename in os.listdir(downloadpath):
filelist.append(downloadpath+eachfilename)
datasrc = max(filelist, key=os.path.getctime)
In [58]:
chem_list = read_csv(datasrc, encoding = 'unicode_escape', header=0)
#print(chem_list.columns)
chem_list.dropna(axis='columns', how='all',inplace=True)
chem_list.fillna("None", inplace=True)
#print(chem_list.columns.values)
## Pull out only columns of interest for our task
cols_of_interest = chem_list[['Title','CAS Number','Cancer','Cancer - Listing Mechanism',
'Reproductive Toxicity','Chemical listed under Proposition 65 as causing',
'Developmental Toxicity - Date of Listing','Developmental Toxicity - Listing Mechanism',
'Female Reproductive Toxicity - Date of Listing',
'Female Reproductive Toxicity - Listing Mechanism',
'Male Reproductive Toxicity - Date of Listing',
'Male Reproductive Toxicity - Listing Mechanism']]
## Remove entries which are not relevant
prop_65_irrelevant = cols_of_interest.loc[(cols_of_interest['Cancer'] == "None") &
(cols_of_interest['Reproductive Toxicity'] == "None") &
(cols_of_interest['Chemical listed under Proposition 65 as causing'] == "None")]
non_prop_chems = prop_65_irrelevant['Title'].tolist()
prop65_chems = cols_of_interest.loc[~cols_of_interest['Title'].isin(non_prop_chems)].copy()
#print(prop65_chems.head(n=2))
The property in Wikidata uses the URL stub as ID so we'll need to convert the Chemical names to url stubs that work with prop65 website. The urls will then be mapped to Wikidata entries with the property that were added via Mix N match. Normally, urls can be tested, but CA Prop 65 website has captcha protection and blocks scrapers.
Example conversion: "OEHHA listing" --> "OEHHA url" | "Prop 65 listing" --> "Prop 65 url"
In [59]:
## To convert the title to a url stub, lower case it, strip out parenthesis, brackets, and commas, and replace spaces with dashes
prop65_chems['url_stub'] = prop65_chems['Title'].str.lower().str.replace("[","").str.replace("]","").str.replace(",","").str.replace("(","").str.replace(")","").str.strip("]").str.replace(".","").str.replace("&","").str.replace("'","").str.replace(" ","-")
#print(prop65_chems.head())
## Check the look of the url stub
#print(prop65_chems.loc[prop65_chems['Title']=="Allyl Chloride"])
#print(prop65_chems.loc[prop65_chems['Title']=="Trp-P-1 (Tryptophan-P-1)"])
#print(prop65_chems.loc[prop65_chems['Title']=="MeIQx (2-Amino-3,8-dimethylimidazo[4,5-f]quinoxaline)"])
#print(prop65_chems.head(n=2))
#print(prop65_chems.head(n=2))
mixnmatch_cat = prop65_chems[['url_stub','Title','CAS Number']].copy()
mixnmatch_cat.rename(columns={'url_stub':'Entry ID','Title':'Entry name'}, inplace=True)
mixnmatch_cat['Entry description'] = mixnmatch_cat['Entry name'].astype(str).str.cat(mixnmatch_cat['CAS Number'].astype(str),sep=", CAS Number: ")
#mixnmatch_cat.drop('CAS Number',axis=1,inplace=True)
print(mixnmatch_cat.head(n=2))
#mixnmatch_cat.to_csv('data/mixnmatch_cat.tsv',sep='\t', header=True)
In [17]:
prop65_chems.to_csv('data/prop65_chems.tsv',sep='\t',header=True, encoding='utf-8')
In [93]:
sparqlQuery = "SELECT * WHERE {?item wdt:P231 ?CAS}"
result = wdi_core.WDItemEngine.execute_sparql_query(sparqlQuery)
cas_in_wd_list = []
i=0
while i < len(result["results"]["bindings"]):
cas_id = result["results"]["bindings"][i]["CAS"]["value"]
wdid = result["results"]["bindings"][i]["item"]["value"].replace("http://www.wikidata.org/entity/", "")
cas_in_wd_list.append({'WDID':wdid,'CAS Number':cas_id})
i=i+1
cas_in_wd = pd.DataFrame(cas_in_wd_list)
cas_in_wd.drop_duplicates(subset='CAS Number',keep=False,inplace=True)
cas_in_wd.drop_duplicates(subset='WDID',keep=False,inplace=True)
print(cas_in_wd.head(n=2))
In [94]:
prop_65_matches = mixnmatch_cat.merge(cas_in_wd,on='CAS Number',how='inner')
print(prop_65_matches.head(n=2))
print(len(prop_65_matches))
#prop_65_matches.to_csv('data/mixnmatch_cat_with_cas.tsv',sep='\t', header=True)
In [95]:
## Pull things matched via mix n match
sparqlQuery = "SELECT ?item ?CA65 WHERE {?item wdt:P7524 ?CA65}"
result = wdi_core.WDItemEngine.execute_sparql_query(sparqlQuery)
CA65_in_wd_list = []
i=0
while i < len(result["results"]["bindings"]):
CA65_id = result["results"]["bindings"][i]["CA65"]["value"]
wdid = result["results"]["bindings"][i]["item"]["value"].replace("http://www.wikidata.org/entity/", "")
CA65_in_wd_list.append({'WDID':wdid,'Entry ID':CA65_id})
i=i+1
CA65_in_wd = pd.DataFrame(CA65_in_wd_list)
print(len(CA65_in_wd))
In [96]:
bad_urls = read_csv('data/bad_urls.tsv',delimiter='\t',header=0, encoding='utf-8',index_col=0)
bad_urls_cas = bad_urls['CAS Number'].tolist()
print(bad_urls_cas)
In [97]:
## Remove items matched via mix n match from update
#print(CA65_in_wd.head(n=2))
prop_65_less_mixnmatch = prop_65_matches.loc[~prop_65_matches['Entry ID'].isin(CA65_in_wd['Entry ID'].tolist())]
#print(prop_65_less_mixnmatch.head(n=2))
## Remove entries which have odd urls
prop_65_less_bad_urls = prop_65_less_mixnmatch.loc[~prop_65_less_mixnmatch['CAS Number'].isin(bad_urls_cas)]
print(prop_65_less_bad_urls.head(n=2))
In [69]:
#bad_urls.to_csv('data/bad_urls.tsv',sep='\t',header=True, encoding='utf-8')
In [98]:
prop65_to_add = prop_65_less_bad_urls[0:10]
url_base = 'https://oehha.ca.gov/chemicals/'
list_prop = "P7524"
for i in tqdm(range(len(prop65_to_add))):
prop_65_qid = prop65_to_add.iloc[i]['WDID']
prop_65_id = prop65_to_add.iloc[i]['Entry ID']
prop_65_url = url_base+prop_65_id
reference = create_reference(prop_65_url)
prop65_statement = [wdi_core.WDString(value=prop_65_id, prop_nr=list_prop,
references=[copy.deepcopy(reference)])]
item = wdi_core.WDItemEngine(wd_item_id=prop_65_qid, data=prop65_statement, append_value=list_prop,
global_ref_mode='CUSTOM', ref_handler=update_retrieved_if_new_multiple_refs)
item.write(login, edit_summary="added CA prop 65 id")
print(prop_65_id, prop_65_qid, prop_65_url)
In [116]:
prop65_chems = read_csv('data/prop65_chems.tsv',delimiter='\t',header=0, index_col=0, encoding='utf-8')
In [117]:
## Run sparql query to pull all entities with Prop 65 ID (Read Only Run)
sparqlQuery = "SELECT ?item ?CA65 WHERE {?item wdt:P7524 ?CA65}"
result = wdi_core.WDItemEngine.execute_sparql_query(sparqlQuery)
CA65_in_wd_list = []
i=0
while i < len(result["results"]["bindings"]):
CA65_id = result["results"]["bindings"][i]["CA65"]["value"]
wdid = result["results"]["bindings"][i]["item"]["value"].replace("http://www.wikidata.org/entity/", "")
CA65_in_wd_list.append({'WDID':wdid,'url_stub':CA65_id})
i=i+1
## Inspect the results for mapping or coverage issues
CA65_in_wd = pd.DataFrame(CA65_in_wd_list)
print("resulting mapping table has: ",len(CA65_in_wd)," rows.")
In [121]:
## Account for bad urls
bad_CA65_in_wd = CA65_in_wd.loc[CA65_in_wd['WDID'].isin(bad_urls['WDID'].tolist())]
bad_url_stubs = bad_CA65_in_wd.merge(bad_urls, on='WDID')
bad_CA65_in_wd = CA65_in_wd.loc[CA65_in_wd['WDID'].isin(bad_urls['WDID'].tolist())]
prop65_chems['url_stub'].loc[prop65_chems['CAS Number'].isin(bad_url_stubs['CAS Number'])] = bad_url_stubs['url_stub'].values
In [13]:
## Perform left merge for currently listed and partially delisted items
prop_65_mapped = prop65_chems.merge(CA65_in_wd, on='url_stub', how='left')
#print(prop_65_mapped.head(n=2))
In [14]:
prop_65_mapped['devtox current'] = prop_65_mapped['Chemical listed under Proposition 65 as causing'].str.contains("Development")
prop_65_mapped['menrep current'] = prop_65_mapped['Chemical listed under Proposition 65 as causing'].str.contains("Male")
prop_65_mapped['femrep current'] = prop_65_mapped['Chemical listed under Proposition 65 as causing'].str.contains("Female")
prop_65_mapped['cancer current'] = prop_65_mapped['Cancer'].str.contains("Current")
prop_65_mapped['reptox current'] = prop_65_mapped['Reproductive Toxicity'].str.contains("Current")
In [15]:
prop_65_mapped['cancer delisted'] = prop_65_mapped['Cancer'].str.contains("Formerly")
prop_65_mapped['reptox delisted'] = prop_65_mapped['Reproductive Toxicity'].str.contains("Formerly")
prop_65_mapped.loc[(((prop_65_mapped['Developmental Toxicity - Date of Listing']!="None")|
(prop_65_mapped['Developmental Toxicity - Listing Mechanism']!="None"))&
(prop_65_mapped['devtox current']==False)), 'devtox delisted'] = True
prop_65_mapped.loc[(((prop_65_mapped['Female Reproductive Toxicity - Date of Listing']!="None")|
(prop_65_mapped['Female Reproductive Toxicity - Listing Mechanism']!="None"))&
(prop_65_mapped['femrep current']==False)), 'femrep delisted'] = True
prop_65_mapped.loc[(((prop_65_mapped['Male Reproductive Toxicity - Date of Listing']!="None")|
(prop_65_mapped['Male Reproductive Toxicity - Listing Mechanism']!="None"))&
(prop_65_mapped['menrep current']==False)), 'menrep delisted'] = True
prop_65_mapped.fillna(False, inplace=True)
When using SPARQL queries, the 't' in 'wdt' represents 'truthy' and prioritizes higher ranks and non-deprecated items/statements. Since we're looking for statements which are deprecated, we'll use 'ps' (property statement) and 'pq' (property qualifier) for the sparql query involving deprecated items.
In [17]:
deprecated_results = []
current_results = []
for object_type in object_qid.keys():
deprecated_query = "SELECT ?item {?item ps:P31 wd:"+object_qid[object_type]+". ?item pq:P2241 wd:Q56478729. }"
depresult = wdi_core.WDItemEngine.execute_sparql_query(deprecated_query)
i=0
while i < len(depresult["results"]["bindings"]):
wdi_uri = depresult["results"]["bindings"][i]["item"]["value"].replace("http://www.wikidata.org/entity/statement/", "")
tmp = wdi_uri.split('-')
WDID = tmp[0]
deprecated_results.append({'WDID':WDID,'deprecated_type':object_type+' delisted'})
i=i+1
sparqlQuery = "SELECT ?item WHERE {?item wdt:P31 wd:"+object_qid[object_type]+".}"
result = wdi_core.WDItemEngine.execute_sparql_query(sparqlQuery)
j=0
while j < len(result["results"]["bindings"]):
wdid = result["results"]["bindings"][j]["item"]["value"].replace("http://www.wikidata.org/entity/", "")
current_results.append({'WDID':wdid,'ObjectType':object_type+' current'})
j=j+1
In [18]:
deprecated_df = pd.DataFrame(deprecated_results)
current_df = pd.DataFrame(current_results)
print(len(deprecated_df),len(current_df))
print(deprecated_df)
print(current_df)
In [19]:
## all prop_65_CA WDIDs
all_entities = prop_65_mapped['WDID'].tolist()
In [20]:
#### Determine if the status of a chemical as a particular hazard has been changed in prop 65 list
#### and if a corresponding change needs to be made in Wikidata.
def check_statement_status(wd_item,object_qid):
new_statetypes = []
new_dep_types = []
dep_no_change = []
no_change=[]
rank_delist = []
rank_relist = []
for object_type in object_qid.keys():
## If the item is delisted
if eachitem in prop_65_mapped['WDID'].loc[prop_65_mapped[object_type+' delisted']==True].tolist():
## if true, check if it's in the deprecated list
if eachitem in deprecated_df['WDID'].loc[deprecated_df['deprecated_type']==object_type+' delisted'].tolist():
## if true, no write is needed, note it in the log
dep_no_change.append(object_type)
## Else if the item is in the current list of wd entities
elif eachitem in current_df['WDID'].loc[current_df['ObjectType']==object_type+' current'].tolist():
## if true, rank change is needed, new statement not needed
rank_delist.append(object_type)
## Else the item is delisted and not in WD as such
else:
## if true, A new statement is needed which includes the deprecated rank
new_dep_types.append(object_type)
## If the item is current
if eachitem in prop_65_mapped['WDID'].loc[prop_65_mapped[object_type+' current']==True].tolist():
## If the item is current, check if it's in the deprecated list
if eachitem in deprecated_df['WDID'].loc[deprecated_df['deprecated_type']==object_type+' delisted'].tolist():
## if true, it appears to have been re-listed, rank change may be needed
rank_relist.append(object_type)
## Else if the item is in the current list of wd entities
elif eachitem in current_df['WDID'].loc[current_df['ObjectType']==object_type+' current'].tolist():
## if true, no change, no write and no new rank change needed
no_change.append(object_type)
## Else the item is currently listed, but not in WD as such
else:
## if true, A new statement is needed
new_statetypes.append(object_type)
comparison_dict = {'new_statetypes':new_statetypes,
'new_dep_types':new_dep_types,
'dep_no_change':dep_no_change,
'no_change':no_change,
'rank_delist':rank_delist,
'rank_relist':rank_relist}
return(comparison_dict)
In [21]:
#### Male reproductive toxicants, Female reproductive toxicants, and developmental toxicants
#### are all subclasses of reproductive toxicants in Wikidata, so it would be redundant/unnecessary
#### To include 'instance of reproductive toxicant' if it's already an instance of one of the other three
#### This is just some logic for handling reptox vs femrep, menrep and devtox
def rep_redundancy_check (repcheck, comparison_dict):
if (((len(repcheck.intersection(set(comparison_dict['new_statetypes']))) >= 1) or
(len(repcheck.intersection(set(comparison_dict['no_change']))) >= 1)) and
('reptox' in comparison_dict['new_statetypes'])):
comparison_dict['new_statetypes'].remove('reptox')
if (((len(repcheck.intersection(set(comparison_dict['new_dep_types']))) >= 1) or
(len(repcheck.intersection(set(comparison_dict['dep_no_change']))) >= 1)) and
('reptox' in comparison_dict['new_dep_types'])):
comparison_dict['new_dep_types'].remove('reptox')
if len(repcheck.intersection(set(comparison_dict['rank_delist']))) >= 1 and 'reptox' in comparison_dict['rank_delist']:
comparison_dict['rank_delist'].remove('reptox')
if len(repcheck.intersection(set(comparison_dict['rank_relist']))) >= 1 and 'reptox' in comparison_dict['rank_relist']:
comparison_dict['rank_relist'].remove('reptox')
return(comparison_dict)
In [66]:
#### Iteratively create 'instance of' statements for each type of toxicity listed in CA prop 65
#### Include qualifiers for 'date listed' ('start date') if available
#### Include statement on why it's deprecated if it's deprecated
def generate_statements(statetype_set,dep_list,eachitem_row):
statements_to_add = []
for j in range(len(statetype_set)):
run_type = statetype_set[j]
run_object_wdid = object_qid[run_type]
date_type = list_date[run_type]
qualifier_list = []
if date_type != 'None':
runlist_date = str(eachitem_row.iloc[0][date_type])
if runlist_date != 'None':
list_qualifier = wdi_core.WDTime(datetime.strptime(runlist_date,'%m/%d/%Y').strftime("+%Y-%m-%dT00:00:00Z"), prop_nr='P580', is_qualifier=True)
qualifier_list.append(list_qualifier)
if run_type in dep_list:
qualifier_list.append(delist_reason)
state_rank = 'deprecated'
else:
state_rank = 'normal'
prop65_statement = wdi_core.WDItemID(value=run_object_wdid, prop_nr=list_prop, rank=state_rank,
qualifiers = qualifier_list, references=[copy.deepcopy(reference)])
statements_to_add.append(prop65_statement)
j=j+1
return(statements_to_add)
In [38]:
#### Not sure how to change the rank of a specific statement using WDI.
#### Since the reference date will need to change anyway, we'll just replace the entire statement
#### But because these relationships are 'instance of' relations, we risk losing important classifications
#### Eg- instance of chemical compound, etc.
#### To ensure this doesn't happen, retrieve the previous statements and include them when doing a replacement
def retrieve_prev_state_list(subject_qid,dep_object_qid_list):
wd_item = wdi_core.WDItemEngine(wd_item_id=subject_qid)
mass_statement = wd_item.get_wd_json_representation()['claims'][list_prop]
states_to_keep = []
for i in range(len(mass_statement)):
sub_qid = mass_statement[i]['mainsnak']['datavalue']['value']['id']
state_rank = mass_statement[i]['rank']
qualifier_list = mass_statement[i]['qualifiers']
reference = mass_statement[i]['references']
if sub_qid in dep_object_qid_list:
continue
else:
saved_statement = wdi_core.WDItemID(value=sub_qid, prop_nr=list_prop, rank=state_rank,
qualifiers = qualifier_list, references=[copy.deepcopy(reference)])
states_to_keep.append(saved_statement)
return(states_to_keep)
In [81]:
#### Main function
#### Strategy for changing ranks, pull all P31 statements, copy them all, but replace the one that's different.
#### Then do a complete replacement instead of appending
#### For completely new statements, append them
delist_reason = wdi_core.WDItemID('Q56478729', prop_nr='P2241', is_qualifier=True)
edit_log = []
repcheck = set(['femrep','menrep','devrep'])
for eachitem in all_entities[0:5]:
eachitem_row = prop_65_mapped.loc[prop_65_mapped['WDID']==eachitem]
prop65_url = url_base+ eachitem_row.iloc[0]['url_stub']
reference = create_reference(prop65_url)
comparison_dict = check_statement_status(eachitem,object_qid)
comparison_dict = rep_redundancy_check(repcheck,comparison_dict)
## Identify existing statements to change rank
dep_states = comparison_dict['rank_delist']
change_states = comparison_dict['rank_delist']+comparison_dict['rank_relist']
if len(change_states)>0:
change_object_qid_list = []
for eachhaz in change_states:
change_object_qid_list.append(object_qid[eachhaz])
## Pull the existing statements
states_to_keep = retrieve_prev_state_list(eachitem,change_object_qid_list)
## generate rank-changed statements to replace existing statements
changes_to_add = generate_statements(change_states,dep_states,eachitem_row)
## Combine existing statements with generated statements
states_to_write = states_to_keep+changes_to_add
## Write the statements with append=None, to replace the entire thing
print(eachitem,len(states_to_write))
#item = wdi_core.WDItemEngine(wd_item_id=prop_65_qid, data=states_to_write, append_value=None,
# global_ref_mode='CUSTOM', ref_handler=update_retrieved_if_new_multiple_refs)
#item.write(login, edit_summary="added CA prop 65 relation info")
## Generate new statements to write
dep_list = comparison_dict['new_dep_types']
statetype_set = comparison_dict['new_statetypes']+dep_list
if len(statetype_set) > 0:
statements_to_add = generate_statements(statetype_set,dep_list,eachitem_row)
##Write the statements using append method
print(eachitem, len(statements_to_add))
#item = wdi_core.WDItemEngine(wd_item_id=prop_65_qid, data=statements_to_add, append_value=list_prop,
# global_ref_mode='CUSTOM', ref_handler=update_retrieved_if_new_multiple_refs)
#item.write(login, edit_summary="added CA prop 65 relation info")
edit_log.append({'WDID':eachitem,'new statements':len(statetype_set),
'no_change':(len(comparison_dict['dep_no_change'])+len(comparison_dict['no_change'])),
'rank_changes':len(change_states)})
edit_log_df.to_csv('data/log_'+str(datetime.now().strftime("+%Y-%m-%dT00:00:00Z"))+'.tsv',sep='\t',header=True)
In [82]:
print(pd.DataFrame(edit_log))
In [ ]:
#### Test scripts
In [ ]:
#### Try changing the rank in an existing WD statement
## sandbox Q4115189
## P31 instance of
## Carcinogen: Q187661
deprec_update_list = ['cancer', 'devtox']
dep_object_qid_list = []
for eachhaz in deprec_update_list:
dep_object_qid_list.append(object_qid[eachhaz])
subject_qid = 'Q4115189'
wd_item = wdi_core.WDItemEngine(wd_item_id=subject_qid)
mass_statement = wd_item.get_wd_json_representation()['claims'][list_prop]
statementlist = []
for i in range(len(mass_statement)):
sub_qid = mass_statement[i]['mainsnak']['datavalue']['value']['id']
state_rank = mass_statement[i]['rank']
qualifier_list = mass_statement[i]['qualifiers']
reference = mass_statement[i]['references']
if sub_qid in dep_object_qid_list:
print(sub_qid)
else:
saved_statement = wdi_core.WDItemID(value=sub_qid, prop_nr="P31", rank=state_rank,
qualifiers = qualifier_list, references=[copy.deepcopy(reference)])
statementlist.append(saved_statement)
state_rank = 'normal'
qualifier_list = None
reference = create_reference('https://wikidata.org')
prop65_statement1 = wdi_core.WDItemID(value=dep_object_qid_list[0], prop_nr="P31", rank=state_rank,
qualifiers = qualifier_list, references=[copy.deepcopy(reference)])
prop65_statement2 = wdi_core.WDItemID(value=dep_object_qid_list[1], prop_nr="P31", rank=state_rank,
qualifiers = qualifier_list, references=[copy.deepcopy(reference)])
statementlist.append(prop65_statement1)
statementlist.append(prop65_statement2)
print(statementlist)
item = wdi_core.WDItemEngine(wd_item_id='Q4115189', data=statementlist, append_value=None)
item.write(login, edit_summary="test deprecations")
In [ ]:
In [33]:
## Sample query for deprecated item
"""
#instances of carcinogen delisted
SELECT ?item ?itemLabel {
?item ps:P31 wd:Q187661 .
?item pq:P2241 wd:Q56478729.
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
}
"""
sparqlQuery = "SELECT ?item {?item ps:P31 wd:Q187661. ?item pq:P2241 wd:Q56478729. }"
result = wdi_core.WDItemEngine.execute_sparql_query(sparqlQuery)
print(result)
In [ ]:
In [ ]: