In [1]:
import pickle
In [2]:
with open("arabicEntity.local","rb") as f:
arabic_senlist=pickle.load(f)
In [7]:
import pickle
with open("stuff_makesense_spacy_ner","rb") as f1:
spacy_entity=pickle.load(f1)
In [25]:
for key,value in spacy_entity[0].items():
print(value=='PER')
In [10]:
count=0
for key,value in arabic_senlist.items():
count=count+1
if(count<2):
print(key,value[0])
else:
break
In [15]:
test=[0,1,2,3]
In [18]:
test[0:8]
Out[18]:
In [14]:
#build person entity and its sentences
In [59]:
%%time
person_sentences_dic={}
org_sentences_dic={}
count=0
for entity in spacy_entity:
if(count%5000==0):
print(str(count)+" processed");
try:
for key,value in entity.items():
if(value=='PER'):
sentences_list=arabic_senlist[key][0:5]
person_sentences_dic[key]=sentences_list
count=count+1
elif(value=='ORG'):
sentences_list1=arabic_senlist[key][0:5]
org_sentences_dic[key]=sentences_list1
count=count+1
else:
continue;
except:
continue;
In [47]:
len(person_sentences_dic)
Out[47]:
In [34]:
count=0
for entity in spacy_entity:
count=count+1
if(count<2):
for key,value in entity.items():
print(arabic_senlist[key][0:4])
else:
break
In [64]:
count=0
for key,value in person_sentences_dic.items():
count=count+1
if(count<3):
print(key)
print(value)
In [76]:
#verify that the word is in the sentence linked by the sentenceId it stores
In [77]:
sentence="ويقول الادعاء العام الايطالي إن الإمام المصري اختطف على أيدي عملاء السي.آي.إيه الذين نقلوه إلى قاعدة «آفيانو» الجوية والتي تقيم فيها وحدات إيطالية واميركية، ثم نقل إلى ألمانيا وبعدها إلى مصر، حيث تعرض للتعذيب، بحسب قوله."
In [78]:
word="العام"
In [79]:
word in sentence
Out[79]:
In [65]:
import pickle
try:
with open("per_entities_sentences_dic.data",'wb') as f:
pickle.dump(person_sentences_dic,f,pickle.HIGHEST_PROTOCOL)
except Exception as e:
print(e)
pass
In [66]:
import pickle
try:
with open("org_entities_sentences_dic.data",'wb') as f:
pickle.dump(org_sentences_dic,f,pickle.HIGHEST_PROTOCOL)
except Exception as e:
print(e)
pass
In [80]:
from pymongo import MongoClient
import time
import pickle
client=MongoClient()
client=MongoClient('mongodb:/portland.cs.ou.edu')
db=client['lexisnexis']
table=db["fast_per_entities"]
In [82]:
len(person_sentences_dic)
Out[82]:
In [85]:
count=0
for key,value in person_sentences_dic.items():
count=count+1
if(count%5000==0):
print(count)
table.insert_one({"word":key,"sentenceids":value})
In [86]:
len(org_sentences_dic)
Out[86]:
In [ ]:
table=db["fast_org_entities"]
for key,value in org_sentences_dic.items():
table.insert({"word":key,"sentenceids":value})
In [ ]: