In [148]:
import pandas as pd
import spacy
import pdb
from nltk.corpus import stopwords

In [152]:
list_stopWords=list(set(stopwords.words('english')))

In [4]:
nlp = spacy.load('en_core_web_md')

In [5]:
df_demo = pd.read_csv('demo.csv',index_col=0)

In [6]:
df_demo.head()


Out[6]:
class date description title
0 Disasters and accidents 2017/9/1 Hurricane Irma, now a Category 2 hurricane wit... 2017 Atlantic hurricane season
1 Health and medicine 2017/9/1 Researchers report, in the Environmental Scien... Great Lakes
2 International relations 2017/9/1 South Korean President Moon Jae-in and U.S. Pr... South Korea鈥揢nited States relations
3 Law and crime 2017/9/1 United States federal judge Richard Posner has... United States Court of Appeals for the Seventh...
4 Politics and elections 2017/9/1 The Labour Party, led by Jacinda Ardern, surge... New Zealand general election, 2017

In [7]:
def class_code(type_str):
    type_str = type_str.lower()
    if 'armed' in type_str or 'attack' in type_str or 'conflict' in type_str:
        return 1
    elif 'disaster' in type_str or 'accident' in type_str:
        return 2
    elif 'law' in type_str or 'crime' in type_str:
        return 3
    elif 'politic' in type_str or 'election' in type_str:
        return 4
    elif 'international' in type_str or 'relation' in type_str:
        return 5
    elif 'science' in type_str or 'technology' in type_str:
        return 6
    elif 'business' in type_str or 'econom' in type_str:
        return 7
    elif 'art' in type_str or 'culture' in type_str:
        return 8
    elif 'sport' in type_str:
        return 9
    elif 'health' in type_str or 'environment' in type_str:
        return 10
    else:
        return 0

In [8]:
df_demo['class_code'] = df_demo['class'].apply(class_code)

In [9]:
df_demo.head()


Out[9]:
class date description title class_code
0 Disasters and accidents 2017/9/1 Hurricane Irma, now a Category 2 hurricane wit... 2017 Atlantic hurricane season 2
1 Health and medicine 2017/9/1 Researchers report, in the Environmental Scien... Great Lakes 10
2 International relations 2017/9/1 South Korean President Moon Jae-in and U.S. Pr... South Korea鈥揢nited States relations 5
3 Law and crime 2017/9/1 United States federal judge Richard Posner has... United States Court of Appeals for the Seventh... 3
4 Politics and elections 2017/9/1 The Labour Party, led by Jacinda Ardern, surge... New Zealand general election, 2017 4

In [10]:
def description_clean(description):
    description = description.split('. (')[0]+'.'
    return description

In [11]:
df_demo['des_clean'] = df_demo['description'].apply(description_clean)
for i in df_demo.itertuples(): print(i[3]) print(i[6])

In [12]:
from spacy import displacy
from IPython.display import display, HTML

In [13]:
def class_similarity(class_text,span):
    return nlp(class_text).similarity(nlp(span))

In [14]:
def print_pos_(doc,class_text):
    doc_pos_ = []
    for token in doc:
        if token.pos_ in ['VERB']:   #['AUX','NOUN','PROPN','VERB']:
            doc_pos_.extend([token.text,token.pos_,str(class_similarity(class_text,token.text))])
    print('|'.join(doc_pos_))

In [15]:
def print_noun_chunks(doc,class_text):
    noun_chunks =[]
    for i in doc.noun_chunks:
        noun_chunks.extend([i.text,str(class_similarity(class_text,i.text))])
    print('|'.join(noun_chunks))

In [16]:
def expand_with_noun_chuncks(noun_chuncks,ent):
    for noun_chunck in noun_chuncks:
        if ent in noun_chunck:
            return noun_chunck
    return ent

In [32]:
def argument_candidate(doc,class_text):
    arguments = []
    noun_chuncks = [i.text for i in doc.noun_chunks]
    for i in doc.ents:
        #arguments.append((expand_with_noun_chuncks(noun_chuncks,i.text),i.label_,str(class_similarity(class_text,expand_with_noun_chuncks(noun_chuncks,i.text)))))
        arguments.append((i.text,i.label_))
    related_when = [i for i in arguments if i[1] in ['TIME','DATE']]
    related_where = [i for i in arguments if i[1] in ['GPE','LOC','FACILITY']]
    related_who = [i for i in arguments if i[1] in ['PERSON','NORP','ORG','']]
    related_what = [i for i in arguments if i[1] in ['PRODUCT','EVENT','WORK_OF_ART','LAW','LANGUAGE','PERCENT','MONEY','QUANTITY','ORDINAL','CARDINAL']]
    return arguments,{
        'related_when':related_when,
        'related_where':related_where,
        'related_who':related_who,
        'related_what':related_what,
    }
def trigger_candidate(doc,arguments,class_text): triggers = [] arguments = [argu[0] for argu in arguments] noun_chuncks = [i.text for i in doc.noun_chunks] triggers.extend([(i,str(class_similarity(class_text,i))) for i in set(noun_chuncks)-set(arguments)]) for token in doc: if token.tag_.startswith('V'): triggers.append((token.text,token.tag_,str(class_similarity(class_text,token.text)))) return triggers
def trigger_candidate(doc,class_text): triggers_NN = [] triggers_V = [] for token in doc: if not token.is_stop: if token.tag_.startswith('NN'): triggers_NN.append((token.text,token.tag_,str(class_similarity(class_text,token.text)))) if token.tag_.startswith('V'): triggers_V.append((token.text,token.tag_,str(class_similarity(class_text,token.text)))) triggers = sorted(triggers_NN,key=lambda x:x[2],reverse=True)[:3]+sorted(triggers_V,key=lambda x:x[2],reverse=True)[:3] return triggers

In [84]:
def trigger_candidate(doc,class_text,doc_index):
    triggers = []
    for token in doc:
        #if not token.is_stop and token.tag_.startswith('NN') or token.tag_.startswith('V'):
        if not token.is_stop and token.tag_.startswith('V'):
            if token.text.lower() in EFITF[doc_index].keys():
                triggers.append((token.text,token.tag_,str(class_similarity(class_text,token.text)+EFITF[doc_index][token.text.lower()])))
    triggers = sorted(triggers,key=lambda x:x[2],reverse=True)[:3]
    return triggers

In [85]:
def print_arguments_triggers(arguments_dict,triggers):
    print('Arguments:')
    #pdb.set_trace()
    for k,v in arguments_dict.items():
        print(k+' : '+'|'+'|'.join(['|'.join(i) for i in v]))
    triggers_str = ''
    for i in triggers:
        triggers_str+='|'+'|'.join(i)
    print('Triggers:',triggers_str)

In [174]:
def get_kw_set(doc,class_text,doc_index):
    kws = []
    for i in doc.ents:
        #kws.extend(i.text.split(' '))
        kws.append(i.text)
    triggers = []
    for token in doc:
        if not token.is_stop and token.tag_.startswith('V'):
            if token.text.lower() in EFITF[doc_index].keys():
                triggers.append((token.text,token.tag_,str(class_similarity(class_text,token.text)+EFITF[doc_index][token.text.lower()])))
    triggers = sorted(triggers,key=lambda x:x[2],reverse=True)[:3]
    for i in triggers:
        kws.append(i[0])
    noun_chunks = []
    for i in doc.noun_chunks:
        noun_chunks.append((i.text,str(class_similarity(class_text,i.text))))
    #noun_chunks = sorted(noun_chunks,key=lambda x:x[1],reverse=True)[:1]
    #for i in noun_chunks:
     #   kws.extend(i[0].split(' '))
    kws.append(sorted(noun_chunks,key=lambda x:x[1],reverse=True)[0][0].split(' ')[-1])
    kws = [w for w in kws if not w in list_stopWords]
    return list(set(kws))

In [178]:
def print_kws_(kws):
    kws_str = ''
    for i in kws:
        kws_str+='|'+i
    print('kws:',str(len(kws)),kws_str)

In [179]:
def display_doc(doc,style='ent',class_text=None,doc_index=None):
    doc = nlp(doc)
    display(HTML(displacy.render(doc, style=style)))
    arguments,arguments_dict = argument_candidate(doc,class_text)
    triggers = trigger_candidate(doc,class_text,doc_index)
    print_arguments_triggers(arguments_dict,triggers)
    #print_pos_(doc,class_text)
    print_noun_chunks(doc,class_text)
    kws  = get_kw_set(doc,class_text,doc_index)
    print_kws_(kws)

In [180]:
for event in df_demo.iterrows():
    print(event[0])
    print(event[1]['class'],'\t',event[1]['date'],'\t',event[1]['title'])
    display_doc(event[1]['des_clean'],class_text=event[1]['class'].replace('and',''),doc_index=event[0])


0
Disasters and accidents 	 2017/9/1 	 2017 Atlantic hurricane season
Hurricane Irma PERSON , now a Category 2 QUANTITY hurricane with maximum sustained winds of 110 miles per hour QUANTITY ( 175 kilometers per hour QUANTITY ), is expected to resume strengthening this weekend DATE increasing the danger when it nears the Leeward Islands LOC in the Caribbean LOC next Thursday DATE .
Arguments:
related_when : |this weekend|DATE|next Thursday|DATE
related_where : |the Leeward Islands|LOC|Caribbean|LOC
related_who : |Hurricane Irma|PERSON
related_what : |Category 2|QUANTITY|110 miles per hour|QUANTITY|175 kilometers per hour|QUANTITY
Triggers: |increasing|VBG|0.576451752579|expected|VBN|0.476465759179|strengthening|VBG|0.427861711087|resume|VB|0.36165819714|nears|VBZ|0.354343659864
Hurricane Irma|0.357908785797|now a Category 2 hurricane|0.380235448992|maximum sustained winds|0.374320373452|110 miles|0.13337395046|hour|0.162428970276|175 kilometers|0.0666239555169|hour|0.162428970276|the danger|0.462647103084|it|0.264386930191|the Leeward Islands|0.247873127015|the Caribbean|0.208435926875
kws: 12 |175 kilometers per hour|Category 2|strengthening|danger|this weekend|Caribbean|increasing|next Thursday|the Leeward Islands|Hurricane Irma|expected|110 miles per hour
1
Health and medicine 	 2017/9/1 	 Great Lakes
Researchers report, in the Environmental Science & Technology ORG journal, the discovery of antidepressant concentrations in 10 CARDINAL kinds of fish in the Niagara River LOC , which links to the Great Lakes LOC via Lakes Erie LOC and Ontario GPE . Speculated causes include inadequate treatment of water re-introduced into said bodies of water.
Arguments:
related_when : |
related_where : |the Niagara River|LOC|the Great Lakes|LOC|Lakes Erie|LOC|Ontario|GPE
related_who : |the Environmental Science & Technology|ORG
related_what : |10|CARDINAL
Triggers: |include|VBP|0.516214151193|report|VBP|0.511450318452|introduced|VBN|0.438623985499|said|VBN|0.42566708155|links|VBZ|0.425454435351
Researchers|0.486778847535|the Environmental Science & Technology journal|0.626985301713|the discovery|0.409114996831|antidepressant concentrations|0.621126117878|10 kinds|0.338455982602|fish|0.307684981229|the Niagara River|0.388616151142|the Great Lakes|0.358734376058|Lakes Erie|0.32894182904|Ontario|0.32870366923|Speculated causes|0.402148572707|inadequate treatment|0.606032998365|water|0.354915577487|said bodies|0.362389242075|water|0.354915577487
kws: 10 |report|the Environmental Science & Technology|include|the Niagara River|Ontario|10|the Great Lakes|Lakes Erie|introduced|journal
2
International relations 	 2017/9/1 	 South Korea鈥揢nited States relations
South Korean NORP President Moon Jae PERSON -in and U.S. GPE President Donald Trump PERSON agree to revise the South Korea GPE Ballistic Missile Range Guidelines which caps South Korea's GPE missile development.
Arguments:
related_when : |
related_where : |U.S.|GPE|South Korea|GPE|South Korea's|GPE
related_who : |South Korean|NORP|Moon Jae|PERSON|Donald Trump|PERSON
related_what : |
Triggers: |revise|VB|0.53351519271|agree|VBP|0.503212040706|caps|VBZ|0.317745383302
South Korean President Moon Jae-in|0.40294889442|U.S. President Donald Trump|0.413748916657|the South Korea Ballistic Missile Range Guidelines|0.486171632835|South Korea's missile development|0.51063238924
kws: 10 |South Korea's|agree|South Korean|Donald Trump|Moon Jae|South Korea|U.S.|development|revise|caps
3
Law and crime 	 2017/9/1 	 United States Court of Appeals for the Seventh Circuit
United States GPE federal judge Richard Posner PERSON has announced his retirement after three decades DATE of service on the bench of the 7th ORDINAL U.S. Circuit Court of Appeals ORG , headquartered in Chicago GPE , Illinois GPE . Appointed by President Ronald Reagan PERSON in 1981 DATE , Justice Posner PERSON has written more than 3,300 CARDINAL opinions from the bench.(AP ORG ).
Arguments:
related_when : |three decades|DATE|1981|DATE
related_where : |United States|GPE|Chicago|GPE|Illinois|GPE
related_who : |Richard Posner|PERSON|U.S. Circuit Court of Appeals|ORG|Ronald Reagan|PERSON|Posner|PERSON|bench.(AP|ORG
related_what : |7th|ORDINAL|more than 3,300|CARDINAL
Triggers: |written|VBN|0.547143896013|Appointed|VBN|0.456781502092|headquartered|VBN|0.414551141092|announced|VBN|0.369286792484
United States federal judge Richard Posner|0.631050195264|his retirement|0.423290169946|three decades|0.423120644064|service|0.284943002868|the bench|0.343527492927|the 7th U.S. Circuit Court|0.529322510022|Appeals|0.462955921122|Chicago|0.29550988158|Illinois|0.342008780656|President Ronald Reagan|0.265534860052|Justice Posner|0.45301231031|more than 3,300 opinions|0.398155457616|the bench.(AP|0.403803427413
kws: 15 |1981|three decades|Appointed|Illinois|7th|Ronald Reagan|U.S. Circuit Court of Appeals|United States|Richard Posner|Chicago|Posner|more than 3,300|written|headquartered|bench.(AP
4
Politics and elections 	 2017/9/1 	 New Zealand general election, 2017
The Labour Party ORG , led by Jacinda Ardern PERSON , surges in the polls and is neck-and-neck with the National Party ORG , led by Bill English PERSON , for the race for Prime Minister of New Zealand GPE .
Arguments:
related_when : |
related_where : |New Zealand|GPE
related_who : |The Labour Party|ORG|Jacinda Ardern|PERSON|the National Party|ORG|Bill English|PERSON
related_what : |
Triggers: |led|VBN|0.697825518784|led|VBN|0.697825518784|surges|VBZ|0.473210110132
The Labour Party|0.574051702776|Jacinda Ardern|0.021859615811|the polls|0.690135010383|neck|0.0705136826232|-neck|0.0|the National Party|0.589717172463|Bill English|0.352255377966|the race|0.450207550477|Prime Minister|0.48925934282|New Zealand|0.254721162151
kws: 8 |polls|The Labour Party|New Zealand|Jacinda Ardern|surges|led|Bill English|the National Party
5
Politics and elections 	 2017/9/1 	 Kenyan general election, 2017
The Supreme Court ORG of Kenya GPE annuls the results of the recent presidential election, that indicated President Uhuru Kenyatta PERSON was reelected, due to irregularities, and orders a new election.
Arguments:
related_when : |
related_where : |Kenya|GPE
related_who : |The Supreme Court|ORG|Uhuru Kenyatta|PERSON
related_what : |
Triggers: |reelected|VBN|0.903135476102|indicated|VBD|0.486114605134|annuls|VBP|0.443648421403
The Supreme Court|0.446410985937|Kenya|0.257459323197|the results|0.359382037781|the recent presidential election|0.793275848827|President Uhuru Kenyatta|0.274939847061|irregularities|0.186572255|a new election|0.653051979632
kws: 7 |reelected|Uhuru Kenyatta|election|Kenya|annuls|The Supreme Court|indicated
6
Politics and elections 	 2017/9/1 	 Presidency of Donald Trump
Paul Ryan PERSON (R-WI), U.S. GPE Speaker of the House of Representatives ORG , urges President Donald Trump PERSON not to rescind the Deferred Action for Childhood Arrivals ORG ( DACA ORG ) program that protects immigrants who illegally entered the U.S. GPE as children from deportation.
Arguments:
related_when : |
related_where : |U.S.|GPE|U.S.|GPE
related_who : |Paul Ryan|PERSON|the House of Representatives|ORG|Donald Trump|PERSON|the Deferred Action for Childhood Arrivals|ORG|DACA|ORG
related_what : |
Triggers: |urges|VBZ|0.561677213396|rescind|VB|0.52508074443|entered|VBD|0.444119911808|protects|VBZ|0.295272475197
Paul Ryan|0.250817680569|R-WI|0.147229129185|U.S. Speaker|0.369958426959|the House|0.325866711781|Representatives|0.431843478778|President Donald Trump|0.482175436017|the Deferred Action|0.393154236352|Childhood Arrivals (DACA) program|0.242730180572|immigrants|0.396761080045|who|0.298166567757|the U.S.|0.437061635613|children|0.219449375835|deportation|0.290521074462
kws: 10 |entered|urges|Donald Trump|the House of Representatives|DACA|rescind|Trump|the Deferred Action for Childhood Arrivals|U.S.|Paul Ryan
7
Science and technology 	 2017/9/1 	 2017 in astronomy
Asteroid 3122 Florence PERSON , which is roughly 2.7 miles QUANTITY ( 4.4 kilometers QUANTITY ) wide, comes within 4.4 million miles QUANTITY ( 7 million km QUANTITY ) of Earth LOC 鈥?approximately 18 CARDINAL times the distance from our planet to the Moon PERSON .
Arguments:
related_when : |
related_where : |Earth|LOC
related_who : |Asteroid 3122 Florence|PERSON|Moon|PERSON
related_what : |roughly 2.7 miles|QUANTITY|4.4 kilometers|QUANTITY|4.4 million miles|QUANTITY|7 million km|QUANTITY|18|CARDINAL
Triggers: |comes|VBZ|0.622712346803
Asteroid|0.20533818023|3122 Florence|0.08808609462|4.4 kilometers|-0.00234738796058|4.4 million miles|0.143568291562|7 million km|0.119574371354|Earth|0.415404542524|our planet|0.471326009147|the Moon|0.362654722181
kws: 10 |comes|7 million km|roughly 2.7 miles|Asteroid 3122 Florence|planet|18|4.4 million miles|Moon|4.4 kilometers|Earth
8
Armed attacks and conflicts 	 2017/9/2 	 Iraqi Civil War
Seven CARDINAL people are killed and 13 CARDINAL are injured after suicide bombers hit a state-run power station near the northern city of Samarra GPE , Iraq GPE . The Islamic State of ORG Iraq GPE and the Levant ORG claims responsibility for the attack.
Arguments:
related_when : |
related_where : |Samarra|GPE|Iraq|GPE|Iraq|GPE
related_who : |The Islamic State of|ORG|Levant|ORG
related_what : |Seven|CARDINAL|13|CARDINAL
Triggers: |killed|VBN|0.756264779291|injured|VBN|0.635162439802|claims|VBZ|0.630417833733|hit|VBD|0.516748470061|run|VBN|0.501444536372
Seven people|0.499983535135|suicide bombers|0.620420973623|a state-run power station|0.42076337093|the northern city|0.429933340975|Samarra|0.0|Iraq|0.371648268077|The Islamic State|0.480012098147|Iraq|0.371648268077|the Levant|0.0788780730697|responsibility|0.483108068312|the attack|0.703502175466
kws: 10 |Samarra|killed|Levant|injured|claims|attack|The Islamic State of|Iraq|13|Seven
9
Business and economy 	 2017/9/2 	 Trump Tower wiretapping allegations
Both the FBI ORG and NSD ORG declare that they possess no records indicating that Trump Tower ORG in New York City GPE , New York GPE was wiretapped earlier in March DATE .
Arguments:
related_when : |earlier in March|DATE
related_where : |New York City|GPE|New York|GPE
related_who : |FBI|ORG|NSD|ORG|Trump Tower|ORG
related_what : |
Triggers: |possess|VBP|0.515733289038|indicating|VBG|0.498797109917|wiretapped|VBN|0.481607520016|declare|VB|0.47655761179
Both the FBI|0.414553402528|NSD|-0.110817321485|they|0.433423319239|no records|0.340568678527|Trump Tower|0.292076124271|New York City|0.455834564546|March|0.189796103938
kws: 10 |possess|earlier in March|City|New York City|wiretapped|indicating|NSD|New York|Trump Tower|FBI
10
Disasters and accidents 	 2017/9/2 	 2017 disasters in Kenya
Seven CARDINAL schoolgirls are killed and ten CARDINAL hospitalised after a fire at the Moi Girls School ORG in Nairobi GPE , Kenya GPE .
Arguments:
related_when : |
related_where : |Nairobi|GPE|Kenya|GPE
related_who : |the Moi Girls School|ORG
related_what : |Seven|CARDINAL|ten|CARDINAL
Triggers: |hospitalised|VBN|0.774052316353|killed|VBN|0.676026278259
Seven schoolgirls|0.218349128256|a fire|0.330839454957|the Moi Girls School|0.22810133241|Nairobi|0.222126340306|Kenya|0.222126340306
kws: 8 |ten|fire|Kenya|the Moi Girls School|killed|Nairobi|hospitalised|Seven
11
International relations 	 2017/9/2 	 Timor Gap
Australia GPE and East Timor GPE settle a dispute between the two CARDINAL countries in the Timor Sea LOC .
Arguments:
related_when : |
related_where : |Australia|GPE|East Timor|GPE|the Timor Sea|LOC
related_who : |
related_what : |two|CARDINAL
Triggers: |settle|VBP|0.642970629479
Australia|0.313285475626|East Timor|0.278371843071|a dispute|0.509121296394|the two countries|0.584559746707|the Timor Sea|0.365749520623
kws: 6 |two|countries|Australia|East Timor|the Timor Sea|settle
12
Law and crime 	 2017/9/2 	 Law enforcement in Cambodia
Cambodian NORP opposition leader Kem Sokha PERSON is arrested for alleged treason.
Arguments:
related_when : |
related_where : |
related_who : |Cambodian|NORP|Kem Sokha|PERSON
related_what : |
Triggers: |alleged|VBN|0.891774744782|arrested|VBN|0.834817634431
Cambodian opposition leader Kem Sokha|0.342590643695|alleged treason|0.581041341985
kws: 5 |treason|Kem Sokha|Cambodian|arrested|alleged
13
Business and economy 	 2017/9/3 	 Economy of Cambodia
The Cambodia Daily newspaper ORG announces it will publish its final edition after being ordered to pay USD$6.3 million CARDINAL in taxes as a result of an investigation, initiated by Prime Minister of Cambodia GPE Hun Sen PERSON , into private companies operating in Cambodia GPE .
Arguments:
related_when : |
related_where : |Cambodia|GPE|Cambodia|GPE
related_who : |The Cambodia Daily newspaper|ORG|Hun Sen|PERSON
related_what : |USD$6.3 million|CARDINAL
Triggers: |operating|VBG|0.662886759688|pay|VB|0.65640424901|initiated|VBN|0.444462779822|publish|VB|0.427345580133|announces|VBZ|0.42408346855
The Cambodia Daily newspaper|0.470491825123|it|0.471583147914|its final edition|0.367476797751|taxes|0.545024875318|a result|0.443271208147|an investigation|0.380493395073|Prime Minister|0.382542511416|Cambodia Hun Sen|0.0984757782364|private companies|0.624084544149|Cambodia|0.222366604159
kws: 8 |The Cambodia Daily newspaper|Hun Sen|operating|USD$6.3 million|pay|companies|initiated|Cambodia
14
Disasters and accidents 	 2017/9/3 	 Hurricane Harvey
Hurricane Harvey PERSON is projected to possibly become the second ORDINAL costliest hurricane in the United States's GPE history, with estimates ranging from US$72 billion MONEY to over $125 billion MONEY . In comparison, Hurricane Katrina's EVENT total damage is estimated to be around $118 to $160 billion MONEY .
Arguments:
related_when : |
related_where : |the United States's|GPE
related_who : |Hurricane Harvey|PERSON
related_what : |second|ORDINAL|US$72 billion|MONEY|over $125 billion|MONEY|Hurricane Katrina's|EVENT|around $118 to $160 billion|MONEY
Triggers: |estimated|VBN|0.530604267815|ranging|VBG|0.502124779847|projected|VBN|0.477080701963
Hurricane Harvey|0.392259451014|the second costliest hurricane|0.35900132965|the United States's history|0.351613673761|estimates|0.318047078796|comparison|0.175073361951|Hurricane Katrina's total damage|0.565762162693
kws: 11 |damage|over $125 billion|US$72 billion|around $118 to $160 billion|projected|ranging|Hurricane Katrina's|the United States's|second|Hurricane Harvey|estimated
15
Disasters and accidents 	 2017/9/3 	 2017 California wildfires
The La Tuna Fire ORG continues to burn in Los Angeles GPE , California GPE , becoming the largest fire in the city's history at around 5,800 acres QUANTITY . Mayor Eric Garcetti PERSON declares a state of emergency.
Arguments:
related_when : |
related_where : |Los Angeles|GPE|California|GPE
related_who : |The La Tuna Fire|ORG|Eric Garcetti|PERSON
related_what : |around 5,800 acres|QUANTITY
Triggers: |continues|VBZ|0.525916660585|burn|VB|0.511758263486|declares|VBZ|0.450082252701
The La Tuna Fire|0.288991587071|Los Angeles|0.203899121292|California|0.266263241727|the largest fire|0.366123665129|the city's history|0.315794170104|around 5,800 acres|0.234410498845|Mayor Eric Garcetti|0.146733920305|a state|0.260954882189|emergency|0.507407083596
kws: 9 |The La Tuna Fire|continues|California|burn|emergency|Eric Garcetti|declares|around 5,800 acres|Los Angeles
16
Disasters and accidents 	 2017/9/3 	 Unexploded ordnance
Large portions of Frankfurt GPE , Germany GPE , are evacuated as local authorities work to defuse a bomb left over from a Royal Air Force ORG raid during World War II EVENT . The evacuation is the largest to occur in Europe LOC since World War II EVENT .
Arguments:
related_when : |
related_where : |Frankfurt|GPE|Germany|GPE|Europe|LOC
related_who : |Royal Air Force|ORG
related_what : |World War II|EVENT|World War II|EVENT
Triggers: |occur|VB|0.736911283715|defuse|VB|0.578842760554|work|VBP|0.453727826499|evacuated|VBN|0.439276559012|left|VBN|0.333185006519
Large portions|0.22377748531|Frankfurt|0.108952229158|Germany|0.169187339897|local authorities|0.351156004567|a bomb|0.27832351347|a Royal Air Force raid|0.270302638226|World War II|0.361291829215|The evacuation|0.474433452575|Europe|0.217309333547|World War II|0.361291829215
kws: 9 |work|World War II|defuse|Europe|evacuation|occur|Germany|Frankfurt|Royal Air Force
17
International relations 	 2017/9/3 	 2017 North Korea crisis
2017 CARDINAL North Korean NORP nuclear test FAC A 6.3 CARDINAL magnitude earthquake is detected near the Punggye-ri Nuclear Test Site LOC in Kilju County GPE , North Korea GPE , after North Korea GPE tests its sixth ORDINAL and most powerful nuclear weapon to date. North Korean NORP state media claims the country tested a hydrogen bomb that can be fitted on an ICBM NORP .
Arguments:
related_when : |
related_where : |the Punggye-ri Nuclear Test Site|LOC|Kilju County|GPE|North Korea|GPE|North Korea|GPE
related_who : |North Korean|NORP|North Korean|NORP|ICBM|NORP
related_what : |2017|CARDINAL|6.3|CARDINAL|sixth|ORDINAL
Triggers: |claims|VBZ|0.496924830016|tests|VBZ|0.380416228299|tested|VBD|0.308671368003|detected|VBN|0.236958337819|fitted|VBN|0.202453237052
2017 North Korean nuclear test|0.427116084126|A 6.3 magnitude earthquake|0.266066753153|the Punggye-ri Nuclear Test Site|0.436814241617|Kilju County|0.215681359642|North Korea|0.343575725129|North Korea|0.343575725129|its sixth and most powerful nuclear weapon|0.484554837886|date|0.206718475618|North Korean state media|0.528687920285|the country|0.538792147084|a hydrogen bomb|0.253070026941|an ICBM|0.319470720851
kws: 13 |country|North Korea|

|ICBM|tested|tests|the Punggye-ri Nuclear Test Site|claims|6.3|Kilju County|North Korean|2017|sixth
18
Science and technology 	 2017/9/3 	 Free-electron laser
The European NORP X-ray free-electron laser is inaugurated in Hamburg GPE , Germany GPE .
Arguments:
related_when : |
related_where : |Hamburg|GPE|Germany|GPE
related_who : |European|NORP
related_what : |
Triggers: |inaugurated|VBN|0.578445420642
The European X-ray free-electron laser|0.424491526623|Hamburg|0.16417236916|Germany|0.22523847438
kws: 5 |European|Hamburg|Germany|inaugurated|laser
19
Armed attacks and conflicts 	 2017/9/4 	 Syrian Civil War
Syrian NORP state television reports the Syrian Army ORG reaches a point 3 kilometers QUANTITY from Deir GPE ez-Zor, a city besieged by the Islamic NORP State of Iraq GPE and the Levant ORG since 2014 DATE .
Arguments:
related_when : |2014|DATE
related_where : |Deir|GPE|Iraq|GPE
related_who : |Syrian|NORP|the Syrian Army|ORG|Islamic|NORP|Levant|ORG
related_what : |3 kilometers|QUANTITY
Triggers: |besieged|VBN|0.772017395008|reports|VBZ|0.636829825263|reaches|VBZ|0.464214869605
Syrian state television|0.438559804509|the Syrian Army|0.573829628291|a point|0.370069577379|Deir ez-Zor|-0.0590066802067|a city|0.363040196838|the Islamic State|0.480012098147|Iraq|0.371648268077|the Levant|0.0788780730697
kws: 12 |3 kilometers|2014|besieged|Levant|the Syrian Army|reports|Deir|reaches|Iraq|Syrian|Army|Islamic
20
Business and economy 	 2017/9/4 	 Economy of the United States
United Technologies Corp ORG will buy airplane parts maker Rockwell Collins ORG for USD$30 Billion FAC , including seven billion CARDINAL in debt previously incurred by Rockwell Collins PERSON .
Arguments:
related_when : |
related_where : |
related_who : |United Technologies Corp|ORG|Rockwell Collins|ORG|Rockwell Collins|PERSON
related_what : |seven billion|CARDINAL
Triggers: |incurred|VBN|0.680928468114|including|VBG|0.553071667154|buy|VB|0.498794526875
United Technologies Corp|0.506393096195|airplane parts maker|0.383601014482|Rockwell Collins|0.105153675115|USD$30 Billion|0.431948473239|debt|0.546465344851|Rockwell Collins|0.105153675115
kws: 8 |including|incurred|seven billion|USD$30 Billion|United Technologies Corp|Rockwell Collins|buy|debt
21
Business and economy 	 2017/9/4 	 Media of the United States
Tronc Inc. ORG , the Los Angeles Times ORG and the Chicago Tribune ORG publisher, buys the New York Daily News ORG .
Arguments:
related_when : |
related_where : |
related_who : |Tronc Inc.|ORG|the Los Angeles Times|ORG|the Chicago Tribune|ORG|the New York Daily News|ORG
related_what : |
Triggers: |buys|VBZ|0.604804845472
Tronc Inc.|0.186215667038|the Los Angeles Times|0.333799038299|the Chicago Tribune publisher|0.434655137008|the New York Daily News|0.516343650488
kws: 6 |the Los Angeles Times|buys|the Chicago Tribune|News|the New York Daily News|Tronc Inc.
22
Disasters and accidents 	 2017/9/4 	 2017 Atlantic hurricane season
Hurricane Irma EVENT Hurricane Irma PERSON is now a Category 4 QUANTITY hurricane with maximum sustained winds of 130 mph QUANTITY ( 215 km/h QUANTITY ). Hurricane EVENT warnings are issued for the Leeward Islands LOC in the Caribbean LOC , which are expected to be affected Tuesday DATE . Rainfalls PERSON of up to 10 inches QUANTITY ( 25 centimeters QUANTITY ) are possible. Irma PERSON is forecast to strengthen over the next 48 hours TIME . The governors of Puerto Rico GPE and Florida GPE issue states of emergency.
Arguments:
related_when : |Tuesday|DATE|the next 48 hours|TIME
related_where : |the Leeward Islands|LOC|Caribbean|LOC|Puerto Rico|GPE|Florida|GPE
related_who : |Hurricane Irma|PERSON|Rainfalls|PERSON|Irma|PERSON
related_what : |Hurricane Irma|EVENT|Category 4|QUANTITY|130 mph|QUANTITY|215 km/h|QUANTITY|Hurricane|EVENT|up to 10 inches|QUANTITY|25 centimeters|QUANTITY
Triggers: |affected|VBN|0.697770825858|forecast|VBN|0.560290394748|expected|VBN|0.436268096129|strengthen|VB|0.361954692989|issued|VBN|0.345393822104
Hurricane Irma|0.357908785797|Hurricane Irma|0.357908785797|a Category 4 hurricane|0.375361751773|maximum sustained winds|0.374320373452|130 mph|0.179235986107|215 km|0.0646489319549|h|0.0384976888206|Hurricane warnings|0.561845313666|the Leeward Islands|0.247873127015|the Caribbean|0.208435926875|Rainfalls|0.0|up to 10 inches|0.162286805315|25 centimeters|0.0562783222763|Irma|-0.00522683117666|the next 48 hours|0.203661716508|The governors|0.203248037528|Puerto Rico|0.136524945534|Florida|0.247440250985|states|0.327626734376|emergency|0.507407083596
kws: 19 |forecast|affected|25 centimeters|Caribbean|Tuesday|Rainfalls|the next 48 hours|Puerto Rico|Florida|warnings|215 km/h|up to 10 inches|the Leeward Islands|Hurricane Irma|Category 4|expected|Irma|130 mph|Hurricane
23
International relations 	 2017/9/4 	 2017 North Korea crisis
South Korean NORP Defense Minister Song Young PERSON -moo says it is worth reviewing deployment of U.S. GPE strategic assets (aircraft carriers, nuclear submarines, and B-52 PRODUCT bombers) to South Korea GPE more regularly.
Arguments:
related_when : |
related_where : |U.S.|GPE|South Korea|GPE
related_who : |South Korean|NORP|Song Young|PERSON
related_what : |B-52|PRODUCT
Triggers: |says|VBZ|0.492164525813|reviewing|VBG|0.483029829469
South Korean Defense Minister Song Young-moo|0.408137876699|it|0.310377940963|deployment|0.254030296323|U.S. strategic assets|0.584852217979|aircraft carriers|0.312171874531|nuclear submarines|0.334960129915|B-52 bombers|0.145975781535|South Korea|0.351919548799
kws: 8 |B-52|says|South Korean|assets|Song Young|South Korea|U.S.|reviewing
24
International relations 	 2017/9/4 	 Crisis in Venezuela
The opposition movement in Venezuela GPE seeks help from France GPE .
Arguments:
related_when : |
related_where : |Venezuela|GPE|France|GPE
related_who : |
related_what : |
Triggers: |seeks|VBZ|0.815549345108
The opposition movement|0.482940938463|Venezuela|0.196721537552|help|0.342547838316|France|0.315508702161
kws: 4 |France|Venezuela|seeks|movement
25
Law and crime 	 2017/9/4 	 Crime in Italy
Italian NORP fugitive and 'Ndrangheta member Rocco Morabito PERSON is arrested in Montevideo GPE , Uruguay GPE , after 23 years DATE on the run. He is now expected to be extradited to Italy GPE in the coming months DATE .
Arguments:
related_when : |23 years|DATE|the coming months|DATE
related_where : |Montevideo|GPE|Uruguay|GPE|Italy|GPE
related_who : |Italian|NORP|Rocco Morabito|PERSON
related_what : |
Triggers: |arrested|VBN|0.733222805235|extradited|VBN|0.713772662533|coming|VBG|0.583167302493|expected|VBN|0.518465246371
Italian fugitive and 'Ndrangheta member Rocco Morabito|0.559902132756|Montevideo|0.12830496218|Uruguay|0.12830496218|23 years|0.295314025423|the run|0.394003976412|He|0.446581606332|Italy|0.17969688972|the coming months|0.385382241268
kws: 11 |Rocco Morabito|Italy|Italian|Montevideo|extradited|the coming months|arrested|Morabito|Uruguay|23 years|coming
26
Politics and elections 	 2017/9/4 	 Politics of Taiwan
Premier of the Republic of China GPE Lin Chuan PERSON offers his resignation as head of the Executive Branch ORG of Taiwan GPE .
Arguments:
related_when : |
related_where : |the Republic of China|GPE|Taiwan|GPE
related_who : |Lin Chuan|PERSON|the Executive Branch|ORG
related_what : |
Triggers: |offers|VBZ|0.359036736913
Premier|0.199533175525|the Republic|0.534595955881|China|0.156207825947|Lin Chuan|0.0236051202478|his resignation|0.443677278273|head|0.20214664283|the Executive Branch|0.39457847627|Taiwan|0.15698466789
kws: 6 |Lin Chuan|offers|Taiwan|the Republic of China|Republic|the Executive Branch
27
Armed conflicts and attacks 	 2017/9/5 	 Syrian Civil War
Siege of Deir ez-Zor ( 2014鈥?7 PERSON ) GPE The Syrian Army ORG lifts the 28-month CARDINAL -long siege of Deir PRODUCT ez-Zor by the Islamic State of Iraq ORG and the Levant ORG .
Arguments:
related_when : |
related_where : |

|GPE
related_who : |2014鈥?7|PERSON|Syrian Army|ORG|the Islamic State of Iraq|ORG|Levant|ORG
related_what : |28-month|CARDINAL|Deir|PRODUCT
Triggers: |lifts|VBZ|0.330900978673
Siege|0.521823476154|Deir ez-Zor|-0.0590066792325|2014鈥?7|0.0|The Syrian Army|0.573829618817|the 28-month-long siege|0.470934522013|Deir ez-Zor|-0.0590066792325|the Islamic State|0.480012090222|Iraq|0.371648261941|the Levant|0.0788780717674
kws: 9 |

|Syrian Army|the Islamic State of Iraq|28-month|Levant|2014鈥?7|lifts|Deir|Army
28
Armed conflicts and attacks 	 2017/9/5 	 2016鈥?7 Rohingya persecution in Myanmar
More than 123,000 CARDINAL Rohingya PERSON refugees have fled Myanmar GPE and crossed into Bangladesh GPE due to escalating violence by the Myanmar Army ORG .
Arguments:
related_when : |
related_where : |Myanmar|GPE|Bangladesh|GPE
related_who : |Rohingya|PERSON|the Myanmar Army|ORG
related_what : |More than 123,000|CARDINAL
Triggers: |escalating|VBG|0.824328611006|fled|VBN|0.740865661052|crossed|VBD|0.615631019066
More than 123,000 Rohingya refugees|0.476712677498|Myanmar|0.110897563343|Bangladesh|0.243876441692|escalating violence|0.70722532895|the Myanmar Army|0.516522867508
kws: 9 |the Myanmar Army|escalating|crossed|Myanmar|Bangladesh|Rohingya|fled|violence|More than 123,000
29
Disasters and accidents 	 2017/9/5 	 2017 Atlantic hurricane season
Hurricane Irma EVENT Hurricane Irma EVENT strengthens to a maximum Category 5 QUANTITY hurricane, becoming the strongest Atlantic LOC hurricane since 2005 DATE 's Hurricane Wilma EVENT in terms of maximum sustained winds, described as "extremely dangerous." The National Hurricane Center WORK_OF_ART ( NHC ORG ) predicts that Irma PERSON could strengthen even more due to favorable conditions.
Arguments:
related_when : |2005|DATE
related_where : |Atlantic|LOC
related_who : |NHC|ORG|Irma|PERSON
related_what : |Hurricane Irma|EVENT|Hurricane Irma|EVENT|Category 5|QUANTITY|Hurricane Wilma|EVENT|The National Hurricane Center|WORK_OF_ART
Triggers: |predicts|VBZ|0.531750704924|strengthens|VBZ|0.404163109524|described|VBN|0.392599268983|strengthen|VB|0.384446616556
Hurricane Irma|0.357908785797|Hurricane Irma|0.357908785797|a maximum Category 5 hurricane|0.360076674442|the strongest Atlantic hurricane|0.420068582962|2005's Hurricane Wilma|0.345699893042|terms|0.242992989322|maximum sustained winds|0.374320373452|The National Hurricane Center|0.418866405429|(NHC|-0.00701237427904|Irma|-0.00522683117666|favorable conditions|0.380056787407
kws: 12 |strengthens|NHC|predicts|Category 5|Hurricane Wilma|hurricane|described|Atlantic|Hurricane Irma|The National Hurricane Center|Irma|2005
30
International relations 	 2017/9/5 	 India鈥揗yanmar relations
Indian NORP Prime Minister Narendra Modi PERSON heads to Myanmar GPE for a state visit.
Arguments:
related_when : |
related_where : |Myanmar|GPE
related_who : |Indian|NORP|Narendra Modi|PERSON
related_what : |
Triggers: 
Indian Prime Minister Narendra Modi|0.333002686707|Myanmar|0.268959022181|a state visit|0.433511615993
kws: 4 |Myanmar|visit|Narendra Modi|Indian
31
International relations 	 2017/9/5 	 2017 North Korea crisis
Japan鈥揢nited States GPE relations, South Korea鈥揢nited States GPE relations FAC Amid tensions from South Korea GPE , U.S. GPE President Donald Trump PERSON announces that he is "allowing Japan GPE and South Korea GPE to buy a substantially increased amount of highly sophisticated military equipment from the United States GPE ." ( The Independent ORG ).
Arguments:
related_when : |
related_where : |Japan鈥揢nited States|GPE|South Korea鈥揢nited States|GPE|South Korea|GPE|U.S.|GPE|Japan|GPE|South Korea|GPE|the United States|GPE
related_who : |Donald Trump|PERSON|The Independent|ORG
related_what : |
Triggers: |increased|VBN|0.523349314278|allowing|VBG|0.514175296079|announces|VBZ|0.450125527366|buy|VB|0.349803020058
Japan鈥揢nited States relations|0.793477104429|South Korea鈥揢nited States relations|0.708819330836|tensions|0.306615237557|South Korea|0.351919548799|U.S. President Donald Trump|0.413748916657|he|0.329023423376|Japan|0.286642157745|South Korea|0.351919548799|a substantially increased amount|0.356462363157|highly sophisticated military equipment|0.500948522712|the United States|0.554561069776|(The Independent|0.442440109051
kws: 13 |South Korea鈥揢nited States|

|announces|Japan鈥揢nited States|Japan|Donald Trump|relations|allowing|increased|The Independent|South Korea|U.S.|the United States
32
Law and crime 	 2017/9/5 	 Journalists killed in India
Prominent Indian NORP journalist and Lankesh Patrike PERSON editor Gauri Lankesh PERSON is shot dead by unidentified men outside her house in the city of Bengaluru GPE .
Arguments:
related_when : |
related_where : |Bengaluru|GPE
related_who : |Indian|NORP|Lankesh Patrike|PERSON|Gauri Lankesh|PERSON
related_what : |
Triggers: |shot|VBN|0.515839042646
Prominent Indian journalist|0.42397370554|Lankesh Patrike editor|0.17564787189|Gauri Lankesh|0.0|unidentified men|0.388254146727|her house|0.409643719279|the city|0.500354653334|Bengaluru|0.0651254167868
kws: 6 |Gauri Lankesh|city|Indian|Bengaluru|shot|Lankesh Patrike
33
Law and crime 	 2017/9/5 	 Australian Marriage Law Postal Survey (Same-sex marriage)
The High Court of Australia ORG receives a complaint against the Australian NORP Marriage Law Postal Survey citing the postal survey as "unique and offensive." (The Guardian) WORK_OF_ART .
Arguments:
related_when : |
related_where : |
related_who : |The High Court of Australia|ORG|Australian|NORP
related_what : |(The Guardian)|WORK_OF_ART
Triggers: |citing|VBG|0.572195904063|receives|VBZ|0.425439919036
The High Court|0.593857575155|Australia|0.25587200476|a complaint|0.515442505477|the Australian Marriage Law Postal Survey|0.711241499187|the postal survey|0.453356289133|(The Guardian|0.413507017033
kws: 6 |The High Court of Australia|citing|Survey|Australian|receives|(The Guardian)
34
Politics and elections 	 2017/9/5 	 Politics of Taiwan
Tainan GPE Mayor William Lai Ching-te PERSON is appointed as the new Premier by the President of the Republic of China GPE Tsai Ing-wen PERSON .
Arguments:
related_when : |
related_where : |Tainan|GPE|the Republic of China|GPE
related_who : |William Lai Ching-te|PERSON|Tsai Ing-wen|PERSON
related_what : |
Triggers: |appointed|VBN|0.551664357862
Tainan Mayor William Lai Ching-te|0.269547018921|the new Premier|0.332105353284|the President|0.521588055935|the Republic|0.534595955881|China|0.156207825947|Tsai Ing-wen|0.1266557292
kws: 6 |Tsai Ing-wen|appointed|the Republic of China|Republic|Tainan|William Lai Ching-te
35
Politics and elections 	 2017/9/5 	 Deferred Action for Childhood Arrivals
The Trump Administration ORG announces that, during the next six months DATE , it will be ending the DACA ORG program that has halted the deportation of about 800,000 CARDINAL people who were brought to the U.S. GPE illegally as children. Congress ORG is called upon to pass legislation to correct the situation.
Arguments:
related_when : |the next six months|DATE
related_where : |U.S.|GPE
related_who : |The Trump Administration|ORG|DACA|ORG|Congress|ORG
related_what : |about 800,000|CARDINAL
Triggers: |brought|VBN|0.551158832215|ending|VBG|0.533632073921|halted|VBN|0.500590097025|correct|VB|0.484616554518|called|VBN|0.478593183908
The Trump Administration|0.591466005224|the next six months|0.338907360369|it|0.359504523576|the DACA program|0.280110416205|the deportation|0.395922391098|about 800,000 people|0.327572046668|who|0.298166567757|the U.S.|0.437061635613|children|0.219449375835|Congress|0.57855267145|legislation|0.497035700057|the situation|0.45906628286
kws: 10 |The Trump Administration|halted|Congress|DACA|brought|ending|about 800,000|the next six months|Administration|U.S.
36
Sports 	 2017/9/5 	 2017鈥?8 NBA season
Billionaire investor Tilman Fertitta PERSON buys the Houston Rockets NBA ORG basketball team for US$2.2 billion MONEY .
Arguments:
related_when : |
related_where : |
related_who : |Tilman Fertitta|PERSON|Houston Rockets NBA|ORG
related_what : |US$2.2 billion|MONEY
Triggers: |buys|VBZ|0.472414864905
Billionaire investor Tilman Fertitta|0.11165074302|the Houston Rockets NBA basketball team|0.599674816765
kws: 5 |buys|team|US$2.2 billion|Houston Rockets NBA|Tilman Fertitta

In [181]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from collections import defaultdict

In [97]:
def efitf(X):
    count = CountVectorizer(stop_words='english')
    X_train_count = count.fit_transform(X)
    tfidf = TfidfTransformer(use_idf=True,smooth_idf=True,sublinear_tf=True)
    X_train_tfidf = tfidf.fit_transform(X_train_count)
    tf_feature_names = count.get_feature_names()
    X_train_tfidf = [list(i) for i in list(X_train_tfidf.toarray())]
    EFITF = defaultdict(dict)
    for Type,values in enumerate(X_train_tfidf):
        for index,value in enumerate(values):
            if value > 0.0:
                EFITF[Type].update({tf_feature_names[index]:value}) 
    return EFITF

In [98]:
X = []

In [99]:
X = df_demo['des_clean'].tolist()

In [100]:
EFITF = efitf(X)
EFITF[0]

In [104]:
'demo'


Out[104]:
['demo']

In [144]:
a = [1,2,3,4,5]

In [146]:
a


Out[146]:
[1, 3, 4, 5]

In [167]:
doc = nlp('arrested')

In [168]:
doc[0].pos_,doc[0].tag_


Out[168]:
('VERB', 'VBN')

In [ ]: