In [55]:
import os
import html5lib
import requests
import lxml
import lxml.html.html5parser
# from lxml.html import tostring, html5parser
# from lxml import etree
In [10]:
from acl.lxml_dom import Node, NodeList
In [29]:
%load_ext autoreload
%autoreload 2
The autoreload extension is already loaded. To reload it, use:
%reload_ext autoreload
In [4]:
data_dirpath = os.path.expanduser('~/github/acl/data')
In [5]:
url = 'http://www.aclweb.org/anthology/P/P03/'
In [6]:
p03_response = requests.get(url)
# print p03_response.text[:1000]
In [53]:
xml_string = p03_response.text
In [65]:
# html5parser.fromstring("<table><td>foo")
# document = lxml.html.html5parser.fromstring(xml_string, )
# encoding='utf-8',
document = html5lib.parse(xml_string, treebuilder="lxml", namespaceHTMLElements=False)
root = Node.wrap(document.getroot())
In [63]:
document.docinfo.encoding
Out[63]:
'UTF-16LE'
In [51]:
dir(document.docinfo)
Out[51]:
['URL',
'__class__',
'__delattr__',
'__doc__',
'__format__',
'__getattribute__',
'__hash__',
'__init__',
'__new__',
'__reduce__',
'__reduce_ex__',
'__repr__',
'__setattr__',
'__sizeof__',
'__str__',
'__subclasshook__',
'doctype',
'encoding',
'externalDTD',
'internalDTD',
'public_id',
'root_name',
'standalone',
'system_url',
'xml_version']
In [52]:
document.docinfo.encoding
Out[52]:
'UTF-16LE'
In [40]:
content = root.first('//div[@id="content"]')
In [75]:
for child in content:
#print child.tag, child.text
#if child.tag == 'h1':
# break
if 'Lionel Cl' in str(child):
break
In [91]:
b = child.first('b')
# dir(child._element)
# list(child._element.iter())
# print list(child._element)
# [repr(item) for item in child]
el = b._element
print lxml.etree.tostring(el, encoding='unicode')
<b>Lionel Clément; Alexandra Kinyon</b>
In [89]:
repr(b)
Out[89]:
'<b>Lionel Clément; Alexandra Kinyon</b>\n'
In [84]:
print b.text, unicode(b), el.text.decode('utf8')
Lionel Clément; Alexandra Kinyon <b>Lionel Clément; Alexandra Kinyon</b>
---------------------------------------------------------------------------
UnicodeEncodeError Traceback (most recent call last)
<ipython-input-84-075929486f12> in <module>()
----> 1 print b.text, unicode(b), el.text.decode('utf8')
/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/encodings/utf_8.pyc in decode(input, errors)
14
15 def decode(input, errors='strict'):
---> 16 return codecs.utf_8_decode(input, errors, True)
17
18 class IncrementalEncoder(codecs.IncrementalEncoder):
UnicodeEncodeError: 'ascii' codec can't encode characters in position 9-10: ordinal not in range(128)
In [43]:
section = None
for child in content:
if child.tag == 'h1':
section = child.text
elif section is not None:
#anchors = child.find('a')
pdf_href = child.first("a[contains(@href, '.pdf')]").getAttribute('href')
#pdf_url = pdf_a.attributes['href'] if pdf_a else None
bib_href = child.first("a[contains(@href, '.bib')]").getAttribute('href')
# bib_href = None # bib_a.attributes['href'] if bib_a else None
author = child.first('b').text
title = child.first('i').text
print section, author, title, pdf_href, bib_href
# entry = dict(pdf_href=pdf_anchors[0].attrib['href'])
# if len(bib_anchors) > 0:
# entry['bib_href'] = bib_anchors[0].attrib['href']
# print dict(pdf_href=pdf_href, bib_href=bib_href, author=author, title=title)
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics None Front Matter P03-1000.pdf None
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Michael Fleischman; Eduard Hovy; Abdessamad Echihabi Offline Strategies for Online Question Answering: Answering Questions Before They Are Asked P03-1001.pdf P03-1001.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Mihai Surdeanu; Sanda Harabagiu; John Williams; Paul Aarseth Using Predicate-Argument Structures for Information Extraction P03-1002.pdf P03-1002.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Abdessamad Echihabi; Daniel Marcu A Noisy-Channel Approach to Question Answering P03-1003.pdf P03-1003.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Taku Kudo; Yuji Matsumoto Fast Methods for Kernel-Based Text Analysis P03-1004.pdf P03-1004.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Jun Suzuki; Tsutomu Hirao; Yutaka Sasaki; Eisaku Maeda Hierarchical Directed Acyclic Graph Kernel: Methods for Structured Natural Language Data P03-1005.pdf P03-1005.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Cyril Allauzen; Mehryar Mohri; Brian Roark Generalized Algorithms for Constructing Statistical Language Models P03-1006.pdf P03-1006.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Anna Korhonen; Judita Preiss Improving Subcategorization Acquisition Using Word Sense Disambiguation P03-1007.pdf P03-1007.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Malvina Nissim; Katja Markert Syntactic Features and Word Similarity for Supervised Metonymy Resolution P03-1008.pdf P03-1008.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Anna Korhonen; Yuval Krymolowski; Zvika Marx Clustering Polysemic Subcategorization Frame Distributions Semantically P03-1009.pdf P03-1009.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Masao Utiyama; Hitoshi Isahara Reliable Measures for Aligning Japanese-English News Articles and Sentences P03-1010.pdf P03-1010.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Daniel Gildea Loosely Tree-Based Alignment for Machine Translation P03-1011.pdf P03-1011.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Colin Cherry; Dekang Lin A Probability Model to Improve Word Alignment P03-1012.pdf P03-1012.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Amit Dubey; Frank Keller Probabilistic Parsing for German Using Sister-Head Dependencies P03-1013.pdf P03-1013.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Anette Frank; Markus Becker; Berthold Crysmann; Bernd Kiefer; Ulrich Schäfer Integrated Shallow and Deep Parsing: TopP Meets HPSG P03-1014.pdf P03-1014.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Michael Schiehlen Combining Deep and Shallow Approaches in Parsing German P03-1015.pdf P03-1015.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Hua Wu; Ming Zhou Synonymous Collocation Extraction Using Translation Information P03-1016.pdf P03-1016.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Sebastian Padó; Mirella Lapata Constructing Semantic Space Models from Parsed Corpora P03-1017.pdf P03-1017.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Dominic Widdows Orthogonal Negation in Vector Spaces for Modelling Word-Meanings and Document Retrieval P03-1018.pdf P03-1018.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Richard Zens; Hermann Ney A Comparative Study on Reordering Constraints in Statistical Machine Translation P03-1019.pdf P03-1019.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Lucian Vlad Lita; Abe Ittycheriah; Salim Roukos; Nanda Kambhatla tRuEcasIng P03-1020.pdf P03-1020.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Franz Josef Och Minimum Error Rate Training in Statistical Machine Translation P03-1021.pdf P03-1021.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Michael Strube; Christoph Müller A Machine Learning Approach to Pronoun Resolution in Spoken Dialogue P03-1022.pdf P03-1022.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Xiaofeng Yang; Guodong Zhou; Jian Su; Chew Lim Tan Coreference Resolution Using Competition Learning Approach P03-1023.pdf P03-1023.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Lionel Clément; Alexandra Kinyon Generating Parallel Multilingual LFG-TAG Grammars from a MetaGrammar P03-1024.pdf P03-1024.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Jonas Kuhn Compounding and Derivational Morphology in a Finite-State Setting P03-1025.pdf P03-1025.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Gerald Penn; Cosmin Munteanu A Tabulation-Based Parsing Method that Reduces Copying P03-1026.pdf P03-1026.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Andrew Gordon; Abe Kazemzadeh; Anish Nair; Milena Petrova Recognizing Expressions of Commonsense Psychology in English Text P03-1027.pdf P03-1027.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Hai Leong Chieu; Hwee Tou Ng; Yoong Keok Lee Closing the Gap: Learning-Based Information Extraction Rivaling Knowledge-Engineering Methods P03-1028.pdf P03-1028.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Kiyoshi Sudo; Satoshi Sekine; Ralph Grishman An Improved Extraction Pattern Representation Model for Automatic IE Pattern Acquisition P03-1029.pdf P03-1029.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Ayman Farahat; Francine Chen; Thorsten Brants Optimizing Story Link Detection is not Equivalent to Optimizing New Event Detection P03-1030.pdf P03-1030.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Ryuichiro Higashinaka; Mikio Nakano; Kiyoaki Aikawa Corpus-Based Discourse Understanding in Spoken Dialogue Systems P03-1031.pdf P03-1031.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Gang Wang; Tat-Seng Chua; Yong-Cheng Wang Extracting Key Semantic Terms from Chinese Speech Query for Web Searches P03-1032.pdf P03-1032.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Kazunori Komatani; Shinichi Ueno; Tatsuya Kawahara; Hiroshi G. Okuno Flexible Guidance Generation Using User Model in Spoken Dialogue Systems P03-1033.pdf P03-1033.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Charles B. Callaway Integrating Discourse Markers into a Pipelined Natural Language Generation Architecture P03-1034.pdf P03-1034.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Jianfeng Gao; Mu Li; Chang-Ning Huang Improved Source-Channel Models for Chinese Word Segmentation P03-1035.pdf P03-1035.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Mathias Creutz Unsupervised Segmentation of Words Using Prior Distributions of Morph Length and Frequency P03-1036.pdf P03-1036.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Martin Jansche Parametric Models of Linguistic Count Data P03-1037.pdf P03-1037.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Jin-Dong Kim; Hae-Chang Rim; Jun'ichi Tsujii Self-Organizing Markov Models and Their Application to Part-of-Speech Tagging P03-1038.pdf P03-1038.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Taro Watanabe; Eiichiro Sumita; Hiroshi G. Okuno Chunk-Based Statistical Translation P03-1039.pdf P03-1039.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Philipp Koehn; Kevin Knight Feature-Rich Statistical Translation of Noun Phrases P03-1040.pdf P03-1040.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Ashish Venugopal; Stephan Vogel; Alex Waibel Effective Phrase Translation Extraction from Alignment Models P03-1041.pdf P03-1041.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Yunbo Cao; Hang Li; Li Lian Uncertainty Reduction in Collaborative Bootstrapping: Measure and Algorithm P03-1042.pdf P03-1042.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Cheng Niu; Wei Li; Jihong Ding; Rohini Srihari A Bootstrapping Approach to Named Entity Classification Using Successive Learners P03-1043.pdf P03-1043.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Roman Yangarber Counter-Training in Discovery of Semantic Patterns P03-1044.pdf P03-1044.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Denis Béchet; Annie Foret k-Valued Non-Associative Lambek Categorial Grammars are not Learnable from Strings P03-1045.pdf P03-1045.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Julia Hockenmaier Parsing with Generative Models of Predicate-Argument Structure P03-1046.pdf P03-1046.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Joachim Niehren; Stefan Thater Bridging the Gap Between Underspecification Formalisms: Minimal Recursion Semantics as Dominance Constraints P03-1047.pdf P03-1047.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Dragomir R. Radev; Simone Teufel; Horacio Saggion; Wai Lam; John Blitzer; Hong Qi; Arda Ãelebi; Danyu Liu; Elliott Drabek Evaluation Challenges in Large-Scale Document Summarization P03-1048.pdf P03-1048.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Akiko Aizawa Analysis of Source Identified Text Corpora: Exploring the Statistics of the Reused Text and Authorship P03-1049.pdf P03-1049.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Monica Rogati; Scott McCarley; Yiming Yang Unsupervised Learning of Arabic Stemming Using a Parallel Corpus P03-1050.pdf P03-1050.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Young-Suk Lee; Kishore Papineni; Salim Roukos; Ossama Emam; Hany Hassan Language Model Based Arabic Word Segmentation P03-1051.pdf P03-1051.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Kumiko Tanaka-Ishii; Daichi Hayakawa; Masato Takeichi Acquiring Vocabulary for Predictive Text Entry through Dynamic Reuse of a Small User Corpus P03-1052.pdf P03-1052.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics William Gregory Sakas A Word-Order Database for Testing Computational Models of Language Acquisition P03-1053.pdf P03-1053.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Dan Klein; Christopher D. Manning Accurate Unlexicalized Parsing P03-1054.pdf P03-1054.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Péter Dienes; Amit Dubey Deep Syntactic Processing by Combining Shallow Methods P03-1055.pdf P03-1055.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Roger Levy; Christopher D. Manning Is it Harder to Parse Chinese, or the Chinese Treebank? P03-1056.pdf P03-1056.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Kenji Imamura; Eiichiro Sumita; Yuji Matsumoto Feedback Cleaning of Machine Translation Rules Using Automatic Evaluation P03-1057.pdf P03-1057.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Hwee Tou Ng; Bin Wang; Yee Seng Chan Exploiting Parallel Texts for Word Sense Disambiguation: An Empirical Study P03-1058.pdf P03-1058.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Timothy Baldwin; Francis Bond Learning the Countability of English Nouns from Corpus Data P03-1059.pdf P03-1059.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Do-Gil Lee; Hae-Chang Rim; Heui-Seok Lim A Syllable Based Word Recognition Model for Korean Noun Extraction P03-1060.pdf P03-1060.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Kiyotaka Uchimoto; Chikashi Nobata; Atsushi Yamada; Satoshi Sekine; Hitoshi Isahara Morphological Analysis of a Large Spontaneous Speech Corpus in Japanese P03-1061.pdf P03-1061.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Erwin Marsi; Martin Reynaert; Antal van den Bosch; Walter Daelemans; Véronique Hoste Learning to Predict Pitch Accents and Prosodic Boundaries in Dutch P03-1062.pdf P03-1062.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Seong-Bae Park; Byoung-Tak Zhang Text Chunking by Combining Hand-Crafted Rules and Memory-Based Learning P03-1063.pdf P03-1063.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Libin Shen; Aravind K. Joshi A SNoW Based Supertagger with Application to NP Chunking P03-1064.pdf P03-1064.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Wei Li; Xiuhong Zhang; Cheng Niu; Yuankai Jiang; Rohini K. Srihari An Expert Lexicon Approach to Identifying English Phrasal Verbs P03-1065.pdf P03-1065.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Jianfeng Gao; Hisami Suzuki Unsupervised Learning of Dependency Structure for Language Modeling P03-1066.pdf P03-1066.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics William Schuler Using Model-Theoretic Semantic Interpretation to Guide Statistical Parsing and Word Recognition in a Spoken Language Interface P03-1067.pdf P03-1067.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Katrin Erk; Andrea Kowalski; Sebastian Padó; Manfred Pinkal Towards a Resource for Lexical Semantics: A Large German Corpus with Extensive Semantic Annotation P03-1068.pdf P03-1068.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Mirella Lapata Probabilistic Text Structuring: Experiments with Sentence Ordering P03-1069.pdf P03-1069.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Yukiko Nakano; Gabe Reinstein; Tom Stocky; Justine Cassell Towards a Model of Face-to-Face Grounding P03-1070.pdf P03-1070.bib
Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics Michel Galley; Kathleen R. McKeown; Eric Fosler-Lussier; Hongyan Jing Discourse Segmentation of Multi-Party Conversation P03-1071.pdf P03-1071.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics None Proceedings of the ACL-2003 Student Research Workshop P03-2001.pdf None
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Narjès Boufaden An Ontology-based Semantic Tagger for IE system P03-2002.pdf P03-2002.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics José M. Castaño On the Applicability of Global Index Grammars P03-2003.pdf P03-2003.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Malte Gabsdil Classifying Recognition Results for Spoken Dialog Systems P03-2004.pdf P03-2004.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Anurag Gupta An Adaptive Approach to Collecting Multimodal Input P03-2005.pdf P03-2005.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Valentin Jijkoun Finding Non-local Dependencies: Beyond Pattern Matching P03-2006.pdf P03-2006.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Bo-Yeong Kang A Novel Approach to Semantic Indexing Based on Concept P03-2007.pdf P03-2007.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Katsuya Masuda A Ranking Model of Proximal and Structural Text Retrieval Based on Region Algebra P03-2008.pdf P03-2008.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics T. Daniel Midgley Discourse Chunking: A Tool in Dialogue Act Tagging P03-2009.pdf P03-2009.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Hyunjung Son A Computational Treatment of Korean Temporal Markers, OE and DONGAN P03-2010.pdf P03-2010.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Huihsin Tseng Semantic Classification of Chinese Unknown Words P03-2011.pdf P03-2011.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Olga Uryupina High-precision Identification of Discourse New and Unique Noun Phrases P03-2012.pdf P03-2012.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Mitsuko Yamura-Takei Approaches to Zero Adnominal Recognition P03-2013.pdf P03-2013.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics None ACL-03 Interactive Posters and Demonstrations P03-2014.pdf None
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Jun Goto; Yeun-Bae Kim; Masaru Miyazaki; Kazuteru Komine; Noriyoshi Uratani A Spoken Dialogue Interface for TV Operations based on Data Collected by using WOZ Method P03-2015.pdf P03-2015.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Kumiko Tanaka-Ishii; Masato Yamamoto; Hiroshi Nakagawa Kiwi: A Multilingual Usage Consultation Tool based on Internet Searching P03-2016.pdf P03-2016.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Marc Dymetman; Aurélien Max; Kenji Yamada Towards Interactive Text Understanding P03-2017.pdf P03-2017.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Ian Marshall; Ãva Sáfár A Prototype Text to British Sign Language (BSL) Translation System P03-2018.pdf P03-2018.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Stephan Busemann; Witold Drozdzynski; Hans-Ulrich Krieger; Jakub Piskorski; Ulrich Schaefer; Hans Uszkoreit; Feiyu Xu Integrating Information Extraction and Automatic Hyperlinking P03-2019.pdf P03-2019.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Satoshi Sato; Yasuhiro Sasaki Automatic Collection of Related Terms from the Web P03-2020.pdf P03-2020.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Anton Leuski; Chin-Yew Lin; Eduard Hovy iNeATS: Interactive Multi-Document Summarization P03-2021.pdf P03-2021.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics El-Sayed Atlam; Masaki Oono; Jun-ichi Aoe An Evaluation Method of Words Tendency using Decision Tree P03-2022.pdf P03-2022.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Kiyoshi Yamabana; Ken Hanazawa; Ryosuke Isotani; Seiya Osada; Akitoshi Okumura; Takao Watanabe A Speech Translation System with Mobile Wireless Clients P03-2023.pdf P03-2023.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Manny Rayner; Pierrette Bouillon; Vol Van Dalsem III; Hitoshi Isahara; Kyoko Kanzaki; Beth Ann Hockey A Limited-Domain English to Japanese Medical Speech Translator Built Using REGULUS 2 P03-2024.pdf P03-2024.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Fatiha Sadat; Masatoshi Yoshikawa; Shunsuke Uemura Bilingual Terminology Acquisition from Comparable Corpora and Phrasal Translation to Cross-Language Information Retrieval P03-2025.pdf P03-2025.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Emi Izumi; Kiyotaka Uchimoto; Toyomi Saiga; Thepchai Supnithi; Hitoshi Isahara Automatic Error Detection in the Japanese Learners' English Spoken Data P03-2026.pdf P03-2026.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Yoji Kiyota; Sadao Kurohashi; Teruhisa Misu; Kazunori Komatani; Tatsuya Kawahara; Fuyuko Kido Dialog Navigator : A Spoken Dialog Q-A System based on Large Text Knowledge Base P03-2027.pdf P03-2027.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Chiori Hori; Takaaki Hori; Hajime Tsukada; Hideki Isozaki; Yutaka Sasaki; Eisaku Maeda Spoken Interactive ODQA System: SPIQA P03-2028.pdf P03-2028.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Koichi Yamashita; Keiichi Yoshida; Yukihiro Itoh Word Sense Disambiguation Using Pairwise Alignment P03-2029.pdf P03-2029.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Collin F. Baker; Hiroaki Sato The FrameNet Data and Software P03-2030.pdf P03-2030.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Joohui An; Seungwoo Lee; Gary Geunbae Lee Automatic Acquisition of Named Entity Tagged Corpus from World Wide Web P03-2031.pdf P03-2031.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Atsuko Kida; Eiko Yamamoto; Kyoko Kanzaki; Hitoshi Isahara Extraction and Verification of KO-OU Expressions from Large Corpora P03-2032.pdf P03-2032.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Akane Yakushiji; Yuka Tateisi; Yusuke Miyao; Naoki Yoshinaga; Jun'ichi Tsujii A Debug Tool for Practical Grammar Development P03-2033.pdf P03-2033.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Edward Schofield; Zhiping Zheng A Speech Interface for Open-Domain Question-Answering P03-2034.pdf P03-2034.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Koichi Takeuchi; Kyo Kageura; Teruo Koyama Deverbal Compound Noun Analysis Based on Lexical Conceptual Structure P03-2035.pdf P03-2035.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Naoki Yoshinaga; Kentaro Torisawa; Jun'ichi Tsujii Comparison between CFG Filtering Techniques for LTAG and HPSG P03-2036.pdf P03-2036.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Masatoshi Tsuchiya; Satoshi Sato Automatic Detection of Grammar Elements that Decrease Readability P03-2037.pdf P03-2037.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Manny Rayner; Beth Ann Hockey; Jim Hieronymus; John Dowding; Greg Aist; Susana Early An Intelligent Procedure Assistant Built Using REGULUS 2 and ALTERF P03-2038.pdf P03-2038.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Chooi Ling Goh; Masayuki Asahara; Yuji Matsumoto Chinese Unknown Word Identification Using Character-based Tagging and Chunking P03-2039.pdf P03-2039.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Jian-Cheng Wu; Kevin C. Yeh; Thomas C. Chuang; Wen-Chi Shei; Jason S. Chang TotalRecall: A Bilingual Concordance for Computer Assisted Translation and Language Learning P03-2040.pdf P03-2040.bib
The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics Jason Eisner Learning Non-Isomorphic Tree Mappings for Machine Translation P03-2041.pdf P03-2041.bib
In [92]:
# child.findall('.//a')
a = child.xpath("a[contains(@href, 'bib')]")[0]
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-92-a53982264792> in <module>()
1 # child.findall('.//a')
----> 2 a = child.xpath("a[contains(@href, 'bib')]")[0]
IndexError: list index out of range
In [35]:
# child._element.attrib
In [26]:
# for element in document.findall('//div[@id="content"]//a'):
# print etree.tostring(element, pretty_print=True)
In [38]:
# document.xpath('//a')
In [ ]:
lxml.etree.tostring()