This will get the abstract from Arxiv


In [1]:
%%writefile functionsq.py
def make_url(query, n=1):
    """
    this function get a API formated query and
    return the full path query to use as an argument in 
    feedparser
    imputs:
    query: string formated as a API query
    n: integer 
    
    """
    return 'http://export.arxiv.org/api/query?search_query={0}&start={1}&max_results=1000'.format(query,n)


Overwriting functionsq.py

In [3]:
import feedparser
import urllib.request
import functionsq as fff # may be not a good name convention

#data = urllib.request.urlopen(url).read()
query='abs:+novel+OR+ti:+novel/0/1/0/2016/0/1/'
ind=0
print(fff.make_url(query,ind)) 
#d=feedparser.parse(url)


http://export.arxiv.org/api/query?search_query=abs:+novel+OR+ti:+novel/0/1/0/2016/0/1/&start=0&max_results=1000

In [4]:
url=fff.make_url(query,ind)
d=feedparser.parse(url)
print(len(d.entries))
leng_query=1000
while len(d.entries)==leng_query:
    ind+=1
    url=fff.make_url(query,leng_query*ind)
    d=feedparser.parse(url)
    
print('total number of publications in 2016 which have the word\n "novel" in their title or astract is', 100*(ind)+len(d.entries))

#ee[0].get('title')


1000
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-4-6d24528a0ce8> in <module>()
      6     ind+=1
      7     url=fff.make_url(query,leng_query*ind)
----> 8     d=feedparser.parse(url)
      9 
     10 print('total number of publications in 2016 which have the word\n "novel" in their title or astract is', 100*(ind)+len(d.entries))

/opt/conda/lib/python3.5/site-packages/feedparser.py in parse(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers, response_headers)
   3955         source.setByteStream(_StringIO(data))
   3956         try:
-> 3957             saxparser.parse(source)
   3958         except xml.sax.SAXException as e:
   3959             result['bozo'] = 1

/opt/conda/lib/python3.5/xml/sax/expatreader.py in parse(self, source)
    108         self.reset()
    109         self._cont_handler.setDocumentLocator(ExpatLocator(self))
--> 110         xmlreader.IncrementalParser.parse(self, source)
    111 
    112     def prepareParser(self, source):

/opt/conda/lib/python3.5/xml/sax/xmlreader.py in parse(self, source)
    123         buffer = file.read(self._bufsize)
    124         while buffer:
--> 125             self.feed(buffer)
    126             buffer = file.read(self._bufsize)
    127         self.close()

/opt/conda/lib/python3.5/xml/sax/expatreader.py in feed(self, data, isFinal)
    208             # document. When feeding chunks, they are not normally final -
    209             # except when invoked from close.
--> 210             self._parser.Parse(data, isFinal)
    211         except expat.error as e:
    212             exc = SAXParseException(expat.ErrorString(e.code), e, self)

/home/ilan/minonda/conda-bld/work/Python-3.5.2/Modules/pyexpat.c in StartElement()

/opt/conda/lib/python3.5/xml/sax/expatreader.py in start_element_ns(self, name, attrs)
    357 
    358         self._cont_handler.startElementNS(pair, None,
--> 359                                           AttributesNSImpl(newattrs, qnames))
    360 
    361     def end_element_ns(self, name):

/opt/conda/lib/python3.5/site-packages/feedparser.py in startElementNS(self, name, qname, attrs)
   2030                 attrsD[str(qname).lower()] = attrs.getValueByQName(qname)
   2031             localname = str(localname).lower()
-> 2032             self.unknown_starttag(localname, list(attrsD.items()))
   2033 
   2034         def characters(self, text):

/opt/conda/lib/python3.5/site-packages/feedparser.py in unknown_starttag(self, tag, attrs)
    604         # uses a whitelisted URI scheme (e.g. not `javscript:`)
    605         if self.baseuri:
--> 606             self.baseuri = _makeSafeAbsoluteURI(self.baseuri, baseuri) or self.baseuri
    607         else:
    608             self.baseuri = _urljoin(self.baseuri, baseuri)

/opt/conda/lib/python3.5/site-packages/feedparser.py in _makeSafeAbsoluteURI(base, rel)
   2348             return base
   2349         return ''
-> 2350     uri = _urljoin(base, rel)
   2351     if uri.strip().split(':', 1)[0] not in ACCEPTABLE_URI_SCHEMES:
   2352         return ''

/opt/conda/lib/python3.5/site-packages/feedparser.py in _urljoin(base, uri)
    436 _urifixer = re.compile('^([A-Za-z][A-Za-z0-9+-.]*://)(/*)(.*?)')
    437 def _urljoin(base, uri):
--> 438     uri = _urifixer.sub(r'\1\3', uri)
    439     if not isinstance(uri, str):
    440         uri = uri.decode('utf-8', 'ignore')

KeyboardInterrupt: 

In [132]:



Out[132]:
'http://export.arxiv.org/api/query?search_query=abs:+novel+OR+ti:+novel/0/1/0/2016/0/1/&start=0&max_results=100'

In [ ]:


In [131]:
d=feedparser.parse(url)
for i in range(100):
    print(d.entries[i].title)


Large angle Beamstrahlung as a beam-beam monitoring tool
Design and Modeling of a Mems-Based Valveless Pump Driven by an
  Electromagnetic Force
On the Laplacian of 1/r
Protein Structure Determination Using Chemical Shifts
Numerical solution to the Bloch equations, paramagnetic solutions under
  wideband continuous radio frequency irradiation in a pulsed magnetic field
A perturbative approach to the confinement-deconfinement phase
  transition
Particle Physics challenges to the Bohm Picture of Relativistic Quantum
  Field Theory
The Large N Harmonic Oscillator as a String Theory
Design and Analysis of a Novel $\mathcal{L}_1$ Adaptive Control
  Architecture with Guaranteed Transient Performance
Beyond the `Pentagon Identity'
A novel isospectral deformation chain in supersymmetric quantum
  mechanics
WDVV Equations
Haantjes Manifolds and Veselov Systems
Polyhedron Volume-Ratio-based Classification for Image Recognition
Mapping axonal density and average diameter using non-monotonic
  time-dependent gradient-echo MRI
Ivan Franko's novel Dlja domashnjoho ohnyshcha (For the Hearth) in the
  light of the frequency dictionary
Bayesian Prediction for The Winds of Winter
Graph Kernels exploiting Weisfeiler-Lehman Graph Isomorphism Test
  Extensions
Wide field aplanatic two-mirror telescopes for ground-based gamma-ray
  astronomy
Comment on "Novel Convective Instabilities in a Magnetic Fluid"
Comment on "Classifying Novel Phases of Spinor Atoms"
Correlation over Decomposed Signals: A Non-Linear Approach to Fast and
  Effective Sequences Comparison
The Immortal Bel-Robinson Tensor
Gravitoelectromagnetic inflation from a 5D vacuum state: a new formalism
Special functions as structure constants for new infinite-dimensional
  algebras
Large N Matrix Field Theories
A confining string theory derived from QCD
A q-deformed Quantum Mechanics
Divided Differences
Combinatorial identities for binary necklaces from exact ray-splitting
  trace formulae
Curves and The Photon
A Novel Scaled Boundary Finite Element Method in Computational
  Electromagnetics
Organization of Ecosystems in the Vicinity of a Novel Phase Transition
Entanglement Purification through Zeno-like Measurements
A Novel Piezoelectric Microtransformer for Autonmous Sensors
  Applications
Quantum Interference: an experimental proposal, and possible Engineering
  applications
General method of solution of Schwinger-Dyson equations in Minkowski
  space
The quantum spin Hall effect and topological insulators
Using electrowetting to control interface motion in patterned
  microchannels
Scaling conditional tail probability and quantile estimators
Modelling outliers and structural breaks in dynamic linear models with a
  novel use of a heavy tailed prior for the variances: An alternative to the
  Inverted Gamma
On a Variation of the Definition of Limit: Some Analytic Consequences
A novel type of spiral wave with trapped ions
Glass-forming photoactive cholesteric materials doped by quantum dots:
  phototunable circularly-polarized emission
Massless particles and arrow of time in relativistic quantum field
  theory
Symmetry problem
Hydex waveguides for nonlinear optics
Fast, memory efficient low-rank approximation of SimRank
GPU accelerated image reconstruction in a two-strip J-PET tomograph
Exploiting Correlation among Data Items for Cache Replacement in Ad-hoc
  Networks
Thermodynamic expansion to arbitrary moduli
Novel nonlinear kinetic terms for gravitons
Comment on decoherence by time dilation
Separating Topological Noise from Features using Persistent Entropy
Exotic Quantum Order in Low-Dimensional Systems
Fractional-flux vortices and spin superfluidity in triplet
  superconductors
Noise in Disordered Systems: Higher Order Spectra in Avalanche Models
Quadratic solitons as nonlocal solitons
Strange Disoriented Chiral Condensate
Control momentum entanglement with atomic spontaneously generated
  coherence
Atom--photon momentum entanglement with quantum interference
Drug-therapy networks and the predictions of novel drug targets
Equations for hidden Markov models
Coherent-state phase concentration by quantum probabilistic
  amplification
Non-volatile Complementary Resistive Switch-based Content Addressable
  Memory
Analytic Continuation of weighted q-Genocchi numbers and polynomials
Planning of Cellular Networks Enhanced by Energy Harvesting
Perceptron Mistake Bounds
Color entanglement for $γ$-jet production in polarized pA
  collisions
Comparing Writing Styles using Word Embedding and Dynamic Time Warping
Matroid Filtrations and Computational Persistent Homology
Learning like a Child: Fast Novel Visual Concept Learning from Sentence
  Descriptions of Images
Quantum superposition of multiple clones and the novel cloning machine
A Derivative-Free Approach to Total Variation Regularization
Wave Energy Amplification in a Metamaterial based Traveling Wave
  Structure
Anisotropic magnetoresistance in topological insulator
  Bi1.5Sb0.5Te1.8Se1.2/CoFe heterostructures
On the graph-theoretical interpretation of Pearson correlations in a
  multivariate process and a novel partial correlation measure
Pose Induction for Novel Object Categories
Noise Robustness of a Combined Phase Retrieval and Reconstruction Method
  for Phase-Contrast Tomography
Parallel Metropolis chains with cooperative adaptation
The Nature of Dark Matter
The Dark Side of the Universe
Neutron Star Masses, Radii and Equation of State
Diffusive conductors as Andreev interferometers
High Temperature Superconductivity
The Gradient Expansion for the Free-Energy of a Clean Superconductor
Ground-State Properties of the Two-Band Model for Halogen-Bridged Metal
  Complexes
The dimer-RVB State of the Four-Leg Heisenberg Ladder: Interference
  among Resonances
Q-Factor Measurement of Nonlinear Superconducting Resonators
Broken Symmetry and Josephson-like Tunneling in Quantum Hall Bilayers
Superstring representation of Hubbard model
Large grain boundary area superconductors
Dephasing of Atomic Tunneling by Nuclear Quadrupoles
Approaches to Network Classification
A Novel Approach Applied to the Largest Clique Problem
Generation of spatially-separated spin entanglement in a triple quantum
  dot system
Inhomogeneous superconductivity and the "pseudogap state of novel
  superconductors
All Superlinear Inverse Schemes are coNP-Hard
Statistical Parameters of the Novel "Perekhresni stezhky" ("The
  Cross-Paths") by Ivan Franko
Finding Cliques of a Graph using Prime Numbers