CherryPick prototype

Scrape a page for DOIS, clean and check them, then get some metadata from the crossRef API


In [147]:
import re
import scrapy
from scrapy.http import TextResponse
import requests
import json
#regex modified from http://stackoverflow.com/questions/27910/finding-a-doi-in-a-document-or-page
#Alix Axel's regex, with modifications http://stackoverflow.com/users/89771/alix-axel
#found on stackoverflow
doi_re = re.compile(r'\b(10[.][0-9]{3,}(?:[.][0-9]+)*/(?:(?!["&\'()])\S)+)')
target = 'http://www.cardiff.ac.uk/chemistry/research/publications/2014-publications'
r = requests.get(target)
response=TextResponse(r.url,body=r.text, encoding='utf-8')

#all_dois = doi_re.findall(response.xpath('string(.)').extract()[0])
all_dois = doi_re.findall(r.text)
all_dois = [clean_doi(d) for d in all_dois]
all_dois = list(set(all_dois)) #uniqify
print all_dois
api_stub = 'http://api.crossref.org/works/'
reqs = [api_stub+d for d in all_dois]
fails = 0
for req in reqs:
    #print(req)
    r = requests.get(req)
    resp=TextResponse(r.url,body=r.text, encoding='utf-8')
    try:
        jsonresponse = json.loads(resp.body_as_unicode())''
        t=jsonresponse["message"]['title']
        print(t)
    except:
        fails+=1
print(str(fails) + ' failed dois from ' + str(len(reqs)) + ' requests')
print('failure rate: ' + str(float(fails)/len(reqs)))


[u'10.1021/ic501411w', u'10.1038/ncomms4332', u'10.1039/c4cy00027g', u'10.1016/j.tet.2014.02.043', u'10.1016/j.jcat.2013.04.005', u'10.1039/C4OB01916D', u'10.1039/C4CE00308J', u'10.1063/1.4903961', u'10.1002/cphc.201402271', u'10.1021/op500221s', u'10.1021/bi500507v', u'10.1021/bi500238q', u'10.1002/chem.201304005', u'10.1063/1.4889780', u'10.1002/anie.201308997', u'10.1016/j.ccr.2014.02.003', u'10.1039/C3SM52877D', u'10.1016/j.jorganchem.2013.08.031', u'10.1039/c4ta02521k', u'10.1021/bi500508z', u'10.1007/s11244-013-0229-5', u'10.1002/chem.201303736', u'10.1021/ja5066366', u'10.1107/S1600536814019035', u'10.1039/C4MD00265B', u'10.1021/om500847j', u'10.1002/chem.201302053', u'10.1002/9781118695708.ch1', u'10.1021/ic403011h', u'10.1039/C4SC00545G', u'10.1039/c4cy00387j', u'10.1039/c3dt52972j', u'10.1039/c4gc00087k', u'10.1016/j.tet.2014.03.078', u'10.1556/JFC-D-13-00030', u'10.1039/C3SC52745J', u'10.1039/c4dt00519h', u'10.1007/s11243-013-9789-2', u'10.1016/B978-0-08-097742-3.00821-1', u'10.1016/j.tetlet.2014.08.036', u'10.1021/nn405757q', u'10.1021/op500195e', u'10.1016/j.apsusc.2014.06.088', u'10.1039/C4CC04024D', u'10.1021/jo402591v', u'10.1016/j.apsusc.2014.09.078', u'10.1039/c4ce00070f', u'10.1021/jp409522q', u'10.1107/S1600536814003729', u'10.1002/cssc.201300834', u'10.1002/chem.201303355', u'10.1107/S1600536814020017', u'10.1055/s-0033-1340835', u'10.1002/anie.201404266', u'10.1021/jp5078664', u'10.1021/cs400683e', u'10.1007/s13203-014-0059-9', u'10.1021/jp505847g', u'10.1016/j.jinorgbio.2014.02.015', u'10.1039/c3dt50874a', u'10.1021/ja5021348', u'10.1371/journal.pone.0107462', u'10.1002/jcp.24632', u'10.1142/9781848167513_0003', u'10.1016/j.electacta.2013.08.169', u'10.1039/C4CY01213E', u'10.1021/ja5006256', u'10.1107/S1600536814011350', u'10.1016/j.electacta.2013.10.028', u'10.1107/S160053681401664X', u'10.1021/op500213j', u'10.1016/j.susc.2013.11.013', u'10.1007/s00214-014-1525-2', u'10.1021/ja4110842', u'10.1039/C4CP00529E', u'10.1039/c4cc03259d', u'10.1039/C4DT02203C', u'10.1039/C3DT52524D', u'10.1002/ange.201400405', u'10.1021/ar5002457', u'10.1002/asia.201301582', u'10.1021/ol500591q', u'10.1021/ol502201d', u'10.1039/C4FD00215F', u'10.14233/ajchem.2014.15918', u'10.1039/c3ra47573e', u'10.1016/j.jinorgbio.2014.07.011', u'10.1093/bja/aet373', u'10.1002/chem.201402174', u'10.1039/c4cy00184b', u'10.1021/jp5081753', u'10.1002/ejic.201402606', u'10.1016/j.ccr.2014.03.037', u'10.1107/S1600536814005996', u'10.1039/C4DT02239D', u'10.1016/j.jelechem.2013.09.018', u'10.1039/C4CS00042K', u'10.1002/elan.201400085', u'10.1039/c3ce42267d', u'10.1063/1.4885217', u'10.1002/chem.201402446', u'10.1039/C3OB41869C', u'10.1021/ic402597s', u'10.1039/c4fd00061g', u'10.1016/B978-0-08-097742-3.00735-7', u'10.1002/ejic.201402248', u'10.1021/ja5102536', u'10.1039/C4CP04046E', u'10.1016/j.tet.2014.06.070', u'10.1016/j.jinorgbio.2014.06.019', u'10.1021/ic403097s', u'10.1002/chem.201404762', u'10.1107/S160053681401633X', u'10.1002/anie.201308395', u'10.1039/C4CP00564C', u'10.1002/cmdc.201400056', u'10.1063/1.4894746', u'10.1039/c4dt00114a', u'10.1002/anie.201308067', u'10.1002/hc.21164', u'10.1080/00268976.2013.822592', u'10.1021/ic500142z', u'10.1016/j.jsb.2014.07.004', u'10.1021/ct4009969', u'10.1039/C4OB01788A', u'10.1071/CH14170', u'10.1021/om500518r', u'10.1021/cm503174z', u'10.1002/ejic.201402595', u'10.1021/op500155f', u'10.1021/jp412726z', u'10.1016/j.polymer.2013.07.035', u'10.3390/catal4020089', u'10.1039/C4RA03102D', u'10.1021/om500734b', u'10.1002/open.201300039', u'10.1016/j.tet.2014.02.003', u'10.1039/c4cy00044g', u'10.1021/ja502673h', u'10.14293/S2199-1006.1.SOR-CHEM.AALL9P.v1', u'10.1021/ar400177c', u'10.1107/S1600536813033266', u'10.1039/C4OB01396D', u'10.1021/ic402955e', u'10.1007/s11030-013-9498-y', u'10.1016/j.tet.2014.04.083', u'10.1107/S1600536814019990', u'10.1021/ic501236j', u'10.1126/science.1257158', u'10.1039/C4CP04693E', u'10.1002/chem.201403891', u'10.1016/j.mex.2014.08.007', u'10.1021/jm5001216', u'10.1039/9781782620037-00218', u'10.1002/chem.201302348', u'10.1039/c3dt52841c', u'10.1055/s-0034-1379721', u'10.1039/c3cp53691b', u'10.1021/sc400492x', u'10.1039/c4cc01370k', u'10.1002/9783527658985.ch12', u'10.1002/anie.201400405', u'10.11113/jt.v69.3198', u'10.1016/j.apcata.2014.07.029', u'10.1107/S1600536814019321', u'10.1016/j.poly.2013.09.033', u'10.3390/molecules191015584', u'10.1016/j.cplett.2014.02.049', u'10.1002/anie.201405755', u'10.1002/9781118778173', u'10.1002/cbic.201402103', u'10.1039/c3ra46386a', u'10.1107/S1600536814015657', u'10.1016/j.ccr.2014.05.021', u'10.1126/science.1253537', u'10.1002/anie.201406706']
[u'Relationships between Electron Density and Magnetic Properties in Water-Bridged Dimetal Complexes']
[u'The benzaldehyde oxidation paradox explained by the interception of peroxy radical by benzyl alcohol']
[u'Deactivation studies of a carbon supported AuPt nanoparticulate catalyst in the liquid-phase aerobic oxidation of 1,2-propanediol']
[u'Heterogeneously catalyzed oxidation of butanediols in base free aqueous media']
[u'Hydrogen production by photoreforming of biofuels using Au, Pd and Au\u2013Pd/TiO2 photocatalysts']
[u'Improving catalyst activity in secondary amine catalysed transformations']
[u' Weakening of the \u03c0*\u2013\u03c0* dimerisation in 1,2,3,5-dithiadiazolyl radicals: structural, EPR, magnetic and computational studies of dichlorophenyl dithiadiazolyls, Cl 2 C 6 H 3 CNSSN ']
[u'Direct production of OH radicals upon CH overtone activation of (CH3)2COO Criegee intermediates']
[u'Oxygen Reduction Reaction Activity on Pt{111} Surface Alloys']
[u' Reconfiguration of a Continuous Flow Platform for Extended Operation: Application to a Cryogenic Fluorine-Directed ortho -Lithiation Reaction ']
[u'Role of the Occluded Conformation in Bacterial Dihydrofolate Reductases']
[u' Thermal Adaptation of Dihydrofolate Reductase from the Moderate Thermophile Geobacillus stearothermophilus ']
[u' Analysis of High and Selective Uptake of CO 2 in an Oxamide-Containing {Cu 2 (OOCR) 4 }-Based Metal-Organic Framework ']
[u'Nonstatistical dynamics on the caldera']
[u' The Trifluoromethylating Sandmeyer Reaction: A Method for Transforming C\uf8ffN into C\uf8ffCF 3 ']
[u'The coordination chemistry of substituted anthraquinones: Developments and applications']
[u'The interfacial structure of polymeric surfactant stabilised air-in-water foams']
[u'Luminescent rhenium(I) complexes of substituted imidazole[4,5-f]-1,10-phenanthroline derivatives']
[u'Porous macromolecular dihydropyridyl frameworks exhibiting catalytic and halochromic activity']
[u' Loop Interactions during Catalysis by Dihydrofolate Reductase from Moritella profunda ']
[u'Gold-Based Nanoparticulate Catalysts for the Oxidative Esterification of 1,4-Butanediol to Dimethyl Succinate']
[u'Investigation of a Lithium-Halogen Exchange Flow Process for the Preparation of Boronates by Using a Cryo-Flow Reactor']
[u'Evolutionary and Mechanistic Insights from the Reconstruction of \u03b1-Humulene Synthases from a Modern (+)-Germacrene A Synthase']
[u' Crystal structure of bis{ N -[2-(dimethylamino)ethyl]quinolin-8-amine-\u03ba 3 N , N \u2032, N \u2032\u2032}nickel(II) dichloride 3.5-hydrate ']
[u'Gold compounds as aquaporin inhibitors: new opportunities for therapy and imaging']
[u'Iron-Catalyzed Borylation of Alkyl, Allyl, and Aryl Halides: Isolation of an Iron(I) Boryl Complex']
[u'Solid-State Interconversions: Unique 100 % Reversible Transformations between the Ground and Metastable States in Single-Crystals of a Series of Nickel(II) Nitro Complexes']
[u'Powder Diffraction']
[u' Caffeine-Based Gold(I) N -Heterocyclic Carbenes as Possible Anticancer Agents: Synthesis and Biological Properties ']
[u'Light alkane oxidation using catalysts prepared by chemical vapour impregnation: tuning alcohol selectivity through catalyst pre-treatment']
[u'Solvent-free aerobic oxidation of alcohols using supported gold palladium nanoalloys prepared by a modified impregnation method']
[u'Post-synthetic preparation of Sn-, Ti- and Zr-beta: a facile route to water tolerant, highly active Lewis acidic zeolites']
[u'Base-free glucose oxidation using air with supported gold catalysts']
[u'An investigation of the scope of the 1,7-electrocyclization of\xa0\u03b1,\u03b2:\u03b3,\u03b4-conjugated azomethine ylides']
[u'Electrochemical Synthesis in Microreactors']
[u'Modification of coordination networks through a photoinduced charge transfer process']
[u'A benzimidazole functionalised DO3A chelator showing pH switchable coordination modes with lanthanide ions']
[u'Silver nanoparticles functionalised with a luminescent iridium complex: phosphorescent hybrid materials']
[u'8.19 Partial Reduction of Benzenoid Aromatic Rings by Dissolving Metals and by Other Methods']
[u'A facile regioselective 1,3-dipolar cycloaddition protocol for the synthesis of new class of quinolinyl dispiro heterocycles']
[u'High Activity Redox Catalysts Synthesized by Chemical Vapor Impregnation']
[u'Oxidation of Benzyl Alcohol using in Situ Generated Hydrogen Peroxide']
[u'Study of the magnetic\u2013Alq3 interface in organic spin-valves']
[u'Selective photocatalytic oxidation of benzene for the synthesis of phenol using engineered Au\u2013Pd alloy nanoparticles supported on titanium dioxide']
[u'Isothiourea-Mediated Asymmetric Functionalization of 3-Alkenoic Acids']
[u'Surface state modulation through wet chemical treatment as a route to controlling the electrical properties of ZnO nanowire arrays investigated with XPS']
[u'An organometallic complex revealing an unexpected, reversible, temperature induced SC\u2013SC transformation']
[u' Catalytic Dissociation of Water on the (001), (011), and (111) Surfaces of Violarite, FeNi 2 S 4 : A DFT-D2 Study ']
[u' 2,2-Dimethyl- N -(4-methylpyridin-2-yl)propanamide ']
[u'Base-Free Oxidation of Glycerol Using Titania-Supported Trimetallic Au-Pd-Pt Nanoparticles']
[u' Oxidation of Benzyl Alcohol and Carbon Monoxide Using Gold Nanoparticles Supported on MnO 2 Nanowire Microspheres ']
[u'Crystal structure of 4,4-dibutyl-2-phenyl-3,4-dihydroquinazoline']
[u'Safe Generation and Direct Use of Diazoesters in Flow Chemistry']
[u'\u201cCLASSIC NMR\u201d: An In-Situ NMR Strategy for Mapping the Time-Evolution of Crystallization Processes by Combined Liquid-State and Solid-State Measurements']
[u'Density Functional Theory Study of the Adsorption of Hydrazine on the Perfect and Defective Copper (100), (110), and (111) Surfaces']
[u' Molybdenum Oxide on Fe 2 O 3 Core\u2013Shell Catalysts: Probing the Nature of the Structural Motifs Responsible for Methanol Oxidation Catalysis ']
[u'Catalysis using colloidal-supported gold-based nanoparticles']
[u' New Insights into the Structure of the C-Terminated \u03b2-Mo 2 C (001) Surface from First-Principles Calculations ']
[u'Nitrate as a probe of cytochrome c surface: Crystallographic identification of crucial \u201chot spots\u201d for protein\u2013protein recognition']
[u'Photophysics and electrochemistry of a platinum-acetylide disubstituted perylenediimide']
[u'A Two-Coordinate Manganese(0) Complex with an Unsupported Mn\u2013Mg Bond: Allowing Access to Low Coordinate Homo- and Heterobimetallic Compounds']
[u'Variation in Capsidiol Sensitivity between Phytophthora infestans and Phytophthora capsici Is Consistent with Their Host Range']
[u'Functional Inhibition of Aquaporin-3 With a Gold-Based Compound Induces Blockage of Cell Proliferation']
[u'The Catalytic Oxidation of Hydrocarbon Volatile Organic Compounds']
[u'Polymers of intrinsic microporosity in electrocatalysis: Novel pore rigidity effects and lamella palladium growth']
[u'Molybdenum blue nano-rings: an effective catalyst for the partial oxidation of cyclohexane']
[u'Photoinitiated Synthesis of Self-Assembled Vesicles']
[u'1-(2-Bromo-4-chlorophenyl)-3,3-dimethylthiourea']
[u'A novel cobalt complex for enhancing amperometric and impedimetric DNA detection']
[u' Crystal structure of 2-ethylquinazoline-4(3 H )-thione ']
[u'Design and Application of a Low-Temperature Continuous Flow Chemistry Platform']
[u'Adsorption of hydrazine on the perfect and defective copper (111) surface: A dispersion-corrected DFT study']
[u'Effect of a chiral electrostatic cavity on product selection in a reaction with a bifurcating reaction path']
[u' Activation of Alkynes with B(C 6 F 5 ) 3 \u2013 Boron Allylation Reagents Derived from Propargyl Esters ']
[u' A DFT study of the structures, stabilities and redox behaviour of the major surfaces of magnetite Fe 3 O 4 ']
[u' Recent developments in gold( i ) coordination chemistry: luminescence properties and bioimaging opportunities ']
[u' Manganese( i ) templates for the construction of benzannulated triphosphamacrocycles ']
[u'A golden future in medicinal inorganic chemistry: the promise of anticancer gold organometallic compounds']
[u'Flexible stereoselektive Funktionalisierung von Ketonen durch Umpolung mit hypervalenten Iodreagentien']
[u'Anionic Chiral Tridentate N-Donor Pincer Ligands in Asymmetric Catalysis']
[u'Hypervalent Iodine-Catalyzed Oxidative Functionalizations Including Stereoselective Reactions']
[u'Triptycene-Based Organic Molecules of Intrinsic Microporosity']
[u'Expedient Preparation of Nazlinine and a Small Library of Indole Alkaloids Using Flow Electrochemistry as an Enabling Technology']
[u'New in situ solid-state NMR techniques for probing the evolution of crystallization processes: pre-nucleation, nucleation and growth']
[u'Pyrazolopyridines II: Synthesis and Antibacterial Screening of 6-Aryl-3-methyl-1-phenyl-1H-pyrazolo[3,4-b]pyridine-4-carboxylic Acids']
[u'A single rapid route for the synthesis of reduced graphene oxide with antibacterial activities']
[u'New heteronuclear gold(I)\u2013platinum(II) complexes with cytotoxic properties: Are two metals better than one?']
[u'Abstracts of the Winter Anaesthetic Research Society Meeting (ARS): The Royal College of Anaesthetists, London, UK, October 1-2, 2013']
[u'Expedient Iron-Catalyzed Coupling of Alkyl, Benzyl and Allyl Halides with Arylboronic Esters']
[u'Conversion of furfuryl alcohol into 2-methylfuran at room temperature using Pd/TiO2 catalyst']
[u'The Nature of the Molybdenum Surface in Iron Molybdate. The Active Phase in Selective Methanol Oxidation']
[u'Non-Nuclear Attractor in a Molecular Compound under External Pressure']
[u'Self-assembled M2L4 coordination cages: Synthesis and potential applications']
[u' 2-Ethyl-3-[( R )-2-phenylbutanamido]quinazolin-4(3 H )-one monohydrate ']
[u' Aspects of the coordination chemistry of rac-trans-1,2-diphosphinocyclohexane and the preparation of reinforced 9aneP 3 and 9anePN 2 macrocycles ']
[u'Characterisation and electrocatalytic activity of PtNi alloys on Pt{111} electrodes formed using different thermal treatments']
[u'Organocatalytic Lewis base functionalisation of carboxylic acids, esters and anhydrides via C1-ammonium or azolium enolates']
[u'Intrinsically Porous Polymer Protects Catalytic Gold Particles for Enzymeless Glucose Oxidation']
[u'A series of Ln-p-chlorobenzoic acid\u2013terpyridine complexes: lanthanide contraction effects, supramolecular interactions and luminescent behavior']
[u'Comparison of Au and TiO2 based catalysts for the synthesis of chalcogenide nanowires']
[u' Highly Efficient Chiral Resolution of dl -Arginine by Cocrystal Formation Followed by Recrystallization under Preferential-Enrichment Conditions ']
[u'2-Arylacetic anhydrides as ammonium enolate precursors']
[u' Bifunctional Zn II Ln III Dinuclear Complexes Combining Field Induced SMM Behavior and Luminescence: Enhanced NIR Lanthanide Emission by 9-Anthracene Carboxylate Bridging Ligands ']
[u'Spectroscopic and atomic force studies of the functionalisation of carbon surfaces: new insights into the role of the surface topography and specific chemical states']
[u'7.29 Oxidative Functionalization with Hypervalent Halides']
[u'New Gold(I) Organometallic Compounds with Biological Activity in Cancer Cells']
[u' Protein Isotope Effects in Dihydrofolate Reductase From Geobacillus stearothermophilus Show Entropic\u2013Enthalpic Compensatory Effects on the Rate Constant ']
[u'Modelling analysis of the structure and porosity of covalent triazine-based frameworks']
[u'Base-promoted new C\u2013C bond formation: an expedient route for the preparation of thiazolo- and imidazolo-pyridinones via Michael addition']
[u'Light-stable bis(norharmane)silver(I) compounds: Synthesis, characterization and antiproliferative effects in cancer cells']
[u' Closely-Related Zn II 2 Ln III 2 Complexes (Ln III = Gd, Yb) with Either Magnetic Refrigerant or Luminescent Single-Molecule Magnet Properties ']
[u'Organocatalytic Stereoselective Iodoamination of Alkenes']
[u' Crystal structure of 2- tert -butyl-1,3-thiazolo[4,5- b ]pyridine ']
[u'TMEDA in Iron-Catalyzed Kumada Coupling: Amine Adduct versus Homoleptic \u201cate\u201d Complex Formation']
[u'Specific adsorption of perchlorate anions on Pt{hkl} single crystal electrodes']
[u'Cytotoxic Gold(I) N-heterocyclic Carbene Complexes with Phosphane Ligands as Potent Enzyme Inhibitors']
[u'Quantum dynamical investigation of the simplest Criegee intermediate CH2OO and its O\u2013O photodissociation channels']
[u'Chiral lanthanide complexes: coordination chemistry, spectroscopy, and catalysis']
[u'The Direct Synthesis of Hydrogen Peroxide Using Platinum-Promoted Gold-Palladium Catalysts']
[u'Synthesis and Antioxidant Activities of Novel Chiral Ebselen Analogues']
[u' 1+1\u2032 resonant multiphoton ionisation of OH radicals via the A 2 \u03a3 + state: insights from direct comparison with A-X laser-induced fluorescence detection ']
[u'Fluorescent Rhenium-Naphthalimide Conjugates as Cellular Imaging Agents']
[u'The structural basis of differential inhibition of human calpain by indole and phenyl \u03b1-mercaptoacrylic acids']
[u'Ion Binding to Quadruplex DNA Stems. Comparison of MM and QM Descriptions Reveals Sizable Polarization Effects Not Included in Contemporary Simulations']
[u'Organocatalytic Michael addition\u2013lactonisation of carboxylic acids using \u03b1,\u03b2-unsaturated trichloromethyl ketones as \u03b1,\u03b2-unsaturated ester equivalents']
[u'Formation of an Unusual Bis(diguanidinate) Ligand via Nucleophilic Attack of a Guanidinate onto a Carbodiimide']
[u'Iron Phosphine Catalyzed Cross-Coupling of Tetraorganoborates and Related Group 13 Nucleophiles with Alkyl Halides']
[u'Active Nature of Primary Amines during Thermal Decomposition of Nickel Dithiocarbamates to Nickel Sulfide Nanoparticles']
[u'Chromophores, Fluorophores and Robust Ancillary Ligands for Molecular Catalysts: 1,3-Bis(2-pyridylimino)isoindolines']
[u'Rapid Electrochemical Deprotection of the Isonicotinyloxycarbonyl Group from Carbonates and Thiocarbonates in a Microfluidic Reactor']
[u'UV Spectroscopic Characterization of Dimethyl- and Ethyl-Substituted Carbonyl Oxides']
[u'Centrotriindane- and triptindane-based polymers of intrinsic microporosity']
[u'Inhibition of a Gold-Based Catalyst in Benzyl Alcohol Oxidation: Understanding and Remediation']
[u'Nanostructures by self-assembly of polyglycidol-derivatized lipids']
[u' Peripheral Methyl Activation in \u03b7 4 -1,2,3,4-Tetramethylcyclobutadienylcobalt Complexes: Template Synthesis and Subsequent Reactivity of Triphosphamacrocycles ']
[u'Difluoro- and Trifluoromethylation of Electron-Deficient Alkenes in an Electrochemical Microreactor']
[u'A rapid and efficient protocol for the synthesis of novel nitrothiazolo[3,2-c]pyrimidines via microwave-mediated Mannich cyclisation']
[u'Novel cobalt zinc oxide Fischer\u2013Tropsch catalysts synthesised using supercritical anti-solvent precipitation']
[u'Different Dynamical Effects in Mesophilic and Hyperthermophilic Dihydrofolate Reductases']
[u'Synthesis of Quinolinequinone Derivatives and related Carbocyclic Compounds']
[u'Strategies for Designing Supported Gold\u2013Palladium Bimetallic Catalysts for the Direct Synthesis of Hydrogen Peroxide']
[u' (2 E )-2-(1,3-Benzothiazol-2-yl)-3-(dimethylamino)prop-2-enenitrile ']
[u'Theoretical insight into the antioxidant properties of melatonin and derivatives']
[u' Photochemistry in a 3D Metal\u2013Organic Framework (MOF): Monitoring Intermediates and Reactivity of the fac -to- mer Photoisomerization of Re(diimine)(CO) 3 Cl Incorporated in a MOF ']
[u'A regioselective multicomponent protocol for the synthesis of novel bioactive 4-hydroxyquinolin-2(1H)-one grafted monospiropyrrolidine and thiapyrrolizidine hybrids']
[u'[1,3]-Dipolar cycloaddition of N-aryl sydnones to benzothiophene 1,1-dioxide, 1-cyclopropylprop-2-yn-1-ol and 1-(prop-2-ynyl)-1H-indole']
[u'Crystal structure of 2-[4-(methylsulfanyl)quinazolin-2-yl]-1-phenylethanol']
[u'Thiocyanate Complexes of Uranium in Multiple Oxidation States: A Combined Structural, Magnetic, Spectroscopic, Spectroelectrochemical, and Theoretical Study']
[u'Infrared-driven unimolecular reaction of CH3CHOO Criegee intermediates to OH radical products']
[u'Optimised photocatalytic hydrogen production using core\u2013shell AuPd promoters with controlled shell thickness']
[u'Enantioselective Diamination with Novel Chiral Hypervalent Iodine Catalysts']
[u'Comparative analysis and validation of the malachite green assay for the high throughput biochemical characterization of terpene synthases']
[u'Efficacious Inhaled PDE4 Inhibitors with Low Emetic Potential and Long Duration of Action for the Treatment of COPD']
[u'Chapter 7. Catalyst preparation using supercritical fluid precipitation']
[u'The Effect of Grafting Zirconia and Ceria onto Alumina as a Support for Silicotungstic Acid for the Catalytic Dehydration of Glycerol to Acrolein']
[u"It's all about Me: methyl-induced control of coordination stereochemistry by a flexible tridentate N,C,N\u2032 ligand"]
[u'Cluster Preface: Progress in Organo-Fluorine Chemistry']
[u'Impact of co-adsorbed oxygen on crotonaldehyde adsorption over gold nanoclusters: a computational study']
[u'Nanoporous Aluminosilicate-Mediated Synthesis of Ethers by a Dehydrative Etherification Approach']
[u'B(C6F5)3 promoted cyclisation of internal propargyl esters: structural characterisation of 1,3-dioxolium compounds']
[u' Recent Trends in Operando and In Situ Characterization: Techniques for Rational Design of Catalysts ']
[u'Flexible Stereoselective Functionalizations of Ketones through Umpolung with Hypervalent Iodine Reagents']
[u'Highly Active Aluminosilicates with a Hierarchical Porous Structure for Acetalization of 3,4-dimethoxybenzaldehyde']
[u'Vanadium promoted molybdenum phosphate catalysts for the vapour phase partial oxidation of methanol to formaldehyde']
[u' Crystal structure of 4-(2,2-dimethylpropanamido)pyridin-3-yl N , N -diisopropyldithiocarbamate ']
[u'Experimental and theoretical characterisation of phosphorescence from rhenium polypyridyl tricarbonyl complexes']
[u'Cellular Transport Mechanisms of Cytotoxic Metallodrugs: An Overview beyond Cisplatin']
[u'Early time detection of OH radical products from energized Criegee intermediates CH2OO and CH3CHOO']
[u'Metastable Ionic Diodes Derived from an Amine-Based Polymer of Intrinsic Microporosity']
[u'Methods and Applications of Cycloaddition Reactions in Organic Syntheses']
[u'A Gold Coordination Compound as a Chemical Probe to Unravel Aquaporin-7 Function']
[u'All-atom molecular dynamics simulation of HPMA polymers']
[u'Crystal structure of 4-methylsulfanyl-2-phenylquinazoline']
[u'Re(VII) and Tc(VII) trioxo complexes stabilized by tridentate ligands and their potential use as radiopharmaceuticals']
[u'X-ray birefringence imaging']
[u'Diagnosis of Toxoplasmosis Using a Synthetic Glycosylphosphatidylinositol Glycan']
0 failed dois from 176 requests
failure rate: 0.0

In [153]:
d =u'10.1002/9781118<d>778173</a>,'
clean_doi(d)


Out[153]:
u'10.1002/9781118<d>778173'

In [145]:
def clean_doi(d):
    if d[-1] in'.,':
        d=d[:-1]
    ###strip trailing html tags
    clean = re.sub(r'<[^>]+>$','',d)
    return clean

In [150]:
target = 'http://www.cardiff.ac.uk/chemistry/research/publications/2014-publications'
r = requests.get(target)
response=TextResponse(r.url,body=r.text, encoding='utf-8')
target_space = '//body/div/div/div/div[2]/div[2]'
target = response.xpath(target_space).xpath('string(.)').extract()[0]

In [152]:
len(target)


Out[152]:
1

In [ ]: