In [ ]:
import datetime
from allofplos.plos_regex import (validate_doi, show_invalid_dois, find_valid_dois)
from allofplos.samples.corpus_analysis import (get_random_list_of_dois, get_all_local_dois,
get_all_plos_dois)
from allofplos.corpus.plos_corpus import (get_uncorrected_proofs, get_all_solr_dois)
from allofplos import Article
In [ ]:
example_dois = get_random_list_of_dois(count=10)
example_doi = example_dois[0]
article = Article(example_doi)
example_file = article.filepath
example_url = article.url
print("Three ways to represent an article\nArticle as DOI: {}\nArticle as local file: {}\nArticle as url: {}" \
.format(example_doi, example_file, example_url))
In [ ]:
example_corrections_dois = ['10.1371/journal.pone.0166537',
'10.1371/journal.ppat.1005301',
'10.1371/journal.pone.0100397']
In [ ]:
example_retractions_dois = ['10.1371/journal.pone.0180272',
'10.1371/journal.pone.0155388',
'10.1371/journal.pone.0102411']
In [ ]:
example_vor_doi = '10.1371/journal.ppat.1006307'
example_uncorrected_proofs = get_uncorrected_proofs()
In [ ]:
validate_doi('10.1371/journal.pbio.2000797')
In [ ]:
validate_doi('10.1371/journal.pone.12345678') # too many trailing digits
In [ ]:
doi_list = ['10.1371/journal.pbio.2000797', '10.1371/journal.pone.12345678', '10.1371/journal.pmed.1234567']
show_invalid_dois(doi_list)
In [ ]:
article = Article('10.1371/journal.pbio.2000797') # working DOI
article.check_if_doi_resolves()
In [ ]:
article = Article('10.1371/annotation/b8b66a84-4919-4a3e-ba3e-bb11f3853755') # working DOI
article.check_if_doi_resolves()
In [ ]:
article = Article('10.1371/journal.pone.1111111') # valid DOI structure, but article doesn't exist
article.check_if_doi_resolves()
In [ ]:
article = Article(next(iter(example_uncorrected_proofs)))
article.proof
In [ ]:
article = Article(example_vor_doi)
article.proof
In [ ]:
find_valid_dois("ever seen 10.1371/journal.pbio.2000797, it's great! or maybe 10.1371/journal.pone.1234567?")
In [ ]:
# returns a datetime object
article = Article(example_doi)
article.pubdate
In [ ]:
# datetime object can be transformed into any string format
article = Article(example_doi)
dates = article.get_dates(string_=True, string_format='%Y-%b-%d')
print(dates['epub'])
In [ ]:
article = Article(example_doi)
article.authors
Out[ ]:
In [ ]:
article = Article(example_corrections_dois[0])
article.type_
In [ ]:
article = Article(example_retractions_dois[0])
article.type_
In [ ]:
article = Article(example_corrections_dois[0])
article.related_dois
In [ ]:
article = Article(example_retractions_dois[0])
article.related_dois
In [ ]:
solr_dois = get_all_solr_dois()
print(len(solr_dois), "articles indexed on Solr.")
In [ ]:
all_articles = get_all_local_dois()
print(len(all_articles), "articles on local computer.")
In [ ]:
plos_articles = get_all_plos_dois()
In [ ]:
download_updated_xml('allofplos_xml/journal.pcbi.0030158.xml')