Processing metadata

  • Kristian Garza
  • kgarza@datacite.org
  • kjgarza

In [1]:
library(jsonlite)
library(dplyr)
library(stringr)
library(httr)


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


In [2]:
full_query<-fromJSON(paste0('https://api.datacite.org/events?extra=true&page[size]=0&source-id=crossref&citation-type=dataset-scholarly-article'))
meta<-full_query$meta
file<-paste0("../data/",Sys.Date(),"_source_crossref_meta.Rda")
print(file)
save(meta,file=file)


[1] "../data/2018-10-28_source_crossref_meta.Rda"

In [ ]:


In [ ]:


In [3]:
full_query<-fromJSON(paste0('https://api.datacite.org/events?extra=true&page[size]=0&source-id=datacite-crossref&citation-type=dataset-scholarly-article'))
meta<-full_query$meta
file<-paste0("../data/",Sys.Date(),"_source_datacite-crossref_meta.Rda")
print(file)
save(meta,file=file)


[1] "../data/2018-10-28_source_datacite-crossref_meta.Rda"

In [4]:
full_query<-fromJSON(paste0('https://api.datacite.org/events?extra=true&page[size]=0&source-id=datacite-crossref'))
meta<-full_query$meta
file<-paste0("../data/",Sys.Date(),"_source_datacite-crossref_all_citations_types_meta.Rda")
print(file)
save(meta,file=file)


[1] "../data/2018-10-28_source_datacite-crossref_all_citations_types_meta.Rda"

In [5]:
full_query<-fromJSON(paste0('https://api.datacite.org/events?extra=true&page[size]=0&source-id=crossref'))
meta<-full_query$meta
file<-paste0("../data/",Sys.Date(),"_source_crossref_all_citations_types_meta.Rda")
print(file)
save(meta,file=file)


[1] "../data/2018-10-28_source_crossref_all_citations_types_meta.Rda"

In [6]:
full_query<-fromJSON(paste0('https://api.datacite.org/events?extra=true&page[size]=0&source-id=datacite-crossref'))
meta<-full_query$meta
file<-paste0("../data/",Sys.Date(),"_source_datacite_all_citations_types_meta.Rda")
print(file)
save(meta,file=file)


[1] "../data/2018-10-28_source_datacite_all_citations_types_meta.Rda"

In [7]:
# full_query<-fromJSON(paste0('https://api.datacite.org/events?extra=true&page[size]=0&source-id=datacite-usage'))
# meta<-full_query$meta
# file<-paste0("../data/",Sys.Date(),"_source_crossref_all_citations_types_meta.Rda")
# print(file)
# save(meta,file=file)

In [8]:
full_query<-fromJSON(paste0('https://api.datacite.org/events?extra=true&page[size]=0'))
meta<-full_query$meta
file<-paste0("../data/",Sys.Date(),"_all_meta.Rda")
print(file)
save(meta,file=file)


[1] "../data/2018-10-28_all_meta.Rda"

In [9]:
full_query<-content(GET(paste0('https://api.crossref.org/members?rows=1000')),"parsed")
full_query_2<-content(GET(paste0('https://api.crossref.org/members?rows=1000&offset=1000')),"parsed")
full_query_3<-content(GET(paste0('https://api.crossref.org/members?rows=1000&offset=2000')),"parsed")
full_query<-rbind(full_query,full_query_2)
parsed<-rbind(full_query,full_query_3)
# parsed <- content(full_query, "parsed")
items <-parsed$message$items
print(class(parsed))

registrants_names<-data.frame("registrant_name"="","id"="", stringsAsFactors = FALSE)


for (row in (items)) {
    registrant_name <- row$`primary-name`
    registrant_id <- row$id
    if(is.null(row$`primary-name`)){
        registrant_name<-""
    }
#     registrants_names<-rbind(registrants_names,c(substr(registrant_name,0,13),paste0('crossref.',registrant_id)))
    registrants_names<-rbind(registrants_names,c(word(registrant_name, start = -2, end = -1),paste0('crossref.',registrant_id)))

}
file<-paste0("../data/",Sys.Date(),"_crossref_registrants.Rda")
print(file)
save(registrants_names,file=file)


[1] "matrix"
[1] "../data/2018-10-28_crossref_registrants.Rda"

In [10]:
full_query<-fromJSON(paste0('https://api.datacite.org/clients?page[size]=1000'))
# full_query_2<-fromJSON(paste0('https://api.datacite.org/clients?page[size]=500$page[number]=2'))
# full_query<-rbind(full_query,full_query_2)
# parsed <- content(full_query, "parsed")
items <-full_query$data
print(nrow(full_query))

registrants_names<-data.frame("registrant_name"="","id"="", stringsAsFactors = FALSE)

for (row in 1:nrow(items)) { 
    registrant_name <- (items$attributes$name[row])
    registrant_id <- (items$id[row])
    if(is.null(items$attributes$name[row])){
        registrant_name<-""
    }
    
    registrants_names<-rbind(registrants_names,c(word(registrant_name, start = 1, end = 2),paste0('datacite.',registrant_id)))


}
file<-paste0("../data/",Sys.Date(),"_datacite_registrants.Rda")
print(file)
save(registrants_names,file=file)


NULL
[1] "../data/2018-10-28_datacite_registrants.Rda"

In [9]:
head(registrants_names)


registrant_nameid
'Health Monitoring' datacite.gesis.rki
027.7 - datacite.ethz.ubasojs
4TU.Centre for datacite.delft.data4tu
Aalborg University datacite.dk.aau
Aalto University datacite.csc.aalto

In [ ]: