In [1]:
save2wm <- function(urls) {
s <- NULL
w <- 'https://web.archive.org/save/'
l <- length(urls)
i <- 1
for(u in urls){
cat(paste0('Przetwarzam adres ',i,'/',l,' (',round(i/l * 100),'%)\n'))
cat(paste0(u,'\n'))
tryCatch(
expr = {
r <- httr::GET(paste0(w,u))
if(r$status_code == 200) {
s <- rbind(as.data.frame(list(original_url = u, saved_url = paste0('https://web.archive.org',r$headers$`content-location`)),stringsAsFactors = FALSE),s)
} else {
s <- rbind(as.data.frame(list(original_url = u, saved_url = as.character(r$status_code)),stringsAsFactors = FALSE),s)
}
cat(paste0('Status odpowiedzi: ', r$status_code,'\n'))
},
error = function(e){
message('Wystąpił błąd!')
print(e)
cat('-----------------------------------------\n')
},
warning = function(w){
message('Ostrzeżenie!')
print(w)
cat('-----------------------------------------\n')
},
finally = {
message('Odpowiedź serwera zapisana')
}
)
cat('Czekam 12 sekund ')
for(e in 0:11) {
cat('#')
Sys.sleep(1)
}
cat('\n-----------------------------------------\n')
i <- i + 1
}
return(s)
}
In [2]:
t11 <- save2wm(c('http://onet.pl/','http://wp.pl/'))
In [4]:
t11
In [ ]: