Last active
May 16, 2020 21:22
-
-
Save sje30/6d906c080171eb164689501c9b0232a9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## How many new articles have been deposited in biorxiv? When new | |
## papers get uploaded to biorxiv, this information is sent to | |
## crossref. So, we can use the excellent rcrossref package from the | |
## ropensci team to get information. | |
## Note, this does not include revised versions of papers. | |
## | |
## Thanks to Scott Chamberlain for providing the magic lines of code to | |
## grab the information efficiently from crossref. | |
## Note also there is some code below to grab the infomration from | |
## http://www.cshsymposium.com/biorxiv/show_all.php | |
## and on that site you can also generate similar graphs. | |
require(rcrossref) | |
system.time( | |
res <- cr_prefixes(prefixes = "10.1101", works = TRUE, limit = 1000, cursor = "*", cursor_max = 5000, | |
filter = list(type = "report"), .progress = "text") | |
) | |
## colours taken from http://colorbrewer.org | |
col1 = '#f1eef6' | |
col2 = '#bdc9e1' | |
col3 = '#74a9cf' | |
col4 = '#0570b0' | |
## note problem with histogram colour changing... | |
## http://stackoverflow.com/questions/5649600/axis-color-of-date-histogram-in-r | |
d = as.Date(res$data$created) | |
##pdf(file="biorxiv_deposits.pdf", width=7, height=4) | |
svg(file="biorxiv_deposits.svg", width=7, height=4) | |
par(mar=c(4.5, 4.5, 0, 0.4), cex.axis=0.8) | |
hist(d, "months", format="%y-%m",main='', | |
ylab='First submission', xlab='Date', | |
axes=F, | |
col=c(rep(col1, 2), | |
rep(col2, 12), | |
rep(col3, 12), | |
rep(col4, 6) #increase post June 2016... | |
), | |
freq=TRUE, las=2, ylim=c(0,400)) | |
Axis(d,col="black", side=1) | |
axis(2, col="black", las=1) | |
dev.off() | |
q() | |
### old code below | |
cr_prefixes(prefixes = c('10.1101')) | |
cr_cn(dois="10.1126/science.169.3946.635", format="text") | |
cr_cn(dois="10.1101/045104", format="text") | |
## graph at: http://www.cshsymposium.com/biorxiv/usage_monthly.php | |
## screen scrape via: http://www.cshsymposium.com/biorxiv/show_all.php | |
res <- cr_prefixes(prefixes = "10.1101", works = TRUE, filter = list(type = "report"), offset=1000, limit=1000) | |
res$data$DOI | |
cr_cn(dois="10.1101/012799", format="text") | |
get_chunk = function(offset=0, limit=20) { | |
res <- cr_prefixes(prefixes = "10.1101", works = TRUE, | |
filter = list(type = "report"), | |
offset=offset, limit=limit) | |
dois = res$data$DOI | |
res = cr_works(dois=dois) | |
l = data.frame(doi=dois, created=res$data$created) | |
l | |
} | |
get_chunks = function(offset=0) { | |
f = data.frame() | |
limit = 1000 | |
looking = TRUE | |
while (looking) { | |
f2 = get_chunk(offset, limit) | |
offset = offset + limit | |
print(offset) | |
f = rbind(f, f2) | |
if (nrow(f2) < limit) { | |
looking = FALSE | |
} | |
} | |
f | |
} | |
f3 = get_chunks() | |
dates = as.Date(f3$created) | |
hist(dates, "months", format="%d %b") | |
hist(dates, "months", format="%d %b %y", freq=T) | |
f2 = get_chunk(offset=3000) | |
###################################################################### | |
## w3m -dump -cols 999 http://www.cshsymposium.com/biorxiv/show_all.php > o2.txt | |
dat = readLines("o2.txt") | |
dat = dat[-(1:6)] | |
## remove blank lines. | |
g = grep("^$", dat) | |
dat = dat[-g] | |
## find last line "Total Articles, Unique: " | |
g = grep("^Total Articles, Unique", dat) | |
## throw away tail. | |
dat = dat[-(g:length(dat))] | |
dois = substring(dat, 7, 20) | |
dates = as.Date(substring(dat, 28, 37)) | |
hist(dates, "months", format="%d-%b-%y", freq=TRUE) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
* Application for Frictionless data tool fund | |
Stephen Eglen, 2020-05-16 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment