| title | author | date | output | keep_md |
|---|---|---|---|---|
Gold OA output VU Amsterdam - How to |
Najko Jahn |
16. Oktober 2015 |
html_document |
true |
require(dplyr)
# load ISI spradsheet and select only columns needed
vu_amst <- read.csv("pubs_2012-14 V5 CSV.csv", header = TRUE, sep =";",
na.strings = "") %>%
select(JI, PY, SN, PU, DI, UT)
tbl_df(vu_amst)## Source: local data frame [16,438 x 6]
##
## JI PY SN PU
## (fctr) (int) (fctr) (fctr)
## 1 Retrovirology 2014 1742-4690 BIOMED CENTRAL LTD
## 2 BMC Infect. Dis. 2014 1471-2334 BIOMED CENTRAL LTD
## 3 BMC Neurol. 2014 1471-2377 BIOMED CENTRAL LTD
## 4 PLoS One 2014 1932-6203 PUBLIC LIBRARY SCIENCE
## 5 PLoS One 2014 1932-6203 PUBLIC LIBRARY SCIENCE
## 6 PLoS One 2014 1932-6203 PUBLIC LIBRARY SCIENCE
## 7 PLoS One 2014 1932-6203 PUBLIC LIBRARY SCIENCE
## 8 PLoS One 2014 1932-6203 PUBLIC LIBRARY SCIENCE
## 9 Psychiatry Res. 2014 0165-1781 ELSEVIER IRELAND LTD
## 10 Hydrol. Process. 2014 0885-6087 WILEY-BLACKWELL
## .. ... ... ... ...
## Variables not shown: DI (fctr), UT (fctr)
# load DOAJ spreadsheet and select only columns needed
doaj <- httr::content(httr::GET("http://doaj.org/csv")) %>%
select(Journal.ISSN..print.version.,Journal.EISSN..online.version.,Journal.article.processing.charges..APCs.,First.calendar.year.journal.provided.online.Open.Access.content)
tbl_df(doaj)## Source: local data frame [10,609 x 4]
##
## Journal.ISSN..print.version. Journal.EISSN..online.version.
## (chr) (chr)
## 1 0001-3765
## 2 0001-494X 2282-0035
## 3 0001-5113 1846-0453
## 4 0001-527X 1734-154X
## 5 0001-5555 1651-2057
## 6 0001-6012
## 7 0001-625X 2353-074X
## 8 0001-6977 2083-9480
## 9 0001-7019 1846-0410
## 10 0001-7213 1801-7576
## .. ... ...
## Variables not shown: Journal.article.processing.charges..APCs. (chr),
## First.calendar.year.journal.provided.online.Open.Access.content (int)
# join ISSN and EISSN into one vector
doaj.issn <- c(as.character(doaj$Journal.ISSN..print.version.),
as.character(doaj$Journal.EISSN..online.version.))
doaj.issn <- doaj.issn[!doaj.issn == ""]
# convert to class character
vu_amst$SN <- as.character(vu_amst$SN) # match with the vu dataset
vu_amst$DOAJ <- vu_amst$SN %in% doaj.issn
table(vu_amst$DOAJ)##
## FALSE TRUE
## 14514 1924
tt <- merge(vu_amst, doaj, by.x = "SN", by.y = "Journal.ISSN..print.version.")
tt_2 <- merge(vu_amst, doaj, by.x = "SN", by.y = "Journal.EISSN..online.version.")
colnames(tt_2) <- colnames(tt)
vu_doaj <- rbind(tt, tt_2)It is important to check for journals that were transferred to OA and exclude those article that were published before. This is especially important for the SCOAP3 journals
vu_doaj <- filter(vu_doaj, PY >= First.calendar.year.journal.provided.online.Open.Access.content)Let's drop levels not required before exploring the data
vu_doaj <- droplevels(vu_doaj)# create matrix
by_year <- rbind(all = table(vu_amst$PY), doaj = table(vu_doaj$PY), share =
table(vu_doaj$PY) / table(vu_amst$PY) * 100)
# print
knitr::kable(by_year, digits = 2)| 2012 | 2013 | 2014 | |
|---|---|---|---|
| all | 5181.00 | 5526.00 | 5730.00 |
| doaj | 493.00 | 635.00 | 751.00 |
| share | 9.52 | 11.49 | 13.11 |
Publisher names are a bit messy in the Web of Science. Before we tabulate the OA-publishers by year, let's clean up some publisher names:
vu_doaj$PU[grep("Wiley", vu_doaj$PU, ignore.case = T)] <- "WILEY-BLACKWELL"
vu_doaj$PU[grep("FRONTIERS", vu_doaj$PU, ignore.case = T)] <- "FRONTIERS RESEARCH FOUNDATION"
vu_doaj$PU[grep("ELSEVIER", vu_doaj$PU, ignore.case = T)] <- "ELSEVIER SCIENCE BV"We've identified 105 OA publishers. To calculate OA Gold publications over publishers:
count(vu_doaj, PU) %>% arrange(desc(n))## Source: local data frame [105 x 2]
##
## PU n
## (fctr) (int)
## 1 BIOMED CENTRAL LTD 621
## 2 PUBLIC LIBRARY SCIENCE 545
## 3 COPERNICUS GESELLSCHAFT MBH 101
## 4 FRONTIERS RESEARCH FOUNDATION 100
## 5 FERRATA STORTI FOUNDATION 38
## 6 NATURE PUBLISHING GROUP 35
## 7 MDPI AG 34
## 8 JMIR PUBLICATIONS, INC 30
## 9 HINDAWI PUBLISHING CORPORATION 28
## 10 WILEY-BLACKWELL 27
## .. ... ...
Plot OA Gold publications over publishers and year published
# take only the five most popular publishers
vu_doaj$PU <- factor(vu_doaj$PU,
levels = c(rownames(data.frame(rev(sort(table(vu_doaj$PU)))))))
levels(vu_doaj$PU)[6:length(levels(vu_doaj$PU))] <- paste("other (n=",
length(unique(vu_doaj$PU)) - 5, ")", sep= "")
require(dplyr)
publisher_by_yr <- group_by(vu_doaj, PU, PY) %>% tally()
publisher_by_yr## Source: local data frame [18 x 3]
## Groups: PU [?]
##
## PU PY n
## (fctr) (int) (int)
## 1 BIOMED CENTRAL LTD 2012 175
## 2 BIOMED CENTRAL LTD 2013 217
## 3 BIOMED CENTRAL LTD 2014 229
## 4 PUBLIC LIBRARY SCIENCE 2012 148
## 5 PUBLIC LIBRARY SCIENCE 2013 210
## 6 PUBLIC LIBRARY SCIENCE 2014 187
## 7 COPERNICUS GESELLSCHAFT MBH 2012 27
## 8 COPERNICUS GESELLSCHAFT MBH 2013 34
## 9 COPERNICUS GESELLSCHAFT MBH 2014 40
## 10 FRONTIERS RESEARCH FOUNDATION 2012 17
## 11 FRONTIERS RESEARCH FOUNDATION 2013 26
## 12 FRONTIERS RESEARCH FOUNDATION 2014 57
## 13 FERRATA STORTI FOUNDATION 2012 6
## 14 FERRATA STORTI FOUNDATION 2013 8
## 15 FERRATA STORTI FOUNDATION 2014 24
## 16 other (n=100) 2012 120
## 17 other (n=100) 2013 140
## 18 other (n=100) 2014 214
require(ggplot2)
ggplot(publisher_by_yr, aes(factor(PY), n, fill = PU, group = PU)) + geom_area(position = 'stack' ) + scale_fill_manual("Publisher", values = c("#f39c12", "#2980b9", "#2ecc71", "#fb8072","#ffffb3", "#bdc3c7")) +
xlab("Year") + ylab("ISI OA Gold articles") + theme_bw()# relevel by journal
vu_doaj$JI <- factor(vu_doaj$JI,
levels = c(rownames(data.frame(rev(sort(table(vu_doaj$JI)))))))
group_by(vu_doaj, JI, PU) %>% tally()## Source: local data frame [253 x 3]
## Groups: JI [?]
##
## JI PU n
## (fctr) (fctr) (int)
## 1 PLoS One PUBLIC LIBRARY SCIENCE 493
## 2 BMC Public Health BIOMED CENTRAL LTD 89
## 3 BMC Psychiatry BIOMED CENTRAL LTD 41
## 4 Int. J. Behav. Nutr. Phys. Act. BIOMED CENTRAL LTD 40
## 5 Front. Psychol. FRONTIERS RESEARCH FOUNDATION 34
## 6 BMC Fam. Pract. BIOMED CENTRAL LTD 34
## 7 BMC Pregnancy Childbirth BIOMED CENTRAL LTD 33
## 8 J. Med. Internet Res. other (n=100) 31
## 9 BMC Health Serv. Res. BIOMED CENTRAL LTD 31
## 10 Haematologica FERRATA STORTI FOUNDATION 30
## .. ... ... ...

