Last active
December 11, 2015 10:08
-
-
Save tts/4584793 to your computer and use it in GitHub Desktop.
Altmetrics scores by Altmetric and traditional citation metrics (WoS) of Aalto University publications published between 2007 and 2012 and with a DOI known by WoS.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################################################### | |
# | |
# Altmetrics about Aalto University publications | |
# 2007-2012 with a DOI | |
# | |
# Altmetrics data provided by altmetric.com | |
# Web of Science data provided by Thomson Reuters | |
# | |
########################################################## | |
library(rAltmetric) | |
library(plyr) | |
library(reshape) | |
# DOIs are gathered from a local source along with citation counts | |
# by Thomson Reuters Web of Science (WoS), and publication year. | |
# | |
# Merge DOIs (and other variables) by school | |
aalto.doi <- merge_all(list(arts.doi, biz.doi, chem.doi, elec.doi, eng.doi, sci.doi)) | |
# Preamble DOIs for the rAltmetric run | |
aalto.doi$di <- paste("doi/", aalto.doi$di, sep = "") | |
# Take just the DOIs | |
dois <- as.data.frame(aalto.doi$di, stringsAsFactors = FALSE) | |
# Rename the variable | |
names(dois) <- c("doi") | |
# Below, I first run DOIs against the API of Altmetric | |
# with the altmetrics function from the rAltmetric package. | |
# | |
# The result (raw_metrics) is a nested list element. From this list, with | |
# the altmetric_data function, a number of altmetrics scores are | |
# saved as variables to a data frame (metric_data). The DOIs have a | |
# column but for some reason the value in all is '1'. | |
# | |
# However, my goal is to merge the traditional WoS citations | |
# (along with the publication year) with Altmetric scores. | |
# The common nominator is the DOI. To get this done, I need to pick up | |
# DOIs from raw_metrics, merge them with metric_data, and again, merge | |
# this with metrics from WoS. | |
raw_metrics <- llply(dois$doi, altmetrics, .progress = 'text') | |
metric_data <- ldply(raw_metrics, altmetric_data) | |
# Number of root list elements. They include metrics or are NULL | |
N <- length(raw_metrics) | |
# Initialize a data frame | |
adoi <- data.frame(doi = character(N), | |
stringsAsFactors = FALSE) | |
# From all items, take the second list element. | |
# If it is not NULL, store the DOI, otherwise store NA | |
for (i in 1:N) { | |
if ( !is.null(raw_metrics[[i]][[2]]) ) { | |
adoi$doi[i] <- raw_metrics[[i]][[2]] | |
} else { | |
adoi$doi[i] <- 'NA' | |
} | |
} | |
# Exclude NA rows and rename variable | |
adoi.full <- as.data.frame(adoi[adoi$doi != 'NA', ]) | |
names(adoi.full) <- c("doi") | |
# Merge adoi.full with metric_data. First, generate an ID | |
# to both data frames, and then merge by the ID | |
metric_data$id <- seq(from = 1, to = nrow(metric_data)) | |
adoi.full$id <- seq(from = 1, to = nrow(adoi.full)) | |
aalto.alt.all <- merge(metric_data, adoi.full, by = "id") | |
# Then, merge aalto.alt.all with aalto.doi by DOI | |
aalto.doi$di <- sub("doi/", "", aalto.doi$di) | |
aalto.all <- merge(aalto.doi, aalto.alt.all, | |
by.x = "di", by.y = "doi.y") | |
# Choose relevant columns only (di = DOI, py = publ year, tc = WoS) | |
aalto.all.m <- aalto.all[ , c("di", "url", "py", | |
"tc", | |
"mendeley", "connotea", "citeulike", "readers_count", | |
"cited_by_gplus_count", "cited_by_fbwalls_count", | |
"cited_by_posts_count", "cited_by_tweeters_count", | |
"cited_by_accounts_count", "cited_by_feeds_count")] | |
# Factorize DOI | |
aalto.all.m$di <- factor(aalto.all.m$di) | |
# Top5 stats: Twitter, Google+, Facebook, news feed | |
head(aalto.all.m[order(aalto.all.m$cited_by_tweeters_count, decreasing = TRUE), ], n = 5) | |
head(aalto.all.m[order(aalto.all.m$cited_by_gplus_count, decreasing = TRUE), ], n = 5) | |
head(aalto.all.m[order(aalto.all.m$cited_by_fbwalls_count, decreasing = TRUE), ], n = 5) | |
head(aalto.all.m[order(aalto.all.m$cited_by_feeds_count, decreasing = TRUE), ], n = 5) | |
# Top 10 Mendeley | |
top10m <- head(aalto.all.m[order(aalto.all.m$mendeley, decreasing = TRUE), ], n = 10) | |
# NA to 0 | |
top10m[is.na(top10m)] <- 0 | |
# Top 10 WoS | |
top10w <- head(aalto.all.m[order(aalto.all.m$tc, decreasing = TRUE), ], n = 10) | |
top10w[is.na(top10w)] <- 0 | |
# Reshape | |
subsetm <- t(data.frame(top10m$mendeley, top10m$tc, top10m$connotea, | |
top10m$citeulike, top10m$cited_by_fbwalls_count, | |
top10m$cited_by_posts_count, top10m$cited_by_tweeters_count, | |
top10m$cited_by_feeds_count)) | |
subsetw <- t(data.frame(top10w$mendeley, top10w$tc, top10w$connotea, | |
top10w$citeulike, top10w$cited_by_fbwalls_count, | |
top10w$cited_by_posts_count, top10w$cited_by_tweeters_count, | |
top10w$cited_by_feeds_count)) | |
color.scheme <- c(rgb(126, 0, 27, maxColorValue = 255), | |
rgb(200, 64, 94, maxColorValue = 255), | |
rgb(255, 71, 90, maxColorValue = 255), | |
rgb(255, 149, 131, maxColorValue = 255), | |
rgb(169, 124, 82, maxColorValue = 255), | |
rgb(215, 124, 26, maxColorValue = 255), | |
rgb(233, 172, 131, maxColorValue = 255), | |
rgb(199, 174, 36, maxColorValue = 255)) | |
# Plot | |
png("aalto.top10.mendeley.png", width = 1024, height = 768, res = 72) | |
par(mar=c(4,16,7,3), cex = 0.80) | |
barplot(subsetm, | |
legend = c("Mendeley", "WoS", "Connotea", "CiteULike", "Facebook", | |
"Posts", "Twitter", "Feeds"), | |
names.arg = paste(top10m$di, " (", top10m$py, ")", sep = ""), | |
horiz = TRUE, | |
las = 1, | |
main = "Top10 Aalto articles in Mendeley saves 2007-2012", | |
col = color.scheme, | |
beside = TRUE) | |
dev.off() | |
# Plot Top10 WoS | |
png("aalto.top10.wos.png", width = 1024, height = 768, res = 72) | |
par(mar=c(4,16,7,3), cex = 0.80) | |
barplot(subsetw, | |
legend = c("Mendeley", "WoS", "Connotea", "CiteULike", "Facebook", | |
"Posts", "Twitter", "Feeds"), | |
names.arg = paste(top10w$di, " (", top10w$py, ")", sep = ""), | |
horiz = TRUE, | |
las = 1, | |
main = "Top10 Aalto articles (DOI) in WoS citations with altmetrics 2007-2012", | |
col = color.scheme, | |
beside = TRUE) | |
dev.off() | |
################################################## | |
# | |
# Spearman correlations of Mendeley and WoS. | |
# Publications published between 2009 and 2011 | |
# | |
################################################## | |
# 2009-2011 | |
m20092011 <- aalto.all.m[aalto.all.m$py >= 2009 & aalto.all.m$py <= 2011, c("mendeley")] | |
w20092011 <- aalto.all.m[aalto.all.m$py >= 2009 & aalto.all.m$py <= 2011, c("tc")] | |
################################################################# | |
# | |
# Scatterplotting. Is there any relationship between these variables? | |
# | |
################################################################# | |
plot(m20092011, w20092011, | |
xlab = "Mendeley", | |
ylab = "WoS", | |
main = "Mendeley saves and WoS citations in Aalto publications 2009-2011") | |
abline(lm(m ~ w)) | |
mw <- data.frame(cbind(m20092011, w20092011)) | |
cor(mw$m20092011, mw$w20092011, method = "spearman") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment