-
-
Save beijaflor/e2bd8cd5a9609c3806463675ab8c3bfd to your computer and use it in GitHub Desktop.
Calculate Internal PageRank from Screaming Frog Crawl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("igraph") | |
# Swap out path to your Screaming Frog All Outlink CSV. For Windows, remember to change backslashes to forward slashes. | |
links <- read.csv("C:/Documents/screaming-frog-all-outlinks.csv", skip = 1) # CSV Path | |
# This line of code is optional. It filters out JavaScript, CSS, and Images. Technically you should keep them in there. | |
links <- subset(links, Type=="HREF") # Optional line. Filter. | |
links <- subset(links, Follow=="true") | |
links <- subset(links, select=c(Source,Destination)) | |
g <- graph.data.frame(links) | |
pr <- page.rank(g, algo = "prpack", vids = V(g), directed = TRUE, damping = 0.85) | |
values <- data.frame(pr$vector) | |
values$names <- rownames(values) | |
row.names(values) <- NULL | |
values <- values[c(2,1)] | |
names(values)[1] <- "url" | |
names(values)[2] <- "pr" | |
# Swap out 'domain' and 'com' to represent your website address. | |
values <- values[grepl("https?:\\/\\/(.*\\.)?domain\\.com.*", values$url),] # Domain filter. | |
# Replace with your desired filename for the output file. | |
write.csv(values, file = "output-pagerank.csv") # Output file. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment