-
-
Save pshapiro/616b64a4e4399326c82c34734885d5bd to your computer and use it in GitHub Desktop.
library("igraph") | |
# Swap out path to your Screaming Frog All Outlink CSV. For Windows, remember to change backslashes to forward slashes. | |
links <- read.csv("C:/Documents/screaming-frog-all-outlinks.csv", skip = 1) # CSV Path | |
# This line of code is optional. It filters out JavaScript, CSS, and Images. Technically you should keep them in there. | |
links <- subset(links, Type=="AHREF") # Optional line. Filter. | |
links <- subset(links, Follow=="true") | |
links <- subset(links, select=c(Source,Destination)) | |
g <- graph.data.frame(links) | |
pr <- page.rank(g, algo = "prpack", vids = V(g), directed = TRUE, damping = 0.85) | |
values <- data.frame(pr$vector) | |
values$names <- rownames(values) | |
row.names(values) <- NULL | |
values <- values[c(2,1)] | |
names(values)[1] <- "url" | |
names(values)[2] <- "pr" | |
# Swap out 'domain' and 'com' to represent your website address. | |
values <- values[grepl("https?:\\/\\/(.*\\.)?domain\\.com.*", values$url),] # Domain filter. | |
# Replace with your desired filename for the output file. | |
write.csv(values, file = "output-pagerank.csv") # Output file. |
I was curious if anyone is using this now? Any more changes that might need to be made? I am getting only errors.
Thanks
David
It used to work before but now I keep getting a blank .csv file. The only thing that's changed is that I don't have admin rights on my PC anymore -- could this be the root of the problem?
I just installed R and walked through this process.
After some experimentation, I was able to get the script to run, but it only generated a blank CSV.
Does anyone have an other ideas?
Thanks,.
Hi @pshapiro,
could you please uptdate your useful code?
With new Screaming Frog Version 12.6 , there are some errors when you try to read all_outlinks.csv
library("igraph")
links <- read.csv("all_outlinks.csv", skip = 1) # CSV Path
links <- subset(links, Type=="AHREF") # Optional line. Filter.
Error in eval(e, x, parent.frame()) : object 'Type' not found
links <- subset(links, Follow=="true")
Error in eval(e, x, parent.frame()) : object 'Follow' not found
links <- subset(links, select=c(Source,Destination))
Error in eval(substitute(select), nl, parent.frame()) :
object 'Source' not found
g <- graph.data.frame(links)
Error in graph.data.frame(links) :
the data frame should contain at least two columns
pr <- page.rank(g, algo = "prpack", vids = V(g), directed = TRUE, damping = 0.85)
values <- data.frame(pr$vector)
values$names <- rownames(values)
row.names(values) <- NULL
values <- values[c(2,1)]
names(values)[1] <- "url"
names(values)[2] <- "pr"
values <- values[grepl("https?:\\/\\/(.*\\.)?domain\\.com.*", values$url),] # Domain filter.
write.csv(values, file = "output-pagerank.csv") # Output file.
Thankx a lot
Resolved,
in all_outlinks.csv file skip = 0 .
First line "All Outlinks" has been deleted.
;-)
Any updates to SF v. 13?
any updates for this script?
Hi guys, I'm getting these errors:
> links <- subset(links, Type=="HREF") # Optional line. Filter.
Error in eval(e, x, parent.frame()) : object 'Type' not found
> links <- subset(links, Follow=="true")
Error in eval(e, x, parent.frame()) : object 'Follow' not found
> links <- subset(links, select=c(Source,Destination))
Error in eval(substitute(select), nl, parent.frame()) :
object 'Source' not found
Any help?
There was some slight changes in the csv-file that screamingfrog outputs. This should work and you can change the Type=="Hyperlink" to look at different types of links.
# Swap out path to your Screaming Frog All Outlink CSV. For Windows, remember to change backslashes to forward slashes.
links <- read.csv("/YOUR/FILEPATH/all_outlinks.csv") # CSV Path
# This line of code is optional. It filters out JavaScript, CSS, and Images. Technically you should keep them in there.
links <- subset(links, Type=="Hyperlink") # Optional line. Filter.
links <- subset(links, Follow=="true")
links <- subset(links, select=c(Source,Destination))
g <- graph.data.frame(links)
pr <- page.rank(g, algo = "prpack", vids = V(g), directed = TRUE, damping = 0.85)
values <- data.frame(pr$vector)
values$names <- rownames(values)
row.names(values) <- NULL
values <- values[c(2,1)]
names(values)[1] <- "url"
names(values)[2] <- "pr"
# Swap out 'domain' and 'com' to represent your website address.
values <- values[grepl("https?:\\/\\/(.*\\.)?domain\\.com.*", values$url),] # Domain filter.
# Replace with your desired filename for the output file.
write.csv(values, file = "output-pagerank.csv") # Output file.
So good