-
-
Save pshapiro/616b64a4e4399326c82c34734885d5bd to your computer and use it in GitHub Desktop.
library("igraph") | |
# Swap out path to your Screaming Frog All Outlink CSV. For Windows, remember to change backslashes to forward slashes. | |
links <- read.csv("C:/Documents/screaming-frog-all-outlinks.csv", skip = 1) # CSV Path | |
# This line of code is optional. It filters out JavaScript, CSS, and Images. Technically you should keep them in there. | |
links <- subset(links, Type=="AHREF") # Optional line. Filter. | |
links <- subset(links, Follow=="true") | |
links <- subset(links, select=c(Source,Destination)) | |
g <- graph.data.frame(links) | |
pr <- page.rank(g, algo = "prpack", vids = V(g), directed = TRUE, damping = 0.85) | |
values <- data.frame(pr$vector) | |
values$names <- rownames(values) | |
row.names(values) <- NULL | |
values <- values[c(2,1)] | |
names(values)[1] <- "url" | |
names(values)[2] <- "pr" | |
# Swap out 'domain' and 'com' to represent your website address. | |
values <- values[grepl("https?:\\/\\/(.*\\.)?domain\\.com.*", values$url),] # Domain filter. | |
# Replace with your desired filename for the output file. | |
write.csv(values, file = "output-pagerank.csv") # Output file. |
Same issues as SEOForce; I consistently receive "object __ not found" error. Is there any further documentation for this?
I'd suggest adding the following filters too
Just make sure to get only 200 anyway, as you don't want to analyse redirects
links.href<-subset(links,Status.Code=="200")
SF mark redirects as 200 in the outbound / inbound export
links.href<-subset(links,Anchor!="Redirect")
Another update, ScreamingFrog started marking hrefs as AHREF and not HREF. This part should also be updated:
from
links <- subset(links, Type=="HREF") # Optional line. Filter.
to
links <- subset(links, Type=="AHREF") # Optional line. Filter.
So good
I was curious if anyone is using this now? Any more changes that might need to be made? I am getting only errors.
Thanks
David
It used to work before but now I keep getting a blank .csv file. The only thing that's changed is that I don't have admin rights on my PC anymore -- could this be the root of the problem?
I just installed R and walked through this process.
After some experimentation, I was able to get the script to run, but it only generated a blank CSV.
Does anyone have an other ideas?
Thanks,.
Hi @pshapiro,
could you please uptdate your useful code?
With new Screaming Frog Version 12.6 , there are some errors when you try to read all_outlinks.csv
library("igraph")
links <- read.csv("all_outlinks.csv", skip = 1) # CSV Path
links <- subset(links, Type=="AHREF") # Optional line. Filter.
Error in eval(e, x, parent.frame()) : object 'Type' not found
links <- subset(links, Follow=="true")
Error in eval(e, x, parent.frame()) : object 'Follow' not found
links <- subset(links, select=c(Source,Destination))
Error in eval(substitute(select), nl, parent.frame()) :
object 'Source' not found
g <- graph.data.frame(links)
Error in graph.data.frame(links) :
the data frame should contain at least two columns
pr <- page.rank(g, algo = "prpack", vids = V(g), directed = TRUE, damping = 0.85)
values <- data.frame(pr$vector)
values$names <- rownames(values)
row.names(values) <- NULL
values <- values[c(2,1)]
names(values)[1] <- "url"
names(values)[2] <- "pr"
values <- values[grepl("https?:\\/\\/(.*\\.)?domain\\.com.*", values$url),] # Domain filter.
write.csv(values, file = "output-pagerank.csv") # Output file.
Thankx a lot
Resolved,
in all_outlinks.csv file skip = 0 .
First line "All Outlinks" has been deleted.
;-)
Any updates to SF v. 13?
any updates for this script?
Hi guys, I'm getting these errors:
> links <- subset(links, Type=="HREF") # Optional line. Filter.
Error in eval(e, x, parent.frame()) : object 'Type' not found
> links <- subset(links, Follow=="true")
Error in eval(e, x, parent.frame()) : object 'Follow' not found
> links <- subset(links, select=c(Source,Destination))
Error in eval(substitute(select), nl, parent.frame()) :
object 'Source' not found
Any help?
There was some slight changes in the csv-file that screamingfrog outputs. This should work and you can change the Type=="Hyperlink" to look at different types of links.
# Swap out path to your Screaming Frog All Outlink CSV. For Windows, remember to change backslashes to forward slashes.
links <- read.csv("/YOUR/FILEPATH/all_outlinks.csv") # CSV Path
# This line of code is optional. It filters out JavaScript, CSS, and Images. Technically you should keep them in there.
links <- subset(links, Type=="Hyperlink") # Optional line. Filter.
links <- subset(links, Follow=="true")
links <- subset(links, select=c(Source,Destination))
g <- graph.data.frame(links)
pr <- page.rank(g, algo = "prpack", vids = V(g), directed = TRUE, damping = 0.85)
values <- data.frame(pr$vector)
values$names <- rownames(values)
row.names(values) <- NULL
values <- values[c(2,1)]
names(values)[1] <- "url"
names(values)[2] <- "pr"
# Swap out 'domain' and 'com' to represent your website address.
values <- values[grepl("https?:\\/\\/(.*\\.)?domain\\.com.*", values$url),] # Domain filter.
# Replace with your desired filename for the output file.
write.csv(values, file = "output-pagerank.csv") # Output file.
Thanks for the brief explanation..