pshapiro · March 29, 2023 16:36 · oscaramartin · Mar 28, 2020 · oscaramartin · Mar 30, 2020
diff --git a/internal-pagerank.r b/internal-pagerank.r
 library("igraph")
 # Swap out path to your Screaming Frog All Outlink CSV. For Windows, remember to change backslashes to forward slashes.
 links <- read.csv("C:/Documents/screaming-frog-all-outlinks.csv", skip = 1) # CSV Path
 # This line of code is optional. It filters out JavaScript, CSS, and Images. Technically you should keep them in there.
 links <- subset(links, Type=="AHREF") # Optional line. Filter.
 links <- subset(links, Follow=="true")
 links <- subset(links, select=c(Source,Destination))
 g <- graph.data.frame(links)
 pr <- page.rank(g, algo = "prpack", vids = V(g), directed = TRUE, damping = 0.85)
 values <- data.frame(pr$vector)
 values$names <- rownames(values)
 row.names(values) <- NULL
 values <- values[c(2,1)]
 names(values)[1] <- "url"
 names(values)[2] <- "pr"
 # Swap out 'domain' and 'com' to represent your website address.
 values <- values[grepl("https?:\\/\\/(.*\\.)?domain\\.com.*", values$url),] # Domain filter.
 # Replace with your desired filename for the output file.
 write.csv(values, file = "output-pagerank.csv") # Output file.
	library("igraph")
	# Swap out path to your Screaming Frog All Outlink CSV. For Windows, remember to change backslashes to forward slashes.
	links <- read.csv("C:/Documents/screaming-frog-all-outlinks.csv", skip = 1) # CSV Path
	# This line of code is optional. It filters out JavaScript, CSS, and Images. Technically you should keep them in there.
	links <- subset(links, Type=="AHREF") # Optional line. Filter.
	links <- subset(links, Follow=="true")
	links <- subset(links, select=c(Source,Destination))
	g <- graph.data.frame(links)
	pr <- page.rank(g, algo = "prpack", vids = V(g), directed = TRUE, damping = 0.85)
	values <- data.frame(pr$vector)
	values$names <- rownames(values)
	row.names(values) <- NULL
	values <- values[c(2,1)]
	names(values)[1] <- "url"
	names(values)[2] <- "pr"
	# Swap out 'domain' and 'com' to represent your website address.
	values <- values[grepl("https?:\\/\\/(.\\.)?domain\\.com.", values$url),] # Domain filter.
	# Replace with your desired filename for the output file.
	write.csv(values, file = "output-pagerank.csv") # Output file.