Skip to content

Instantly share code, notes, and snippets.

@LCHCAPITALHUMAIN
Created October 15, 2016 19:53
Show Gist options
  • Save LCHCAPITALHUMAIN/5e8c084b1ac8265894c3bdf21198b3d0 to your computer and use it in GitHub Desktop.
Save LCHCAPITALHUMAIN/5e8c084b1ac8265894c3bdf21198b3d0 to your computer and use it in GitHub Desktop.
*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ratpack.site
import groovy.util.logging.Slf4j
import ratpack.site.crawl.Crawler
import ratpack.site.crawl.PrettyPrintCollection
import ratpack.util.RatpackVersion
import spock.lang.Specification
@Slf4j
class LinkCrawlSpec extends Specification {
def "site has no bad links"() {
given:
def aut = new RatpackSiteUnderTest()
def allowBroken = [
"http://www.pac4j.org",
]
def crawler = new Crawler(aut.address.toString()) {
boolean shouldUseHeadRequest(Link url) {
return url.uri.host != "bintray.com" && super.shouldUseHeadRequest(url)
}
@Override
boolean isCrawlable(Link link) {
if (link.uri.path.startsWith("/manual") && !link.uri.path.startsWith("/manual/${RatpackVersion.version - "-SNAPSHOT"}")) {
false
} else {
super.isCrawlable(link)
}
}
List<String> findPageLinks(Response response) {
def document = response.document
document == null ? [] : document.select("body a").collect {
it.attr("href")
}.findAll {
it
}
}
@Override
void addPageErrors(Link link, Response response) {
response.document?.text()?.findAll(~$/\[.+]\(.+\)/$)?.each {
link.errors << new BadMarkdownLinkSyntax(it)
}
super.addPageErrors(link, response)
}
}
when:
def visited = crawler.crawl()
def broken = visited.findAll { it.errors.size() > 0 }
def brokenByLevel = broken.groupBy { link -> allowBroken.any { link.uri.toString().startsWith(it) } ? "warn" : "error" }
def errored = new PrettyPrintCollection(brokenByLevel["error"] ?: [])
def warned = new PrettyPrintCollection(brokenByLevel["warn"] ?: [])
if (!warned.empty) {
log.warn "${warned}"
}
then:
errored.empty
cleanup:
aut.stop()
}
private static class BadMarkdownLinkSyntax extends Crawler.PageError {
final String link
BadMarkdownLinkSyntax(String link) {
this.link = link
}
@Override
String toString() {
"Bad markdown link: $link"
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment