Skip to content

Instantly share code, notes, and snippets.

@glaforge
Last active March 13, 2023 11:20
Show Gist options
  • Save glaforge/fda1c2155bbda354c86c643a704d04ca to your computer and use it in GitHub Desktop.
Save glaforge/fda1c2155bbda354c86c643a704d04ca to your computer and use it in GitHub Desktop.
import java.nio.file.Paths
@Grab('org.jsoup:jsoup:1.15.4')
import org.jsoup.Jsoup
import org.jsoup.safety.Safelist
@Grab('org.apache.commons:commons-lang3:3.12.0')
import static org.apache.commons.lang3.StringUtils.getLevenshteinDistance as levenDist
@Grab('org.codehaus.gpars:gpars:1.2.1')
import static groovyx.gpars.GParsPool.withPool
def oldSlugUrls = [:]
def newSlugUrls = [:]
withPool {
final oldBaseUrl = 'https://glaforge.appspot.com'
final baseArchivePageUrl = oldBaseUrl + '/archives/p'
final oldPageRange = (1..49).makeConcurrent()
def oldUrls = oldPageRange.collectMany { int pageId ->
def archiveUrl = baseArchivePageUrl + pageId
def archivePageDoc = Jsoup.connect(archiveUrl).get()
archivePageDoc.select('.archive-post-title > h3 > a').collect { aTag ->
aTag.attr('href')
}
}
oldSlugUrls = oldUrls.collectEntries { url ->
[url - '/article/', url]
}
final newBlogBaseUrl = 'https://glaforge.dev'
final newTalkBaseUrl = newBlogBaseUrl + '/talks'
def newBlogRange = 2..42
def newTalkRange = 2..7
def baseUrls = [
newBlogBaseUrl,
newTalkBaseUrl,
*(newBlogRange.collect { pageId -> "${newBlogBaseUrl}/page/${pageId}" }),
*(newTalkRange.collect { pageId -> "${newTalkBaseUrl}/page/${pageId}" }),
].makeConcurrent()
def newUrls = baseUrls.collectMany { url ->
def doc = Jsoup.connect(url).get()
doc.select('h1 > a').collect { aTag ->
aTag.attr('href')
}
}
newSlugUrls = newUrls.collectEntries { url ->
def slug = url[(url[0..-2].lastIndexOf('/')+1)..-2]
[slug, url]
}
}
def redirections = Paths.get("/tmp/gaelyk-blog-redirect.groovy")
[oldSlugUrls.keySet(), newSlugUrls.keySet()]
.combinations()
.collect { oldSlug, newSlug ->
[oldSlug: oldSlug, newSlug: newSlug, dist: levenDist(oldSlug, newSlug) ]
}.findAll {
(it.oldSlug.contains('groovy-weekly') && it.dist == 0) || (!it.oldSlug.contains('groovy-weekly') && it.dist <= 1) }
.each {
redirections << """get '${oldSlugUrls[it.oldSlug]}', \t redirect301: '${newSlugUrls[it.newSlug]}'\n"""
}
println "OK"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment