Created
October 10, 2012 15:42
-
-
Save harrah/3866442 to your computer and use it in GitHub Desktop.
archive Google Code wiki to HTML, replace old wiki with links to archive
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.jsoup._ | |
import java.io.{File, FileWriter} | |
/* | |
libraryDependencies += "org.jsoup" % "jsoup" % "1.7.1" | |
scalaVersion := "2.10.0-M7" | |
*/ | |
object Main { | |
val WikiSuffix = ".wiki" | |
val HtmlSuffix = ".html" | |
val Href = "href" | |
val WikiAuthorClass = "#wikiauthor" | |
val WikiLinkPrefix = "/p/simple-build-tool/wiki/" | |
val SbtMoved = "sbt_has_moved" | |
val RobotsMeta = """<meta name="robots" content="noindex, nofollow"></meta>""" | |
def main(args: Array[String]) { | |
val inDir = new File(args(0)) | |
val outDir = new File(args(1)) | |
for(page <- inDir.listFiles().take(1); name <- wikiPageName(page)) { | |
write(generateHtml(name), new File(outDir, name + HtmlSuffix), name) | |
write(newWikiContent(name), page, name) | |
} | |
} | |
def write(content: String, file: File, name: String) | |
{ | |
println("Writing " + file.getAbsolutePath + "...") | |
file.getParentFile.mkdirs() | |
val out = new FileWriter(file) | |
try { out.write(content) } | |
finally { out.close() } | |
println("Transformed " + name) | |
} | |
def generateHtml(pagename: String): String = | |
{ | |
val url = wikiURL(pagename) | |
println("Retrieving " + url) | |
val doc = Jsoup.connect(url).get(); | |
trim(doc) | |
transformInternalLinks(doc) | |
addRobotsMeta(doc) | |
doc.outerHtml() | |
} | |
def addRobotsMeta(doc: nodes.Document) | |
{ | |
doc.select("head").append(RobotsMeta) | |
} | |
def trim(doc: nodes.Document) | |
{ | |
doc.select(WikiAuthorClass).remove() | |
doc.select(s"a.label[$Href*=label:Featured]").remove() | |
removeSbtMoved(doc) | |
} | |
def removeSbtMoved(doc: nodes.Document) | |
{ | |
for(moved <- select(doc, s"h1 > a[name=$SbtMoved]")) { | |
val h1 = moved.parent | |
val next = h1.nextElementSibling | |
next.nextElementSibling.remove() | |
next.remove() | |
h1.remove() | |
} | |
for(a <- select(doc, s"a[href=#$SbtMoved]")) | |
a.parent.remove() | |
} | |
def transformInternalLinks(doc: nodes.Document) | |
{ | |
for(a <- select(doc, s"a[$Href^=$WikiLinkPrefix]")) | |
a.attr(Href, a.attr(Href).stripPrefix(WikiLinkPrefix)) | |
} | |
def select(doc: nodes.Document, s: String) = | |
{ | |
import collection.JavaConverters._ | |
doc.select(s).iterator.asScala | |
} | |
def wikiPageName(file: File): Option[String] = | |
if(file.getName.endsWith(WikiSuffix)) | |
Some(file.getName.stripSuffix(WikiSuffix)) | |
else | |
None | |
def wikiURL(name: String) = s"http://code.google.com/p/simple-build-tool/wiki/$name?show=content" | |
def newWikiContent(name: String): String = s""" | |
= sbt has moved = | |
*sbt is now hosted on !GitHub*: https://github.com/harrah/xsbt. | |
This page previously documented sbt 0.7.7, which is *no longer maintained*. | |
The original content is archived at http://www.scala-sbt.org/0.7.7/docs/$name.html | |
""" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment