Skip to content

Instantly share code, notes, and snippets.

@xerz-one
Last active October 18, 2016 08:53
Show Gist options
  • Save xerz-one/c681befd02a2afe8fe60ebebac234a7b to your computer and use it in GitHub Desktop.
Save xerz-one/c681befd02a2afe8fe60ebebac234a7b to your computer and use it in GitHub Desktop.
A script that destroys Windows-1252 garbage - with garbage!
/*
Ungarbage v0.1.0.1
October 18th 2016, @espectalll
To the extent possible under law, the author has dedicated all copyright and related and neighboring rights
to this software to the public domain worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with this software. If not, see
<http://creativecommons.org/publicdomain/zero/1.0/>.
*/
import java.io.File
import java.nio.charset.Charset
import java.nio.file.Files
fun findFiles(dir: File): MutableList<File> {
var files: MutableList<File> = arrayListOf()
for(child in dir.listFiles()) {
var childPath = child.toString()
when {
child.isDirectory -> files.addAll(findFiles(child))
childPath.endsWith(".html") || childPath.endsWith(".htm") -> files.add(child)
}
}
return files
}
fun replaceBad(data: List<String>): List<String> {
var data = data.toMutableList()
var blacklist = mapOf('á' to "&#225;", 'é' to "&#233;", 'í' to "&#237;", 'ó' to "&#243;", 'ú' to "&#250;",
'Á' to "&#193;", 'É' to "&#201;", 'Í' to "&#205;", 'Ó' to "&#211;", 'Ú' to "&#218;",
'ñ' to "&#241;", 'Ñ' to "&#209;")
for (i in 0..data.size - 1) {
for (char in data[i]) {
if (blacklist[char] != null) {
data[i] = data[i].replace(char.toString(), blacklist[char].toString())
}
}
}
return data
}
fun main(args: Array<String>) {
var files = findFiles(File("."))
for (file in files) {
var data = file.readLines(Charset.forName("windows-1252"))
Files.write(file.toPath(), replaceBad(data), Charsets.UTF_8)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment