Last active
June 29, 2022 22:38
-
-
Save basil/c4dc312c19c6ddc80d03275a5ed0f484 to your computer and use it in GitHub Desktop.
Convert `.properties` files to UTF-8
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.basilcrow.jenkins; | |
import java.io.IOException; | |
import java.nio.ByteBuffer; | |
import java.nio.charset.CharacterCodingException; | |
import java.nio.charset.Charset; | |
import java.nio.charset.CharsetDecoder; | |
import java.nio.charset.CodingErrorAction; | |
import java.nio.charset.StandardCharsets; | |
import java.nio.file.Files; | |
import java.nio.file.Path; | |
import java.nio.file.Paths; | |
import java.util.List; | |
import java.util.Objects; | |
import java.util.stream.Collectors; | |
import java.util.stream.Stream; | |
import org.apache.commons.text.translate.UnicodeUnescaper; | |
public class Main { | |
public static void main(String[] args) throws Exception { | |
List<Path> list; | |
try (Stream<Path> stream = | |
Files.walk(Paths.get(Objects.requireNonNull(System.getenv("JENKINS_CORE"))))) { | |
list = | |
stream.filter(Files::isRegularFile) | |
.filter(f -> f.toString().endsWith(".properties")) | |
.sorted() | |
.collect(Collectors.toUnmodifiableList()); | |
} | |
for (Path file : list) { | |
if (!isEncoded(file, StandardCharsets.US_ASCII)) { | |
boolean isUtf8 = isEncoded(file, StandardCharsets.UTF_8); | |
boolean isIso88591 = isEncoded(file, StandardCharsets.ISO_8859_1); | |
if (isUtf8 && isIso88591) { | |
throw new IllegalStateException(file + " is valid UTF-8 and valid ISO-8859-1. To avoid problems when auto-detecting the encoding, use the lowest common denominator of ASCII encoding and express non-ASCII characters with escape sequences using a tool like `native2ascii`."); | |
} | |
} | |
if (!isEncoded(file, StandardCharsets.US_ASCII) | |
&& !isEncoded(file, StandardCharsets.ISO_8859_1)) { | |
throw new IllegalStateException(file + " is neither ASCII nor ISO-8859-1"); | |
} | |
if (isEncoded(file, StandardCharsets.ISO_8859_1)) { | |
String str = Files.readString(file, StandardCharsets.ISO_8859_1); | |
Files.writeString(file, str, StandardCharsets.UTF_8); | |
} | |
if (!isEncoded(file, StandardCharsets.US_ASCII) | |
&& !isEncoded(file, StandardCharsets.UTF_8)) { | |
throw new IllegalStateException(file + " is neither ASCII nor UTF-8"); | |
} | |
{ | |
String inStr = Files.readString(file, StandardCharsets.UTF_8); | |
UnicodeUnescaper unicodeUnescaper = new UnicodeUnescaper(); | |
String outStr = unicodeUnescaper.translate(inStr); | |
Files.writeString(file, outStr, StandardCharsets.UTF_8); | |
} | |
if (!isEncoded(file, StandardCharsets.US_ASCII) | |
&& !isEncoded(file, StandardCharsets.UTF_8)) { | |
throw new IllegalStateException(file + " is neither ASCII nor UTF-8"); | |
} | |
} | |
} | |
private static boolean isEncoded(Path file, Charset charset) throws IOException { | |
byte[] bytes = Files.readAllBytes(file); | |
CharsetDecoder decoder = charset.newDecoder(); | |
decoder.onMalformedInput(CodingErrorAction.REPORT); | |
decoder.onUnmappableCharacter(CodingErrorAction.REPORT); | |
ByteBuffer buffer = ByteBuffer.wrap(bytes); | |
try { | |
decoder.decode(buffer); | |
return true; | |
} catch (CharacterCodingException e) { | |
return false; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment