Created
August 13, 2023 13:31
-
-
Save raymyers/b5aedf87c1b35ccf02fc52b5e85aeb8d to your computer and use it in GitHub Desktop.
Wrap icu4j charset detection
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ... | |
dependencies { | |
// https://mvnrepository.com/artifact/com.ibm.icu/icu4j | |
implementation 'com.ibm.icu:icu4j:73.2' | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package your.name; | |
import com.ibm.icu.text.CharsetDetector; | |
import java.io.File; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.nio.charset.Charset; | |
import java.nio.file.Files; | |
import java.nio.file.Path; | |
public class CharsetUtils { | |
public static Charset detectFileCharset(File file) throws IOException { | |
CharsetDetector charsetDetector = new CharsetDetector(); | |
try (var inputStream = Files.newInputStream(Path.of(file.getAbsolutePath()))) { | |
var inputStreamIgnoringRestart = new InputStream() { | |
@Override | |
public int read() throws IOException { | |
return inputStream.read(); | |
} | |
@Override | |
public void mark(int readLimit) { | |
// ignore | |
} | |
@Override | |
public void reset() { | |
// ignore | |
} | |
}; | |
charsetDetector.setText(inputStreamIgnoringRestart); | |
} | |
return Charset.forName(charsetDetector.detect().getName()); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This is free and unencumbered software released into the public domain. | |
Unlicense: https://choosealicense.com/licenses/unlicense | |
The dependency icu4j is under the Unicode License. | |
https://www.unicode.org/faq/unicode_license.html |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment