Skip to content

Instantly share code, notes, and snippets.

@zkxs
Created July 3, 2014 07:30
Show Gist options
  • Save zkxs/e212827dbe42ee320ea9 to your computer and use it in GitHub Desktop.
Save zkxs/e212827dbe42ee320ea9 to your computer and use it in GitHub Desktop.
Extracts unicode characters less than 0x1F0F1 from a file
import java.io.*;
import java.nio.CharBuffer;
/**
* Extracts unicode characters less than 0x1F0F1 from a file.
* <br />
* Created for http://www.reddit.com/r/program/comments/1tvh50/request_unicodetxt/
* <br />
* Usage: java UnicodeCharExtract path/to/file > output.txt
* <br />
* @author reddit.com/u/zkxs Jul 3, 2014
*/
public class UnicodeCharExtract
{
private final static String CHARACTER_ENCODING = "UTF-8";
private final static int BUFFER_SIZE = 1024;
public static void main(String[] args)
{
for (String filename : args)
{
try
{
// horrid sequence of wrappers
readChars(new BufferedReader(new InputStreamReader(new FileInputStream(filename), CHARACTER_ENCODING)));
}
catch (FileNotFoundException | UnsupportedEncodingException e)
{
e.printStackTrace();
}
}
}
private static CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
private static void readChars(BufferedReader br)
{
try
{
while(br.read(buffer) != -1)
{
buffer.flip();
while(buffer.hasRemaining())
{
char c = buffer.get();
if (c > 0x7F && c < 0x1F0F1)
System.out.print(c);
}
buffer.clear();
}
}
catch (IOException e)
{
e.printStackTrace();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment