Skip to content

Instantly share code, notes, and snippets.

@markwoon
Created July 16, 2018 23:52
Show Gist options
  • Save markwoon/5b216226363859fd1a39dc091f43eca7 to your computer and use it in GitHub Desktop.
Save markwoon/5b216226363859fd1a39dc091f43eca7 to your computer and use it in GitHub Desktop.
Utility class to check if a file is an Illustrator file based on XMP metadata.
import java.io.BufferedInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* This utility class checks if a file is an Illustrator file.
* This is necessary because Illustrator files masquerade as PDF files and you have to check the XMP data to confirm
* that it's actually an Illustrator file.
*
* @author Mark Woon
*/
public class IllustratorDetector {
private static boolean isStringChar(char ch) {
if (ch >= 'a' && ch <= 'z')
return true;
if (ch >= 'A' && ch <= 'Z')
return true;
if (ch >= '0' && ch <= '9')
return true;
switch (ch) {
case '/':
case '-':
case '+':
case ':':
case ';':
case '.':
case ',':
case '_':
case '=':
case '"':
case '&':
case '$':
case '%':
case '#':
case '\'':
case '(':
case ')':
case '[':
case ']':
case '<':
case '>':
return true;
}
return false;
}
/**
* Extracts XMP metadata from binary file.
*/
public static String extract(Path file) throws IOException {
StringBuilder xmpBuilder = new StringBuilder();
try (BufferedInputStream in = new BufferedInputStream(Files.newInputStream(file))) {
StringBuilder sb = new StringBuilder();
int i;
char ch;
while ((i = in.read()) != -1) {
ch = (char)i;
if (xmpBuilder.length() > 0 && ch == '\n' && sb.length() > 0) {
xmpBuilder.append('\n');
}
if (isStringChar(ch) || (sb.length() > 0 && (ch == ' '))) {
// if so, build up string
sb.append(ch);
} else {
// if not, see if anything to output
if (sb.length() == 0) {
continue;
}
if (sb.length() >= 4) {
if (xmpBuilder.length() > 0 || sb.toString().startsWith("<x:xmpmeta")) {
xmpBuilder.append(sb);
}
if (sb.toString().startsWith("</x:xmpmeta")) {
break;
}
}
sb.setLength(0);
}
}
return xmpBuilder.toString();
}
}
private static final Pattern sf_dcFormatPattern = Pattern.compile("<dc:format>(.*?)</dc:format>", Pattern.CASE_INSENSITIVE);
/**
* Checks if file is an Illustrator file based on XMP metadata.
*/
public static boolean isIllustratorFile(Path file) throws IOException {
String xmp = extract(file);
if (xmp.length() == 0) {
return false;
}
Matcher m = sf_dcFormatPattern.matcher(xmp);
if (m.find()) {
return m.group(1).equalsIgnoreCase("application/vnd.adobe.illustrator");
}
return false;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment