Created
July 16, 2018 23:52
-
-
Save markwoon/5b216226363859fd1a39dc091f43eca7 to your computer and use it in GitHub Desktop.
Utility class to check if a file is an Illustrator file based on XMP metadata.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.BufferedInputStream; | |
import java.io.IOException; | |
import java.nio.file.Files; | |
import java.nio.file.Path; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
/** | |
* This utility class checks if a file is an Illustrator file. | |
* This is necessary because Illustrator files masquerade as PDF files and you have to check the XMP data to confirm | |
* that it's actually an Illustrator file. | |
* | |
* @author Mark Woon | |
*/ | |
public class IllustratorDetector { | |
private static boolean isStringChar(char ch) { | |
if (ch >= 'a' && ch <= 'z') | |
return true; | |
if (ch >= 'A' && ch <= 'Z') | |
return true; | |
if (ch >= '0' && ch <= '9') | |
return true; | |
switch (ch) { | |
case '/': | |
case '-': | |
case '+': | |
case ':': | |
case ';': | |
case '.': | |
case ',': | |
case '_': | |
case '=': | |
case '"': | |
case '&': | |
case '$': | |
case '%': | |
case '#': | |
case '\'': | |
case '(': | |
case ')': | |
case '[': | |
case ']': | |
case '<': | |
case '>': | |
return true; | |
} | |
return false; | |
} | |
/** | |
* Extracts XMP metadata from binary file. | |
*/ | |
public static String extract(Path file) throws IOException { | |
StringBuilder xmpBuilder = new StringBuilder(); | |
try (BufferedInputStream in = new BufferedInputStream(Files.newInputStream(file))) { | |
StringBuilder sb = new StringBuilder(); | |
int i; | |
char ch; | |
while ((i = in.read()) != -1) { | |
ch = (char)i; | |
if (xmpBuilder.length() > 0 && ch == '\n' && sb.length() > 0) { | |
xmpBuilder.append('\n'); | |
} | |
if (isStringChar(ch) || (sb.length() > 0 && (ch == ' '))) { | |
// if so, build up string | |
sb.append(ch); | |
} else { | |
// if not, see if anything to output | |
if (sb.length() == 0) { | |
continue; | |
} | |
if (sb.length() >= 4) { | |
if (xmpBuilder.length() > 0 || sb.toString().startsWith("<x:xmpmeta")) { | |
xmpBuilder.append(sb); | |
} | |
if (sb.toString().startsWith("</x:xmpmeta")) { | |
break; | |
} | |
} | |
sb.setLength(0); | |
} | |
} | |
return xmpBuilder.toString(); | |
} | |
} | |
private static final Pattern sf_dcFormatPattern = Pattern.compile("<dc:format>(.*?)</dc:format>", Pattern.CASE_INSENSITIVE); | |
/** | |
* Checks if file is an Illustrator file based on XMP metadata. | |
*/ | |
public static boolean isIllustratorFile(Path file) throws IOException { | |
String xmp = extract(file); | |
if (xmp.length() == 0) { | |
return false; | |
} | |
Matcher m = sf_dcFormatPattern.matcher(xmp); | |
if (m.find()) { | |
return m.group(1).equalsIgnoreCase("application/vnd.adobe.illustrator"); | |
} | |
return false; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment