Skip to content

Instantly share code, notes, and snippets.

@seralf
Last active October 13, 2015 11:18
Show Gist options
  • Save seralf/4187977 to your computer and use it in GitHub Desktop.
Save seralf/4187977 to your computer and use it in GitHub Desktop.
A simple class for RDF MIME detection using sesame Rio Parser.
package formats;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFParser;
import org.openrdf.rio.Rio;
/**
* A simple class for RDF MIME detection using sesame Rio Parser.
* @author seralf
*
*/
public class RdfMimeDetect {
private static class NoMIMEException extends Exception {
public NoMIMEException() {
super();
}
public NoMIMEException(String message, Throwable cause) {
super(message, cause);
}
public NoMIMEException(String message) {
super(message);
}
public NoMIMEException(Throwable cause) {
super(cause);
}
}
private static class MimeAlias {
private String wrong;
private String right;
public MimeAlias(String wrongName, String rightName) {
this.wrong = wrongName;
this.right = rightName;
}
@Override
public String toString() {
return "(" + wrong + ", " + right + ")";
}
}
private final static Map<String, String> aliases = new HashMap<String, String>();
public static void setMimeAlias(final MimeAlias... mimeAliases){;
for (MimeAlias mimeAlias : mimeAliases) {
aliases.put(mimeAlias.wrong, mimeAlias.right);
}
}
public static RDFFormat getMime(final URL url, boolean useExtension) throws IOException, NoMIMEException {
final RDFFormat format;
// we could obtaind the correct RDFFormat from file extension or directly from MIME name
if(useExtension){
format = RDFFormat.forFileName(url.toString());
} else {
String mimeType = ((HttpURLConnection) url.openConnection()).getContentType();
// in case we have MIME in the form: text/turtle; utf-8 or similar...
String mimeTypeSimple = mimeType.split(";")[0].trim();
// check if the mime name is in the exception or not
format = aliases.containsKey(mimeTypeSimple) ? RDFFormat.forMIMEType(aliases.get(mimeTypeSimple)) : RDFFormat.forMIMEType(mimeTypeSimple);
}
if (format == null) throw new NoMIMEException("NO MIME found");
return format;
}
public static void main(String[] args) throws Exception {
// map of some alternative mime names...
final MimeAlias[] aliases = {
new MimeAlias("text/n3", "text/rdf+n3")
};
// list of example URLs
final URL[] urls = {
new URL("http://dbpedia.org/data/Banco_del_Mutuo_Soccorso.rdf"),
new URL("http://dbpedia.org/data/Banco_del_Mutuo_Soccorso.json"),
new URL("http://dbpedia.org/data/Banco_del_Mutuo_Soccorso.n3"),
new URL("http://dbpedia.org/data/Banco_del_Mutuo_Soccorso.ntriples")
};
// set the mime name exception list
RdfMimeDetect.setMimeAlias(aliases);
for (URL url : urls) {
try {
RDFFormat mime = getMime(url, false);
System.out.printf("URL: %s, MIME: %s\n", url, mime);
RDFParser rdfParser = Rio.createParser(mime);
System.out.printf("TODO: SOMETHING WITH PARSER FOR: %s\n", rdfParser.getRDFFormat());
} catch (Exception e) {
System.err.printf("Can't detect MIME for url: %s (%s)\n", url, e.getMessage());
}
}
}
}
/*
*
*
*
*
* RDF/XML application/rdf+xml .rdf .rdfs .owl UTF-8 RDF/XML application/xml
* .xml UTF-8
*
* N-TRIPLES text/plain .nt 7-bit US-ASCII
*
* TURTLE text/turtle .ttl UTF-8 TURTLE application/x-turtle .ttl UTF-8
*
* N3 text/rdf+n3 .n3 UTF-8
*
* TRIX application/trix .trix UTF-8
*
* TRIG application/x-trig .trig UTF-8
*
* BINARY application/x-binary-rdf .brf binary
*
* NQUADS text/x-nquads .nq 7-bit US-ASCII
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment