Last active
October 13, 2015 11:18
-
-
Save seralf/4187977 to your computer and use it in GitHub Desktop.
A simple class for RDF MIME detection using sesame Rio Parser.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package formats; | |
import java.io.IOException; | |
import java.net.HttpURLConnection; | |
import java.net.URL; | |
import java.util.HashMap; | |
import java.util.Map; | |
import org.openrdf.rio.RDFFormat; | |
import org.openrdf.rio.RDFParser; | |
import org.openrdf.rio.Rio; | |
/** | |
* A simple class for RDF MIME detection using sesame Rio Parser. | |
* @author seralf | |
* | |
*/ | |
public class RdfMimeDetect { | |
private static class NoMIMEException extends Exception { | |
public NoMIMEException() { | |
super(); | |
} | |
public NoMIMEException(String message, Throwable cause) { | |
super(message, cause); | |
} | |
public NoMIMEException(String message) { | |
super(message); | |
} | |
public NoMIMEException(Throwable cause) { | |
super(cause); | |
} | |
} | |
private static class MimeAlias { | |
private String wrong; | |
private String right; | |
public MimeAlias(String wrongName, String rightName) { | |
this.wrong = wrongName; | |
this.right = rightName; | |
} | |
@Override | |
public String toString() { | |
return "(" + wrong + ", " + right + ")"; | |
} | |
} | |
private final static Map<String, String> aliases = new HashMap<String, String>(); | |
public static void setMimeAlias(final MimeAlias... mimeAliases){; | |
for (MimeAlias mimeAlias : mimeAliases) { | |
aliases.put(mimeAlias.wrong, mimeAlias.right); | |
} | |
} | |
public static RDFFormat getMime(final URL url, boolean useExtension) throws IOException, NoMIMEException { | |
final RDFFormat format; | |
// we could obtaind the correct RDFFormat from file extension or directly from MIME name | |
if(useExtension){ | |
format = RDFFormat.forFileName(url.toString()); | |
} else { | |
String mimeType = ((HttpURLConnection) url.openConnection()).getContentType(); | |
// in case we have MIME in the form: text/turtle; utf-8 or similar... | |
String mimeTypeSimple = mimeType.split(";")[0].trim(); | |
// check if the mime name is in the exception or not | |
format = aliases.containsKey(mimeTypeSimple) ? RDFFormat.forMIMEType(aliases.get(mimeTypeSimple)) : RDFFormat.forMIMEType(mimeTypeSimple); | |
} | |
if (format == null) throw new NoMIMEException("NO MIME found"); | |
return format; | |
} | |
public static void main(String[] args) throws Exception { | |
// map of some alternative mime names... | |
final MimeAlias[] aliases = { | |
new MimeAlias("text/n3", "text/rdf+n3") | |
}; | |
// list of example URLs | |
final URL[] urls = { | |
new URL("http://dbpedia.org/data/Banco_del_Mutuo_Soccorso.rdf"), | |
new URL("http://dbpedia.org/data/Banco_del_Mutuo_Soccorso.json"), | |
new URL("http://dbpedia.org/data/Banco_del_Mutuo_Soccorso.n3"), | |
new URL("http://dbpedia.org/data/Banco_del_Mutuo_Soccorso.ntriples") | |
}; | |
// set the mime name exception list | |
RdfMimeDetect.setMimeAlias(aliases); | |
for (URL url : urls) { | |
try { | |
RDFFormat mime = getMime(url, false); | |
System.out.printf("URL: %s, MIME: %s\n", url, mime); | |
RDFParser rdfParser = Rio.createParser(mime); | |
System.out.printf("TODO: SOMETHING WITH PARSER FOR: %s\n", rdfParser.getRDFFormat()); | |
} catch (Exception e) { | |
System.err.printf("Can't detect MIME for url: %s (%s)\n", url, e.getMessage()); | |
} | |
} | |
} | |
} | |
/* | |
* | |
* | |
* | |
* | |
* RDF/XML application/rdf+xml .rdf .rdfs .owl UTF-8 RDF/XML application/xml | |
* .xml UTF-8 | |
* | |
* N-TRIPLES text/plain .nt 7-bit US-ASCII | |
* | |
* TURTLE text/turtle .ttl UTF-8 TURTLE application/x-turtle .ttl UTF-8 | |
* | |
* N3 text/rdf+n3 .n3 UTF-8 | |
* | |
* TRIX application/trix .trix UTF-8 | |
* | |
* TRIG application/x-trig .trig UTF-8 | |
* | |
* BINARY application/x-binary-rdf .brf binary | |
* | |
* NQUADS text/x-nquads .nq 7-bit US-ASCII | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment