Created
May 19, 2014 01:26
-
-
Save yoshi0309/bd93cb7b1dda6536ada0 to your computer and use it in GitHub Desktop.
Apache Tika 1.5 - AutoDetectParser example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package yoshida.tika_sample; | |
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.FileNotFoundException; | |
import java.io.IOException; | |
import org.apache.tika.exception.TikaException; | |
import org.apache.tika.metadata.Metadata; | |
import org.apache.tika.parser.AutoDetectParser; | |
import org.apache.tika.parser.ParseContext; | |
import org.apache.tika.parser.Parser; | |
import org.apache.tika.sax.BodyContentHandler; | |
import org.xml.sax.ContentHandler; | |
import org.xml.sax.SAXException; | |
/** | |
* Hello world! | |
* | |
*/ | |
public class App | |
{ | |
public static void main( String[] args) | |
{ | |
System.out.println( "Hello World!" ); | |
String target = "XXX.pdf"; | |
File document = new File(target); | |
Parser parser = new AutoDetectParser(); | |
ContentHandler handler = new BodyContentHandler(); | |
Metadata metadata = new Metadata(); | |
try { | |
parser.parse(new FileInputStream(document), handler, metadata, new ParseContext()); | |
} catch (FileNotFoundException e) { | |
e.printStackTrace(); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} catch (SAXException e) { | |
e.printStackTrace(); | |
} catch (TikaException e) { | |
e.printStackTrace(); | |
} | |
System.out.println(metadata); | |
System.out.println(handler.toString()); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | |
<modelVersion>4.0.0</modelVersion> | |
<groupId>yoshida</groupId> | |
<artifactId>tika-sample</artifactId> | |
<version>0.0.1-SNAPSHOT</version> | |
<packaging>jar</packaging> | |
<name>tika-sample</name> | |
<url>http://maven.apache.org</url> | |
<properties> | |
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | |
</properties> | |
<dependencies> | |
<dependency> | |
<groupId>org.apache.tika</groupId> | |
<artifactId>tika-core</artifactId> | |
<version>1.5</version> | |
</dependency> | |
<dependency> | |
<groupId>org.apache.tika</groupId> | |
<artifactId>tika-parsers</artifactId> | |
<version>1.5</version> | |
</dependency> | |
</dependencies> | |
</project> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment