Last active
December 29, 2022 21:27
-
-
Save madan712/10641676 to your computer and use it in GitHub Desktop.
Java program to read doc or docx file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.File; | |
import java.io.FileInputStream; | |
import java.util.List; | |
import org.apache.poi.hwpf.HWPFDocument; | |
import org.apache.poi.hwpf.extractor.WordExtractor; | |
import org.apache.poi.xwpf.usermodel.XWPFDocument; | |
import org.apache.poi.xwpf.usermodel.XWPFParagraph; | |
public class DocReader { | |
public static void readDocFile(String fileName) { | |
try { | |
File file = new File(fileName); | |
FileInputStream fis = new FileInputStream(file.getAbsolutePath()); | |
HWPFDocument doc = new HWPFDocument(fis); | |
WordExtractor we = new WordExtractor(doc); | |
String[] paragraphs = we.getParagraphText(); | |
System.out.println("Total no of paragraph "+paragraphs.length); | |
for (String para : paragraphs) { | |
System.out.println(para.toString()); | |
} | |
fis.close(); | |
} catch (Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
public static void readDocxFile(String fileName) { | |
try { | |
File file = new File(fileName); | |
FileInputStream fis = new FileInputStream(file.getAbsolutePath()); | |
XWPFDocument document = new XWPFDocument(fis); | |
List<XWPFParagraph> paragraphs = document.getParagraphs(); | |
System.out.println("Total no of paragraph "+paragraphs.size()); | |
for (XWPFParagraph para : paragraphs) { | |
System.out.println(para.getText()); | |
} | |
fis.close(); | |
} catch (Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
public static void main(String[] args) { | |
readDocxFile("C:\\Test.docx"); | |
readDocFile("C:\\Test.doc"); | |
} | |
} |
My pom file is below that worked for me after much research over the internet collecting pieces together:
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.9</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.9</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.9</version>
</dependency>
Hope this will help.
Thank you,
Vishwas Saxena, Greater Noida
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
he cyjj. El problema es porque usas la librería equivocada. debe usar poi-ooxml.
org.apache.poi
poi-ooxml
3.7-beta3