-
-
Save madan712/10641676 to your computer and use it in GitHub Desktop.
import java.io.File; | |
import java.io.FileInputStream; | |
import java.util.List; | |
import org.apache.poi.hwpf.HWPFDocument; | |
import org.apache.poi.hwpf.extractor.WordExtractor; | |
import org.apache.poi.xwpf.usermodel.XWPFDocument; | |
import org.apache.poi.xwpf.usermodel.XWPFParagraph; | |
public class DocReader { | |
public static void readDocFile(String fileName) { | |
try { | |
File file = new File(fileName); | |
FileInputStream fis = new FileInputStream(file.getAbsolutePath()); | |
HWPFDocument doc = new HWPFDocument(fis); | |
WordExtractor we = new WordExtractor(doc); | |
String[] paragraphs = we.getParagraphText(); | |
System.out.println("Total no of paragraph "+paragraphs.length); | |
for (String para : paragraphs) { | |
System.out.println(para.toString()); | |
} | |
fis.close(); | |
} catch (Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
public static void readDocxFile(String fileName) { | |
try { | |
File file = new File(fileName); | |
FileInputStream fis = new FileInputStream(file.getAbsolutePath()); | |
XWPFDocument document = new XWPFDocument(fis); | |
List<XWPFParagraph> paragraphs = document.getParagraphs(); | |
System.out.println("Total no of paragraph "+paragraphs.size()); | |
for (XWPFParagraph para : paragraphs) { | |
System.out.println(para.getText()); | |
} | |
fis.close(); | |
} catch (Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
public static void main(String[] args) { | |
readDocxFile("C:\\Test.docx"); | |
readDocFile("C:\\Test.doc"); | |
} | |
} |
Hi, i tried this for .doc file and i am getting the below error.
org.apache.poi.POIXMLException: org.apache.poi.openxml4j.exceptions.InvalidFormatException: Package should contain a content type part [M1.13]
at org.apache.poi.util.PackageHelper.open(PackageHelper.java:41)
at org.apache.poi.xwpf.usermodel.XWPFDocument.(XWPFDocument.java:120).
Please tell me how to resolve this
I think you'd have to use HWPF for .doc files.
HWPFDocument wordDoc = new HWPFDocument(new FileInputStream(filePath));
int noOfParagraphs = wordDoc.getParagraphTable().getParagraphs().size();
could you tell me how you define poi dependency in your pom? The one I pull from maven has no module named xwpf or hwpf. THX
he cyjj. El problema es porque usas la librería equivocada. debe usar poi-ooxml.
org.apache.poi
poi-ooxml
3.7-beta3
My pom file is below that worked for me after much research over the internet collecting pieces together:
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.9</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.9</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.9</version>
</dependency>
Hope this will help.
Thank you,
Vishwas Saxena, Greater Noida
Hi i tried this piece of code but its not working and its showing its unable read header which is expecting 162 bytes and available is 500bytes....Can you please tell me how to resolve it......