jamespaultg · April 14, 2020 18:09
diff --git a/getworddoc.py b/getworddoc.py
 !pip3 install python-docx -q
 import docx2txt

 # replace following line with location of your .docx file
 wordfile = "your word document.docx"

 # get the contents of the word document
 def getDocxContent(filename):
    doc = docx.Document(filename)
    fullText = ""
    for para in doc.paragraphs:
        fullText += para.text
    return fullText

 doc_content = getDocxContent(wordfile)


 # Extracting paragraph text
 doc = docx.Document(wordfile)
 doc.paragraphs[0].text

 # Extraction text from different sections
 sections = doc.sections
 len(doc.sections)
 for section in sections:
     print(section.start_type)

 # get headings
 def iter_headings(paragraphs):
    for paragraph in paragraphs:
        print(paragraph.style.name)
        if paragraph.style.name.startswith('Heading'):
            yield paragraph

 for heading in iter_headings(doc.paragraphs[1:100]):
    print(heading.text)
	!pip3 install python-docx -q
	import docx2txt

	# replace following line with location of your .docx file
	wordfile = "your word document.docx"

	# get the contents of the word document
	def getDocxContent(filename):
	doc = docx.Document(filename)
	fullText = ""
	for para in doc.paragraphs:
	fullText += para.text
	return fullText

	doc_content = getDocxContent(wordfile)


	# Extracting paragraph text
	doc = docx.Document(wordfile)
	doc.paragraphs[0].text

	# Extraction text from different sections
	sections = doc.sections
	len(doc.sections)
	for section in sections:
	print(section.start_type)

	# get headings
	def iter_headings(paragraphs):
	for paragraph in paragraphs:
	print(paragraph.style.name)
	if paragraph.style.name.startswith('Heading'):
	yield paragraph

	for heading in iter_headings(doc.paragraphs[1:100]):
	print(heading.text)