Skip to content

Instantly share code, notes, and snippets.

@itherunder
Created December 20, 2021 02:47
Show Gist options
  • Save itherunder/89372d8a72cf6c4ac2ae99a39cb722d7 to your computer and use it in GitHub Desktop.
Save itherunder/89372d8a72cf6c4ac2ae99a39cb722d7 to your computer and use it in GitHub Desktop.
Python读取docx, pdf, xlsx
from logging import info
import os, pdfplumber, docx, xlrd
import sys
from win32com import client as wc
def doc2docx(file):
word = wc.Dispatch("Word.Application")
doc = word.Documents.Open(doc_path+'/'+file)
doc.SaveAs(doc_path+'/'+'{}x'.format(file), 12)
doc.Close()
word.Quit()
def search_pdf(doc):
try:
with pdfplumber.open(doc_path+'/'+doc) as pdf:
for page in pdf.pages:
if name in page.extract_text():
print('pdf:', doc)
info_fd.write('pdf: %s\n' % doc)
return
except Exception as e:
print(e)
def search_word(doc):
try:
if doc.endswith('doc'):
if not os.path.exists(doc_path+'/'+'{}x'.format(doc)):
doc2docx(doc)
doc += 'x'
word = docx.Document(doc_path+'/'+doc)
for para in word.paragraphs:
if name in para.text:
print('word:', doc)
info_fd.write('word: %s\n' % doc)
return
except Exception as e:
print(e)
def search_excel(doc):
try:
sheets = xlrd.open_workbook(doc_path+'/'+doc).sheets()
for sheet in sheets:
for i in range(sheet.nrows):
for j in range(len(sheet.row(i))):
if name in str(sheet.cell_value(i, j)):
print('excel:', doc)
info_fd.write('excel: %s\n' % doc)
return
except Exception as e:
print(e)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment