Created
December 20, 2021 02:47
-
-
Save itherunder/89372d8a72cf6c4ac2ae99a39cb722d7 to your computer and use it in GitHub Desktop.
Python读取docx, pdf, xlsx
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from logging import info | |
import os, pdfplumber, docx, xlrd | |
import sys | |
from win32com import client as wc | |
def doc2docx(file): | |
word = wc.Dispatch("Word.Application") | |
doc = word.Documents.Open(doc_path+'/'+file) | |
doc.SaveAs(doc_path+'/'+'{}x'.format(file), 12) | |
doc.Close() | |
word.Quit() | |
def search_pdf(doc): | |
try: | |
with pdfplumber.open(doc_path+'/'+doc) as pdf: | |
for page in pdf.pages: | |
if name in page.extract_text(): | |
print('pdf:', doc) | |
info_fd.write('pdf: %s\n' % doc) | |
return | |
except Exception as e: | |
print(e) | |
def search_word(doc): | |
try: | |
if doc.endswith('doc'): | |
if not os.path.exists(doc_path+'/'+'{}x'.format(doc)): | |
doc2docx(doc) | |
doc += 'x' | |
word = docx.Document(doc_path+'/'+doc) | |
for para in word.paragraphs: | |
if name in para.text: | |
print('word:', doc) | |
info_fd.write('word: %s\n' % doc) | |
return | |
except Exception as e: | |
print(e) | |
def search_excel(doc): | |
try: | |
sheets = xlrd.open_workbook(doc_path+'/'+doc).sheets() | |
for sheet in sheets: | |
for i in range(sheet.nrows): | |
for j in range(len(sheet.row(i))): | |
if name in str(sheet.cell_value(i, j)): | |
print('excel:', doc) | |
info_fd.write('excel: %s\n' % doc) | |
return | |
except Exception as e: | |
print(e) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment