Skip to content

Instantly share code, notes, and snippets.

@CodeCouturiers
Created August 9, 2024 11:38
Show Gist options
  • Save CodeCouturiers/f92612c229beb78324bde8d78177a6c1 to your computer and use it in GitHub Desktop.
Save CodeCouturiers/f92612c229beb78324bde8d78177a6c1 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys
from peepdf.PDFCore import PDFParser, PDFObject, PDFDictionary
def check_cyrillic_support(pdf_file):
parser = PDFParser()
ret, pdf = parser.parse(pdf_file)
if ret != 0:
print("Error parsing PDF file")
return
pdf_body = pdf.body[0]
# Проверка шрифтов
fonts = []
cyrillic_fonts = []
for obj in pdf_body.objects.values():
if isinstance(obj.object, PDFDictionary):
dict_obj = obj.object
if '/Type' in dict_obj.elements and dict_obj.elements['/Type'].getValue() == '/Font':
fonts.append(dict_obj)
if '/Encoding' in dict_obj.elements:
encoding = dict_obj.elements['/Encoding']
if isinstance(encoding, PDFObject):
encoding = encoding.getValue()
if encoding in ['/Identity-H', '/WinAnsiEncoding', '/Unicode']:
cyrillic_fonts.append(dict_obj)
if not cyrillic_fonts:
print("Warning: No fonts with potential Cyrillic support found")
else:
print(f"Found {len(cyrillic_fonts)} fonts with potential Cyrillic support")
# Проверка текстовых объектов
text_objects = []
cyrillic_text_objects = []
for obj in pdf_body.objects.values():
if obj.object.type == 'stream' and '/Filter' in obj.object.dictType:
if obj.object.dictType['/Filter'].getValue() == '/FlateDecode':
decoded_stream = obj.object.decodedStream
if b'BT' in decoded_stream and b'ET' in decoded_stream:
text_objects.append(obj.object)
if any(0x0400 <= ord(chr(b)) <= 0x04FF for b in decoded_stream):
cyrillic_text_objects.append(obj.object)
if not cyrillic_text_objects:
print("Warning: No text objects with Cyrillic characters found")
else:
print(f"Found {len(cyrillic_text_objects)} text objects with Cyrillic characters")
if __name__ == "__main__":
pdf_file = r"C:\Users\user\source\repos\PDFGenerator\PDFGenerator\bin\Debug\MediumTermLeaseAgreement.pdf"
check_cyrillic_support(pdf_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment