AliRn76 · February 19, 2022 15:21
diff --git a/upload.py b/upload.py
 import os
 import shlex
 import base64
 import subprocess
 import fitz  # PyMuPDF
 from uuid import uuid4
 from fastapi import FastAPI
 from pydantic import BaseModel
 from PIL import Image, ImageDraw, ImageFont


 app = FastAPI()

 UPLOAD_PATH = 'files/'
 if not os.path.exists(UPLOAD_PATH):
    os.makedirs(UPLOAD_PATH)


 class UploadSerializer(BaseModel):
    name: str
    file: str


 def pdf_to_image(file_path: str) -> None:
    doc = fitz.open(file_path)
    if file_path[-4:] == '.pdf':
        path = file_path[:-4]
        os.remove(file_path)
    else:
        path = file_path
    for page in doc:
        pix = page.get_pixmap(alpha=False)
        pix.save(f'{path}-{page.number}.jpg')


 def write_file(file_data: str, path: str) -> None:
    _file = base64.decodebytes(file_data.encode())
    with open(path, 'wb') as file:
        file.write(_file)


 def get_file_extension(file_path: str) -> bytes:
    cmd = shlex.split(f'file --mime-type {file_path}')
    result = subprocess.check_output(cmd)
    mime_type = result.split()[-1]
    print(f'{mime_type = }')
    return mime_type


 def convert_doc_to_pdf(file_path: str) -> None:
    cmd = shlex.split(f'lowriter --convert-to pdf --outdir {UPLOAD_PATH}. {file_path}')  # LibreOfficeWriter
    subprocess.call(cmd)


 @app.get('/upload/')
 def upload(data: UploadSerializer):
    original_name = data.name
    file_name = uuid4().hex
    file_path = UPLOAD_PATH + file_name

    write_file(file_data=data.file, path=file_path)
    mime_type = get_file_extension(file_path=file_path)

    if mime_type == b'image/jpeg':
        extension = '.jpeg'

    elif mime_type == b'application/msword':
        extension = '.doc'
        convert_doc_to_pdf(file_path=file_path)
        pdf_to_image(file_path=f'{file_path}.pdf')

    elif mime_type == b'application/pdf':
        extension = '.pdf'
        pdf_to_image(file_path=file_path)

    elif mime_type == b'text/plain':
        extension = '.txt'
        fnt = ImageFont.truetype('AriaTextG1-Regular.otf', 15)
        with open(file_path, 'r') as file:
            text = file.read()

        # Wrap Text
        wrapper = TextWrapper(text=text, font=fnt, max_width=1080)
        wrapped_text = wrapper.wrapped_text()
        # Create Empty Image
        image = Image.new(mode='RGB', size=(1080, 720), color='white')
        # Draw Image
        draw = ImageDraw.Draw(image)
        draw.text((10, 10), wrapped_text, font=fnt, fill=(0, 0, 0))
        # Save Image
        filename = file_name + '.jpg'
        image.save(UPLOAD_PATH + filename)

        # TODO: if draw.textsize(text=wrapped_text, font=fnt)[1] > 720:
        #   Write rest of file to new image (while)
    else:
        extension = mime_type.decode().split('/')[-1]

    os.rename(file_path, file_path + extension)
    data = {
        'data': original_name,
        'path': file_path + extension,
        'name': original_name
    }
    return data


 class TextWrapper:
    """
    Helper class to wrap text in lines, based on given text, font
    and max allowed line width.
    """

    def __init__(self, text, font, max_width):
        self.text = text
        self.text_lines = [
            ' '.join([self.strip_word(w) for w in line.split(' ') if w])
            for line in text.split('\n')
        ]
        self.font = font
        self.max_width = max_width
        self.draw = ImageDraw.Draw(
            Image.new(
                mode='RGB',
                size=(100, 100)
            )
        )
        self.space_width = self.draw.textsize(text=' ', font=self.font)[0]

    @staticmethod
    def strip_word(word):
        if len(word.split('\t')) == 2:
            if word.strip() == '\t':
                final_word = f'{word.strip()}    '
            else:
                final_word = f'    {word.strip()}'
        else:
            final_word = word.strip()
        return final_word

    def get_text_width(self, text):
        return self.draw.textsize(text=text, font=self.font)[0]

    def wrapped_text(self):
        wrapped_lines = []
        buf = []
        buf_width = 0

        for line in self.text_lines:
            for word in line.split(' '):
                word_width = self.get_text_width(word)

                if not buf:
                    expected_width = word_width
                else:
                    expected_width = buf_width + self.space_width + word_width

                if expected_width <= self.max_width:
                    # word fits in line
                    buf_width = expected_width
                    buf.append(word)
                else:
                    # word doesn't fit in line
                    wrapped_lines.append(' '.join(buf))
                    buf = [word]
                    buf_width = word_width

            if buf:
                wrapped_lines.append(' '.join(buf))
                buf = []
                buf_width = 0

        return '\n'.join(wrapped_lines)
	import os
	import shlex
	import base64
	import subprocess
	import fitz # PyMuPDF
	from uuid import uuid4
	from fastapi import FastAPI
	from pydantic import BaseModel
	from PIL import Image, ImageDraw, ImageFont


	app = FastAPI()

	UPLOAD_PATH = 'files/'
	if not os.path.exists(UPLOAD_PATH):
	os.makedirs(UPLOAD_PATH)


	class UploadSerializer(BaseModel):
	name: str
	file: str


	def pdf_to_image(file_path: str) -> None:
	doc = fitz.open(file_path)
	if file_path[-4:] == '.pdf':
	path = file_path[:-4]
	os.remove(file_path)
	else:
	path = file_path
	for page in doc:
	pix = page.get_pixmap(alpha=False)
	pix.save(f'{path}-{page.number}.jpg')


	def write_file(file_data: str, path: str) -> None:
	_file = base64.decodebytes(file_data.encode())
	with open(path, 'wb') as file:
	file.write(_file)


	def get_file_extension(file_path: str) -> bytes:
	cmd = shlex.split(f'file --mime-type {file_path}')
	result = subprocess.check_output(cmd)
	mime_type = result.split()[-1]
	print(f'{mime_type = }')
	return mime_type


	def convert_doc_to_pdf(file_path: str) -> None:
	cmd = shlex.split(f'lowriter --convert-to pdf --outdir {UPLOAD_PATH}. {file_path}') # LibreOfficeWriter
	subprocess.call(cmd)


	@app.get('/upload/')
	def upload(data: UploadSerializer):
	original_name = data.name
	file_name = uuid4().hex
	file_path = UPLOAD_PATH + file_name

	write_file(file_data=data.file, path=file_path)
	mime_type = get_file_extension(file_path=file_path)

	if mime_type == b'image/jpeg':
	extension = '.jpeg'

	elif mime_type == b'application/msword':
	extension = '.doc'
	convert_doc_to_pdf(file_path=file_path)
	pdf_to_image(file_path=f'{file_path}.pdf')

	elif mime_type == b'application/pdf':
	extension = '.pdf'
	pdf_to_image(file_path=file_path)

	elif mime_type == b'text/plain':
	extension = '.txt'
	fnt = ImageFont.truetype('AriaTextG1-Regular.otf', 15)
	with open(file_path, 'r') as file:
	text = file.read()

	# Wrap Text
	wrapper = TextWrapper(text=text, font=fnt, max_width=1080)
	wrapped_text = wrapper.wrapped_text()
	# Create Empty Image
	image = Image.new(mode='RGB', size=(1080, 720), color='white')
	# Draw Image
	draw = ImageDraw.Draw(image)
	draw.text((10, 10), wrapped_text, font=fnt, fill=(0, 0, 0))
	# Save Image
	filename = file_name + '.jpg'
	image.save(UPLOAD_PATH + filename)

	# TODO: if draw.textsize(text=wrapped_text, font=fnt)[1] > 720:
	# Write rest of file to new image (while)
	else:
	extension = mime_type.decode().split('/')[-1]

	os.rename(file_path, file_path + extension)
	data = {
	'data': original_name,
	'path': file_path + extension,
	'name': original_name
	}
	return data


	class TextWrapper:
	"""
	Helper class to wrap text in lines, based on given text, font
	and max allowed line width.
	"""

	def __init__(self, text, font, max_width):
	self.text = text
	self.text_lines = [
	' '.join([self.strip_word(w) for w in line.split(' ') if w])
	for line in text.split('\n')
	]
	self.font = font
	self.max_width = max_width
	self.draw = ImageDraw.Draw(
	Image.new(
	mode='RGB',
	size=(100, 100)
	)
	)
	self.space_width = self.draw.textsize(text=' ', font=self.font)[0]

	@staticmethod
	def strip_word(word):
	if len(word.split('\t')) == 2:
	if word.strip() == '\t':
	final_word = f'{word.strip()} '
	else:
	final_word = f' {word.strip()}'
	else:
	final_word = word.strip()
	return final_word

	def get_text_width(self, text):
	return self.draw.textsize(text=text, font=self.font)[0]

	def wrapped_text(self):
	wrapped_lines = []
	buf = []
	buf_width = 0

	for line in self.text_lines:
	for word in line.split(' '):
	word_width = self.get_text_width(word)

	if not buf:
	expected_width = word_width
	else:
	expected_width = buf_width + self.space_width + word_width

	if expected_width <= self.max_width:
	# word fits in line
	buf_width = expected_width
	buf.append(word)
	else:
	# word doesn't fit in line
	wrapped_lines.append(' '.join(buf))
	buf = [word]
	buf_width = word_width

	if buf:
	wrapped_lines.append(' '.join(buf))
	buf = []
	buf_width = 0

	return '\n'.join(wrapped_lines)
No results found