Skip to content

Instantly share code, notes, and snippets.

@Sciroccogti
Last active August 20, 2020 10:17
Show Gist options
  • Save Sciroccogti/2d396cb0ac43510d437409a5dac27ae1 to your computer and use it in GitHub Desktop.
Save Sciroccogti/2d396cb0ac43510d437409a5dac27ae1 to your computer and use it in GitHub Desktop.
turn files into pdf
##############################################
## Script to convert .doc or .docx to .pdf ##
## Written by Henry Ling ##
##############################################
import os
from time import strftime
from win32com import client
# Counts the number of files in the directory that can be converted
def n_files(directory):
total = 0
for file in os.listdir(directory):
if (file.endswith('.doc') or file.endswith('.docx') or file.endswith('.tmd')):
total += 1
return total
# Creates a new directory within current directory called PDFs
def createFolder(directory):
if not os.path.exists(directory + '\\PDFs'):
os.makedirs(directory + '\\PDFs')
if __name__ == "__main__":
print('\nPlease note that this will overwrite any existing PDF files')
print('For best results, close Microsoft Word before proceeding')
input('Press enter to continue.')
directory = os.getcwd()
if n_files(directory) == 0:
print('There are no files to convert')
exit()
createFolder(directory)
print('Starting conversion... \n')
# Opens each file with Microsoft Word and saves as a PDF
try:
word = client.DispatchEx('Word.Application')
for file in os.listdir(directory):
if (file.endswith('.doc') or file.endswith('.docx') or file.endswith('.tmd')):
ending = ""
if file.endswith('.doc'):
ending = '.doc'
if file.endswith('.docx'):
ending = '.docx'
if file.endswith('.tmd'):
ending = '.tmd'
new_name = file.replace(ending,r".pdf")
in_file = os.path.abspath(directory + '\\' + file)
new_file = os.path.abspath(directory + '\\PDFs' + '\\' + new_name)
doc = word.Documents.Open(in_file)
print(new_name)
doc.SaveAs(new_file,FileFormat = 17)
doc.Close()
except :
print("Error: Aborting")
finally:
word.Quit()
print('\nConversion finished at ' + strftime("%H:%M:%S"))
import img2pdf
import os
filelist = os.listdir('./')
count = 0
converted = []
for file in filelist:
if not file[0:-5] in converted and not file[0:-4] in converted and (file.endswith('.jpg') or file.endswith('.png')):
print('found ' + file)
imglist = []
imglist.append(file)
if file[-5] == '1': # first image
filename = file[0:-5]
for i in range(2, 10):
if filename + str(i) + file[-4:] in filelist:
imglist.append(filename + str(i) + file[-4:])
print('found ' + filename + str(i) + file[-4:])
else:
break
else:
filename = file[0:-4]
with open(filename + '.pdf', 'wb') as pdf:
print('converting ' + filename + '.pdf')
pdf.write(img2pdf.convert(imglist))
count += 1
print('%d done!' % count)
converted.append(filename)
import pikepdf
import os
import sys
if __name__ == "__main__":
if len(sys.argv) < 2:
print('Please enter output pdf name!')
exit()
outpdf = pikepdf.Pdf.new()
filelist = os.listdir('./')
count = 0
for file in filelist:
if file.endswith('.pdf') and file != sys.argv[1]:
print('Appending ' + file +' ...')
src = pikepdf.Pdf.open(file)
outpdf.pages.extend(src.pages)
count += 1
print('%d pdfs merged into 1!' % count)
outpdf.save(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment