Created
September 3, 2024 05:04
-
-
Save ramSeraph/152fc8fd67a8a786460b3b3931adb975 to your computer and use it in GitHub Desktop.
Code to convert doc files to pdf using google drive
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# License: UNLICENSE | |
import io | |
import json | |
import string | |
import random | |
import mimetypes | |
from pathlib import Path | |
import magic | |
from httplib2 import Http | |
from oauth2client import file, client, tools | |
from googleapiclient.discovery import build | |
from googleapiclient.errors import HttpError | |
from googleapiclient.http import MediaIoBaseUpload, MediaIoBaseDownload | |
def get_random_string(n): | |
res = ''.join(random.choices(string.ascii_uppercase + | |
string.digits, k=n)) | |
return res | |
def export_pdf(file_id, service): | |
request = service.files().export_media( | |
fileId=file_id, mimeType="application/pdf" | |
) | |
file = io.BytesIO() | |
downloader = MediaIoBaseDownload(file, request) | |
done = False | |
while done is False: | |
status, done = downloader.next_chunk() | |
print(f"Download {int(status.progress() * 100)}.") | |
return file.getvalue() | |
def upload_file(content, service): | |
mimetype = magic.from_buffer(content, mime=True) | |
ext = mimetypes.guess_extension(mimetype, strict=True) | |
fname = get_random_string(7) + ext | |
file_metadata = { | |
"name": fname, | |
"mimeType": "application/vnd.google-apps.spreadsheet", | |
} | |
iofile = io.BytesIO(content) | |
media = MediaIoBaseUpload(iofile, mimetype=mimetype) | |
file = ( | |
service.files() | |
.create(body=file_metadata, media_body=media, fields="id") | |
.execute() | |
) | |
return file.get("id") | |
def delete_file(file_id, service): | |
# directly deleting doesn't work and I don't understand why | |
#resp = service.files().delete(fileId=file_id, supportsAllDrives=True) | |
body_value = {'trashed': True} | |
resp = service.files().update(fileId=file_id, body=body_value).execute() | |
resp = service.files().emptyTrash().execute() | |
def get_service(): | |
SCOPES = 'https://www.googleapis.com/auth/drive' | |
store = file.Storage('storage.json') | |
creds = store.get() | |
if not creds or creds.invalid: | |
flow = client.flow_from_clientsecrets('credentials.json', SCOPES) | |
creds = tools.run_flow(flow, store) | |
service = build('drive', 'v3', http=creds.authorize(Http())) | |
return service | |
def convert(from_file, to_file, service=None): | |
if service is None: | |
service = get_service() | |
from_bytes = Path(from_file).read_bytes() | |
print('uploading file') | |
file_id = upload_file(from_bytes, service) | |
print('downloading file as pdf') | |
pdf_content = export_pdf(file_id, service) | |
Path(to_file).write_bytes(pdf_content) | |
print('deleting file', file_id) | |
delete_file(file_id, service) | |
if __name__ == "__main__": | |
convert('ms1288-2007.doc', 'ms1288-2007.pdf') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
python-magic | |
httplib2 | |
google-api-python-client | |
google-auth-httplib2 | |
google-auth-oauthlib |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
WARNING: Deleting leftover files was not working due to (what I am guessing are) permission issues, So.. I move them to trash and empty the thrash.