Skip to content

Instantly share code, notes, and snippets.

@marph91
Created October 28, 2024 17:37
Show Gist options
  • Save marph91/4d069f948d58abec490d0e5699a4aa69 to your computer and use it in GitHub Desktop.
Save marph91/4d069f948d58abec490d0e5699a4aa69 to your computer and use it in GitHub Desktop.
fix the creation date of some documents in paperless-ngx
"""fix the creation date of some documents in paperless-ngx"""
import datetime as dt
from urllib.parse import urljoin
import requests
class Session(requests.Session):
"""https://stackoverflow.com/a/51026159/7410886"""
def __init__(self, base_url: str):
super().__init__()
self.base_url = base_url
def request(self, method, path: str, *args, **kwargs):
url = urljoin(self.base_url, path)
response = super().request(method, url, *args, **kwargs)
response.raise_for_status()
return response.json()
def unpaginate(session: Session, path: str) -> list:
response = session.get(path)
unpaginated_list = response["results"]
while (next_url := response["next"]) is not None:
response = session.get(next_url[len(session.base_url) :])
unpaginated_list.extend(response["results"])
return unpaginated_list
def main(session: Session):
# get the ID of a specific tag
tag_id = None
tags = unpaginate(session, "/api/tags/")
for tag in tags:
if tag["name"] == "<tag>":
tag_id = tag["id"]
assert tag_id is not None
# get all documents with this tag
# https://github.com/paperless-ngx/paperless-ngx/discussions/6937#discussioncomment-9703790
documents = unpaginate(session, f"/api/documents/?tags__id__all={tag_id}")
for document in documents:
metadata = session.get(f'/api/documents/{document["id"]}/metadata/')
if metadata["archive_metadata"] is not None:
for datum in metadata["archive_metadata"]:
if datum["key"] == "CreateDate":
print(
f"\"{document["title"]}\" -",
"old date:",
document["created"],
"- new date:",
datum["value"],
)
document["created_date"] = dt.datetime.fromisoformat(
datum["value"]
).strftime("%Y-%m-%d")
document["created"] = datum["value"]
session.put(f"/api/documents/{document["id"]}/", json=document)
continue
if __name__ == "__main__":
with Session("<base_url>") as session:
session.auth = requests.auth.HTTPBasicAuth("<user>", "<password>")
main(session)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment