Skip to content

Instantly share code, notes, and snippets.

@joffilyfe
Created April 25, 2019 16:36
Show Gist options
  • Save joffilyfe/eb6998994739e7e14be16d6101138185 to your computer and use it in GitHub Desktop.
Save joffilyfe/eb6998994739e7e14be16d6101138185 to your computer and use it in GitHub Desktop.
from documentstore_migracao.utils.xylose_converter import parse_date
def get_publication_date(document: etree.ElementTree) -> str:
def publication_dates():
yield 'pub-date[@pub-type="epub"]'
yield 'pub-date[@date-type="pub"]'
yield 'pub-date'
article_meta = document.find(".//article-meta")
if article_meta is None:
raise ValueError("XML não possui article-meta")
for xpath in publication_dates():
pubdate = article_meta.find(xpath)
if pubdate is not None:
items = [
pubdate.findtext(elem_name) for elem_name in ["year", "month", "day"]
]
return "-".join([item for item in items if item])
def get_document_bundle_manifest(
document: etree.ElementTree, document_url: str, assets: list
) -> dict:
"""Cria um manifesto no formato do Kernel a partir de um
documento xml"""
try:
_id = document.find(".//article-id[@pub-id-type='scielo-id']").text
except AttributeError:
raise ValueError("Document requires an scielo-id") from None
date = get_publication_date(document)
if not date:
raise ValueError("A creation date is required") from None
_creation_date = parse_date(date)
_version = {"data": document_url, "assets": {}, "timestamp": _creation_date}
_document = {"id": _id, "versions": [_version]}
for asset in assets:
_version["assets"][asset.get("asset_id")] = [[_creation_date, asset.get("asset_url")]]
return _document
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment