Created
April 25, 2019 16:36
-
-
Save joffilyfe/eb6998994739e7e14be16d6101138185 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from documentstore_migracao.utils.xylose_converter import parse_date | |
def get_publication_date(document: etree.ElementTree) -> str: | |
def publication_dates(): | |
yield 'pub-date[@pub-type="epub"]' | |
yield 'pub-date[@date-type="pub"]' | |
yield 'pub-date' | |
article_meta = document.find(".//article-meta") | |
if article_meta is None: | |
raise ValueError("XML não possui article-meta") | |
for xpath in publication_dates(): | |
pubdate = article_meta.find(xpath) | |
if pubdate is not None: | |
items = [ | |
pubdate.findtext(elem_name) for elem_name in ["year", "month", "day"] | |
] | |
return "-".join([item for item in items if item]) | |
def get_document_bundle_manifest( | |
document: etree.ElementTree, document_url: str, assets: list | |
) -> dict: | |
"""Cria um manifesto no formato do Kernel a partir de um | |
documento xml""" | |
try: | |
_id = document.find(".//article-id[@pub-id-type='scielo-id']").text | |
except AttributeError: | |
raise ValueError("Document requires an scielo-id") from None | |
date = get_publication_date(document) | |
if not date: | |
raise ValueError("A creation date is required") from None | |
_creation_date = parse_date(date) | |
_version = {"data": document_url, "assets": {}, "timestamp": _creation_date} | |
_document = {"id": _id, "versions": [_version]} | |
for asset in assets: | |
_version["assets"][asset.get("asset_id")] = [[_creation_date, asset.get("asset_url")]] | |
return _document |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment