goerz · December 5, 2020 18:40
diff --git a/arxiv_summarize.py b/arxiv_summarize.py
 #!/usr/bin/env python
 """Create markdown summaries from arXiv identifiers.

 Test with::

    pytest arxiv_summarize.py -s -x --doctest-modules

 See ``--help`` for usage summary::

    python arxiv_summarize.py --help
 """

 # MIT License
 #
 # Copyright (c) 2020 Michael Goerz
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 import collections
 import itertools
 import logging
 import os
 import re
 import select
 import sys
 import unicodedata
 from pathlib import Path
 from textwrap import indent

 import arxiv  # https://github.com/lukasschwab/arxiv.py
 import click
 from click.testing import CliRunner

 __version__ = '0.1.0'

 # fmt: off
 # These are all the primary categories present in the OAI ArXiv metadata
 CATEGORIES = [
    "acc-phys", "adap-org", "alg-geom", "ao-sci", "astro-ph", "atom-ph",
    "bayes-an", "chao-dyn", "chem-ph", "cmp-lg", "comp-gas", "cond-mat", "cs",
    "dg-ga", "funct-an", "gr-qc", "hep-ex", "hep-lat", "hep-ph", "hep-th",
    "math", "math-ph", "mtrl-th", "nlin", "nucl-ex", "nucl-th", "patt-sol",
    "physics", "plasm-ph", "q-alg", "q-bio", "quant-ph", "solv-int",
    "supr-con", "eess", "econ", "q-fin", "stat"
 ]

 #  All subcategories with more than 2 capital letters (not SG, SI, SP, etc)
 SUB_CATEGORIES = [
     'acc-ph', 'ao-ph', 'app-ph', 'atm-clus', 'atom-ph', 'bio-ph', 'chem-ph',
     'class-ph', 'comp-ph', 'data-an', 'dis-nn', 'ed-ph', 'flu-dyn', 'gen-ph',
     'geo-ph', 'hist-ph', 'ins-det', 'med-ph', 'mes-hall', 'mtrl-sci',
     'optics', 'other', 'plasm-ph', 'pop-ph', 'quant-gas', 'soc-ph', 'soft',
     'space-ph', 'stat-mech', 'str-el', 'supr-con'
 ]
 # fmt: on


 # Regexes adapted from
 # https://github.com/mattbierbaum/arxiv-public-datasets/blob/master/arxiv_public_data/regex_arxiv.py
 RE_CATEGORIES = r'(?:{})(?:(?:[.][A-Z]{{2}})|(?:{}))?'.format(
    r'|'.join(CATEGORIES), r'|'.join(SUB_CATEGORIES)
 )
 RE_DATE = r'(?:\d{2}[01]\d)'  # YYMM
 RE_VERSION = r'(?:v[1-9]\d*)?'
 RE_NUM_NEW = r'(?:\d{4,5})'
 RE_NUM_OLD = r'(?:\d{3})'

 # matches: 1612.00001 1203.0023v2
 RE_ID_NEW = r'(?:{date}\.{number}{version})'.format(
    date=RE_DATE, number=RE_NUM_NEW, version=RE_VERSION
 )

 RX_ID_NEW = re.compile(
    r'(?P<date>{date})\.(?P<number>{number})(?P<version>{version})'.format(
        date=RE_DATE, number=RE_NUM_NEW, version=RE_VERSION
    )
 )

 # matches: hep-th/11030234 cs/0112345v2 cs.AI/0112345v2
 RE_ID_OLD = r'(?:{cat}/{date}{number}{version})'.format(
    cat=RE_CATEGORIES, date=RE_DATE, number=RE_NUM_OLD, version=RE_VERSION
 )

 RX_ID_OLD = re.compile(
    r'(?P<cat>{cat})/(?P<date>{date})(?P<number>{number})(?P<version>{version})'.format(
        cat=RE_CATEGORIES, date=RE_DATE, number=RE_NUM_OLD, version=RE_VERSION
    )
 )


 RX_ID = re.compile(r'(?:%s|%s)' % (RE_ID_OLD, RE_ID_NEW))


 class ArxivID:
    """An arXiv ID.

    This wraps around strings that are valid arXiv IDs in the format described
    in https://arxiv.org/help/arxiv_identifier. Both "old-style" IDs
    (submissions up to March 2007), e.g.::

        >>> id = ArxivID("math.GT/0309136")
        >>> id.date
        '0309'
        >>> id.year
        2003
        >>> id.month
        9
        >>> id.num
        136
        >>> id.number
        '136'
        >>> id.version  # None
        >>> id.category
        'math.GT'
        >>> id.archive
        'math'
        >>> id.subject
        'GT'

    and "new-style" IDs (submissions after March 2007), e.g.::

        >>> id = ArxivID("1501.00001v1")
        >>> id.date
        '1501'
        >>> id.year
        2015
        >>> id.month
        1
        >>> id.num
        1
        >>> id.number
        '00001'
        >>> id.version
        1
        >>> id.category  # None
        >>> id.archive  # None
        >>> id.subject  # None

    are supported. There is no guarantee that an instantiated ArxivID actually
    exists, only that is has the correct format.

    The IDs are converted to a canonical format, e.g. by padding the article
    number with the appropriate number of zeros (4 digits before 2015, 5 digits
    starting from 2015)::

        >>> ArxivID("0706.00001v2")  # extra '0'
        ArxivID('0706.0001v2')


    An ArxivID may be used in a string context::

        >>> print("arXiv:%s" % ArxivID("1501.00001v1"))
        arXiv:1501.00001v1

    Two IDs compare as equal if they canonically refer to the same submission.
    For old-style IDs, the subject-identifier is not taken into account::

        >>> ArxivID("math.GT/0309136") == "math/0309136"
        True

    Also, an arXiv ID that does not contain an explicit version identifier
    compares as equal to all versions of the same submission.

        >>> ArxivID('1501.00001') == ArxivID('1501.00001v1')
        True
        >>> ArxivID('1501.00001') == ArxivID('1501.00001v2')
        True

    Args:
        string (str): String from which to extract the ID
        search (bool): If True, search for the first valid arXiv ID in
            `string`. If False `string` must be a valid arXiv ID by itself.

    Attributes:
        date (str): The year and month of the arXiv submission in the format
            "YYMM"
        year (int): The full 4-digit integer year of the arXiv submission
        month (int): The month of the arXiv submission
        num (int): The number of the arXiv submission, as an integer
        number (str): The number of the arXiv submission as a string with the
            canonical zero-padding (3 digits up to March 2007, 4 digits up to
            December 2014, 5 digits for anything newer)
        version (int or None): The version number of the submission. A value of
            None refers to the "latest" version.
        category (str or None): The full "category", e.g. "math.GT". None for
            new-style IDs (since March 2007).
        archive (str or None): The archive name, e.g. "math". May be identical
            to the `category` when the `subject` is redundant or optional. None
            for new-style IDs (since March 2007).
        subject (str or None):  Optional subject identifier, e.g. "GT"

    Raises:
        ValueError: If `string` does not contain a valid arXiv ID.
    """

    def __init__(self, string, search=True):
        parse_mtd = 'match'
        if search:
            parse_mtd = 'search'
        match_new = getattr(RX_ID_NEW, parse_mtd)(str(string))
        match_old = getattr(RX_ID_OLD, parse_mtd)(str(string))
        if match_new:
            self._style = 'new'
            self.date = match_new.group('date')
            self.year = 2000 + int(self.date[0:2])
            self.month = int(self.date[2:4])
            self.num = int(match_new.group('number'))
            if self.year <= 2012:
                self.number = "%04d" % self.num
            else:
                self.number = "%05d" % self.num
            try:
                self.version = int(match_new.group('version')[1:])
            except (TypeError, ValueError):
                self.version = None
            self.category = None
            self.archive = None
            self.subject = None
        elif match_old:
            self._style = 'old'
            self.date = match_old.group('date')
            if self.date[0] == '0':
                self.year = 2000 + int(self.date[0:2])
            else:
                self.year = 1900 + int(self.date[0:2])
            self.month = int(self.date[2:4])
            self.num = int(match_old.group('number'))
            self.number = "%03d" % self.num
            try:
                self.version = int(match_old.group('version')[1:])
            except (TypeError, ValueError):
                self.version = None
            self.category = match_old.group('cat')
            self.archive = self.category.split(".")[0]
            self.subject = None
            if "." in self.category:
                self.subject = self.category.split(".")[-1]
        else:
            raise ValueError("Invalid arXiv ID: %r" % string)

    def __str__(self):
        v = ""
        if self.version is not None:
            v = "v%d" % self.version
        if self._style == 'new':
            return "%s.%s%s" % (self.date, self.number, v)
        else:
            return "%s/%s%s%s" % (self.category, self.date, self.number, v)

    def __repr__(self):
        return "%s(%r)" % (self.__class__.__name__, str(self))

    def __eq__(self, other):
        if not isinstance(other, self.__class__):
            other = self.__class__(other)
        if self.version is not None and other.version is not None:
            if self.version != other.version:
                return False
        attribs = ['date', 'num', 'archive']
        # we don't compare the subject, as that seems to be non-canonical
        return all([getattr(self, a) == getattr(other, a) for a in attribs])

    @classmethod
    def find_all(cls, string):
        """Find arXiv IDs in the given `string`.

        The IDs are returned as a list in the order that they appear in
        `string`, with duplicated removed.
        """
        if sys.version_info < (3, 7, 0):
            logger = logging.getLogger(__name__)
            logger.warning(
                "IDs may not be sorted propery. Run on Python >=3.7."
            )
        return [
            cls(k) for k in collections.Counter(RX_ID.findall(string)).keys()
        ]


 MKD_FMT = r'''
 {title_prefix}{title}

 {authors_and_separated}

 {arxiv_url}

 {tags}

 {abstract}

 {links}
 '''.lstrip()


 def flatten(list_):
    """Flatten the given list."""
    flat_list = []
    for element in list_:
        if isinstance(element, (list, tuple)):
            flat_list += flatten(element)
        else:
            flat_list.append(element)
    return flat_list


 def make_filename(string, allow_unicode=False, slugify=False):
    """Turn `string` into a "safe" filename.

    Convert to ASCII if 'allow_unicode' is False.  Convert slashes ("/") to
    hyphens ("-").  Remove characters that aren't alphanumerics, underscores,
    hyphens, spaces, or periods, and strip leading and trailing whitespace. If
    `slugify` is True, also convert to lowercase, convert spaces to
    hyphens and strip out most punctuation marks.
    """
    string = str(string)
    if allow_unicode:
        string = unicodedata.normalize('NFKC', string)
    else:
        string = (
            unicodedata.normalize('NFKD', string)
            .encode('ascii', 'ignore')
            .decode('ascii')
        )
    string = string.replace("/", "-")
    string = re.sub(r'[^\w\s()\[\],."\'?+-]', '', string).strip()
    if slugify:
        string = string.lower()
        string = re.sub(r'[()\[\],"\'?+]', '', string)
        string = re.sub(r'[-\s]+', '-', string)
    return string


 def _sanitize_single_line(string):
    string = string.replace("\n", " ")
    string = re.sub(r'\s{2,}', ' ', string)
    return string


 def _sanitize_paragraphs(string):
    """Remove unnecessary line breaks."""
    rx = re.compile(
        r'''
        (?<!\A)           # not at the beginning of string
        (?<!\n|\s)        # not preceded by another newline or whitespace
        \n
        (?!\n|\s|\Z)      # not followed by another newline, whitespace, or
                          # end of string
    ''',
        re.X,
    )
    string = " ".join(rx.split(string))
    return string


 def _tags(query_result):
    """Return a list of tags based on the tags in the arXiv `query_result`."""
    return [
        "#%s" % tag
        for tag in sorted(
            set(
                flatten(
                    ['arxiv']
                    + [tag.term.split(".") for tag in query_result.tags]
                )
            )
        )
    ]


 def _links(query_result):
    """Return a list of markdown-formatted links for the `query_result`.

    This includes only titled links, which will usually be the PDF and the DOI
    link to any published version of the article.
    """
    return [
        "[%s](%s)" % (link.title, link.href)
        for link in query_result.links
        if 'title' in link
    ]


 def arxiv_fmt_replacements(query_id, query_result, header_level=2):
    """Return replacements for formatting a string with arXiv query results.

    The result is a dictionary with replacement to be used in a string's
    `format` method.

    The resulting dict contains the following keys:

    * `title_prefix`: Markdown title prefix for a header of level
      `header_level`, e.g. "## " for ``header_level=2``.
    * `arxiv_comment`: The comment field of the arXiv submission; typically the
      number of pages.
    * `arxiv_url`: The URL of the abstract on arxiv.org
    * `doi`: The DOI of the paper if available, or None
    * `journal_reference`: The Journal reference of the paper if available, or
      None
    * `pdf_url`: The URL for the PDF file on arxiv.org
    * `published`: The date on which the arXiv version was originally published
    * `abstract`: The abstract text
    * `title`: The title of the submission
    * `updated`: The date on which the arXiv version was last updated
    * `authors`: List of author names
    * `authors_and_separated`: A string containing all authors separated by
      "and"
    * `authors_comma_separated`: A string containing all authors separated by
      commas
    * `tags`: A string containing a list of tags separated by spaces, including
      "arxiv" and tags based on the arXiv submission categories
    * `links`: A string containing markdown-formatted links to the PDF and the
      published version of the article, if available.
    * `id`: The canonical arXiv ID, as reported by the arXiv API
    * `query_id`: The arXiv ID as searched for (value of the `query_id`
       argument)
    """
    keys = [
        'arxiv_comment',
        'arxiv_url',
        'doi',
        'journal_reference',
        'pdf_url',
        'published',
        'updated',
        'authors',
    ]
    replacements = {k: query_result.get(k, None) for k in keys}
    replacements.update(
        dict(
            title_prefix="#" * header_level + " ",
            title=_sanitize_single_line(query_result.title),
            authors_and_separated=" and ".join(
                [
                    name.replace(" and ", " {and} ")
                    for name in query_result.authors
                ]
            ),
            authors_comma_separated=", ".join(query_result.authors),
            abstract=_sanitize_paragraphs(query_result.summary),
            tags=" ".join(_tags(query_result)),
            links=" ".join(_links(query_result)),
            query_id=query_id,
            id=str(ArxivID(query_result.id)),
        )
    )
    return replacements


 def _arxiv_query(id_list):
    """Wrapper around `arxiv.query` to deal with missing IDs."""
    responses = arxiv.query(id_list=[str(id) for id in id_list])

    if len(responses) == len(id_list):  # all IDs found
        for (id, response) in zip(id_list, responses):
            # I can't find anywhere that the arXiv API *guarantees" that the
            # results are returned in the same order as the query IDs, so we'll
            # double check
            assert ArxivID(id) == ArxivID(response.id)
        return id_list, responses
    else:  # not all IDs found
        response_cycle = itertools.cycle(responses)
        responses_filtered = []
        id_list_filtered = []
        for id in id_list:
            attempts = len(responses)
            while attempts > 0:
                response = next(response_cycle)
                if ArxivID(id) == ArxivID(response.id):
                    id_list_filtered.append(id)
                    responses_filtered.append(response)
                    break
                attempts -= 1
            else:
                logger = logging.getLogger(__name__)
                logger.error("Cannot find ID %s", id)
        return id_list_filtered, responses_filtered


 def arxiv_to_markdown(
    *arxiv_ids,
    indent_level=0,
    block_spacing=2,
    header_level=2,
    write_to_files=None,
    slugify=True,
    append=True,
    obsidian=False,
 ):
    """Given a list of ArxivIDs, generate and return markdown summaries.

    Each ID will be rendered into a "block".  Each block will start with the
    title of the paper, formatted as a markdown header with the given
    `header_level`.  Multiple blocks that are written to the same output file
    or returned as a result are separated by `block_spacing` number of empty
    lines.

    If `write_to_files` is given, the block for each ID is written to a
    filename based on `write_to_files`. The filename is obtained by formatting
    `write_to_files` with replacements, e.g.  "{id}" is replaced by the ID and
    "{title}" is replaced by the manuscript title, and
    "{authors_and_separated}" is
    replaced by a list of the author names. Other fields are those available in
    the arXiv API response record. The filename is then sanitized through
    :func:`make_filename`. If `slugify` is True, non-ASCII letters are dropped
    from the filename, spaces are replaced by hyphens, and the filename is
    converted to lowercase.  A summary bullet list in markdown format linking
    to the written files is generated and returned as a multi-line string
    result.  IF `obsidian` is True, a special syntax appropriate for the
    Obsidian note-taking software will be used.

    If `write_to_files` is None, the blocks for multiple IDs will be
    concatenated (with `block_spacing`) and returned as a multi-line string
    result.

    In either case, the returned result will be indented with 4 spaces per
    `indent_level`.
    """
    logger = logging.getLogger(__name__)
    arxiv_ids, responses = _arxiv_query(id_list=arxiv_ids)
    prefix = ""
    if write_to_files is None:
        prefix = "    " * indent_level
    separator = "\n" * block_spacing
    blocks = []
    block_replacements = []
    for (query_id, response) in zip(arxiv_ids, responses):
        replacements = arxiv_fmt_replacements(
            query_id, response, header_level=header_level
        )
        block_replacements.append(replacements)
        blocks.append(indent(MKD_FMT.format(**replacements), prefix))
    outfiles = set()  # existing files
    if write_to_files is None:
        return separator.join(blocks)
    else:  # write blocks to file, generate and return summary
        logger.debug("write_to_files = %s", write_to_files)
        summary_items = []
        tuples = tuple(zip(arxiv_ids, responses, block_replacements, blocks))
        for (id, response, replacements, block) in tuples:
            outfile = Path(
                make_filename(
                    write_to_files.format(**replacements),
                    allow_unicode=(not slugify),
                    slugify=slugify,
                )
            )
            logger.debug("Writing block for %s to %s", id, outfile)
            if append and outfile.is_file():
                outfiles.add(outfile)
            elif outfile not in outfiles:
                outfile.unlink(missing_ok=True)
            with outfile.open(mode="a", encoding="utf8") as out_fh:
                if outfile in outfiles:
                    out_fh.write(separator)
                out_fh.write(block)
            outfiles.add(outfile)
            replacements['outfile'] = outfile
            replacements['outfile_stem'] = outfile.stem
            if obsidian:
                summary_items.append(
                    "* [[{outfile_stem}|{title}]]".format(**replacements)
                )
            else:
                summary_items.append(
                    "* [{title}]({outfile})".format(**replacements)
                )
        prefix = "    " * indent_level
        return indent("\n".join(summary_items), prefix) + "\n"


 TEST_OUTPUT = r"""
 ## Prediction of Toric Code Topological Order from Rydberg Blockade

 Ruben Verresen and Mikhail D. Lukin and Ashvin Vishwanath

 http://arxiv.org/abs/2011.12310v1

 #arxiv #atom-ph #cond-mat #physics #quant-gas #quant-ph #str-el

 The physical realization of $\mathbb Z_2$ topological order as encountered in the paradigmatic toric code has proven to be an elusive goal. We show that this phase of matter can be created in a two-dimensional array of strongly interacting Rydberg atoms. Our proposal makes use of atoms localized on the sites of a ruby lattice, coupled via a Rydberg blockade mechanism. First, we show that the blockade model effectively realizes a monomer-dimer model on the kagome lattice with a single-site kinetic term, and we obtain its phase diagram using the numerical density matrix renormalization group method. We find a topological quantum liquid (TQL) as evidenced by multiple measures including (i) a continuous transition between two featureless phases, (ii) a topological entanglement entropy of $\ln 2$ as measured in various geometries, (iii) degenerate topological ground states and (iv) the expected modular matrix from ground state overlap. Next, we show that the TQL can persist upon including realistic, algebraically-decaying van der Waals interactions $V(r) \sim 1/r^6$. Moreover, we can directly access the topological loop operators of this model, which can be measured experimentally using a dynamic protocol, providing a "smoking gun" experimental signature of the TQL phase. Finally, we show how to trap an emergent anyon and realize different topological boundary conditions, and we discuss the implications for exploring fault-tolerant quantum memories.

 [pdf](http://arxiv.org/pdf/2011.12310v1)


 ## Krotov: A Python implementation of Krotov's method for quantum optimal control

 Michael H. Goerz and Daniel Basilewitsch and Fernando Gago-Encinas and Matthias G. Krauss and Karl P. Horn and Daniel M. Reich and Christiane P. Koch

 http://arxiv.org/abs/1902.11284v6

 #arxiv #quant-ph

 We present a new open-source Python package, krotov, implementing the quantum optimal control method of that name. It allows to determine time-dependent external fields for a wide range of quantum control problems, including state-to-state transfer, quantum gate implementation and optimization towards an arbitrary perfect entangler. Krotov's method compares to other gradient-based optimization methods such as gradient-ascent and guarantees monotonic convergence for approximately time-continuous control fields. The user-friendly interface allows for combination with other Python packages, and thus high-level customization. The package is being developed at https://github.com/qucontrol/krotov

 [doi](http://dx.doi.org/10.21468/SciPostPhys.7.6.080) [pdf](http://arxiv.org/pdf/1902.11284v6)
 """.lstrip()


 def test_arxiv_to_markdown():
    """Test of `arxiv_to_markdown` function."""
    md = arxiv_to_markdown('2011.12310', '1902.11284')
    assert md[-1] == "\n"
    assert md == TEST_OUTPUT


 def test_arxiv_id_regex():
    """Test the regexes for extracting arXiv identifiers."""
    for id in ['1612.00001', '1203.0023v2']:
        assert re.match(RE_ID_NEW, id)
        assert RX_ID_NEW.match(id)
    for id in ['hep-th/11030234', 'cs/0112345v2', 'cs.AI/0112345v2']:
        assert re.match(RE_ID_OLD, id)
        assert RX_ID_OLD.match(id)
    input = (
        "http://arxiv.org/abs/1902.11284v6  2011.12310 arxiv:math.GT/0309136"
    )
    arxiv_ids = RX_ID.findall(input)
    assert arxiv_ids == ['1902.11284v6', '2011.12310', 'math.GT/0309136']


 def test_arxiv_id_extraction():
    """Test that duplicate IDs are excluded in arxiv_ids."""
    input = "http://arxiv.org/abs/1902.11284v6,1203.0023v2,1902.11284v6 1203.0023v2 1612.00001 cs/0112345v2"
    assert ArxivID.find_all(input) == [
        ArxivID("1902.11284v6"),
        ArxivID("1203.0023v2"),
        ArxivID("1612.00001"),
        ArxivID("cs/0112345v2"),
    ]


 def test_write_to_files(caplog):
    runner = CliRunner(mix_stderr=False)

    with runner.isolated_filesystem():
        result = runner.invoke(
            main,
            [
                '--debug',
                '--write-to-files',
                'abstracts.md',
                '--indent-level',
                '2',
            ],
            input='2011.12310v1 1902.11284v6',
        )
        assert result.exit_code == 0
        files = list(Path(".").glob("*.*"))
        assert Path("abstracts.md") in files
        abstracts_md = Path("abstracts.md").read_text()
    expected = """\
        * [Prediction of Toric Code Topological Order from Rydberg Blockade](abstracts.md)
        * [Krotov: A Python implementation of Krotov's method for quantum optimal control](abstracts.md)\n"""
    assert result.output == expected
    assert abstracts_md == TEST_OUTPUT

    with runner.isolated_filesystem():
        result = runner.invoke(
            main,
            [
                '--debug',
                '--write-to-files',
                '{query_id}.md',
                '--indent-level',
                '2',
            ],
            input='2011.12310v1 1902.11284v6 cs/0112345v2 math.GT/0309136',
            # note: cs/0112345v2 does not exist
        )
        files = list(Path(".").glob("*.*"))
    assert result.exit_code == 0
    error_msgs = [
        x.message
        for x in caplog.get_records(when="call")
        if x.levelno == logging.ERROR
    ]
    assert result.exit_code == 0
    assert len(error_msgs) == 1
    assert error_msgs[0] == "Cannot find ID cs/0112345v2"
    assert Path('2011.12310v1.md') in files
    assert Path('1902.11284v6.md') in files
    assert Path('math.GT-0309136.md') in files
    expected = """\
        * [Prediction of Toric Code Topological Order from Rydberg Blockade](2011.12310v1.md)
        * [Krotov: A Python implementation of Krotov's method for quantum optimal control](1902.11284v6.md)
        * [Regular points in affine Springer fibers](math.GT-0309136.md)\n"""
    assert result.output == expected

    with runner.isolated_filesystem():
        result = runner.invoke(
            main,
            [
                '--debug',
                '--write-to-files',
                '{query_id}.md',
                '--indent-level',
                '2',
                '--obsidian',
            ],
            input='2011.12310v1 1902.11284v6 cs/0112345v2 math.GT/0309136',
            # note: cs/0112345v2 does not exist
        )
        files = list(Path(".").glob("*.*"))
    assert result.exit_code == 0
    expected = """\
        * [[2011.12310v1|Prediction of Toric Code Topological Order from Rydberg Blockade]]
        * [[1902.11284v6|Krotov: A Python implementation of Krotov's method for quantum optimal control]]
        * [[math.GT-0309136|Regular points in affine Springer fibers]]\n"""
    assert result.output == expected

    with runner.isolated_filesystem():
        result = runner.invoke(
            main,
            [
                '--debug',
                '--write-to-files',
                '{authors_comma_separated} - {title}.md',
                '--indent-level',
                '2',
            ],
            input='2011.12310v1 1902.11284v6 cs/0112345v2 math.GT/0309136',
            # note: cs/0112345v2 does not exist
        )
        files = list(Path(".").glob("*.*"))
    assert result.exit_code == 0
    expected = """\
        * [Prediction of Toric Code Topological Order from Rydberg Blockade](Ruben Verresen, Mikhail D. Lukin, Ashvin Vishwanath - Prediction of Toric Code Topological Order from Rydberg Blockade.md)
        * [Krotov: A Python implementation of Krotov's method for quantum optimal control](Michael H. Goerz, Daniel Basilewitsch, Fernando Gago-Encinas, Matthias G. Krauss, Karl P. Horn, Daniel M. Reich, Christiane P. Koch - Krotov A Python implementation of Krotov's method for quantum optimal control.md)
        * [Regular points in affine Springer fibers](Mark Goresky, Robert Kottwitz, Robert MacPherson - Regular points in affine Springer fibers.md)\n"""
    assert result.output == expected


 def has_stdin_data(timeout=0.1):
    """Return True if data is detected on stdin with `timeout` (in seconds)."""
    try:
        return bool(select.select([sys.stdin], [], [], timeout)[0])
    except OSError:
        return True


 @click.command()
 @click.help_option('--help', '-h')
 @click.version_option(version=__version__)
 @click.option('--debug', is_flag=True, help='enable debug logging')
 @click.option(
    '--indent-level',
    type=click.IntRange(0, None),
    default=0,
    help=_sanitize_paragraphs(
        """The indedation level. The STDOUT output will be indented by four
        spaces per indentation level."""
    ),
    show_default=True,
 )
 @click.option(
    '--block-spacing',
    type=click.IntRange(0, None),
    default=2,
    help=_sanitize_paragraphs(
        """The number of blank lines between markdown blocks for different
        arXiv IDs."""
    ),
    show_default=True,
 )
 @click.option(
    '--header-level',
    type=click.IntRange(0, 5),
    default=2,
    help=_sanitize_paragraphs(
        """The level of the markdown heading to be used for the title of each
        block. That is, the number of '#' symbols preceding the title."""
    ),
    show_default=True,
 )
 @click.option(
    '--write-to-files',
    '-o',
    type=click.Path(dir_okay=False),
    help=_sanitize_paragraphs(
        """A pattern for filenames to which to append the markdown block for
        each arXiv ID.  The pattern may contain any of the fields returned by
        the arxiv_fmt_replacements function (see source code). For example,
        "{query_id}.md" will write each block to a file named after its arXiv
        ID.  If given in combination with --no-append, the output file is
        overwritten."""
    ),
 )
 @click.option(
    '--slugify/--no-slugify',
    default=False,
    help=_sanitize_paragraphs(
        """If given in combination with --write-to-files, indicates whether
        filenames should be "slugified" (replace spaces with hyphens, allow
        only select ASCII characters, convert to lowercase)."""
    ),
 )
 @click.option(
    '--append/--no-append',
    default=True,
    help=_sanitize_paragraphs(
        """If given in combination with --write-to-files, whether to append to
        the output file (default) or whether to overwrite it."""
    ),
 )
 @click.option(
    '--obsidian/--no-obsidian',
    default=False,
    help=_sanitize_paragraphs(
        """If given in combination with --write-to-files, whether to use
        Obsidian syntax for linking to the generated files or standard markdown
        syntax."""
    ),
 )
 @click.argument('ids', nargs=-1)
 def main(
    debug,
    indent_level,
    block_spacing,
    header_level,
    write_to_files,
    slugify,
    append,
    obsidian,
    ids,
 ):
    """Create markdown summaries of given arXiv identifiers.

    The arXiv identifiers are extracted for the positional arguments as well as
    from any text piped into the program on STDIN. If "-" is given as the only
    positional argument, the programs will wait for input from STDIN.

    Example arXiv IDs are 2011.12310v1, 1902.11284 or (prior to 2007)
    math.GT/0309136. The script will search for anything that has the correct
    format, and ignore the remaining input. Thus, you may e.g. pipe a list of
    arXiv URLs into the script as input.

    By default, the script will write to STDOUT a block of markdown for each
    arXiv ID, starting with the manuscript title as a markdown header (see
    --header-level), followed by author information, the abstract, and relevant
    links. Multiple blocks are separated by blank lines (see --block-spacing).

    If the `--write-to-files` option is given, the markdown blocks are instead
    written to output files, and a markdown-formatted bullet list with the
    manuscript titles and links to the written files is written to STDOUT.
    """
    logging.basicConfig(level=logging.WARNING)
    logger = logging.getLogger(__name__)
    if debug:
        logger.setLevel(logging.DEBUG)
        logger.debug("Enabled debug output")
    if has_stdin_data() or ids == ("-",):
        logger.debug("Reading from pipe")
        stdin_text = click.get_text_stream('stdin')
        input = stdin_text.read().replace("\n", " ")
    else:
        logger.debug("Not connected to pipe")
        input = ""
    input += " " + " ".join(ids)
    logger.debug("INPUT: %s", input)
    ids = ArxivID.find_all(input)
    logger.debug("FOUND IDS: %s", ids)
    markdown = arxiv_to_markdown(
        *ids,
        indent_level=indent_level,
        block_spacing=block_spacing,
        header_level=header_level,
        write_to_files=write_to_files,
        slugify=slugify,
        append=append,
        obsidian=obsidian,
    )
    click.echo(markdown, nl=False)


 if __name__ == "__main__":
    main()