frostming · March 31, 2018 03:31
diff --git a/__init__.py b/__init__.py
 # -*- coding: utf-8 -*-

 # Copyright (c) 2017 by Esteban Castro Borsani.
 # Released under MIT license

 from .elements import (
    Header,
    Quote,
    HRule,
    UListItem,
    UList,
    OListItem,
    OList,
    Code,
    LinkRefLabel,
    Paragraph,
    Literal,
    Link,
    LinkRef,
    DoubleEmphasis,
    Emphasis,
    CodeSpan,
    Image,
    ImageRef,
    AutoLink)
 from .elements import (
    default_elements,
    default_children)
 from .parser import (
    MarkDown,
    parse,
    render)
diff --git a/elements.py b/elements.py
 # -*- coding: utf-8 -*-

 # Copyright (c) 2017 by Esteban Castro Borsani.
 # Released under MIT license

 import re


 BLOCK_ELEMENTS = 'BLOCK_ELEMENTS'
 INLINE_ELEMENTS = 'INLINE_ELEMENTS'
 ALL_ELEMENTS = 'ALL_ELEMENTS'
 U_LIST_ELEMENTS = 'U_LIST_ELEMENTS'
 O_LIST_ELEMENTS = 'O_LIST_ELEMENTS'


 class Element:

    name = ''
    patterns = ()
    children = {}

    @staticmethod
    def parse(match, ctx):
        raise NotImplementedError

    @staticmethod
    def render(content, ctx):
        raise NotImplementedError


 class Header(Element):

    name = 'header'
    patterns = (
        r'^(?P<h1>[^\n]+)\n=+\n+',
        r'^(?P<h2>[^\n]+)\n\-+\n+',
        r'^(#{1,6})(?P<hx>[^\n]+)\n+')
    children = {'text': INLINE_ELEMENTS}

    @staticmethod
    def parse(match, ctx):
        named_groups = match.groupdict()
        groups = match.groups()

        if named_groups['hx']:
            title = (
                groups[1]
                    .lstrip(' ')
                    .rstrip(' #'))
            level = len(groups[0])
        elif named_groups['h1']:
            title = groups[0].strip()
            level = 1
        else:  # h2
            title = groups[0].strip()
            level = 2

        return {
            'text': title,
            'level': level}

    @staticmethod
    def render(content, ctx):
        return '<h%(level)s>%(text)s</h%(level)s>\n' % content


 class Quote(Element):

    name = 'quote'
    patterns = (r'^(?:>(?:[^\n]+\n)*\n+)+',)
    children = {'text': ALL_ELEMENTS}
    _quote_sub_pattern = re.compile(r'^> ?', flags=re.M)

    @staticmethod
    def parse(match, ctx):
        return {
            'text': Quote
                ._quote_sub_pattern
                .sub('', match.group(0))}

    @staticmethod
    def render(content, ctx):
        return '<blockquote>\n%(text)s</blockquote>\n' % content


 class HRule(Element):

    name = 'h_rule'
    patterns = (
        r'^(?:\- ?){3,}\n+',
        r'^(?:\* ?){3,}\n+')
    children = {}

    @staticmethod
    def parse(match, ctx):
        return {}

    @staticmethod
    def render(content, ctx):
        return '<hr />\n'


 class UListItem(Element):

    name = 'u_list_item'
    # patterns = (r'^[*\-+] [^\n]+\n*(?:[^*\-+\n][^\n]+\n?(?:\n {4})?)*\n*',)  # Old version
    # patterns = (r'^[*\-+] [^\n]+\n*([^*\-+][^\n]+\n*)*',)
    patterns = (r'^[*\-+] ([^*\-+\n]\n*[^\n]+\n*)*',)
    children = {'text': ALL_ELEMENTS}
    _list_sub_pattern = re.compile(r'^(?:[*\-+] {1,3}| {1,4})', flags=re.M)

    @staticmethod
    def parse(match, ctx):
        return {
            'text': UListItem
                ._list_sub_pattern
                .sub('', match.group(0))}

    @staticmethod
    def render(content, ctx):
        return '<li>%s</li>\n' % content['text'].rstrip('\n')

    @staticmethod
    def _remove_paragraphs(children_list):
        new_children = []

        for child in children_list:
            child_token, child_children = child

            if child_token != Paragraph.name:
                new_children.append(child)
                continue

            new_children.extend(child_children['text'])

        return new_children

    @staticmethod
    def _add_paragraphs(children_list):
        new_children = []

        for child in children_list:
            child_token, child_children = child

            if child_token != '_raw_text':
                new_children.append(child)
                continue

            new_children.append((
                Paragraph.name,
                {'text': [child]}))

        return new_children

    @staticmethod
    def post_parse(node, parent_ctx, ctx):
        token, children = node

        if not parent_ctx['has_loose_item']:
            new_children_list = UListItem._remove_paragraphs(children['text'])
        else:
            new_children_list = UListItem._add_paragraphs(children['text'])

        return (
            (token, {**children, **{'text': new_children_list}}),
            {})


 class UList(Element):

    name = 'u_list'
    patterns = (r'^(?:[*\-+] (?:[^\n]+\n(?:\n {4})?)+\n+)+',)
    children = {'text': U_LIST_ELEMENTS}

    @staticmethod
    def parse(match, ctx):
        # todo: check if has loose_item right here (empty line anywhere but the end) and remove post_parse

        return {'text': match.group(0)}

    @staticmethod
    def render(content, ctx):
        return '<ul>\n%(text)s</ul>\n' % content

    @staticmethod
    def _has_loose_item(u_list):
        _token, children = u_list

        *items_list, (_token_last, last_item_children) = children['text']

        # todo: check if any children is a block element (skip inlines)
        #
        # Does not matter what elements these are
        # Skip if there's just one

        # todo: wtf?
        try:
            if len(last_item_children['text']) > 1:
                return True
        except TypeError:
            return False

        return any(
            Paragraph.name == token_child
            for _token_item_list, item_list in items_list
            for token_child, _child in item_list['text'])

    @staticmethod
    def post_parse(node, parent_ctx, ctx):
        return node, {'has_loose_item': UList._has_loose_item(node)}


 class OListItem(Element):

    name = 'o_list_item'
    # todo: benchmark
    patterns = (r'^\d{1,9}\. [^\n]+\n(?:[^\d][^\n]+\n(?:\n {4})?)*\n*',)
    children = {'text': ALL_ELEMENTS}
    _list_sub_pattern = re.compile(
        r'^(?:\d{1,9}\. {1,3}| {1,4})', flags=re.M)

    @staticmethod
    def parse(match, ctx):
        return {
            'text': OListItem
                ._list_sub_pattern
                .sub('', match.group(0))
                .rstrip()}

    @staticmethod
    def render(content, ctx):
        return '<li>%s</li>\n' % content['text'].rstrip('\n')


 class OList(Element):

    name = 'o_list'
    patterns = (r'^(?:\d{1,9}\. (?:[^\n]+\n(?:\n {4})?)+\n+)+',)
    children = {'text': O_LIST_ELEMENTS}

    @staticmethod
    def parse(match, ctx):
        return {'text': match.group(0)}

    @staticmethod
    def render(content, ctx):
        return '<ol>\n%(text)s</ol>\n' % content


 class Code(Element):

    name = 'code'
    patterns = (
        r'^(?: {4}[^\n]+\n+)+',
        r'^```\n+[^`]+```\n+')
    children = {}
    _code_sub_pattern = re.compile(r'^( {4}|```\n*)', flags=re.M)

    @staticmethod
    def parse(match, ctx):
        return {
            'text': Code
                ._code_sub_pattern
                .sub('', match.group(0))}

    @staticmethod
    def render(content, ctx):
        return '<pre><code>%(text)s</code></pre>\n' % content


 class LinkRefLabel(Element):

    name = 'link_ref_label'
    patterns = (
        r'^ {,3}\[([^\]]+)\]: +(?:<([^>]+)>|([^ \n]+) ?\n?) *(?:'
        r'"([^"]+)"|\'([^\']+)\'|\(([^\)]+)\))\n+',
        r'^ {,3}\[([^\]]+)\]: +(?:<([^>]+)>|([^\n]+)\n)\n*')
    children = {}

    @staticmethod
    def parse(match, ctx):
        groups = match.groups()

        if len(groups) > 3:
            title = groups[3] or groups[4] or groups[5] or ''
        else:
            title = ''

        url = groups[1] or groups[2] or ''
        ctx[groups[0].lower()] = (
            url.strip(),
            title.strip())
        return {}

    @staticmethod
    def render(content, ctx):
        return ''


 class Paragraph(Element):

    name = 'paragraph'
    patterns = (r'^(?:[^\n]+\n)*\n+',)
    children = {'text': INLINE_ELEMENTS}

    @staticmethod
    def parse(match, ctx):
        # todo: use INLINE_ELEMENTS + BR?
        # todo: use INLINE - Paragraph, so we dont strip here
        return {'text': match.group(0).strip()}

    @staticmethod
    def render(content, ctx):
        # todo: replace line ending with 2 spaces or tabs by <br>
        return '<p>%(text)s</p>\n' % content


 class Literal(Element):

    name = 'literal'
    patterns = (r'\\[`*_\{\}\[\]\(\)#+\-\.!\\]',)
    children = {}

    @staticmethod
    def parse(match, ctx):
        return {'text': match.group(0)[1]}

    @staticmethod
    def render(content, ctx):
        return content['text']


 class Link(Element):

    name = 'link'
    patterns = (
        r'\[([^\]]+)\]\(([^ ]+) "([^"]+)"\)',
        r'\[([^\]]+)\]\(([^\)]+)\)')
    children = {'text': INLINE_ELEMENTS}

    @staticmethod
    def parse(match, ctx):
        groups = match.groups()
        content = {
            'text': groups[0],
            'link': groups[1]}

        try:
            content['title'] = groups[2]
        except IndexError:
            pass

        return content

    @staticmethod
    def render(content, ctx):
        if 'title' in content:
            return (
                '<a href="%(link)s" '
                'title="%(title)s">%(text)s</a>' % content)
        else:
            return (
                '<a href="%(link)s">%(text)s</a>' % content)


 class LinkRef(Element):

    name = 'link_ref'
    patterns = (r'\[([^\]]+)\] ?\[([^\]]*)\]',)
    children = {'text': INLINE_ELEMENTS}

    @staticmethod
    def parse(match, ctx):
        groups = match.groups()
        return {
            'text': groups[0],
            'ref': (groups[1] or groups[0]).lower()}

    @staticmethod
    def render(content, ctx):
        try:
            link, title = ctx[content['ref']]
        except KeyError:
            return '%(text)s' % content
        else:
            return Link.render(
                {'link': link,
                 'title': title,
                 'text': content['text']},
                ctx)


 class DoubleEmphasis(Element):

    name = 'double_emphasis'
    patterns = (
        r'\*\*[^ ](?:[^ ]+ \*\* )*[^\*]*\*\*',
        r'__[^ ](?:[^ ]+ __ )*[^_]*__')
    children = {'text': INLINE_ELEMENTS}

    @staticmethod
    def parse(match, ctx):
        return {'text': match.group(0)[2:-2]}

    @staticmethod
    def render(content, ctx):
        return '<strong>%(text)s</strong>' % content


 class Emphasis(Element):

    name = 'emphasis'
    patterns = (
        # ('emphasis', r'\*[^ ](?:[^ \\]+(?: \* |\\\*))*[^\*]*\*'),
        r'\*[^ ](?:[^ ]+ \* )*[^\*]*\*',
        r'_[^ ](?:[^ ]+ _ )*[^_]*_')
    children = {'text': INLINE_ELEMENTS}

    @staticmethod
    def parse(match, ctx):
        return {'text': match.group(0)[1:-1]}

    @staticmethod
    def render(content, ctx):
        return '<em>%(text)s</em>' % content


 class CodeSpan(Element):

    name = 'code_span'
    patterns = (
        r'``([^`]+`)+`',
        r'`[^`]+`')
    children = {}
    _code_sub_pattern = re.compile(r'(?:^``? ?| ?`?`$)')

    @staticmethod
    def parse(match, ctx):
        return {
            'text': CodeSpan
                ._code_sub_pattern
                .sub('', match.group(0))}

    @staticmethod
    def render(content, ctx):
        return '<code>%(text)s</code>' % content


 class Image(Element):

    name = 'image'
    patterns = (
        r'!\[([^\]]+)\]\(([^ ]+) "([^"]+)"\)',
        r'!\[([^\]]+)\]\(([^\)]+)\)')
    children = {'text': INLINE_ELEMENTS}

    @staticmethod
    def parse(match, ctx):
        groups = match.groups()
        content = {
            'text': groups[0],
            'link': groups[1]}

        try:
            content['title'] = groups[2]
        except IndexError:
            pass

        return content

    @staticmethod
    def render(content, ctx):
        if 'title' in content:
            return (
                '<img src="%(link)s" '
                'title="%(title)s">%(text)s</img>' % content)
        else:
            return (
                '<img src="%(link)s">%(text)s</img>' % content)


 class ImageRef(Element):

    name = 'image_ref'
    patterns = (r'!\[([^\]]+)\] ?\[([^\]]+)\]',)
    children = {'text': INLINE_ELEMENTS}

    @staticmethod
    def parse(match, ctx):
        return {
            'text': match.group(1),
            'ref': match.group(2)}

    @staticmethod
    def render(content, ctx):
        try:
            link, title = ctx[content['ref']]
        except KeyError:
            return '%(text)s' % content
        else:
            return Image.render(
                {'link': link,
                 'title': title,
                 'text': content['text']},
                ctx)


 class AutoLink(Element):

    name = 'auto_link'
    patterns = (r'<[^>]+>',)
    children = {}

    @staticmethod
    def parse(match, ctx):
        return {'link': match.group(0)[1:-1]}

    @staticmethod
    def render(content, ctx):
        link = content['link']
        return Link.render(
            {'link': link,
             'text': link},
            ctx)


 DEFAULT_ELEMENTS = {
    element.name: element
    for element in (
        Header,
        Quote,
        HRule,
        UListItem,
        UList,
        OListItem,
        OList,
        Code,
        LinkRefLabel,
        Paragraph,
        Literal,
        Link,
        LinkRef,
        DoubleEmphasis,
        Emphasis,
        CodeSpan,
        Image,
        ImageRef,
        AutoLink)}


 def default_elements():
    return DEFAULT_ELEMENTS.copy()


 def to_rules(elements):
    return tuple(
        (element.name, pattern)
        for element in elements
        for pattern in element.patterns)


 DEFAULT_CHILDREN = {
    BLOCK_ELEMENTS: to_rules((
        Header,
        Quote,
        HRule,
        UList,
        OList,
        Code,  # todo: move to top?
        LinkRefLabel,
        Paragraph)),
    INLINE_ELEMENTS: to_rules((
        Literal,
        Link,
        LinkRef,
        DoubleEmphasis,
        Emphasis,
        CodeSpan,
        Image,
        ImageRef,
        AutoLink)),
    U_LIST_ELEMENTS: to_rules((
        UListItem,)),
    O_LIST_ELEMENTS: to_rules((
        OListItem,))}

 DEFAULT_CHILDREN[ALL_ELEMENTS] = (
    *DEFAULT_CHILDREN[BLOCK_ELEMENTS],
    *DEFAULT_CHILDREN[INLINE_ELEMENTS])


 def default_children():
    return DEFAULT_CHILDREN.copy()
diff --git a/parser.py b/parser.py
 # -*- coding: utf-8 -*-

 # Copyright (c) 2017 by Esteban Castro Borsani.
 # Released under MIT license

 import re

 from . import scanner
 from . import elements as elms


 def _text_pre_process(text):
    # todo: replace lines with only spaces and tabs by \n
    return '%s\n\n' % (
        text.replace('\r\n', '\n')
            .replace('\r', '\n')
            .strip('\n'))


 _scanners_cache = {}


 def _scanner_for(rules):
    try:
        return _scanners_cache[rules]
    except KeyError:
        new_scanner = scanner.Scanner(rules, flags=re.M)
        _scanners_cache[rules] = new_scanner
        return new_scanner


 _RAW_TEXT = '_raw_text'


 def _parse(txt, ctx, acc, rules, children_map, elements, level, limit):
    if (not rules or
            level == limit):
        acc.append((_RAW_TEXT, txt))
        return acc

    for token, match_or_hole in _scanner_for(rules).scan_with_holes(txt):
        if token is None:
            acc.append((_RAW_TEXT, match_or_hole))
            continue

        element = elements[token]
        content = element.parse(match_or_hole, ctx)
        children = {
            ck: _parse(
                txt=content[ck],
                ctx=ctx,
                acc=[],
                rules=children_map[rule_name],
                children_map=children_map,
                elements=elements,
                level=level + 1,
                limit=limit)
            for ck, rule_name in element.children.items()}
        acc.append((token, {**content, **children}))

    return acc


 _RECURSION_LIMIT = 10


 def parse(
        txt,
        ctx,
        children_map=elms.DEFAULT_CHILDREN,
        elements=elms.DEFAULT_ELEMENTS,
        limit=_RECURSION_LIMIT):
    if limit < 1:
        limit = 1

    return _parse(
        txt=_text_pre_process(txt),
        ctx=ctx,
        acc=[],
        rules=children_map[elms.ALL_ELEMENTS],
        children_map=children_map,
        elements=elements,
        level=0,
        limit=limit)


 def _post_parse(ast, parent_ctx, ctx):
    for index, (token, children_or_text) in enumerate(ast):
        if token == _RAW_TEXT:
            continue

        element = elms.DEFAULT_ELEMENTS[token]

        if not hasattr(element, 'post_parse'):
            continue

        new_node, curr_ctx = element.post_parse(
            (token, children_or_text),
            parent_ctx,
            ctx)
        ast[index] = new_node

        for ck, cv in element.children.items():
            _post_parse(new_node[1][ck], curr_ctx, ctx)


 def post_parse(ast, ctx):
    _post_parse(ast, {}, ctx)
    return ast


 def _escape(text):
    return (
        text.replace('&amp;', '&')
            .replace('&', '&amp;')
            .replace('<', '&lt;')
            .replace('>', '&gt;'))


 def _render(ast, ctx):
    res = []

    for token, children_or_text in ast:
        if token == _RAW_TEXT:
            res.append(_escape(children_or_text))
            continue

        element = elms.DEFAULT_ELEMENTS[token]
        content = {
            ck: _render(children_or_text[ck], ctx)
            for ck, cv in element.children.items()}
        cleaned_extra_data = {
            name: _escape(value)
            for name, value in children_or_text.items()
            if (name not in element.children and
                isinstance(value, str))}
        res.append(element.render(
            {**children_or_text,
             **content,
             **cleaned_extra_data},
            ctx))

    return ''.join(res)


 def render(ast_or_txt, ctx=None):
    if ctx is None:
        ctx = {}

    if isinstance(ast_or_txt, str):
        ast_or_txt = post_parse(
            parse(ast_or_txt, ctx),
            ctx)

    return _render(ast_or_txt, ctx)


 class MarkDown:

    def __init__(self):
        self._elements = elms.default_elements()
        self._children = elms.default_children()

    def new_element_type(self, element_type):
        self._children.setdefault(element_type, ())

    def add_element(self, element_type, element, index):
        self._elements[element.name] = element

        children = list(self._children[element_type])
        children.insert(index, element)
        self._children[element_type] = tuple(children)

        self._children[elms.ALL_ELEMENTS] = (
            *self._children[elms.INLINE_ELEMENTS],
            *self._children[elms.BLOCK_ELEMENTS])

    def add_inline(self, element, index):
        self.add_element(elms.INLINE_ELEMENTS, element, index)

    def add_block(self, element, index):
        self.add_element(elms.BLOCK_ELEMENTS, element, index)

    def render(self, text, context=None, nesting_limit=_RECURSION_LIMIT):
        if context is None:
            context = {}

        return render(
            post_parse(
                parse(
                    txt=text,
                    ctx=context,
                    children_map=self._children,
                    elements=self._elements,
                    limit=nesting_limit),
                context),
            context)
diff --git a/scanner.py b/scanner.py
 # -*- coding: utf-8 -*-

 # Copyright (c) 2017 by Esteban Castro Borsani.
 #
 # Original code by Armin Ronacher.
 # Modifications under MIT licence.

 # Copyright (c) 2015 by Armin Ronacher.
 #
 # Some rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 #     * Redistributions of source code must retain the above copyright
 #       notice, this list of conditions and the following disclaimer.
 #
 #     * Redistributions in binary form must reproduce the above
 #       copyright notice, this list of conditions and the following
 #       disclaimer in the documentation and/or other materials provided
 #       with the distribution.
 #
 #     * The names of the contributors may not be used to endorse or
 #       promote products derived from this software without specific
 #       prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


 from sre_parse import Pattern, SubPattern, parse
 from sre_compile import compile as sre_compile
 from sre_constants import BRANCH, SUBPATTERN


 __all__ = ['Scanner']


 class _ScanMatch:

    def __init__(self, match, rule, start, end):
        self._match = match
        self._start = start
        self._end = end
        self._rule = rule

    def __repr__(self):
        return '%s<%s>' % (
            __class__.__name__,
            repr(self._match.groups()))

    def __getattr__(self, name):
        return getattr(self._match, name)

    def __group_proc(self, method, group):
        if group == 0:
            return method()

        if isinstance(group, str):
            return method('%s_%s' % (self._rule, group))

        real_group = self._start + group

        if real_group > self._end:
            raise IndexError('no such group')

        return method(real_group)

    def group(self, *groups):
        if len(groups) in (0, 1):
            return self.__group_proc(
                self._match.group,
                groups and groups[0] or 0)

        return tuple(
            self.__group_proc(self._match.group, group)
            for group in groups)

    def groupdict(self, default=None):
        prefix = '%s_' % self._rule
        len_prefix = len(prefix)
        return {
            key[len_prefix:]: value
            for key, value in self._match.groupdict(default).items()
            if key.startswith(prefix)}

    def span(self, group=0):
        return self.__group_proc(self._match.span, group)

    def groups(self):
        return self._match.groups()[self._start:self._end]

    def start(self, group=0):
        return self.__group_proc(self._match.start, group)

    def end(self, group=0):
        return self.__group_proc(self._match.end, group)

    def expand(self, template):
        raise RuntimeError('Unsupported on scan matches')


 class Scanner:
    """
    This is similar to re.Scanner.\

    It creates a compounded regex\
    pattern out of many patterns.

    Except it ``search`` to find matches,\
    this is so it's possible to take\
    the unmatched parts of the string.

    It prefixes groups with ``name_of_rule_``\
    to avoid group names clashes. GroupDicts\
    can still be retrieve as normal without the prefix.

    It adjusts group indexes so they work as expected,\
    instead of as per the compounded regex.

    It has a few caveats: group-index back-references\
    are relative to the compounded regex,\
    so for all practical purposes they won't work.

    """

    def __init__(self, rules, flags=0):
        pattern = Pattern()
        pattern.flags = flags

        for _ in range(len(rules)):
            pattern.opengroup()

        _og = pattern.opengroup
        pattern.opengroup = lambda n: _og(n and '%s_%s' % (name, n) or n)

        self.rules = []
        subpatterns = []
        subflags = set()

        for group, (name, regex) in enumerate(rules, 1):
            last_group = pattern.groups - 1
            subpattern = parse(regex, flags, pattern)
            subpatterns.append(SubPattern(pattern, [
                (SUBPATTERN, (group, subpattern)),
            ]))
            subflags.add(subpattern.pattern.flags)
            self.rules.append((name, last_group, pattern.groups - 1))

        self._scanner = sre_compile(SubPattern(
            pattern, [(BRANCH, (None, subpatterns))])).scanner

        if len(subflags) > 1:
            raise ValueError(
                'In-pattern flags are not supported')

    def _scan(self, string):
        sc = self._scanner(string)

        for match in iter(sc.search, None):
            rule, start, end = self.rules[match.lastindex - 1]
            yield rule, _ScanMatch(match, rule, start, end)

    def scan_with_holes(self, string):
        pos = 0

        for rule, match in self._scan(string):
            hole = string[pos:match.start()]

            if hole:
                yield None, hole

            yield rule, match
            pos = match.end()

        hole = string[pos:]

        if hole:
            yield None, hole
	# -- coding: utf-8 --

	# Copyright (c) 2017 by Esteban Castro Borsani.
	# Released under MIT license

	from .elements import (
	Header,
	Quote,
	HRule,
	UListItem,
	UList,
	OListItem,
	OList,
	Code,
	LinkRefLabel,
	Paragraph,
	Literal,
	Link,
	LinkRef,
	DoubleEmphasis,
	Emphasis,
	CodeSpan,
	Image,
	ImageRef,
	AutoLink)
	from .elements import (
	default_elements,
	default_children)
	from .parser import (
	MarkDown,
	parse,
	render)
	# -- coding: utf-8 --

	# Copyright (c) 2017 by Esteban Castro Borsani.
	#
	# Original code by Armin Ronacher.
	# Modifications under MIT licence.

	# Copyright (c) 2015 by Armin Ronacher.
	#
	# Some rights reserved.
	#
	# Redistribution and use in source and binary forms, with or without
	# modification, are permitted provided that the following conditions are
	# met:
	#
	# * Redistributions of source code must retain the above copyright
	# notice, this list of conditions and the following disclaimer.
	#
	# * Redistributions in binary form must reproduce the above
	# copyright notice, this list of conditions and the following
	# disclaimer in the documentation and/or other materials provided
	# with the distribution.
	#
	# * The names of the contributors may not be used to endorse or
	# promote products derived from this software without specific
	# prior written permission.
	#
	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


	from sre_parse import Pattern, SubPattern, parse
	from sre_compile import compile as sre_compile
	from sre_constants import BRANCH, SUBPATTERN


	__all__ = ['Scanner']


	class _ScanMatch:

	def __init__(self, match, rule, start, end):
	self._match = match
	self._start = start
	self._end = end
	self._rule = rule

	def __repr__(self):
	return '%s<%s>' % (
	__class__.__name__,
	repr(self._match.groups()))

	def __getattr__(self, name):
	return getattr(self._match, name)

	def __group_proc(self, method, group):
	if group == 0:
	return method()

	if isinstance(group, str):
	return method('%s_%s' % (self._rule, group))

	real_group = self._start + group

	if real_group > self._end:
	raise IndexError('no such group')

	return method(real_group)

	def group(self, *groups):
	if len(groups) in (0, 1):
	return self.__group_proc(
	self._match.group,
	groups and groups[0] or 0)

	return tuple(
	self.__group_proc(self._match.group, group)
	for group in groups)

	def groupdict(self, default=None):
	prefix = '%s_' % self._rule
	len_prefix = len(prefix)
	return {
	key[len_prefix:]: value
	for key, value in self._match.groupdict(default).items()
	if key.startswith(prefix)}

	def span(self, group=0):
	return self.__group_proc(self._match.span, group)

	def groups(self):
	return self._match.groups()[self._start:self._end]

	def start(self, group=0):
	return self.__group_proc(self._match.start, group)

	def end(self, group=0):
	return self.__group_proc(self._match.end, group)

	def expand(self, template):
	raise RuntimeError('Unsupported on scan matches')


	class Scanner:
	"""
	This is similar to re.Scanner.\

	It creates a compounded regex\
	pattern out of many patterns.

	Except it ``search`` to find matches,\
	this is so it's possible to take\
	the unmatched parts of the string.

	It prefixes groups with ``name_of_rule_``\
	to avoid group names clashes. GroupDicts\
	can still be retrieve as normal without the prefix.

	It adjusts group indexes so they work as expected,\
	instead of as per the compounded regex.

	It has a few caveats: group-index back-references\
	are relative to the compounded regex,\
	so for all practical purposes they won't work.

	"""

	def __init__(self, rules, flags=0):
	pattern = Pattern()
	pattern.flags = flags

	for _ in range(len(rules)):
	pattern.opengroup()

	_og = pattern.opengroup
	pattern.opengroup = lambda n: _og(n and '%s_%s' % (name, n) or n)

	self.rules = []
	subpatterns = []
	subflags = set()

	for group, (name, regex) in enumerate(rules, 1):
	last_group = pattern.groups - 1
	subpattern = parse(regex, flags, pattern)
	subpatterns.append(SubPattern(pattern, [
	(SUBPATTERN, (group, subpattern)),
	]))
	subflags.add(subpattern.pattern.flags)
	self.rules.append((name, last_group, pattern.groups - 1))

	self._scanner = sre_compile(SubPattern(
	pattern, [(BRANCH, (None, subpatterns))])).scanner

	if len(subflags) > 1:
	raise ValueError(
	'In-pattern flags are not supported')

	def _scan(self, string):
	sc = self._scanner(string)

	for match in iter(sc.search, None):
	rule, start, end = self.rules[match.lastindex - 1]
	yield rule, _ScanMatch(match, rule, start, end)

	def scan_with_holes(self, string):
	pos = 0

	for rule, match in self._scan(string):
	hole = string[pos:match.start()]

	if hole:
	yield None, hole

	yield rule, match
	pos = match.end()

	hole = string[pos:]

	if hole:
	yield None, hole