alextremblay · July 15, 2021 15:42
diff --git a/tokenizer.py b/tokenizer.py
 from __future__ import annotations
 from typing import Any, Callable, List, Literal, Tuple, Union, TypeVar, cast
 import regex
 from regex.regex import Match

 DEFAULT = '_SCANNER_DEFAULT_PATTERN'  # a sentinel value

 T = TypeVar('T')

 class TokenMeta(type): 
    """Metaclass for dynamic attribute access on class (not on class instance)
    
    With any class you use this metaclass in, references to capital-case attributes 
    on that class will return named subclasses of that class
    
    see Token as an example"""
    
    def __getattr__(cls: TokenMeta, name: str) -> TokenMeta:
        if name[0].isupper():
            parent_class_name = cls.__name__
            new_subclass_name = f'{parent_class_name}.{name}'
            new_subclass = cast(
                TokenMeta,
                type(new_subclass_name, (cls,), {})
            )
            # register this subclass as an attribute on its parent class
            # next time it is referenced, that attribute will be used, and this method will not be called
            setattr(cls, name, new_subclass)

            return new_subclass
        #else:
        raise AttributeError

 class Token(str, metaclass=TokenMeta):
    """subclass of string, used to categorize different types of strings. 
    referencing any capital-case attribute of this class returns a subclass named after that attribute
    
    Example:
        >>> token_string = Token('hello')
        >>> # all tokens and subclasses are, at the end of the day, strings.
        >>> isinstance(token_string, str)
        True
        >>> # You can define any categories you want
        >>> other_string = Token.Category('some string')
        >>> a_third_string = Token.OtherCategory('some other string')
        >>> # All categories are subclasses of Token (and by extension str)
        >>> issubclass(Token.Category, Token) and issubclass(Token.OtherCategory, Token)
        True
        >>> isinstance(other_string, Token.Category)
        True
        >>> isinstance(a_third_string, Token.Category)
        False
        >>> # Categories can be arbitrarily nested
        >>> yet_another_string = Token.Category.SubCategory('hello')
        >>> # string equality still works between them all
        >>> token_string == yet_another_string
        True
    """
    def __repr__(self,):
        return f'{self.__class__.__name__}({super().__repr__()})'


 class Scanner:
    def __init__(self, rules: List[Tuple[str, Callable]], flags=regex.VERSION1) -> None:
        """
        
        """
        self.patterns = {}
        self.actions = {}
        self.default_action = str
        for index, rule in enumerate(rules):
            pattern, action = rule
            if pattern == DEFAULT:
                self.default_action = action
                continue
            name = f'scanner_pattern{index}'
            self.patterns[name] = pattern
            self.actions[name] = action
        self.scanner = regex.compile(self.assemble_pattern(), flags)
    
    def assemble_pattern(self):
        r"""
        Construct a regex pattern from a set of sub-patterns, assigning each sub-pattern to a named capture group
        
        Example:
            >>> self.patterns = {
            ...     "scanner_pattern0": r"\w+",
            ...     "scanner_pattern1": r"\d{3}"
            ... }
            >>> self.assemble_pattern()
            '(?|(?<scanner_pattern0>\\w+)|(?<scanner_pattern1>\\d{3}))'
        """
        named_patterns = []
        for name, pattern in self.patterns.items():
            named_patterns.append(f'(?<{name}>{pattern})')

        alternates = '|'.join(named_patterns)
        full_pattern = fr'(?|{alternates})'
        return full_pattern

    def get_pattern_name(self, match_object):
        """
        Identify which pattern from the rules list was matched by a given match object

        The match object's groupdict() will contain entries for each scanner pattern 
        (ex 'scanner_pattern0', 'scanner_pattern1', etc). all of those entries will have a value of None, 
        except for one, the one that matched. This method finds that entry, and returns its name
        """
        for name, value in match_object.groupdict().items():
            name: str
            if name.startswith('scanner_pattern'):
                if value is not None:
                    return name
        # If we get to this point, something has gone seriously wrong
        raise Exception("text fragment matched a pattern from the rules list, but was not captured by that pattern")

    def __call__(self, input_str, pass_in_option: Union[Literal['string'], Literal['match_object']] = 'string'):
        unprocessed_text = input_str
        while len(unprocessed_text) > 0:
            m = self.scanner.search(unprocessed_text)
            if not m:
                # If we've arrived at this point, we've got a chunk of text that doesn't contain any of the patterns.
                yield self.default_action(unprocessed_text)
                break
            start, end = m.span()
            if start > 0:
                # At this point, we've got a match somewhere in the middle of the string, with unmatched text before it.
                yield self.default_action(unprocessed_text[:start])
            pattern_name = self.get_pattern_name(m)

            action = self.actions[pattern_name]
            if pass_in_option == 'string':
                val = m[0]
            else:
                val = m
            
            yield action(val)
            unprocessed_text = unprocessed_text[end:]


 if __name__ == "__main__":

    scanner = Scanner([
        (r'%\{.*?\}', Token.Grok),
        (r'(?<!\\)\((?:[^)(]*(?R)?)*+(?<!\\)\)', Token.Group),
        (DEFAULT, Token.Default)
    ])

    testdata = r'%{WORD:action}test %\{WORD:action}test (%{ASA_TCP_UDP}|%{ASA_ICMP})  \(type \d, code \d\)  (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4}) some text' # noqa
    
    for token in scanner(testdata):
        print(repr(token))
    
    # Output:
    # Token.Grok('%{WORD:action}')
    # Token.Default('test %\\{WORD:action}test ')
    # Token.Group('(%{ASA_TCP_UDP}|%{ASA_ICMP})')
    # Token.Default('  \\(type \\d, code \\d\\)  ')
    # Token.Group('(?:(?:[A-Fa-f0-9]{4}\\.){2}[A-Fa-f0-9]{4})')
    # Token.Default(' some text')

    # A more advanced example
    import textwrap
    class Markdown(Token):
        pass

    sample_text = textwrap.dedent("""
        # Heading level 1
        ## Heading level 2
        ## Heading level 2
        I just love **bold text**.
        I just love __bold text__.
        Italicized text is the *cat's meow*.
        Italicized text is the _cat's meow_.
        > Dorothy followed her through many of the beautiful rooms in her castle.
        > 
        > The Witch bade her clean the pots and kettles and sweep the floor and keep the fire fed with wood.
        - First item
        - Second item
        - Third item
        - Fourth item
        At the command prompt, type `nano`.
        ```
        <html>
        <head>
        </head>
        </html>
        ```
        My favorite search engine is [Duck Duck Go](https://duckduckgo.com).
        test **not a
        heading**
        """)
    
    mdscanner = Scanner([
        (r'[#]+ .*\n', Markdown.Heading),
        (r'[*]{2}[^*\n]+[*]{2}', Markdown.Bold),
        (r'[_]{2}[^_\n]+[_]{2}', Markdown.Bold),
        (r'[*]{1}[^*\n]+[*]{1}', Markdown.Italics),
        (r'[_]{1}[^_\n]+[_]{1}', Markdown.Italics),
        (r'([>] .*\n)+', Markdown.Blockquote),
        (r'([-] .*\n)+', Markdown.List),
        (r'[`]{1}[^`\n]+[`]{1}', Markdown.Code),
        (r'[`]{3}[^`]+[`]{3}', Markdown.Code),
        (r'\[[^\]]+\]\([^\)]+\)', Markdown.Link),
        (DEFAULT, Markdown.Text)
    ])

    for token in mdscanner(sample_text):
        print(repr(token))
    
    # Output:
    # Markdown.Text('\n')
    # Markdown.Heading('# Heading level 1\n')
    # Markdown.Heading('## Heading level 2\n')
    # Markdown.Heading('## Heading level 2\n')
    # Markdown.Text('I just love ')
    # Markdown.Bold('**bold text**')
    # Markdown.Text('.\nI just love ')
    # Markdown.Bold('__bold text__')
    # Markdown.Text('.\nItalicized text is the ')
    # Markdown.Italics("*cat's meow*")
    # Markdown.Text('.\nItalicized text is the ')
    # Markdown.Italics("_cat's meow_")
    # Markdown.Text('.\n')
    # Markdown.Blockquote('> Dorothy followed her through many of the beautiful rooms in her castle.\n> \n> The Witch bade her clean the pots and kettles and sweep the floor and keep the fire fed with wood.\n')
    # Markdown.List('- First item\n- Second item\n- Third item\n- Fourth item\n')
    # Markdown.Text('At the command prompt, type ')
    # Markdown.Code('`nano`')
    # Markdown.Text('.\n')
    # Markdown.Code('```\n<html>\n<head>\n</head>\n</html>\n```')
    # Markdown.Text('\nMy favorite search engine is ')
    # Markdown.Link('[Duck Duck Go](https://duckduckgo.com)')
    # Markdown.Text('.\ntest **not a\nheading**\n')

    # An even more advanced example
    class NewMarkdownBase:
        """Base class for all types of NewMarkdown tokens

        sets up the default behaviour for extracting a printable value from a regex match group
        __init__ can be overridden to modify the logic for populating the value attribute 
        from the match object
        """
        matches: List[str]
        value: Any
        def __repr__(self,):
            return f'NewMarkdown.{self.__class__.__name__}({repr(self.value)})'
        
        def extract(self, d) -> Any:
            return d['text'][0]

        def __init__(self, obj) -> None:
            if isinstance(obj, Match):
                self.value = self.extract(obj.capturesdict())
            else:
                # class is being manually instantiated
                self.value = obj

    class NewMarkdown:
        class Text(str):
            def __repr__(self,):
                return f'NewMarkdown.Text({repr(str(self))})'

        class Heading(NewMarkdownBase):
            pattern = r'[#]+ (?<text>.*)\n'
        
        class Italics(NewMarkdownBase):
            patterns = [
                r'[*]{1}(?<text>[^*\n]+)[*]{1}',
                r'[_]{1}(?<text>[^_\n]+)[_]{1}'
            ]
        
        class Bold(NewMarkdownBase):
            patterns = [
                r'[*]{2}(?<text>[^*\n]+)[*]{2}',
                r'[_]{2}(?<text>[^_\n]+)[_]{2}'
            ]

        class Blockquote(NewMarkdownBase):
            pattern = r'([>] (?<text>.*)\n)+'
            def extract(self, d) -> Any:
                return '\n'.join(d['text'])
        
        class List(NewMarkdownBase):
            pattern = r'([-] (?<text>.*)\n)+'
            def extract(self, d) -> Any:
                return d['text']

        class Code(NewMarkdownBase):
            patterns = [
                r'[`]{1}(?<text>[^`\n]+)[`]{1}',
                r'[`]{3}(?<text>[^`]+)[`]{3}'
            ]

        class Link(NewMarkdownBase):
            pattern = r'\[(?<text>[^\]]+)\]\((?<url>[^\)]+)\)'
            def extract(self, d) -> Any:
                text = d['text'][0]
                url = d['url'][0]
                return dict(text=text, url=url)


    mdscanner2 = Scanner([
        (NewMarkdown.Heading.pattern, NewMarkdown.Heading),
        (NewMarkdown.Bold.patterns[0], NewMarkdown.Bold),
        (NewMarkdown.Bold.patterns[1], NewMarkdown.Bold),
        (NewMarkdown.Italics.patterns[0], NewMarkdown.Italics),
        (NewMarkdown.Italics.patterns[1], NewMarkdown.Italics),        
        (NewMarkdown.Blockquote.pattern, NewMarkdown.Blockquote),      
        (NewMarkdown.List.pattern, NewMarkdown.List),     
        (NewMarkdown.Code.patterns[0], NewMarkdown.Code),     
        (NewMarkdown.Code.patterns[1], NewMarkdown.Code),
        (NewMarkdown.Link.pattern, NewMarkdown.Link),
        (DEFAULT, NewMarkdown.Text)
    ])

    
    for token in mdscanner2(sample_text, pass_in_option='match_object'):
        print(repr(token))
    
    # Output:
    # NewMarkdown.Text('\n')
    # NewMarkdown.Heading('Heading level 1')
    # NewMarkdown.Heading('Heading level 2')
    # NewMarkdown.Heading('Heading level 2')
    # NewMarkdown.Text('I just love ')
    # NewMarkdown.Bold('bold text')
    # NewMarkdown.Text('.\nI just love ')
    # NewMarkdown.Bold('bold text')
    # NewMarkdown.Text('.\nItalicized text is the ')
    # NewMarkdown.Italics("cat's meow")
    # NewMarkdown.Text('.\nItalicized text is the ')
    # NewMarkdown.Italics("cat's meow")
    # NewMarkdown.Text('.\n')
    # NewMarkdown.Blockquote('Dorothy followed her through many of the beautiful rooms in her castle.\n\nThe Witch bade her clean the pots and kettles and sweep the floor and keep the fire fed with wood.')
    # NewMarkdown.List(['First item', 'Second item', 'Third item', 'Fourth item'])
    # NewMarkdown.Text('At the command prompt, type ')
    # NewMarkdown.Code('nano')
    # NewMarkdown.Text('.\n')
    # NewMarkdown.Code('\n<html>\n<head>\n</head>\n</html>\n')
    # NewMarkdown.Text('\nMy favorite search engine is ')
    # NewMarkdown.Link({'text': 'Duck Duck Go', 'url': 'https://duckduckgo.com'})
    # NewMarkdown.Text('.\ntest **not a\nheading**\n')
	from __future__ import annotations
	from typing import Any, Callable, List, Literal, Tuple, Union, TypeVar, cast
	import regex
	from regex.regex import Match

	DEFAULT = '_SCANNER_DEFAULT_PATTERN' # a sentinel value

	T = TypeVar('T')

	class TokenMeta(type):
	"""Metaclass for dynamic attribute access on class (not on class instance)

	With any class you use this metaclass in, references to capital-case attributes
	on that class will return named subclasses of that class

	see Token as an example"""

	def __getattr__(cls: TokenMeta, name: str) -> TokenMeta:
	if name[0].isupper():
	parent_class_name = cls.__name__
	new_subclass_name = f'{parent_class_name}.{name}'
	new_subclass = cast(
	TokenMeta,
	type(new_subclass_name, (cls,), {})
	)
	# register this subclass as an attribute on its parent class
	# next time it is referenced, that attribute will be used, and this method will not be called
	setattr(cls, name, new_subclass)

	return new_subclass
	#else:
	raise AttributeError

	class Token(str, metaclass=TokenMeta):
	"""subclass of string, used to categorize different types of strings.
	referencing any capital-case attribute of this class returns a subclass named after that attribute

	Example:
	>>> token_string = Token('hello')
	>>> # all tokens and subclasses are, at the end of the day, strings.
	>>> isinstance(token_string, str)
	True
	>>> # You can define any categories you want
	>>> other_string = Token.Category('some string')
	>>> a_third_string = Token.OtherCategory('some other string')
	>>> # All categories are subclasses of Token (and by extension str)
	>>> issubclass(Token.Category, Token) and issubclass(Token.OtherCategory, Token)
	True
	>>> isinstance(other_string, Token.Category)
	True
	>>> isinstance(a_third_string, Token.Category)
	False
	>>> # Categories can be arbitrarily nested
	>>> yet_another_string = Token.Category.SubCategory('hello')
	>>> # string equality still works between them all
	>>> token_string == yet_another_string
	True
	"""
	def __repr__(self,):
	return f'{self.__class__.__name__}({super().__repr__()})'


	class Scanner:
	def __init__(self, rules: List[Tuple[str, Callable]], flags=regex.VERSION1) -> None:
	"""

	"""
	self.patterns = {}
	self.actions = {}
	self.default_action = str
	for index, rule in enumerate(rules):
	pattern, action = rule
	if pattern == DEFAULT:
	self.default_action = action
	continue
	name = f'scanner_pattern{index}'
	self.patterns[name] = pattern
	self.actions[name] = action
	self.scanner = regex.compile(self.assemble_pattern(), flags)

	def assemble_pattern(self):
	r"""
	Construct a regex pattern from a set of sub-patterns, assigning each sub-pattern to a named capture group

	Example:
	>>> self.patterns = {
	... "scanner_pattern0": r"\w+",
	... "scanner_pattern1": r"\d{3}"
	... }
	>>> self.assemble_pattern()
	'(?\|(?<scanner_pattern0>\\w+)\|(?<scanner_pattern1>\\d{3}))'
	"""
	named_patterns = []
	for name, pattern in self.patterns.items():
	named_patterns.append(f'(?<{name}>{pattern})')

	alternates = '\|'.join(named_patterns)
	full_pattern = fr'(?\|{alternates})'
	return full_pattern

	def get_pattern_name(self, match_object):
	"""
	Identify which pattern from the rules list was matched by a given match object

	The match object's groupdict() will contain entries for each scanner pattern
	(ex 'scanner_pattern0', 'scanner_pattern1', etc). all of those entries will have a value of None,
	except for one, the one that matched. This method finds that entry, and returns its name
	"""
	for name, value in match_object.groupdict().items():
	name: str
	if name.startswith('scanner_pattern'):
	if value is not None:
	return name
	# If we get to this point, something has gone seriously wrong
	raise Exception("text fragment matched a pattern from the rules list, but was not captured by that pattern")

	def __call__(self, input_str, pass_in_option: Union[Literal['string'], Literal['match_object']] = 'string'):
	unprocessed_text = input_str
	while len(unprocessed_text) > 0:
	m = self.scanner.search(unprocessed_text)
	if not m:
	# If we've arrived at this point, we've got a chunk of text that doesn't contain any of the patterns.
	yield self.default_action(unprocessed_text)
	break
	start, end = m.span()
	if start > 0:
	# At this point, we've got a match somewhere in the middle of the string, with unmatched text before it.
	yield self.default_action(unprocessed_text[:start])
	pattern_name = self.get_pattern_name(m)

	action = self.actions[pattern_name]
	if pass_in_option == 'string':
	val = m[0]
	else:
	val = m

	yield action(val)
	unprocessed_text = unprocessed_text[end:]


	if __name__ == "__main__":

	scanner = Scanner([
	(r'%\{.*?\}', Token.Grok),
	(r'(?<!\\)\((?:[^)(](?R)?)+(?<!\\)\)', Token.Group),
	(DEFAULT, Token.Default)
	])

	testdata = r'%{WORD:action}test %\{WORD:action}test (%{ASA_TCP_UDP}\|%{ASA_ICMP}) \(type \d, code \d\) (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4}) some text' # noqa

	for token in scanner(testdata):
	print(repr(token))

	# Output:
	# Token.Grok('%{WORD:action}')
	# Token.Default('test %\\{WORD:action}test ')
	# Token.Group('(%{ASA_TCP_UDP}\|%{ASA_ICMP})')
	# Token.Default(' \\(type \\d, code \\d\\) ')
	# Token.Group('(?:(?:[A-Fa-f0-9]{4}\\.){2}[A-Fa-f0-9]{4})')
	# Token.Default(' some text')

	# A more advanced example
	import textwrap
	class Markdown(Token):
	pass

	sample_text = textwrap.dedent("""
	# Heading level 1
	## Heading level 2
	## Heading level 2
	I just love bold text.
	I just love __bold text__.
	Italicized text is the cat's meow.
	Italicized text is the _cat's meow_.
	> Dorothy followed her through many of the beautiful rooms in her castle.
	>
	> The Witch bade her clean the pots and kettles and sweep the floor and keep the fire fed with wood.
	- First item
	- Second item
	- Third item
	- Fourth item
	At the command prompt, type `nano`.
	```
	<html>
	<head>
	</head>
	</html>
	```
	My favorite search engine is [Duck Duck Go](https://duckduckgo.com).
	test **not a
	heading**
	""")

	mdscanner = Scanner([
	(r'[#]+ .*\n', Markdown.Heading),
	(r'[]{2}[^\n]+[*]{2}', Markdown.Bold),
	(r'[_]{2}[^_\n]+[_]{2}', Markdown.Bold),
	(r'[]{1}[^\n]+[*]{1}', Markdown.Italics),
	(r'[_]{1}[^_\n]+[_]{1}', Markdown.Italics),
	(r'([>] .*\n)+', Markdown.Blockquote),
	(r'([-] .*\n)+', Markdown.List),
	(r'[`]{1}[^`\n]+[`]{1}', Markdown.Code),
	(r'[`]{3}[^`]+[`]{3}', Markdown.Code),
	(r'\[[^\]]+\]\([^\)]+\)', Markdown.Link),
	(DEFAULT, Markdown.Text)
	])

	for token in mdscanner(sample_text):
	print(repr(token))

	# Output:
	# Markdown.Text('\n')
	# Markdown.Heading('# Heading level 1\n')
	# Markdown.Heading('## Heading level 2\n')
	# Markdown.Heading('## Heading level 2\n')
	# Markdown.Text('I just love ')
	# Markdown.Bold('bold text')
	# Markdown.Text('.\nI just love ')
	# Markdown.Bold('__bold text__')
	# Markdown.Text('.\nItalicized text is the ')
	# Markdown.Italics("cat's meow")
	# Markdown.Text('.\nItalicized text is the ')
	# Markdown.Italics("_cat's meow_")
	# Markdown.Text('.\n')
	# Markdown.Blockquote('> Dorothy followed her through many of the beautiful rooms in her castle.\n> \n> The Witch bade her clean the pots and kettles and sweep the floor and keep the fire fed with wood.\n')
	# Markdown.List('- First item\n- Second item\n- Third item\n- Fourth item\n')
	# Markdown.Text('At the command prompt, type ')
	# Markdown.Code('`nano`')
	# Markdown.Text('.\n')
	# Markdown.Code('```\n<html>\n<head>\n</head>\n</html>\n```')
	# Markdown.Text('\nMy favorite search engine is ')
	# Markdown.Link('[Duck Duck Go](https://duckduckgo.com)')
	# Markdown.Text('.\ntest not a\nheading\n')

	# An even more advanced example
	class NewMarkdownBase:
	"""Base class for all types of NewMarkdown tokens

	sets up the default behaviour for extracting a printable value from a regex match group
	__init__ can be overridden to modify the logic for populating the value attribute
	from the match object
	"""
	matches: List[str]
	value: Any
	def __repr__(self,):
	return f'NewMarkdown.{self.__class__.__name__}({repr(self.value)})'

	def extract(self, d) -> Any:
	return d['text'][0]

	def __init__(self, obj) -> None:
	if isinstance(obj, Match):
	self.value = self.extract(obj.capturesdict())
	else:
	# class is being manually instantiated
	self.value = obj

	class NewMarkdown:
	class Text(str):
	def __repr__(self,):
	return f'NewMarkdown.Text({repr(str(self))})'

	class Heading(NewMarkdownBase):
	pattern = r'[#]+ (?<text>.*)\n'

	class Italics(NewMarkdownBase):
	patterns = [
	r'[]{1}(?<text>[^\n]+)[*]{1}',
	r'[_]{1}(?<text>[^_\n]+)[_]{1}'
	]

	class Bold(NewMarkdownBase):
	patterns = [
	r'[]{2}(?<text>[^\n]+)[*]{2}',
	r'[_]{2}(?<text>[^_\n]+)[_]{2}'
	]

	class Blockquote(NewMarkdownBase):
	pattern = r'([>] (?<text>.*)\n)+'
	def extract(self, d) -> Any:
	return '\n'.join(d['text'])

	class List(NewMarkdownBase):
	pattern = r'([-] (?<text>.*)\n)+'
	def extract(self, d) -> Any:
	return d['text']

	class Code(NewMarkdownBase):
	patterns = [
	r'[`]{1}(?<text>[^`\n]+)[`]{1}',
	r'[`]{3}(?<text>[^`]+)[`]{3}'
	]

	class Link(NewMarkdownBase):
	pattern = r'\[(?<text>[^\]]+)\]\((?<url>[^\)]+)\)'
	def extract(self, d) -> Any:
	text = d['text'][0]
	url = d['url'][0]
	return dict(text=text, url=url)


	mdscanner2 = Scanner([
	(NewMarkdown.Heading.pattern, NewMarkdown.Heading),
	(NewMarkdown.Bold.patterns[0], NewMarkdown.Bold),
	(NewMarkdown.Bold.patterns[1], NewMarkdown.Bold),
	(NewMarkdown.Italics.patterns[0], NewMarkdown.Italics),
	(NewMarkdown.Italics.patterns[1], NewMarkdown.Italics),
	(NewMarkdown.Blockquote.pattern, NewMarkdown.Blockquote),
	(NewMarkdown.List.pattern, NewMarkdown.List),
	(NewMarkdown.Code.patterns[0], NewMarkdown.Code),
	(NewMarkdown.Code.patterns[1], NewMarkdown.Code),
	(NewMarkdown.Link.pattern, NewMarkdown.Link),
	(DEFAULT, NewMarkdown.Text)
	])


	for token in mdscanner2(sample_text, pass_in_option='match_object'):
	print(repr(token))

	# Output:
	# NewMarkdown.Text('\n')
	# NewMarkdown.Heading('Heading level 1')
	# NewMarkdown.Heading('Heading level 2')
	# NewMarkdown.Heading('Heading level 2')
	# NewMarkdown.Text('I just love ')
	# NewMarkdown.Bold('bold text')
	# NewMarkdown.Text('.\nI just love ')
	# NewMarkdown.Bold('bold text')
	# NewMarkdown.Text('.\nItalicized text is the ')
	# NewMarkdown.Italics("cat's meow")
	# NewMarkdown.Text('.\nItalicized text is the ')
	# NewMarkdown.Italics("cat's meow")
	# NewMarkdown.Text('.\n')
	# NewMarkdown.Blockquote('Dorothy followed her through many of the beautiful rooms in her castle.\n\nThe Witch bade her clean the pots and kettles and sweep the floor and keep the fire fed with wood.')
	# NewMarkdown.List(['First item', 'Second item', 'Third item', 'Fourth item'])
	# NewMarkdown.Text('At the command prompt, type ')
	# NewMarkdown.Code('nano')
	# NewMarkdown.Text('.\n')
	# NewMarkdown.Code('\n<html>\n<head>\n</head>\n</html>\n')
	# NewMarkdown.Text('\nMy favorite search engine is ')
	# NewMarkdown.Link({'text': 'Duck Duck Go', 'url': 'https://duckduckgo.com'})
	# NewMarkdown.Text('.\ntest not a\nheading\n')