Last active
September 20, 2017 01:33
-
-
Save dagss/5008118 to your computer and use it in GitHub Desktop.
marked_yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A PyYAML loader that annotates position in source code. | |
The loader is based on `SafeConstructor`, i.e., the behaviour of | |
`yaml.safe_load`, but in addition: | |
- Every dict/list/unicode is replaced with dict_node/list_node/unicode_node, | |
which subclasses dict/list/unicode to add the attributes `start_mark` | |
and `end_mark`. (See the yaml.error module for the `Mark` class.) | |
- Every string is always returned as unicode, no ASCII-ficiation is | |
attempted. | |
- Note that int/bool/... are returned unchanged for now | |
""" | |
from yaml.composer import Composer | |
from yaml.reader import Reader | |
from yaml.scanner import Scanner | |
from yaml.composer import Composer | |
from yaml.resolver import Resolver | |
from yaml.parser import Parser | |
from yaml.constructor import Constructor, BaseConstructor, SafeConstructor | |
def create_node_class(cls): | |
class node_class(cls): | |
def __init__(self, x, start_mark, end_mark): | |
cls.__init__(self, x) | |
self.start_mark = start_mark | |
self.end_mark = end_mark | |
def __new__(self, x, start_mark, end_mark): | |
return cls.__new__(self, x) | |
node_class.__name__ = '%s_node' % cls.__name__ | |
return node_class | |
dict_node = create_node_class(dict) | |
list_node = create_node_class(list) | |
unicode_node = create_node_class(unicode) | |
class NodeConstructor(SafeConstructor): | |
# To support lazy loading, the original constructors first yield | |
# an empty object, then fill them in when iterated. Due to | |
# laziness we omit this behaviour (and will only do "deep | |
# construction") by first exhausting iterators, then yielding | |
# copies. | |
def construct_yaml_map(self, node): | |
obj, = SafeConstructor.construct_yaml_map(self, node) | |
return dict_node(obj, node.start_mark, node.end_mark) | |
def construct_yaml_seq(self, node): | |
obj, = SafeConstructor.construct_yaml_seq(self, node) | |
return list_node(obj, node.start_mark, node.end_mark) | |
def construct_yaml_str(self, node): | |
obj = SafeConstructor.construct_scalar(self, node) | |
assert isinstance(obj, unicode) | |
return unicode_node(obj, node.start_mark, node.end_mark) | |
NodeConstructor.add_constructor( | |
u'tag:yaml.org,2002:map', | |
NodeConstructor.construct_yaml_map) | |
NodeConstructor.add_constructor( | |
u'tag:yaml.org,2002:seq', | |
NodeConstructor.construct_yaml_seq) | |
NodeConstructor.add_constructor( | |
u'tag:yaml.org,2002:str', | |
NodeConstructor.construct_yaml_str) | |
class MarkedLoader(Reader, Scanner, Parser, Composer, NodeConstructor, Resolver): | |
def __init__(self, stream): | |
Reader.__init__(self, stream) | |
Scanner.__init__(self) | |
Parser.__init__(self) | |
Composer.__init__(self) | |
SafeConstructor.__init__(self) | |
Resolver.__init__(self) | |
def marked_load(stream): | |
return MarkedLoader(stream).get_single_data() | |
def test_marked_yaml(): | |
def loc(obj): | |
return (obj.start_mark.line, obj.start_mark.column, obj.end_mark.line, obj.end_mark.column) | |
d = marked_load( # note: test very sensitive to whitespace in string below | |
'''\ | |
a: | |
[b, c, {d: e}] | |
f: | |
g: h''') | |
assert d == {'a': ['b', 'c', {'d': 'e'}], 'f': {'g': 'h'}} | |
assert loc(d['a'][2]['d']) == (1, 17, 1, 18) | |
assert loc(d) == (0, 4, 3, 10) | |
assert loc(d['a']) == (1, 6, 1, 20) | |
assert isinstance(d['a'][2]['d'], unicode) | |
assert isinstance(d, dict) | |
assert isinstance(d['f'], dict) | |
assert isinstance(d['a'], list) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Could I use this in another project? Are there any licensing requirements, and if so, would the Apache v2.0 license be OK?