Skip to content

Instantly share code, notes, and snippets.

@dagss
Last active September 20, 2017 01:33
Show Gist options
  • Save dagss/5008118 to your computer and use it in GitHub Desktop.
Save dagss/5008118 to your computer and use it in GitHub Desktop.
marked_yaml
"""
A PyYAML loader that annotates position in source code.
The loader is based on `SafeConstructor`, i.e., the behaviour of
`yaml.safe_load`, but in addition:
- Every dict/list/unicode is replaced with dict_node/list_node/unicode_node,
which subclasses dict/list/unicode to add the attributes `start_mark`
and `end_mark`. (See the yaml.error module for the `Mark` class.)
- Every string is always returned as unicode, no ASCII-ficiation is
attempted.
- Note that int/bool/... are returned unchanged for now
"""
from yaml.composer import Composer
from yaml.reader import Reader
from yaml.scanner import Scanner
from yaml.composer import Composer
from yaml.resolver import Resolver
from yaml.parser import Parser
from yaml.constructor import Constructor, BaseConstructor, SafeConstructor
def create_node_class(cls):
class node_class(cls):
def __init__(self, x, start_mark, end_mark):
cls.__init__(self, x)
self.start_mark = start_mark
self.end_mark = end_mark
def __new__(self, x, start_mark, end_mark):
return cls.__new__(self, x)
node_class.__name__ = '%s_node' % cls.__name__
return node_class
dict_node = create_node_class(dict)
list_node = create_node_class(list)
unicode_node = create_node_class(unicode)
class NodeConstructor(SafeConstructor):
# To support lazy loading, the original constructors first yield
# an empty object, then fill them in when iterated. Due to
# laziness we omit this behaviour (and will only do "deep
# construction") by first exhausting iterators, then yielding
# copies.
def construct_yaml_map(self, node):
obj, = SafeConstructor.construct_yaml_map(self, node)
return dict_node(obj, node.start_mark, node.end_mark)
def construct_yaml_seq(self, node):
obj, = SafeConstructor.construct_yaml_seq(self, node)
return list_node(obj, node.start_mark, node.end_mark)
def construct_yaml_str(self, node):
obj = SafeConstructor.construct_scalar(self, node)
assert isinstance(obj, unicode)
return unicode_node(obj, node.start_mark, node.end_mark)
NodeConstructor.add_constructor(
u'tag:yaml.org,2002:map',
NodeConstructor.construct_yaml_map)
NodeConstructor.add_constructor(
u'tag:yaml.org,2002:seq',
NodeConstructor.construct_yaml_seq)
NodeConstructor.add_constructor(
u'tag:yaml.org,2002:str',
NodeConstructor.construct_yaml_str)
class MarkedLoader(Reader, Scanner, Parser, Composer, NodeConstructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
SafeConstructor.__init__(self)
Resolver.__init__(self)
def marked_load(stream):
return MarkedLoader(stream).get_single_data()
def test_marked_yaml():
def loc(obj):
return (obj.start_mark.line, obj.start_mark.column, obj.end_mark.line, obj.end_mark.column)
d = marked_load( # note: test very sensitive to whitespace in string below
'''\
a:
[b, c, {d: e}]
f:
g: h''')
assert d == {'a': ['b', 'c', {'d': 'e'}], 'f': {'g': 'h'}}
assert loc(d['a'][2]['d']) == (1, 17, 1, 18)
assert loc(d) == (0, 4, 3, 10)
assert loc(d['a']) == (1, 6, 1, 20)
assert isinstance(d['a'][2]['d'], unicode)
assert isinstance(d, dict)
assert isinstance(d['f'], dict)
assert isinstance(d['a'], list)
@zdw
Copy link

zdw commented Sep 20, 2017

Could I use this in another project? Are there any licensing requirements, and if so, would the Apache v2.0 license be OK?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment