Skip to content

Instantly share code, notes, and snippets.

@amcgregor
Last active June 28, 2021 16:12
Show Gist options
  • Save amcgregor/11d72ee69fd928547e08e68bca53e043 to your computer and use it in GitHub Desktop.
Save amcgregor/11d72ee69fd928547e08e68bca53e043 to your computer and use it in GitHub Desktop.
A declarative description of Operational Transform (OT) operations developed with the context of Quill's Delta format for representation of rich text content changes.
"""An implementation of the Delta text markup format using MongoDB as back-end storage.
For details, please see:
- https://quilljs.com/guides/designing-the-delta-format/
- https://quilljs.com/docs/delta/
"""
from collections import deque
from weakref import proxy
from marrow.mongo import Document, Field
from marrow.mongo.field import String, Embed as Embed_, Array, Integer
from marrow.mongo.trait import Derived
class Operation(Derived):
OPS = {}
op = String(choices=OPS, default=None, positional=False, assign=True, repr=False)
embed = String(default=None, positional=False, repr=False) # TODO: choices
value = Field()
attributes = Embed_(Document)
def __init__(self, *args, **kw):
"""Initialize an operation using a shortcut syntax."""
super(Operation, self).__init__(*args, attributes=kw or None)
@classmethod
def from_json(cls, value):
value = dict(value)
instance = cls()
instance.op, = set(value) - {'attributes'}
instance.value = value.pop(instance.op)
if not isinstance(instance, self.OPS[instance.op]):
self.OPS[instance.op].from_json(value)
if 'attributes' in value:
instance.attributes = Document.from_json(value['attributes'])
return instance
@property
def as_json(self):
value = {self.embed: self.value} if self.embed else self.value
result = {self.op: value}
if self.attributes:
result['attributes'] = self.attributes
return result
@property
def length(self): # Optimization for delete/retain cases.
return self.value
@classmethod
def __attributed__(cls):
super(Operation, cls).__attributed__()
# Automatically register subclasses as valid operations.
field = self.__fields__['op']
if getattr(field, 'default', None):
Operation.OPS.setdefault(field.default, proxy(cls))
class Insert(Operation):
"""A representation of the addition of a block of content to the stream.
Define the text inserted as the first positional parameter, and optional attributes as keyword arguments.
Insert("Google", link="https://www.google.com/") # Insert a link.
Insert("Gandalf", bold=True) # Insert bolded text.
Insert(" the ") # Insert normal text.
Insert("Grey", color='#ccc') # Insert grey text.
Insert("\n") # Insert a paragraph break.
In the resulting JSON, only insert operations have an `insert` key defined. A string value represents inserting
text. Any other type represents inserting an embed, please reference the `Embed` class below. In both cases an
optional `attributes` key can be defined with an embedded object to describe additonal formatting information.
Formats can be changed by the retain operation, see `Retain` below.
Insert("Gandalf", bold=True).as_json
{"insert": "Gandalf", "attributes": {"bold": true}}
"""
op = Operation.op.adapt(default='insert')
@property
def length(self):
"""Return the textual length of the content represented by this insert."""
return len(self.value)
@classmethod
def from_json(cls, value):
"""Transform a JSON-encoded operation into an Operation instance.
This specific version can differentiate between `Insert` and `Embed` constructs.
"""
instance = super(Insert, cls).from_json(value)
if instance.embed or isinstance(instance.value, str):
return instance
return Embed.from_json(value)
class Embed(Insert):
"""A richer description of inserted non-textual content.
Embed('image', 'https://octodex.github.com/images/labtocat.png', alt="Lab Octocat")
Embed('image', '/assets/img/icon.png', link='/').as_json
{"insert": {"image": "/assets/img/icon.png"}, "attributes": {"link": "/"}}
"""
embed = Operation.embed.adapt(positional=True, repr=True)
length = 1 # The length of any embed is always one.
class Delete(Operation):
"""Identify characters to delete.
Delete(10) # Delete the next 10 characters.
"""
op = Operation.op.adapt(default='delete')
value = Integer()
attributes = None # Deletions have no additional attributes.
class Retain(Operation):
"""Preserve a range of characters, optionally with adjusted attributes.
# Unbold and italicize "Gandalf".
Retain(7, bold=None, italic=True).as_json
{"retain": 7, "attributes": {"bold": null, "italic": true}}
# Now keep " the ", insert "White", and delete "Grey".
Retain(5)
Insert("White", color="#fff")
Delete(4)
"""
op = Operation.op.adapt(default='retain')
value = Integer()
class Delta(Document):
ops = Array(Operation, default=lambda: [], assign=True)
def insert(self, value, **attributes):
"""Append an insert operation while maintaining the ability to chain calls.
Please refer to the docstring of the `Insert` class for details.
"""
self.ops.append(Insert(value, **attributes))
return self
def embed(self, kind, value, **attributes):
"""Append an embed insert operation while maintaining the ability to chain calls.
Please refer to the docstring of the `Embed` class for details.
"""
self.ops.append(Embed(kind, value, **attributes))
return self
def retain(self, count, **attributes):
"""Append a retain operation while maintaining the ability to chain calls.
Please refer to the docstring of the `Retain` class for details.
"""
self.ops.append(Retain(count, **attributes))
return self
def delete(self, count):
"""Append a delete operation while maintaining the ability to chain calls.
Please refer to the docstring of the `Delete` class for details.
"""
self.ops.append(Delete(count))
def apply(self, delta):
pass
def diff(self, other):
pass
@property
def fragments(self):
index = 0
for fragment in self.ops:
yield index, fragment
index += fragment.length
@property
def lines(self):
"""A read-only view of an at-rest document.
For efficiency sake this repeatedly fills and drains a buffer to gather elements for a line, then yields the
paragraph attributes and an iterator across that buffer prior to clearing it.
"""
buf = []
for offset, fragment in self.fragments:
if fragment.op != 'insert':
raise ValueError("Not a root document, encountered: " + fragment.op)
if not isinstance(chunk.value, str) or chunk.value != b"\n":
buf.append(chunk)
continue
yield chunk.attributes, iter(buf)
buf.clear()
if __name__ == '__main__':
document = Delta().insert("Hello", bold=True).insert(" world!")
document.insert("\n", align='right')
document.insert("This is a demo of Delta storage.")
document.insert("\n", align='left')
document.embed('image', 'monkey.png', alt="Funny monkey picture.")
for attrs, fragments in document.lines:
line = "".join(unciode(fragment.value) for offset, fragment in fragments)
if 'align' in attrs:
if attrs['align'] == 'right':
line = line.rjust(80)
print(line)
print()
# Construct a document wiht the text "Gandalf the Grey", Gandalf in bold, Grey in grey.
document = Delta().insert("Gandalf", bold=True).insert(" the ").insert("Grey", color='#ccc')
# Change the text and representative color.
death = Delta().retain(12).delete(4).insert("White", color='#fff')
document.apply(death)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment