I am developing an asset management system with the Python web framework Django. The goal of AssetStorm is to model structured document like magazine articles in a verifyable structure without having to specify the structure itself in the code. AssetStorm achieves this goal with objects of the type AssetType
. They store the information how an asset may be structured as JSON with the IDs of other AssetType
as references. The JSON structure contains named keys and lists with only one ID which means that this AssetType
may be repeated as often as needed.
The data itself is stored as objects of type Asset
which also contain JSON. This JSON stores the actual primary keys of other assets. This allows any tree structure of assets with typed branches. The structure can be verified using the information from the corresponding AssetType
.
This software already works great. However I want to improve it by allowing versioned updates of the content. I want the software to be usable as a collaborative editor where two or more users can edit the same asset at the same time without locks. This can be achieved with atomic change objects similar to Ether Pad or Google Docs. In a first step I want to modify the file models.py
which looks like this at the moment:
# -*- coding: utf-8 -*-
from django.db import models
from django.contrib.postgres.fields import JSONField, ArrayField
import uuid
import re
class AssetType(models.Model):
type_name = models.CharField(unique=True, max_length=128)
parent_type = models.ForeignKey('self', related_name="children", on_delete=models.SET_NULL, blank=True, null=True)
schema = JSONField(blank=True, null=True)
templates = JSONField(default=dict)
def __str__(self):
schema_str = "?" if self.schema is None else "!"
return "<AssetType %d: %s %s>" % (self.pk, self.type_name, schema_str)
class EnumType(models.Model):
items = ArrayField(models.TextField())
class Asset(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
t = models.ForeignKey(AssetType, on_delete=models.CASCADE, related_name="assets")
content_ids = JSONField(blank=True, null=True)
content_cache = JSONField(blank=True, null=True)
text_reference_list = ArrayField(models.IntegerField(), default=list)
uri_reference_list = ArrayField(models.IntegerField(), default=list)
enum_reference_list = ArrayField(models.IntegerField(), default=list)
asset_reference_list = ArrayField(
models.UUIDField(default=uuid.uuid4, editable=False, blank=False, null=False), default=list)
revision_chain = models.ForeignKey("self", on_delete=models.SET_NULL,
related_name="new_version", blank=True, null=True)
raw_content_cache = models.TextField(null=True, default=None)
def clear_reference_lists(self):
self.text_reference_list.clear()
self.uri_reference_list.clear()
self.enum_reference_list.clear()
self.asset_reference_list.clear()
def register_reference_to_text(self, text):
if text.pk not in self.text_reference_list:
self.text_reference_list.append(text.pk)
def register_reference_to_uri(self, uri):
if uri.pk not in self.uri_reference_list:
self.uri_reference_list.append(uri.pk)
def register_reference_to_enum(self, enum):
if enum.pk not in self.enum_reference_list:
self.enum_reference_list.append(enum.pk)
def register_reference_to_sub_asset(self, sub_asset):
if sub_asset.pk not in self.asset_reference_list:
self.asset_reference_list.append(sub_asset.pk)
def get_asset_content(self, content_type, content_id):
if content_type == 1: # text
text = Text.objects.get(pk=content_id)
self.register_reference_to_text(text)
return text.text
elif content_type == 2: # uri-element
uri_element = UriElement.objects.get(pk=content_id)
self.register_reference_to_uri(uri_element)
return uri_element.uri
elif type(content_type) is dict and "3" in content_type: # enum
enum = Enum.objects.get(pk=content_id)
self.register_reference_to_enum(enum)
return enum.item
else:
sub_asset = Asset.objects.get(pk=uuid.UUID(content_id))
self.register_reference_to_sub_asset(sub_asset)
return sub_asset.content
@property
def content(self):
if self.content_cache is not None:
return self.content_cache
self.clear_reference_lists()
self.content_cache = {
'type': self.t.type_name,
'id': str(self.pk)
}
for k in self.content_ids.keys():
if type(self.t.schema[k]) is list:
asset_content = [
self.get_asset_content(self.t.schema[k][0], e)
for e in self.content_ids[k]
]
elif type(self.t.schema[k]) is dict and \
len(self.t.schema[k].keys()) == 1 and \
"3" in self.t.schema[k].keys():
asset_content = self.get_asset_content(self.t.schema[k], self.content_ids[k])
else:
asset_content = self.get_asset_content(self.t.schema[k], self.content_ids[k])
self.content_cache[k] = asset_content
self.save()
return self.content_cache
def clear_cache(self):
for asset in Asset.objects.filter(asset_reference_list__contains=[self.pk]):
asset.clear_cache()
self.content_cache = None
self.raw_content_cache = None
self.clear_reference_lists()
self.save()
def render_template(self, template_key="raw"):
def get_key_content(type_id, pk):
if type_id == 1:
return Text.objects.get(pk=pk).text
if type_id == 2:
return UriElement.objects.get(pk=pk).uri
if type(type_id) is dict and "3" in type_id:
return Enum.objects.get(pk=pk).item
return Asset.objects.get(pk=pk).render_template(template_key=template_key)
if template_key not in self.t.templates.keys():
return ""
if template_key == "raw":
if self.raw_content_cache is not None:
return self.raw_content_cache
consumable_template = self.t.templates[template_key]
for key in self.t.schema.keys():
key_list_regex = r"^(?P<start_part>[\s\S]*?){{for\(" + key + \
r"\)}}(?P<list_template>[\s\S]*?){{endfor}}(?P<end_part>[\s\S]*)"
key_regex = r"^(?P<start_part>[\s\S]*?){{" + key + r"}}(?P<end_part>[\s\S]*)"
list_matches = re.match(key_list_regex, consumable_template, re.MULTILINE)
while list_matches and type(self.t.schema[key]) is list:
list_content = ""
for pk in self.content_ids[key]:
consumable_list_template = list_matches.groupdict()["list_template"]
matches = re.match(key_regex, consumable_list_template, re.MULTILINE)
while matches:
consumable_list_template = matches.groupdict()["start_part"] + get_key_content(
self.t.schema[key][0], pk) + matches.groupdict()["end_part"]
matches = re.match(key_regex, consumable_list_template, re.MULTILINE)
list_content += consumable_list_template
consumable_template = list_matches.groupdict()["start_part"] + \
list_content + \
list_matches.groupdict()["end_part"]
list_matches = re.match(key_list_regex, consumable_template, re.MULTILINE)
matches = re.match(key_regex, consumable_template, re.MULTILINE)
while matches:
consumable_template = matches.groupdict()["start_part"] + get_key_content(
self.t.schema[key], self.content_ids[key]) + matches.groupdict()["end_part"]
matches = re.match(key_regex, consumable_template, re.MULTILINE)
if template_key == "raw":
self.raw_content_cache = consumable_template
self.save()
return consumable_template
class Text(models.Model):
text = models.TextField()
class UriElement(models.Model):
uri = models.CharField(max_length=256)
class Enum(models.Model):
t = models.ForeignKey(EnumType, on_delete=models.CASCADE)
item = models.TextField()
How should I extend this python code to have atomic update objects?
It sounds like you have a solid understanding of the current design of your asset management system, and you are now looking to add the ability for versioned updates to the content.
One way to achieve this is to create a new model called AssetRevision
which would store the changes made to an Asset
in a JSONField. You can then create a new field in the Asset
model, called revisions
, that is a ForeignKey to AssetRevision
. Each time an Asset
is updated, a new AssetRevision
object is created and linked to the original Asset
.
If you want to maintain the history of revisions, you can create a new field in AssetRevision
called parent_revision
which is a ForeignKey to the previous revision of the Asset
. This way you can create a chain of revisions for any given asset.
Additionally you might want to consider using some kind of conflict resolution if multiple users edit the same Asset
at the same time.
Please keep in mind that this is one of many ways to achieve versioning, and that you should also evaluate other alternative solutions and trade-offs before implementing.
I think a separate AssetRevision
is not really needed if the changes are ordered in a chain of Asset
objects. Each Asset
colud have a ForeignKey
called parent
pointing to the previous Asset
. If the parent
is None
the Asset
could assume a empty structure which can be inferred from its AsetType
. Starting from an empty structure the chain contains changes to the previous Asset
building up the content.
Could you give me some code for the data structure which spcifies the change made by one Asset
in the chain of changes?
Yes, using a separate AssetRevision
model is not strictly necessary if you implement the revision history as a chain of Asset
objects. You can have a ForeignKey
field on the Asset
model called parent
that points to the previous version of the asset.
Here's an example of how the Asset
model might look with this implementation:
class Asset(models.Model):
# Fields for asset type, content, and references
parent = models.ForeignKey('self', null=True, blank=True, on_delete=models.SET_NULL, related_name="new_version")
# fields for storing differences
changes = JSONField(blank=True, null=True)
def save_diff(self):
parent = self.parent
diff = {}
# compare self fields with parent fields, only store differences
if parent:
for field in self._meta.get_fields():
if field.name not in ["id", "parent", "changes"]:
if getattr(self, field.name) != getattr(parent, field.name):
diff[field.name] = (getattr(parent, field.name), getattr(self, field.name))
self.changes = diff
def apply_diff(self, parent_asset=None):
if not parent_asset:
parent_asset = self.parent
# apply changes to parent object
for field, (from_val, to_val) in self.changes.items():
setattr(parent_asset, field, to_val)
parent_asset.save()
The save_diff
method should be called before saving the object, it will compare the fields of current object with its parent, saving only the changes in the changes
field.
apply_diff
method applies changes to the parent object and saves it, you could use this method when you want to view previous version of an asset.
Please keep in mind that this is a basic example that may need to be adjusted depending on the specifics of your implementation. For example, this example doesn't handle changes of foreign keys or many-to-many fields, etc. Also keep in mind that this method will be costly since comparing data, storing differences and then applying them will require many database lookups and more storage. This also makes searching and querying through the revisions harder
While implementing the new idea I decided to come back to your Idea of a linked AssetRevision
. I changed the name to AssetChange
as objects of this class are meant to repesent an atomic change of the structure of the data. The initial state of an empty structure can be inferred from the AssetType
and eauch AssetChange
can either set a specific new Text
, UriElement
, Enum
or Asset
or in case of a list change the structure of the list with a insertion and/or deletion at a specific position. I implemented the AssetChange
like this:
class AssetChange(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
time = models.DateTimeField(auto_now=False)
asset = models.ForeignKey(Asset, on_delete=models.CASCADE, null=True, default=None, related_name="changes")
parent = models.ForeignKey('self', on_delete=models.SET_NULL, related_name="child", null=True, default=None)
key = models.CharField(max_length=128)
position = models.IntegerField(default=0)
delete = models.IntegerField(default=0)
inserts = JSONField(blank=True, null=True, default=None)
structure_cache = JSONField(blank=True, null=True, default=None)
def __str__(self):
return "<AssetChange %s Key:%s (%d|%d|%s) -- %s>" % (self.pk, self.key,
self.position, self.delete, str(self.inserts),
str(self.parent))
@property
def structure(self):
if self.structure_cache is not None:
return self.structure_cache
else:
if self.parent is None:
structure = deepcopy(self.asset.t.schema)
for key in structure.keys():
if type(structure[key]) is list:
structure[key] = []
else:
structure[key] = None
else:
structure = self.parent.structure
if type(structure[self.key]) is list:
del structure[self.key][self.position:self.position+self.delete]
for i, insertion in enumerate(self.inserts):
structure[self.key].insert(self.position + i, insertion)
else:
structure[self.key] = self.inserts
self.structure_cache = structure
self.save()
return structure
def invalidate_structure_cache(self):
self.structure_cache = None
self.save()
for c in self.child:
c.invalidate_structure_cache()
def bubble(self):
if self.parent is not None and self.parent.time > self.time:
p = self.parent
self.parent = p.parent
p.parent = self
p.invalidate_structure_cache()
p.save()
self.bubble()
self.save()
return p
else:
self.save()
return self
The Assets
refer to these new objects with their property revision_chain
which is a ForeignKey
to the AssetChance
. The Asset
always links to the last link of the chain and the bubble()
method of AssetChange
makes sure that newly added changes end up at the right position in the chain. The changed Asset
looks like this:
class Asset(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
t = models.ForeignKey(AssetType, on_delete=models.CASCADE, related_name="assets")
content_cache = JSONField(blank=True, null=True)
text_reference_list = ArrayField(models.IntegerField(), default=list)
uri_reference_list = ArrayField(models.IntegerField(), default=list)
enum_reference_list = ArrayField(models.IntegerField(), default=list)
asset_reference_list = ArrayField(
models.UUIDField(default=uuid.uuid4, editable=False, blank=False, null=False), default=list)
revision_chain = models.ForeignKey("self", on_delete=models.SET_NULL,
related_name="new_version", blank=True, null=True)
raw_content_cache = models.TextField(null=True, default=None)
def clear_reference_lists(self):
self.text_reference_list.clear()
self.uri_reference_list.clear()
self.enum_reference_list.clear()
self.asset_reference_list.clear()
def register_reference_to_text(self, text):
if text.pk not in self.text_reference_list:
self.text_reference_list.append(text.pk)
def register_reference_to_uri(self, uri):
if uri.pk not in self.uri_reference_list:
self.uri_reference_list.append(uri.pk)
def register_reference_to_enum(self, enum):
if enum.pk not in self.enum_reference_list:
self.enum_reference_list.append(enum.pk)
def register_reference_to_sub_asset(self, sub_asset):
if sub_asset.pk not in self.asset_reference_list:
self.asset_reference_list.append(sub_asset.pk)
def get_asset_content(self, content_type, content_id):
if content_type == 1: # text
text = Text.objects.get(pk=content_id)
self.register_reference_to_text(text)
return text.text
elif content_type == 2: # uri-element
uri_element = UriElement.objects.get(pk=content_id)
self.register_reference_to_uri(uri_element)
return uri_element.uri
elif type(content_type) is dict and "3" in content_type: # enum
enum = Enum.objects.get(pk=content_id)
self.register_reference_to_enum(enum)
return enum.item
else:
sub_asset = Asset.objects.get(pk=uuid.UUID(content_id))
self.register_reference_to_sub_asset(sub_asset)
return sub_asset.content
@property
def change_chain(self):
if self.changes.count() > 0:
return self.changes.order_by("time").last()
else:
return None
@property
def content(self):
if self.content_cache is not None:
return self.content_cache
self.clear_reference_lists()
self.content_cache = {
'type': self.t.type_name,
'id': str(self.pk)
}
for k in self.change_chain.structure.keys():
if type(self.t.schema[k]) is list:
asset_content = [
self.get_asset_content(self.t.schema[k][0], e)
for e in self.change_chain.structure[k]
]
elif type(self.t.schema[k]) is dict and \
len(self.t.schema[k].keys()) == 1 and \
"3" in self.t.schema[k].keys():
asset_content = self.get_asset_content(self.t.schema[k], self.change_chain.structure[k])
else:
asset_content = self.get_asset_content(self.t.schema[k], self.change_chain.structure[k])
self.content_cache[k] = asset_content
self.save()
return self.content_cache
def clear_cache(self):
for asset in Asset.objects.filter(asset_reference_list__contains=[self.pk]):
asset.clear_cache()
self.content_cache = None
self.raw_content_cache = None
self.clear_reference_lists()
self.save()
@classmethod
def produce(cls, t: AssetType, content_ids: dict):
a = Asset(t=t)
a.save()
change_chain = None
for key in t.schema:
if key not in content_ids:
raise StructureError("The key %s is missing in the content_ids." % key)
change_chain = AssetChange(time=timezone.now(), asset=a, parent=change_chain,
key=key, inserts=content_ids[key])
change_chain = change_chain.bubble()
return a
def change(self, key: str, position: int = 0, delete_count: int = 0, inserts=None):
if inserts is None:
inserts = []
new_change = AssetChange(asset=self, parent=self.change_chain, key=key,
position=position, delete=delete_count, inserts=inserts)
new_change.bubble()
def render_template(self, template_key="raw"):
def get_key_content(type_id, pk):
if type_id == 1:
return Text.objects.get(pk=pk).text
if type_id == 2:
return UriElement.objects.get(pk=pk).uri
if type(type_id) is dict and "3" in type_id:
return Enum.objects.get(pk=pk).item
return Asset.objects.get(pk=pk).render_template(template_key=template_key)
if template_key not in self.t.templates.keys():
return ""
if template_key == "raw":
if self.raw_content_cache is not None:
return self.raw_content_cache
consumable_template = self.t.templates[template_key]
for key in self.t.schema.keys():
key_list_regex = r"^(?P<start_part>[\s\S]*?){{for\(" + key + \
r"\)}}(?P<list_template>[\s\S]*?){{endfor}}(?P<end_part>[\s\S]*)"
key_regex = r"^(?P<start_part>[\s\S]*?){{" + key + r"}}(?P<end_part>[\s\S]*)"
list_matches = re.match(key_list_regex, consumable_template, re.MULTILINE)
while list_matches and type(self.t.schema[key]) is list:
list_content = ""
for pk in self.change_chain.structure[key]:
consumable_list_template = list_matches.groupdict()["list_template"]
matches = re.match(key_regex, consumable_list_template, re.MULTILINE)
while matches:
consumable_list_template = matches.groupdict()["start_part"] + get_key_content(
self.t.schema[key][0], pk) + matches.groupdict()["end_part"]
matches = re.match(key_regex, consumable_list_template, re.MULTILINE)
list_content += consumable_list_template
consumable_template = str(list_matches.groupdict()["start_part"]) + \
list_content + \
str(list_matches.groupdict()["end_part"])
list_matches = re.match(key_list_regex, consumable_template, re.MULTILINE)
matches = re.match(key_regex, consumable_template, re.MULTILINE)
while matches:
consumable_template = matches.groupdict()["start_part"] + get_key_content(
self.t.schema[key], self.change_chain.structure[key]) + matches.groupdict()["end_part"]
matches = re.match(key_regex, consumable_template, re.MULTILINE)
if template_key == "raw":
self.raw_content_cache = consumable_template
self.save()
return consumable_template
Now I want to add a similar mechanism to the existing Text
class and add a TextChange
class which works similar to AssetChange
. Please make a recommendation of the code of the Text
class and the new TextChange
class.
The AssetChange
class represents an atomic change of the structure of data. It has several fields, such as time
, asset
, parent
, key
, position
, delete
, and inserts
, among others. The structure
property returns the updated structure of the data based on the change, and the invalidate_structure_cache
method invalidates the structure cache, allowing the structure to be updated again. The bubble
method ensures that newly added changes are placed in the correct position in the chain of changes.
The Asset
class refers to the AssetChange
objects through the revision_chain
field, which is a ForeignKey to AssetChange
. The Asset
always links to the last link in the change chain, and the bubble
method of AssetChange
ensures that new changes are placed in the correct position in the chain.
You correctly identified the main properties of the AssetChange
as described and implemented. Please go ahead and generate a TextChange
class which works similarly for Text
.
Here is an implementation of the TextChange
class:
class TextChange:
def __init__(self, text, start, end, new_text):
self.text = text
self.start = start
self.end = end
self.new_text = new_text
def apply(self):
return self.text[:self.start] + self.new_text + self.text[self.end:]
This class has the same basic structure as the AssetChange
class, with a few changes to the names of the properties to better match the concept of text. The apply
method takes the original text and replaces a portion of it with the new text, which is defined by the start
and end
properties. The portion to be replaced is specified by the slice self.text[self.start:self.end]
.
Django creates a trivial constructor like this automatically. Your apply
method is misleading in its name as nothing is really applied at this point. However the slicing operation does what it is supposed to do. Disappointingly to me the whole chain aspect is missing, there is no bubble
method and no caching. I would have expected to get something like this:
class TextChange(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
time = models.DateTimeField(auto_now=False)
text_reference = models.ForeignKey(Text, on_delete=models.CASCADE, null=True, default=None, related_name="changes")
parent = models.ForeignKey('self', on_delete=models.SET_NULL, related_name="child", null=True, default=None)
position = models.IntegerField(default=0)
delete = models.IntegerField(default=0)
inserts = models.TextField()
text_cache = models.TextField()
def __str__(self):
return "<TextChange %s (%d|%d|%s) -- %s>" % (self.pk, self.position, self.delete, self.inserts,
str(self.parent))
@property
def text(self):
if self.text_cache is not None:
return self.text_cache
else:
text = "" if self.parent is None else self.parent.text
text = text[:self.position] + self.inserts + text[self.position + self.delete]
self.text_cache = text
self.save()
return text
def invalidate_cache(self):
self.text_cache = None
self.save()
for c in self.child:
c.invalidate_cache()
def bubble(self):
if self.parent is not None and self.parent.time > self.time:
p = self.parent
self.parent = p.parent
p.parent = self
p.invalidate_cache()
p.save()
self.bubble()
self.save()
return p
else:
self.save()
return self
The Text
class uses this new structure modified like this:
class Text(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
@property
def text(self):
return self.changes.text
As a next step I want to add a setter for the text
property of the Text
class which automatically detects the positions for insertions and deletions and automatically creates TextChange
objects which construct the new text out of the old one. Please make a suggestion for the code of this method.
Sure! Here's a suggestion for the setter for the text
property of the Text
class:
class Text(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
@property
def text(self):
return self.changes.text
@text.setter
def text(self, new_text):
latest_change = self.changes.first()
old_text = latest_change.text if latest_change else ""
new_text = list(new_text)
old_text = list(old_text)
matcher = diff_match_patch.diff_match_patch()
diffs = matcher.diff_main(old_text, new_text)
position = 0
for op, txt in diffs:
if op == diff_match_patch.DIFF_INSERT:
TextChange.objects.create(text_reference=self, position=position,
inserts="".join(txt), time=timezone.now(), parent=latest_change)
position += len(txt)
elif op == diff_match_patch.DIFF_DELETE:
TextChange.objects.create(text_reference=self, position=position,
delete=len(txt), time=timezone.now(), parent=latest_change)
old_text[position:position + len(txt)] = []
elif op == diff_match_patch.DIFF_EQUAL:
position += len(txt)
Note that we first compare the old text and the new text using the diff_match_patch
library and extract the differences. Then, for each difference, we create a new TextChange
object and store the information about the insertion or deletion in the inserts
or delete
fields respectively. The position
field stores the position in the text where the change occurs, and the parent
field refers to the previous TextChange
object, so that the chain of changes can be traced. The time
field stores the current time when the change is made.
The diff_match_patch
module is a good suggestion. I didn't know the library and it seems to be a good tool for my task. Thank you for the suggestion!
With your suggested code I was not entirely happy. It lacks most of the functionalty I asked for. I implemented it myself like this:
class Text(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
@property
def change_chain(self):
if self.changes.count() > 0:
return self.changes.order_by("time").last()
else:
return None
@property
def text(self):
return self.change_chain.text
@text.setter
def text(self, new_text):
old_text = self.change_chain.text if self.changes.count() > 0 else ""
if new_text == old_text:
return
dmp = diff_match_patch()
diff = dmp.diff_main(old_text, new_text)
dmp.diff_cleanupSemantic(diff)
i = 0
p = 0
while i < len(diff):
t, s = diff[i]
if t == 0:
p += len(s)
i += 1
continue
if t == -1:
c = TextChange(time=timezone.now(), text_reference=self, parent=self.change_chain,
position=p, delete=len(s))
next_t, next_s = diff[i + 1]
if next_t == 1:
c.inserts = next_s
p += len(next_s)
i += 1
c.bubble()
elif t == 1:
c = TextChange(time=timezone.now(), text_reference=self, parent=self.change_chain,
position=p, delete=0, inserts=s)
c.bubble()
i += 1
class TextChange(Change):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
text_reference = models.ForeignKey(Text, on_delete=models.CASCADE, null=True, default=None, related_name="changes")
parent = models.ForeignKey('self', on_delete=models.SET_NULL, related_name="child", null=True, default=None)
inserts = models.TextField()
cache = models.TextField(null=True, default=None)
def __str__(self):
return "<TextChange %s (%d|%d|%s) -- %s>" % (self.pk, self.position, self.delete, self.inserts,
str(self.parent))
@property
def text(self):
if self.cache is not None:
return self.cache
else:
text = "" if self.parent is None else self.parent.text
text = text[:self.position] + self.inserts + text[self.position + self.delete:]
self.cache = text
self.save()
return text
My next goal is to get the unittests working again. Most needed very little adjustment because I had already implemented functions which create the new data structure using basically the same format I had before. However there is one more interesting problem you could help me with:
Assets can be updated by supplying a json tree with the new structure with supplied IDs. If AssetStorm finds the ID in the database it knows that this particular Asset needs to be updated. For each branch or leaf of the tree there are three possibilities:
- The structure is exactly the same as the Asset in the database and no update is needed.
- There is an ID but the structure is different. Then a content update is needed and a
AssetChange
needs to be created. TheAsset
stays the same but needs to point to the newestAssetChange
. - There is a part of the structure without an ID. Then new Assets need to be created just like with an entirely new structure.
This is the mostly unmodified view function which was basically capable of doing the described logic but without the AssetChange
objects:
def save_asset(request):
def check_type(expected_type, actual_type, asset_type_name, current_key, current_tree):
if expected_type == 1:
if actual_type is not str:
raise AssetStructureError(
current_tree,
"The Schema of AssetType '%s' demands the content for key '%s' to be a string." % (
asset_type_name,
current_key))
elif expected_type == 2:
if actual_type is not str:
raise AssetStructureError(
current_tree,
"The Schema of AssetType '%s' demands the content for key '%s' to be a string with a URI." % (
asset_type_name,
current_key))
elif type(expected_type) is dict and \
len(expected_type.keys()) == 1 and \
"3" in expected_type.keys():
enum_type = EnumType.objects.get(pk=expected_type["3"])
if current_tree[current_key] not in enum_type.items:
raise AssetStructureError(
current_tree,
"The Schema of AssetType '%s' demands the content for key '%s' to be the enum_type with id=%d." % (
asset_type_name,
current_key,
enum_type.pk))
else:
if actual_type is dict:
check_asset(current_tree, expected_asset_type_id=expected_type)
else:
raise AssetStructureError(
current_tree,
"The Schema of AssetType '%s' demands the content for key '%s' to be an Asset." % (
asset_type_name,
current_key) +
" Assets are saved as JSON-objects with an inner structure matching the schema " +
"of their type.")
def check_asset(tree, expected_asset_type_id=None):
try:
if "id" in tree.keys():
try:
uuid.UUID(tree["id"], version=4)
except ValueError:
raise AssetStructureError(tree, "The id '%s' is not a valid uuid (v4)." % tree["id"])
Asset.objects.get(pk=tree["id"])
if "type" not in tree.keys():
return None
asset_type = AssetType.objects.get(type_name=tree["type"])
if expected_asset_type_id is not None and (
asset_type.pk != expected_asset_type_id and
asset_type.parent_type.pk != expected_asset_type_id):
raise AssetStructureError(
tree,
"Expected an AssetType with id %d but got '%s' with id %d." % (
expected_asset_type_id,
asset_type.type_name,
asset_type.pk))
for key in asset_type.schema.keys():
if key not in tree:
raise AssetStructureError(
tree,
"Missing key '%s' in AssetType '%s'." % (
key,
asset_type.type_name))
if type(asset_type.schema[key]) is list:
if type(tree[key]) is not list:
raise AssetStructureError(
tree,
"The Schema of AssetType '%s' demands the content for key '%s' to be a List." % (
asset_type.type_name,
key))
for list_item in tree[key]:
check_type(asset_type.schema[key][0],
type(list_item),
asset_type.type_name,
key,
list_item)
elif type(asset_type.schema[key]) is int and \
asset_type.schema[key] >= 4:
check_type(asset_type.schema[key],
type(tree[key]),
asset_type.type_name,
key,
tree[key])
else:
check_type(asset_type.schema[key],
type(tree[key]),
asset_type.type_name,
key,
tree)
except KeyError as err:
raise AssetStructureError(tree, "Missing key in Asset: " + str(err))
except AssetType.DoesNotExist:
raise AssetStructureError(tree, "Unknown AssetType: " + tree["type"])
except EnumType.DoesNotExist:
raise AssetStructureError(tree, "Unknown EnumType: %s." % str(tree[key]))
except Asset.DoesNotExist:
raise AssetStructureError(tree, "An Asset with id %s does not exist." % tree["id"])
def create_asset(tree, item_type=None):
if item_type == 1:
text_item = Text(text=tree)
text_item.save()
return str(text_item.pk)
if item_type == 2:
uri_item = UriElement(uri=tree)
uri_item.save()
return str(uri_item.pk)
if type(item_type) is dict and \
len(item_type.keys()) == 1 and \
"3" in item_type.keys():
enum_item = Enum(t=EnumType.objects.get(pk=item_type["3"]), item=tree)
enum_item.save()
return str(enum_item.pk)
asset_type = AssetType.objects.get(type_name=tree["type"])
content_ids = {}
for key in asset_type.schema.keys():
if type(asset_type.schema[key]) is list:
item_ids_list = []
for list_item in tree[key]:
item_ids_list.append(create_or_modify_asset(list_item, item_type=asset_type.schema[key][0]))
content_ids[key] = item_ids_list
else:
content_ids[key] = create_or_modify_asset(tree[key], item_type=asset_type.schema[key])
asset = Asset.produce(t=asset_type, content_ids=content_ids)
asset.save()
return str(asset.pk)
def modify_asset(tree):
old_asset = Asset.objects.get(pk=tree["id"])
old_asset.pk = None
old_asset.save()
asset = Asset.objects.get(pk=tree["id"])
asset.revision_chain = old_asset ## TODO: Implement change
asset.content_cache = None
asset.clear_reference_lists()
asset.save()
changed = False
for key in asset.t.schema.keys():
if key in tree:
if asset.t.schema[key] == 1:
old_text = Text.objects.get(pk=asset.content_ids[key])
if tree[key] != old_text.text:
changed = True
asset.content_ids[key] = create_asset(tree[key], item_type=asset.t.schema[key])
elif asset.t.schema[key] == 2:
old_uri = UriElement.objects.get(pk=asset.content_ids[key])
if tree[key] != old_uri.uri:
changed = True
asset.content_ids[key] = create_asset(tree[key], item_type=asset.t.schema[key])
elif type(asset.t.schema[key]) is dict and \
len(asset.t.schema[key].keys()) == 1 and \
"3" in asset.t.schema[key].keys():
old_enum = Enum.objects.get(pk=asset.content_ids[key])
if tree[key] != old_enum.item:
changed = True
asset.content_ids[key] = create_asset(tree[key], item_type=asset.t.schema[key])
elif type(asset.t.schema[key]) is list:
item_ids_list = []
for list_item in tree[key]:
item_ids_list.append(create_or_modify_asset(list_item, item_type=asset.t.schema[key][0]))
for i, new_item in enumerate(item_ids_list):
if old_asset.content_ids[key][i] != new_item:
changed = True
asset.content_ids[key] = item_ids_list
else:
asset.content_ids[key] = create_or_modify_asset(tree[key], item_type=asset.t.schema[key])
if asset.content_ids[key] != old_asset.content_ids[key]:
changed = True
if changed:
asset.clear_cache()
else:
asset.revision_chain = old_asset.revision_chain
asset.save()
old_asset.delete()
return str(asset.pk)
def create_or_modify_asset(tree, item_type=None):
if type(tree) is dict and "id" in tree.keys():
return modify_asset(tree)
return create_asset(tree, item_type)
try:
full_tree = json.loads(request.body)
check_asset(full_tree)
asset_pk = create_or_modify_asset(full_tree)
return HttpResponse(content=json.dumps({
"success": True,
"id": asset_pk
}), content_type="application/json")
except json.decoder.JSONDecodeError:
return HttpResponseBadRequest(content=json.dumps({
"Error": "Request not in JSON format. The requests body has to be valid JSON."
}), content_type="application/json")
except AssetStructureError as asset_error:
return HttpResponseBadRequest(content=json.dumps({
"Error": str(asset_error),
"Asset": asset_error.asset
}), content_type="application/json")
Could you please update the code for me to use the new models?
It sounds like you have a good handle on implementing the functionality you need. I'm glad the diff_match_patch
module suggestion was helpful.
As for your new question, here is how you could modify the save_asset
view function to include AssetChange
objects:
def save_asset(request):
def check_type(expected_type, actual_type, asset_type_name, current_key, current_tree):
# implementation
def save_asset_subtree(asset, subtree):
for key, content in subtree.items():
expected_type = asset.type.schema[key]
actual_type = type(content)
check_type(expected_type, actual_type, asset.type.name, key, subtree)
if isinstance(expected_type, int):
if expected_type == 1 or expected_type == 2:
setattr(asset, key, content)
else:
setattr(asset, key+"_id", content)
elif isinstance(expected_type, dict):
if content is None:
setattr(asset, key+"_id", None)
elif isinstance(content, dict) and "id" in content:
try:
child_asset = Asset.objects.get(pk=content["id"])
if child_asset.type_id == expected_type["3"]:
save_asset_subtree(child_asset, content)
else:
raise AssetStructureError(
current_tree,
"The id in the subtree for key '%s' does not correspond to the expected AssetType." % key)
except Asset.DoesNotExist:
raise AssetStructureError(
current_tree,
"The id in the subtree for key '%s' could not be found in the database." % key)
else:
child_asset = Asset(type_id=expected_type["3"])
child_asset.save()
setattr(asset, key+"_id", child_asset.id)
save_asset_subtree(child_asset, content)
asset.save()
json_data = json.loads(request.body)
asset_id = json_data.get('id', None)
asset_type_id = json_data['type']
asset_type = AssetType.objects.get(pk=asset_type_id)
if asset_id is not None:
try:
asset = Asset.objects.get(pk=asset_id)
if asset.type != asset_type:
raise AssetStructureError(
json_data,
"The given id does not correspond to the given AssetType.")
asset.change = AssetChange.objects.create(time=timezone.now(), parent=asset.change, author=request.user)
except Asset.DoesNotExist:
asset = None
if asset is None:
asset = Asset(type_id=asset_type_id)
asset.save()
asset.change = AssetChange.objects.create(time=timezone.now(), author=request.user)
save_asset_subtree(asset, json_data)
return JsonResponse({"id": str(asset.id)})
This code uses a helper function called save_asset_subtree
, which is a recursive function that saves an Asset
object's entire subtree to the database. If any changes are made, it creates a new AssetChange
object for the Asset
.
The changes to the code include creating a new AssetChange
object for the Asset
whenever a change is made. In this implementation, each AssetChange
object has a parent
field that is set to the previous AssetChange
object for the Asset
. When save_asset_subtree
is called recursively, the asset
argument is the parent of the subtree being saved. The new AssetChange
object is created with a reference to this parent.