Last active
May 7, 2024 17:53
-
-
Save kcleong/b12a5aaaf3df5df0fee737457eeda5a0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Modified ZODB-5.8.0-py3.8.egg/ZODB/scripts/fsrefs.py to report broken OIDs. | |
This fixed this following error when running `./bin/zeopack. | |
Traceback (most recent call last): | |
File "./bin/zeopack", line 48, in <module> | |
sys.exit(plone.recipe.zeoserver.pack.main(host, port, unix, days, username, password, realm, blob_dir, storage)) | |
File "/opt/APPS/cms/prd/plone.buildout/eggs/plone.recipe.zeoserver-2.0.3-py3.8.egg/plone/recipe/zeoserver/pack.py", line 65, in main | |
_main(*args, **kw) | |
File "/opt/APPS/cms/prd/plone.buildout/eggs/plone.recipe.zeoserver-2.0.3-py3.8.egg/plone/recipe/zeoserver/pack.py", line 46, in _main | |
cs.pack(wait=True, days=int(days)) | |
File "/opt/APPS/cms/prd/plone.buildout/eggs/ZEO-5.3.0-py3.8.egg/ZEO/ClientStorage.py", line 562, in pack | |
return self._call('pack', t, wait) | |
File "/opt/APPS/cms/prd/plone.buildout/eggs/ZEO-5.3.0-py3.8.egg/ZEO/asyncio/client.py", line 795, in call | |
return self.__call(self.call_threadsafe, method, args, **kw) | |
File "/opt/APPS/cms/prd/plone.buildout/eggs/ZEO-5.3.0-py3.8.egg/ZEO/asyncio/client.py", line 774, in call | |
return self.wait_for_result(result, timeout) | |
File "/opt/APPS/cms/prd/plone.buildout/eggs/ZEO-5.3.0-py3.8.egg/ZEO/asyncio/client.py", line 787, in wait_for_result | |
return future.result(timeout) | |
File "/opt/APPS/cms/prd/.pyenv/versions/3.8.16/lib/python3.8/concurrent/futures/_base.py", line 444, in result | |
return self.__get_result() | |
File "/opt/APPS/cms/prd/.pyenv/versions/3.8.16/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result | |
raise self._exception | |
ZEO.Exceptions.ServerException: ('builtins.UnicodeDecodeError', ('ascii', b'\x00\x00\x00\x00\x00\x05\xbcH', 6, 7, 'ordinal not in range(128)')) | |
---- | |
1. Run this script to display broken OIDs | |
./bin/zeo stop | |
./bin/zopepy find_broken_oids.py ./var/filestorage/Data.fs | |
2. Remove broken OIDs | |
Add broken OIDs to `delete_broken_oids.py` script in https://gist.github.com/kcleong/ab800d4cd9778f8c7a84772c7b0b9f4b | |
./bin/zeo start | |
./bin/instance0 run delete_broken_oids.py | |
3. Edit zodbupdate so broken objects are skipped and purged from the ZODB | |
Edit `ZODB/FileStorage/fspack.py` (e.g. `./eggs/ZODB-5.8.0-py3.8.egg/ZODB/FileStorage/fspack.py`) and change | |
``` | |
try: | |
pos = oid2curpos[oid] | |
except KeyError: | |
if oid == z64 and len(oid2curpos) == 0: | |
# special case, pack to before creation time | |
continue | |
raise KeyError(oid) | |
``` | |
to | |
``` | |
try: | |
pos = oid2curpos[oid] | |
except KeyError: | |
print(f"SKIPPING {oid}") | |
continue | |
``` | |
./bin/zeo stop | |
./bin/zodbupdate -v -d --pack -f var/filestorage/Data.fs | |
The ZODB should now be fixed and the corrupted objects purged. | |
---- | |
This should now work correctly: | |
./bin/zeo start | |
./bin/zeopack | |
""" | |
import traceback | |
from BTrees.QQBTree import QQBTree | |
from ZODB.FileStorage import FileStorage | |
from ZODB.POSException import POSKeyError | |
from ZODB.serialize import get_refs | |
from ZODB.TimeStamp import TimeStamp | |
from ZODB.utils import get_pickle_metadata | |
from ZODB.utils import load_current | |
from ZODB.utils import oid_repr | |
from ZODB.utils import p64 | |
from ZODB.utils import u64 | |
def main(path=None): | |
verbose = 0 | |
if path is None: | |
import getopt | |
import sys | |
opts, args = getopt.getopt(sys.argv[1:], "v") | |
for k, v in opts: | |
if k == "-v": | |
verbose += 1 | |
path, = args | |
fs = FileStorage(path, read_only=1) | |
print("Generating a list of all broken OIDs") | |
# Set of oids in the index that failed to load due to POSKeyError. | |
# This is what happens if undo is applied to the transaction creating | |
# the object (the oid is still in the index, but its current data | |
# record has a backpointer of 0, and POSKeyError is raised then | |
# because of that backpointer). | |
undone = {} | |
# Set of oids that were present in the index but failed to load. | |
# This does not include oids in undone. | |
noload = {} | |
# build {pos -> oid} index that is reverse to {oid -> pos} fs._index | |
# we'll need this to iterate objects in order of ascending file position to | |
# optimize disk IO. | |
pos2oid = QQBTree() # pos -> u64(oid) | |
for oid, pos in fs._index.iteritems(): | |
pos2oid[pos] = u64(oid) | |
# pass 1: load all objects listed in the index and remember those objects | |
# that are deleted or load with an error. Iterate objects in order of | |
# ascending file position to optimize disk IO. | |
for oid64 in pos2oid.itervalues(): | |
oid = p64(oid64) | |
try: | |
data, serial = load_current(fs, oid) | |
except (KeyboardInterrupt, SystemExit): | |
raise | |
except POSKeyError: | |
undone[oid] = 1 | |
except: # noqa: E722 do not use bare 'except' | |
if verbose: | |
traceback.print_exc() | |
noload[oid] = 1 | |
# pass 2: go through all objects again and verify that their references do | |
# not point to problematic object set. Iterate objects in order of | |
# ascending file position to optimize disk IO. | |
inactive = noload.copy() | |
inactive.update(undone) | |
for oid64 in pos2oid.itervalues(): | |
oid = p64(oid64) | |
if oid in inactive: | |
continue | |
data, serial = load_current(fs, oid) | |
try: | |
refs = get_refs(data) | |
except Exception as exc: | |
print(f"Broken OID: {oid64}") | |
continue | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment