Last active
May 1, 2021 03:50
-
-
Save dunhamsteve/78ec0162dc959e0678d449bb2713f387 to your computer and use it in GitHub Desktop.
This script exports your Craft.app databases as json files.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
// Placed in the public domain | |
// Dump craft database as json | |
// This is intended as a sample of how to extract craft raw data for | |
// playing around with. It has no dependencies aside from node. | |
// The top section reads the Craft DB into memory (takes about 380ms for 50k blocks) | |
// To determine the realm file format I consulted the source code. This works with | |
// Realm 5, which Craft currently uses. It will need to be tweaked if Craft switches | |
// to Realm 6. | |
// The code at the end does a little cleanup (expanding nested json) and writes to realmData.js | |
// This can be replaced by whatever you want to do with the data. | |
// NB - you'll find the cached images, etc in Caches/com.luki.datacache | |
// the filenames are sha1(url). | |
let {readdirSync, readFileSync, writeFileSync} = require("fs"); | |
let assert = (cond, msg) => { if (!cond) throw Error(msg || "Assert"); }; | |
function read_database(rfile2, translateRefs = true) { | |
function readBlob(ref) { | |
assert(!(ref & 1), `bad ref ${ref}`); | |
let size = (buf[ref + 5] << 16) + (buf[ref + 6] << 8) + buf[ref + 7]; | |
let h = buf[ref + 4]; | |
let width = 1 << (h & 7) >> 1; | |
let wtype = (h & 24) >> 3; | |
assert(wtype === 2); | |
assert(width === 1); | |
return buf.slice(ref+8,ref+8+size); | |
} | |
function readStringArray(ref) { | |
assert(!(ref & 1), `bad ref ${ref}`); | |
let size = (buf[ref + 5] << 16) + (buf[ref + 6] << 8) + buf[ref + 7]; | |
let h = buf[ref + 4]; | |
let width = 1 << (h & 7) >> 1; | |
let wtype = (h & 24) >> 3; | |
assert(wtype === 1); | |
let rval = []; | |
for (let i = 0; i < size; i++) { | |
let s = ref + 8 + width * i; | |
let e = s + width - buf[s + width - 1] - 1; | |
rval.push(buf.toString("utf8", s, e)); | |
} | |
return rval; | |
} | |
const getFlags = (ref) => buf[ref + 4] >> 5; | |
function readArray(ref) { | |
assert(!(ref & 1), `bad ref ${ref}`); | |
let size = (buf[ref + 5] << 16) + (buf[ref + 6] << 8) + buf[ref + 7]; | |
let h = buf[ref + 4]; | |
let width = 1 << (h & 7) >> 1; | |
let wtype = (h & 24) >> 3; | |
assert(wtype === 0); | |
let rval = []; | |
for (let i = 0; i < size; i++) { | |
if (width === 0) | |
rval.push(0); | |
else if (width === 1) | |
rval.push(1 & buf[ref + 8 + i / 8 | 0] >> i % 8); | |
else if (width === 2) | |
rval.push(3 & buf[ref + 8 + i / 4 | 0] >> 2 * (i % 3)); | |
else if (width === 4) | |
rval.push(15 & buf[ref + 8 + i / 2 | 0] >> 4 * (i % 2)); | |
else if (width === 8) | |
rval.push(buf[ref + 8 + i]); | |
else if (width === 16) | |
rval.push(buf.readUInt16LE(ref + 8 + i * 2)); | |
else if (width === 32) | |
rval.push(buf.readUInt32LE(ref + 8 + i * 4)); | |
else | |
assert(false, `width ${width} ints not handled`); | |
} | |
return rval; | |
} | |
const readers = { | |
int(acc, ref) { | |
assert(getFlags(ref) == 0); | |
acc.push(...readArray(ref)); | |
}, | |
bool(acc, ref) { | |
assert(getFlags(ref) == 0); | |
for (let x of readArray(ref)) | |
acc.push(x == 1); | |
}, | |
linklist(acc, ref) { | |
assert(getFlags(ref) == 2); | |
readArray(ref).forEach((v) => acc.push(v == 0 ? [] : readArray(v))); | |
}, | |
timestamp(acc, ref) { | |
assert(getFlags(ref) == 2); | |
scanBPTree(acc, readArray(ref)[0], (acc2, ref2) => readArray(ref2).slice(1).forEach((x) => acc2.push(x))); | |
}, | |
string(acc, ref) { | |
let flags = getFlags(ref); | |
if (flags == 2) { | |
let arr = readArray(ref); | |
let ends = readArray(arr[0]); | |
let blob = readBlob(arr[1]); | |
let s = 0; | |
ends.forEach((e) => { acc.push(blob.toString('utf8', s, e - 1)); s = e }); | |
} else if (flags === 3) { | |
for (let r of readArray(ref)) { | |
if (r) { | |
let blob = readBlob(r) | |
acc.push(blob.toString('utf8',0,blob.length-1)) | |
} else { | |
acc.push(null) | |
} | |
} | |
} else if (flags === 0) { | |
acc.push(...readStringArray(ref)); | |
} | |
} | |
}; | |
function scanBPTree(acc, ref, reader) { | |
let flags = getFlags(ref); | |
if (flags & 4) { | |
readArray(ref).slice(1, -1).forEach((ref2) => scanBPTree(acc, ref2, reader)); | |
} else { | |
reader(acc, ref); | |
} | |
} | |
const colTypes = {0: "int", 1: "bool", 2: "string", 8: "timestamp", 13: "linklist"}; | |
function readTable(name, ref) { | |
let arr = readArray(ref); | |
let spec = readArray(arr[0]); | |
let types = readArray(spec[0]); | |
let names = readStringArray(spec[1]); | |
let attrs = readArray(spec[2]); | |
let subspecs = spec.length > 3 && readArray(spec[3]); | |
let crefs = readArray(arr[1]); | |
let cix = 0; | |
let six = 0; | |
let cols = []; | |
let refs = {}; | |
for (let i = 0; i < names.length; i++) { | |
let cname = names[i]; | |
let ct = colTypes[types[i]]; | |
if (subspecs && ct == "linklist") { | |
refs[cname] = subspecs[six++] >> 1; | |
} | |
let col = []; | |
scanBPTree(col, crefs[cix], readers[ct]); | |
cols.push(col); | |
cix++; | |
if (attrs[i] & 1) | |
cix++; | |
} | |
let rows = []; | |
for (let i = 0; i < cols[0].length; i++) { | |
let row = {}; | |
for (let j = 0; j < names.length; j++) { | |
row[names[j]] = cols[j][i]; | |
} | |
rows.push(row); | |
} | |
return {name, rows, refs}; | |
} | |
function readTop(ref) { | |
let [rNames, rTables] = readArray(ref); | |
let names = readStringArray(rNames); | |
let trefs = readArray(rTables); | |
return names.map((name, i) => readTable(name, trefs[i])); | |
} | |
function get_topref(buf2) { | |
let magic = buf2.readInt32LE(16); | |
assert(magic == 0x42442d54, "bad magic"); | |
let sel = buf2.readUInt8(23) & 1; | |
let topref2 = buf2.readInt32LE(sel * 8); | |
return topref2; | |
} | |
let buf = readFileSync(rfile2); | |
let topref = get_topref(buf); | |
let tables = readTop(topref); | |
let pks = {}; | |
let db2 = {}; | |
for (let table of tables) { | |
if (table.name == "pk") { | |
for (let {pk_table, pk_property} of table.rows) | |
pks[pk_table] = pk_property; | |
} else if (table.name.startsWith("class_")) { | |
db2[table.name.slice(6)] = table.rows; | |
} | |
} | |
if (translateRefs) | |
for (let table of tables) { | |
for (let k in table.refs) { | |
let dest = tables[table.refs[k]]; | |
let dpk = pks[dest.name.slice(6)]; | |
console.log(table.name, k, "->", dest.name, dpk); | |
for (let item of table.rows) { | |
let value = item[k]; | |
if (value && value.length) { | |
for (let i = 0; i < value.length; i++) | |
value[i] = dest.rows[value[i]][dpk]; | |
} | |
} | |
} | |
} | |
return db2; | |
} | |
let jsonProps = { | |
BlockDataModel: ['offSchemaProperties', 'rawProperties','style', 'pageStyleData'], | |
FolderDataModel: ['properties'], | |
DocumentShareDataModel: ['_sharedBlocks'], | |
SnapshotDataModel:['_userIds'], | |
SpaceConfigDataModel: ['value'], | |
} | |
function decodeNestedJson(db) { | |
function decode(value) { | |
if (!value) return null | |
if (value === "{}") return {} | |
return JSON.parse(value) | |
} | |
for (let tname in jsonProps) { | |
for (let block of db[tname]) { | |
for (let k of jsonProps[tname]) { | |
block[k] = decode(block[k]) | |
} | |
if (tname === 'FolderDataModel') { | |
// more json inside of json | |
let props = block.properties | |
if (props && props.icon) { | |
props.icon = decode(props.icon) | |
} | |
} | |
if (tname === 'DocumentShareDataModel') { | |
block._sharedBlocks.forEach(sb => {sb.settings = decode(sb.settings)}) | |
} | |
} | |
} | |
} | |
let base = `${process.env.HOME}/Library/Containers/com.lukilabs.lukiapp/Data/Library/Application Support/com.lukilabs.lukiapp`; | |
for (let fn of readdirSync(base)) { | |
if (fn.startsWith("LukiMain") && fn.endsWith(".realm")) { | |
let realm_id = fn.split('_')[1].split('||').pop() | |
console.log('Read', realm_id,fn) | |
let rfile = `${base}/${fn}` | |
// pass in false here if you want some of the xrefs to be array indices instead of uuids. | |
let db = read_database(rfile, true); | |
// Some values are strings containing json, we decode them here for convenience. | |
decodeNestedJson(db) | |
let json = JSON.stringify(db, null, ' ') | |
let outfn = `realm_${realm_id}.json` | |
writeFileSync(outfn, json) | |
console.log('wrote', json.length, 'bytes to', outfn) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment