Last active
July 9, 2021 13:18
-
-
Save jlongster/2412f3b80148778ad001f392b674b5b8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// UPDATE: don't use this. when it re-partitions the list when time moves forward, it does not correctly keep hashes | |
// Use a real merkle tree instead: https://gist.github.com/jlongster/f431b6d75ef29c1a2ed000715aef9c8c | |
import Timestamp from './timestamp'; | |
// This is a compact data structure that keeps track of a list of | |
// hashes (representing messages) over a range of time in order to | |
// figure out what has changed between clients, kinda like a Merkle | |
// tree. It creates "buckets" that represent different time ranges, | |
// and divides time into smaller buckets the more recent they are. The | |
// total data structure is mostly a 17-length array. | |
// | |
// It requires knowledge of the latest "clock", which is determinstic | |
// given the current HLC state. Given this, it continuously | |
// re-partitions the buckets so they represent the time ranges | |
// relative to the clock. | |
// | |
// For example, a default hashlist would look like this: | |
// | |
// initHashlist('2018-11-12T13:21:40.122Z-0000-0123456789ABCDEF'); | |
// | |
// { at: 25700485, | |
// list: | |
// [ { hash: 0, timestamp: 25700485 }, // 2018-11-12T13:25:00.000Z | |
// { hash: 0, timestamp: 25700475 }, // 2018-11-12T13:15:00.000Z | |
// { hash: 0, timestamp: 25700465 }, // 2018-11-12T13:05:00.000Z | |
// { hash: 0, timestamp: 25700445 }, // 2018-11-12T12:45:00.000Z | |
// { hash: 0, timestamp: 25700405 }, // 2018-11-12T12:05:00.000Z | |
// { hash: 0, timestamp: 25700325 }, // 2018-11-12T10:45:00.000Z | |
// { hash: 0, timestamp: 25700165 }, // 2018-11-12T08:05:00.000Z | |
// { hash: 0, timestamp: 25699845 }, // 2018-11-12T02:45:00.000Z | |
// { hash: 0, timestamp: 25699205 }, // 2018-11-11T16:05:00.000Z | |
// { hash: 0, timestamp: 25697925 }, // 2018-11-10T18:45:00.000Z | |
// { hash: 0, timestamp: 25695365 }, // 2018-11-09T00:05:00.000Z | |
// { hash: 0, timestamp: 25690245 }, // 2018-11-05T10:45:00.000Z | |
// { hash: 0, timestamp: 25680005 }, // 2018-10-29T08:05:00.000Z | |
// { hash: 0, timestamp: 25659525 }, // 2018-10-15T02:45:00.000Z | |
// { hash: 0, timestamp: 25618565 }, // 2018-09-16T16:05:00.000Z | |
// { hash: 0, timestamp: 25536645 }, // 2018-07-21T18:45:00.000Z | |
// { hash: 0, timestamp: 25372805 } ] } // 2018-03-30T00:05:00.000Z | |
// | |
// Note how it creates larger buckets as time goes back. This allows | |
// you to track recent changes more closely, while still detecting | |
// anamolous cases where something changed far back. | |
export function timestampToISOString(timestamp) { | |
return new Date(timestamp * 60 * 1000).toISOString(); | |
} | |
function minutesFromTimestamp(timestamp) { | |
return (timestamp.millis() / 1000 / 60) | 0; | |
} | |
export function mapHashlist(hl, fn) { | |
return { clock: hl.clock, list: hl.list.map(fn) }; | |
} | |
// Create an empty hashlist based off of a time | |
export function initHashlist(latestTimestamp) { | |
const currentTime = Math.ceil(minutesFromTimestamp(latestTimestamp) / 5) * 5; | |
let list = []; | |
list.push({ | |
hash: 0, | |
timestamp: currentTime | |
}); | |
let x = 10; | |
for (let i = 0; i < 16; i++) { | |
list.push({ | |
hash: 0, | |
timestamp: currentTime - x | |
}); | |
x *= 2; | |
} | |
return { clock: latestTimestamp.toString(), list }; | |
} | |
// Re-partition the hashlist based off a new time. This will collapse | |
// time ranges as needed to bring it up to speed, while keeping the | |
// hashes in tact | |
export function updateHashlist(hashlist, latestTimestamp) { | |
const newList = initHashlist(latestTimestamp); | |
if (hashlist === null) { | |
return newList; | |
} else if (newList.clock === hashlist.clock) { | |
// No need to re-partition if they are based off the same time | |
return hashlist; | |
} else if (hashlist.clock > newList.clock) { | |
throw new Error( | |
'Cannot combine hashlist with one in the past (global clock must be running behind messages)' | |
); | |
} | |
return mapHashlist(newList, bucket2 => { | |
let bucket = bucket2; | |
hashlist.list.some(bucket1 => { | |
if (bucket1.timestamp <= bucket2.timestamp) { | |
bucket = { ...bucket, hash: bucket2.hash ^ bucket1.hash }; | |
return true; | |
} | |
}); | |
return bucket; | |
}); | |
} | |
// Find the most recent time that are guaranteed to have the same | |
// messages applied | |
export function diffHashlists(hl1, hl2) { | |
if (hl1.clock.toString() !== hl2.clock.toString()) { | |
throw new Error("Cannot compare hashlists, time isn't the same"); | |
} | |
if (hl1.list[0].hash === hl2.list[0].hash) { | |
// They are equal | |
return null; | |
} | |
for (let i = 0; i < hl1.list.length; i++) { | |
if (hl1.list[i].hash === hl2.list[i].hash) { | |
return timestampToISOString(hl1.list[i].timestamp); | |
} | |
} | |
throw new Error('No common history in hashlists'); | |
} | |
// Add a message a hashlist, given the latest time. This will update | |
// all the hashes in the appropriate buckets | |
export function addMessage(latestTimestamp, hashlist, message) { | |
if (message.timestamp.toString() > latestTimestamp.toString()) { | |
throw new Error('Message time is greater than latest given time'); | |
} | |
hashlist = updateHashlist(hashlist, latestTimestamp); | |
console.log('adding message', message.timestamp.toString()); | |
return mapHashlist(hashlist, bucket => { | |
if (bucket.timestamp > minutesFromTimestamp(message.timestamp)) { | |
return { | |
...bucket, | |
hash: bucket.hash ^ message.timestamp.hash() | |
}; | |
} | |
return bucket; | |
}); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { | |
initHashlist, | |
updateHashlist, | |
diffHashlists, | |
addMessage, | |
timestampToISOString | |
} from './hashlist'; | |
import Timestamp from './timestamp'; | |
import { format } from 'date-fns'; | |
function pretty(n) { | |
if (n < 60) { | |
console.log(`${n} (${n} min)`); | |
} else if (n < 24 * 60) { | |
console.log(`${n} (${n / 60} hours)`); | |
} else { | |
console.log(`${n} (${n / (24 * 60)} days)`); | |
} | |
} | |
function message(timestamp, hash) { | |
timestamp = Timestamp.parse(timestamp); | |
timestamp.hash = () => hash; | |
return { timestamp }; | |
} | |
function debug(hashlist) { | |
console.log({ | |
clock: hashlist.clock.toString(), | |
list: hashlist.list.map(hl => ({ | |
hash: hl.hash, | |
timestamp: timestampToISOString(hl.timestamp) | |
})) | |
}); | |
} | |
describe('hashlist', () => { | |
test('hashlist is created correctly', () => { | |
let hl = initHashlist( | |
Timestamp.parse('2018-11-12T13:21:40.122Z-0000-0123456789ABCDEF') | |
); | |
expect(hl.clock.toString()).toBe( | |
'2018-11-12T13:21:40.122Z-0000-0123456789ABCDEF' | |
); | |
expect(hl.list.length).toBe(17); | |
}); | |
test('updating the current time works', () => { | |
let hl = initHashlist( | |
Timestamp.parse('2018-11-12T13:21:40.122Z-0000-0123456789ABCDEF') | |
); | |
// Set a few hashes | |
for (let i = 0; i < 14; i++) { | |
hl.list[i].hash = hl.list[i].hash ^ 1200; | |
} | |
for (let i = 0; i < 6; i++) { | |
hl.list[i].hash = hl.list[i].hash ^ 1000; | |
} | |
expect(hl.list.slice(0, 6).forEach(x => expect(x.hash).toBe(1880))); | |
expect(hl.list.slice(7, 14).forEach(x => expect(x.hash).toBe(1200))); | |
// The hashlist should be updated with new buckets since time has | |
// been moved forward | |
let updated = updateHashlist( | |
hl, | |
Timestamp.parse('2018-11-14T13:21:40.122Z-0000-0123456789ABCDEF') | |
); | |
expect( | |
updated.list.slice(0, 10).forEach(x => { | |
expect(x.hash).toBe(1880); | |
expect( | |
timestampToISOString(x.timestamp) > '2018-11-12T11:00:00.000Z' | |
).toBeTruthy(); | |
}) | |
); | |
expect( | |
updated.list.slice(11, 13).forEach(x => { | |
expect(x.hash).toBe(1200); | |
expect( | |
timestampToISOString(x.timestamp) > '2018-10-29T07:00:00.000Z' | |
).toBeTruthy(); | |
}) | |
); | |
expect( | |
timestampToISOString(updated.list[14].timestamp) <= | |
'2018-10-29T07:00:00.000Z' | |
).toBeTruthy(); | |
}); | |
test('adding message works', () => { | |
let initHl = initHashlist( | |
Timestamp.parse('2018-11-13T05:21:40.122Z-0000-0123456789ABCDEF') | |
); | |
// Adding a message that has the same timestamp as the latest time | |
// should work | |
let hl = addMessage( | |
Timestamp.parse('2018-11-14T13:21:40.122Z-0000-0123456789ABCDEF'), | |
initHl, | |
message('2018-11-14T13:21:40.122Z-0000-0123456789ABCDEF', 1000) | |
); | |
expect(hl.list[0].hash).toBe(1000); | |
expect(hl.list[1].hash).toBe(0); | |
// Adding a message that is in the future should throw | |
expect(() => { | |
addMessage( | |
Timestamp.parse('2018-11-14T13:21:40.122Z-0000-0123456789ABCDEF'), | |
initHl, | |
message('2018-11-14T13:21:41.122Z-0000-0123456789ABCDEF', 1000) | |
); | |
}).toThrow(); | |
// Adding a message in the last 5 minutes should be the latest | |
// bucket only | |
hl = addMessage( | |
Timestamp.parse('2018-11-14T13:21:40.122Z-0000-0123456789ABCDEF'), | |
initHl, | |
message('2018-11-14T13:19:40.122Z-0000-0123456789ABCDEF', 1000) | |
); | |
expect(hl.list[0].hash).toBe(1000); | |
expect(hl.list[1].hash).toBe(0); | |
// Adding a message in the last 10 minutes should be the latest | |
// two buckets only | |
hl = addMessage( | |
Timestamp.parse('2018-11-14T13:21:40.122Z-0000-0123456789ABCDEF'), | |
initHl, | |
message('2018-11-14T13:14:40.122Z-0000-0123456789ABCDEF', 1000) | |
); | |
expect(hl.list[0].hash).toBe(1000); | |
expect(hl.list[1].hash).toBe(1000); | |
expect(hl.list[2].hash).toBe(0); | |
// Adding an old message should update many buckets | |
hl = addMessage( | |
Timestamp.parse('2018-11-14T13:21:40.122Z-0000-0123456789ABCDEF'), | |
initHl, | |
message('2018-06-14T13:14:40.122Z-0000-0123456789ABCDEF', 1000) | |
); | |
for (let i = 0; i < 16; i++) { | |
expect(hl.list[i].hash).toBe(1000); | |
} | |
expect(hl.list[16].hash).toBe(0); | |
}); | |
test('diff returns the correct time difference', () => { | |
let hl1 = initHashlist( | |
Timestamp.parse('2018-11-13T05:21:40.122Z-0000-0123456789ABCDEF') | |
); | |
let hl2 = initHashlist( | |
Timestamp.parse('2018-11-20T05:21:40.122Z-0000-0123456789ABCDEF') | |
); | |
const messages = [ | |
// First client messages | |
message('2018-11-13T13:20:40.122Z-0000-0123456789ABCDEF', 1000), | |
message('2018-11-14T13:05:35.122Z-0000-0123456789ABCDEF', 1100), | |
message('2018-11-15T22:19:00.122Z-0000-0123456789ABCDEF', 1200), | |
// Second client messages | |
message('2018-11-20T13:19:40.122Z-0000-0123456789ABCDEF', 1300), | |
message('2018-11-25T13:19:40.122Z-0000-0123456789ABCDEF', 1400) | |
]; | |
// Add some messages to the first list. Note that we've moved the | |
// clock forward | |
let timestamp1 = Timestamp.parse( | |
'2018-11-16T13:21:40.122Z-0000-0123456789ABCDEF' | |
); | |
hl1 = addMessage(timestamp1, hl1, messages[0]); | |
hl1 = addMessage(timestamp1, hl1, messages[1]); | |
hl1 = addMessage(timestamp1, hl1, messages[2]); | |
expect(hl1.list[0].hash).toBe(788); | |
// Add some messages to the second list | |
let timestamp2 = Timestamp.parse( | |
'2018-11-25T13:21:40.122Z-0000-0123456789ABCDEF' | |
); | |
hl2 = addMessage(timestamp2, hl2, messages[3]); | |
hl2 = addMessage(timestamp2, hl2, messages[4]); | |
expect(hl2.list[0].hash).toBe(108); | |
// Move both hashlists forward to the same latest time | |
let lastTimestamp = timestamp2; | |
hl1 = updateHashlist(hl1, lastTimestamp); | |
hl2 = updateHashlist(hl2, lastTimestamp); | |
// This is correct - this is the most recent timestamp (which is | |
// before all 5 new messages) | |
expect(diffHashlists(hl1, hl2)).toBe('2018-11-11T08:05:00.000Z'); | |
// Add the messages to the other client and verify the hash | |
hl1 = addMessage(lastTimestamp, hl1, messages[3]); | |
hl1 = addMessage(lastTimestamp, hl1, messages[4]); | |
hl2 = addMessage(lastTimestamp, hl2, messages[0]); | |
hl2 = addMessage(lastTimestamp, hl2, messages[1]); | |
hl2 = addMessage(lastTimestamp, hl2, messages[2]); | |
expect(hl1.list[0].hash).toBe(hl2.list[0].hash); | |
}); | |
test('diff returns the correct time with same messages applied', () => { | |
let hl1 = initHashlist( | |
Timestamp.parse('2018-11-13T05:21:40.122Z-0000-0123456789ABCDEF') | |
); | |
let hl2 = initHashlist( | |
Timestamp.parse('2018-11-20T05:21:40.122Z-0000-0123456789ABCDEF') | |
); | |
let messages = [ | |
message('2018-11-13T13:20:40.122Z-0000-0123456789ABCDEF', 1000), | |
message('2018-11-14T13:05:35.122Z-0000-0123456789ABCDEF', 1100), | |
message('2018-11-21T22:19:00.122Z-0000-0123456789ABCDEF', 1200), | |
message('2018-11-23T22:19:40.122Z-0000-0123456789ABCDEF', 1300), | |
message('2018-11-24T13:19:40.122Z-0000-0123456789ABCDEF', 1400) | |
]; | |
let timestamp1 = Timestamp.parse( | |
'2018-11-24T23:21:40.122Z-0000-0123456789ABCDEF' | |
); | |
hl1 = addMessage(timestamp1, hl1, messages[0]); | |
hl1 = addMessage(timestamp1, hl1, messages[1]); | |
hl1 = addMessage(timestamp1, hl1, messages[2]); | |
hl1 = addMessage(timestamp1, hl1, messages[3]); | |
let timestamp2 = Timestamp.parse( | |
'2018-11-25T13:21:40.122Z-0000-0123456789ABCDEF' | |
); | |
hl2 = addMessage(timestamp2, hl2, messages[0]); | |
hl2 = addMessage(timestamp2, hl2, messages[1]); | |
hl2 = addMessage(timestamp2, hl2, messages[2]); | |
hl2 = addMessage(timestamp2, hl2, messages[4]); | |
// Move both hashlists forward to the same latest time | |
let lastTimestamp = timestamp2; | |
hl1 = updateHashlist(hl1, lastTimestamp); | |
hl2 = updateHashlist(hl2, lastTimestamp); | |
expect(diffHashlists(hl1, hl2)).toBe('2018-11-23T18:45:00.000Z'); | |
}); | |
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { addMessage, updateHashlist } from './hashlist'; | |
import Timestamp from './timestamp'; | |
import expect from 'expect'; | |
// prettier-ignore | |
let messages = [ | |
{ timestamp: Timestamp.parse('2018-01-01T01:01:00.000Z-0000-0123456789ABCDEF') }, | |
{ timestamp: Timestamp.parse('2018-03-01T01:01:00.000Z-0000-0123456789ABCDEF') }, | |
{ timestamp: Timestamp.parse('2018-05-01T01:02:00.000Z-0000-0123456789ABCDEF') }, | |
{ timestamp: Timestamp.parse('2018-06-01T01:02:00.000Z-0000-0123456789ABCDEF') }, | |
{ timestamp: Timestamp.parse('2018-06-02T01:02:00.000Z-0000-0123456789ABCDEF') }, | |
{ timestamp: Timestamp.parse('2018-06-02T05:02:00.000Z-0000-0123456789ABCDEF') }, | |
{ timestamp: Timestamp.parse('2018-06-02T05:21:00.000Z-0000-0123456789ABCDEF') }, | |
{ timestamp: Timestamp.parse('2018-06-02T05:35:00.000Z-0000-0123456789ABCDEF') }, | |
{ timestamp: Timestamp.parse('2018-06-02T05:42:00.000Z-0000-0123456789ABCDEF') }, | |
{ timestamp: Timestamp.parse('2018-06-02T05:46:00.000Z-0000-0123456789ABCDEF') }, | |
{ timestamp: Timestamp.parse('2018-06-02T05:53:00.000Z-0000-0123456789ABCDEF') }, | |
{ timestamp: Timestamp.parse('2018-06-02T05:54:00.000Z-0000-0123456789ABCDEF') }, | |
{ timestamp: Timestamp.parse('2018-06-02T05:57:00.000Z-0000-0123456789ABCDEF') } | |
]; | |
let latestTime = Timestamp.parse( | |
'2018-06-02T05:57:00.000Z-0000-0123456789ABCDEF' | |
); | |
function makeHashlistAtTime(time) { | |
let hl = null; | |
for (let msg of messages) { | |
hl = addMessage(time, hl, msg); | |
} | |
return hl; | |
} | |
let baseMillis = latestTime.millis(); | |
let hl = makeHashlistAtTime(latestTime); | |
// Move forward in 55 seconds increments through 3 months worth of | |
// time and make sure that re-partitioning the hashlist results in the | |
// same thing as a new one created at that time | |
for (var i = 0; i < 3 * 30 * 24 * 60 * 60 * 1000; i += 55 * 1000) { | |
let newTime = new Timestamp(baseMillis + i, 0, '0123456789ABCDEF'); | |
console.log('Moving forward to ' + newTime); | |
let correct = makeHashlistAtTime(newTime); | |
let moved = updateHashlist(hl, newTime); | |
expect(correct).toEqual(moved); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment