Skip to content

Instantly share code, notes, and snippets.

@leiless
Created October 12, 2023 07:17
Show Gist options
  • Save leiless/6f17e0ee8eecd22a217c12c95c08a431 to your computer and use it in GitHub Desktop.
Save leiless/6f17e0ee8eecd22a217c12c95c08a431 to your computer and use it in GitHub Desktop.
LMDB INTEGER_KEY big key `mdb_cursor_get(MDB_SET_RANGE)` bug
use lmdb::{Cursor, Transaction};
fn test_lmdb_dup_sort_update_in_place() -> anyhow::Result<()> {
let db_dir = "lmdb-dir";
if let Err(err) = std::fs::remove_dir_all(db_dir) {
if err.kind() != std::io::ErrorKind::NotFound {
return Err(err.try_into()?);
}
}
std::fs::create_dir_all(db_dir)?;
let env = {
let mut builder = lmdb::Environment::new();
builder.set_map_size(100 * 1024 * 1024);
builder.open(std::path::Path::new(db_dir)).unwrap()
};
let db = env.create_db(
None,
lmdb::DatabaseFlags::INTEGER_KEY | lmdb::DatabaseFlags::DUP_SORT,
)?;
// Data population #1
let mut txn = env.begin_rw_txn()?;
let start_key1 = 0x0002_0000_0000_0074_0002_0000_0000_0076_u128;
let end_key1 = start_key1 + 100000;
for i in start_key1..=end_key1 {
let key = i.to_ne_bytes();
assert_eq!(key.len(), std::mem::size_of_val(&start_key1));
let val = format!("{:#x}", i);
txn.put(db, &key, &val, lmdb::WriteFlags::empty())?;
}
txn.commit()?;
// Data population #2
let mut txn = env.begin_rw_txn()?;
let start_key2 = 0x0005_0000_0000_0005_0006_0000_0000_0043_u128;
let end_key2 = start_key2 + 100000;
assert!(start_key1 < start_key2);
assert!(end_key1 < end_key2);
for i in start_key2..=end_key2 {
let key = i.to_ne_bytes();
assert_eq!(key.len(), std::mem::size_of_val(&start_key2));
let val = format!("{:#x}", i);
txn.put(db, &key, &val, lmdb::WriteFlags::empty())?;
}
txn.commit()?;
// Test MDB_SET_RANGE (>=)
let txn = env.begin_ro_txn()?;
let iter_init_key = start_key2 + 0x100;
let mut cursor = txn.open_ro_cursor(db)?;
let mut count = 0;
let mut last_key = None;
for it in cursor.iter_from(iter_init_key.to_ne_bytes()) {
let (a, b) = it?;
let key = u128::from_ne_bytes(a.try_into()?);
let val = std::str::from_utf8(b)?;
eprintln!("{:#x} {}", key, val);
// Perform the sanity checks
match last_key {
None => {
assert_eq!(key, iter_init_key);
}
Some(last_key) => {
assert_eq!(last_key + 1, key, "\nlast_key {:#x} vs. key {:#x}", last_key, key);
}
}
last_key = Some(key);
count += 1;
if count > 100 {
break;
}
}
drop(cursor);
// Invariant check
assert!(count > 100, "{}", count);
assert!(last_key.is_some());
assert_eq!(last_key.unwrap() + 1, iter_init_key + count);
txn.abort();
Ok(())
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
test_lmdb_dup_sort_update_in_place()?;
eprintln!("Done");
Ok(())
}
@leiless
Copy link
Author

leiless commented Oct 12, 2023

Reproduce

$ cargo b                 
    Finished dev [unoptimized + debuginfo] target(s) in 0.08s

$ ./rust-test
0x50000000000050006000000000143 0x50000000000050006000000000143
0x50000000000050006000000000144 0x50000000000050006000000000144
0x20000000000740002000000000145 0x20000000000740002000000000145
thread 'main' panicked at 'assertion failed: `(left == right)`
  left: `25961484292674230378061700054122821`,
 right: `10384593717071795079936492919849285`: 
last_key 0x50000000000050006000000000144 vs. key 0x20000000000740002000000000145', src/main.rs:88:17
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace

Root cause

LMDB MDB_INTEGERKEY flag expects you to use either 32-bit or 64-bit integer keys, and all keys should be the same size.

numeric keys in native byte order: either unsigned int or size_t. The keys must all be of the same size.
http://www.lmdb.tech/doc/group__mdb__dbi__open.html#ga219c5c50d41902fca79c68d8836e7753

In this use case, I use 128-bit integer keys, and all keys(128-bit) can be successfully inserted, but the mdb_cursor_get(MDB_SET_RANGE) is problematic.
I guess all the cursor/get/put-related APIs are problematic.

Solution 1: INTEGER_KEY -> REVERSE_KEY + Little-endian keys

#define MDB_REVERSEKEY 0x02
use reverse string keys
http://www.lmdb.tech/doc/group__mdb__dbi__open.html#gaea2d873575b7875771ea5296b4c85beb

Keys are strings to be compared in reverse order, from the end of the strings to the beginning.
By default, Keys are treated as strings and compared from beginning to end.

diff --git a/rust-test/src/main.rs b/rust-test/src/main.rs
index 85b5885..afdc395 100644
--- a/rust-test/src/main.rs
+++ b/rust-test/src/main.rs
@@ -18,7 +18,7 @@ fn test_lmdb_dup_sort_update_in_place() -> anyhow::Result<()> {
 
     let db = env.create_db(
         None,
-        lmdb::DatabaseFlags::INTEGER_KEY | lmdb::DatabaseFlags::DUP_SORT,
+        lmdb::DatabaseFlags::REVERSE_KEY | lmdb::DatabaseFlags::DUP_SORT,
     )?;
 
 
@@ -30,7 +30,7 @@ fn test_lmdb_dup_sort_update_in_place() -> anyhow::Result<()> {
     let end_key1 = start_key1 + 100000;
 
     for i in start_key1..=end_key1 {
-        let key = i.to_ne_bytes();
+        let key = i.to_le_bytes();
         assert_eq!(key.len(), std::mem::size_of_val(&start_key1));
         let val = format!("{:#x}", i);
 
@@ -51,7 +51,7 @@ fn test_lmdb_dup_sort_update_in_place() -> anyhow::Result<()> {
     assert!(end_key1 < end_key2);
 
     for i in start_key2..=end_key2 {
-        let key = i.to_ne_bytes();
+        let key = i.to_le_bytes();
         assert_eq!(key.len(), std::mem::size_of_val(&start_key2));
         let val = format!("{:#x}", i);
 
@@ -72,9 +72,9 @@ fn test_lmdb_dup_sort_update_in_place() -> anyhow::Result<()> {
 
     let mut last_key = None;
 
-    for it in cursor.iter_from(iter_init_key.to_ne_bytes()) {
+    for it in cursor.iter_from(iter_init_key.to_le_bytes()) {
         let (a, b) = it?;
-        let key = u128::from_ne_bytes(a.try_into()?);
+        let key = u128::from_le_bytes(a.try_into()?);
 
         let val = std::str::from_utf8(b)?;
         eprintln!("{:#x} {}", key, val);

Solution 2: no INTEGER_KEY + Big-endian keys

diff --git a/rust-test/src/main.rs b/rust-test/src/main.rs
index 85b5885..6b8ead7 100644
--- a/rust-test/src/main.rs
+++ b/rust-test/src/main.rs
@@ -18,7 +18,7 @@ fn test_lmdb_dup_sort_update_in_place() -> anyhow::Result<()> {
 
     let db = env.create_db(
         None,
-        lmdb::DatabaseFlags::INTEGER_KEY | lmdb::DatabaseFlags::DUP_SORT,
+        lmdb::DatabaseFlags::DUP_SORT,
     )?;
 
 
@@ -30,7 +30,7 @@ fn test_lmdb_dup_sort_update_in_place() -> anyhow::Result<()> {
     let end_key1 = start_key1 + 100000;
 
     for i in start_key1..=end_key1 {
-        let key = i.to_ne_bytes();
+        let key = i.to_be_bytes();
         assert_eq!(key.len(), std::mem::size_of_val(&start_key1));
         let val = format!("{:#x}", i);
 
@@ -51,7 +51,7 @@ fn test_lmdb_dup_sort_update_in_place() -> anyhow::Result<()> {
     assert!(end_key1 < end_key2);
 
     for i in start_key2..=end_key2 {
-        let key = i.to_ne_bytes();
+        let key = i.to_be_bytes();
         assert_eq!(key.len(), std::mem::size_of_val(&start_key2));
         let val = format!("{:#x}", i);
 
@@ -72,9 +72,9 @@ fn test_lmdb_dup_sort_update_in_place() -> anyhow::Result<()> {
 
     let mut last_key = None;
 
-    for it in cursor.iter_from(iter_init_key.to_ne_bytes()) {
+    for it in cursor.iter_from(iter_init_key.to_be_bytes()) {
         let (a, b) = it?;
-        let key = u128::from_ne_bytes(a.try_into()?);
+        let key = u128::from_be_bytes(a.try_into()?);
 
         let val = std::str::from_utf8(b)?;
         eprintln!("{:#x} {}", key, val);

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment