Created
December 13, 2016 13:58
-
-
Save dvdhrm/aaf61c8d559442590658d557010acf62 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 23b39ffc66db836c1d5f895d90188e982ebe512d Mon Sep 17 00:00:00 2001 | |
From: David Herrmann <[email protected]> | |
Date: Tue, 13 Dec 2016 14:49:14 +0100 | |
Subject: [PATCH] bpf-lpm: fixes and tests | |
Fixes 3 things: | |
- Initialize new_node to NULL. Otherwise, the error-path is wrong if | |
we jump there just to unlock the spinlock. | |
- longest_prefix_match() should check for both node->prefixlen and | |
key->prefixlen *AT THE SAME TIME*. Otherwise, it returns a wrong | |
value if: | |
prefixlen > node->prefixlen && | |
key->prefixlen < node->prefixlen | |
Because in that case the first condition is caught and | |
node->prefixlen is returned. But that is wrong, since we must never | |
return anything greater than key->prefixlen. So add the min() there. | |
Also merge both conditions, since the handler is now basically the | |
same. | |
- When inserting a new node that is an *ANCESTOR* of an existing node, | |
we must insert it as such. We must *NOT* insert an intermediate | |
node. | |
Example: We have 192.168.0.0/24 in the trie. Then we add | |
192.168.0.0/16. This must make the /16 node a parent of the | |
/24 node. If they end up as siblings, we will never find | |
the /16 node. | |
This requires a small change in extract_bit() to accept the data | |
pointer directly. | |
Additionally to the fixes, this adds a randomized test in | |
./samples/bpf/test_lpm.c. This runs a simple trivial-lpm implementation | |
against the kernel bpf-lpm map and compares their behavior. | |
Signed-off-by: David Herrmann <[email protected]> | |
--- | |
kernel/bpf/lpm_trie.c | 30 +++--- | |
samples/bpf/Makefile | 1 + | |
samples/bpf/test_lpm.c | 248 +++++++++++++++++++++++++++++++++++++++++++++++++ | |
3 files changed, 267 insertions(+), 12 deletions(-) | |
create mode 100644 samples/bpf/test_lpm.c | |
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c | |
index 1daaed5..8853703 100644 | |
--- a/kernel/bpf/lpm_trie.c | |
+++ b/kernel/bpf/lpm_trie.c | |
@@ -148,10 +148,9 @@ struct lpm_trie { | |
* returned. | |
*/ | |
-static inline int extract_bit(const struct bpf_lpm_trie_key *key, | |
- size_t index) | |
+static inline int extract_bit(const u8 *data, size_t index) | |
{ | |
- return !!(key->data[index / 8] & (1 << (7 - (index % 8)))); | |
+ return !!(data[index / 8] & (1 << (7 - (index % 8)))); | |
} | |
/** | |
@@ -175,11 +174,8 @@ static size_t longest_prefix_match(const struct lpm_trie *trie, | |
b = 8 - fls(node->data[i] ^ key->data[i]); | |
prefixlen += b; | |
- if (prefixlen >= node->prefixlen) | |
- return node->prefixlen; | |
- | |
- if (prefixlen >= key->prefixlen) | |
- return key->prefixlen; | |
+ if (prefixlen >= node->prefixlen || prefixlen >= key->prefixlen) | |
+ return min(node->prefixlen, key->prefixlen); | |
if (b < 8) | |
break; | |
@@ -231,7 +227,7 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key) | |
* become more specific. Determine the next bit in the key and | |
* traverse down. | |
*/ | |
- next_bit = extract_bit(key, node->prefixlen); | |
+ next_bit = extract_bit(key->data, node->prefixlen); | |
node = rcu_dereference(node->child[next_bit]); | |
} | |
@@ -278,7 +274,7 @@ static size_t find_target_node(struct lpm_trie *trie, | |
(*node)->prefixlen == trie->max_prefixlen) | |
break; | |
- next_bit = extract_bit(key, (*node)->prefixlen); | |
+ next_bit = extract_bit(key->data, (*node)->prefixlen); | |
node = &(*node)->child[next_bit]; | |
} | |
@@ -292,7 +288,7 @@ static int trie_update_elem(struct bpf_map *map, | |
void *_key, void *value, u64 flags) | |
{ | |
struct lpm_trie *trie = container_of(map, struct lpm_trie, map); | |
- struct lpm_tree_node **node, *new_node, *im_node; | |
+ struct lpm_tree_node **node, *im_node, *new_node = NULL; | |
struct bpf_lpm_trie_key *key = _key; | |
size_t matchlen; | |
int ret = 0; | |
@@ -357,6 +353,16 @@ static int trie_update_elem(struct bpf_map *map, | |
goto out; | |
} | |
+ /* | |
+ * If the new node matches the prefix completely, it must be an | |
+ * inserted as an ancestor. Simply insert it between @node and @*node. | |
+ */ | |
+ if (matchlen == key->prefixlen) { | |
+ new_node->child[extract_bit((*node)->data, matchlen)] = *node; | |
+ rcu_assign_pointer(*node, new_node); | |
+ goto out; | |
+ } | |
+ | |
/* Create an intermediate node and place it inbetween */ | |
im_node = lpm_tree_node_alloc(trie->data_size); | |
if (!im_node) { | |
@@ -369,7 +375,7 @@ static int trie_update_elem(struct bpf_map *map, | |
memcpy(im_node->data, (*node)->data, trie->data_size); | |
/* Now determine which child to install in which slot */ | |
- if (extract_bit(key, matchlen)) { | |
+ if (extract_bit(key->data, matchlen)) { | |
im_node->child[0] = *node; | |
im_node->child[1] = new_node; | |
} else { | |
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile | |
index 8c49cc1..bacb6eb63 100644 | |
--- a/samples/bpf/Makefile | |
+++ b/samples/bpf/Makefile | |
@@ -28,6 +28,7 @@ hostprogs-y += test_current_task_under_cgroup | |
hostprogs-y += trace_event | |
hostprogs-y += sampleip | |
hostprogs-y += tc_l2_redirect | |
+hostprogs-y += test_lpm | |
test_verifier-objs := test_verifier.o libbpf.o | |
test_maps-objs := test_maps.o libbpf.o | |
diff --git a/samples/bpf/test_lpm.c b/samples/bpf/test_lpm.c | |
new file mode 100644 | |
index 0000000..c9bae74 | |
--- /dev/null | |
+++ b/samples/bpf/test_lpm.c | |
@@ -0,0 +1,248 @@ | |
+/* | |
+ * Randomized tests for eBPF longest-prefix-match maps | |
+ * | |
+ * This program runs randomized tests against the lpm-bpf-map. It implements a | |
+ * "Trivial Longest Prefix Match" (tlpm) based on simple, linear, singly linked | |
+ * lists. The implementation should be pretty straightforward. | |
+ * | |
+ * Based on tlpm, this inserts randomized data into bpf-lpm-maps and verifies | |
+ * the trie-based bpf-map implementation behaves the same way as tlpm. | |
+ */ | |
+ | |
+#include <assert.h> | |
+#include <errno.h> | |
+#include <inttypes.h> | |
+#include <linux/bpf.h> | |
+#include <stdio.h> | |
+#include <stdlib.h> | |
+#include <string.h> | |
+#include <time.h> | |
+#include <unistd.h> | |
+#include "libbpf.h" | |
+ | |
+struct tlpm_node { | |
+ struct tlpm_node *next; | |
+ size_t n_bits; | |
+ uint8_t key[]; | |
+}; | |
+ | |
+static struct tlpm_node *tlpm_add(struct tlpm_node *list, | |
+ const uint8_t *key, | |
+ size_t n_bits) | |
+{ | |
+ struct tlpm_node *node; | |
+ size_t n; | |
+ | |
+ /* add new entry with @key/@n_bits to @list and return new head */ | |
+ | |
+ n = (n_bits + 7) / 8; | |
+ node = malloc(sizeof(*node) + n); | |
+ assert(node); | |
+ | |
+ node->next = list; | |
+ node->n_bits = n_bits; | |
+ memcpy(node->key, key, n); | |
+ | |
+ return node; | |
+} | |
+ | |
+static void tlpm_clear(struct tlpm_node *list) | |
+{ | |
+ struct tlpm_node *node; | |
+ | |
+ /* free all entries in @list */ | |
+ | |
+ while ((node = list)) { | |
+ list = list->next; | |
+ free(node); | |
+ } | |
+} | |
+ | |
+static struct tlpm_node *tlpm_match(struct tlpm_node *list, | |
+ const uint8_t *key, | |
+ size_t n_bits) | |
+{ | |
+ struct tlpm_node *best = NULL; | |
+ size_t i; | |
+ | |
+ /* | |
+ * Perform longest prefix-match on @key/@n_bits. That is, iterate all | |
+ * entries and match each prefix against @key. Remember the "best" | |
+ * entry we find (i.e., the longest prefix that matches) and return it | |
+ * to the caller when done. | |
+ */ | |
+ | |
+ for ( ; list; list = list->next) { | |
+ for (i = 0; i < n_bits && i < list->n_bits; ++i) { | |
+ if ((key[i / 8] & (1 << (7 - i % 8))) != | |
+ (list->key[i / 8] & (1 << (7 - i % 8)))) | |
+ break; | |
+ } | |
+ | |
+ if (i >= list->n_bits) { | |
+ if (!best || i > best->n_bits) | |
+ best = list; | |
+ } | |
+ } | |
+ | |
+ return best; | |
+} | |
+ | |
+static void test_lpm_basic(void) | |
+{ | |
+ struct tlpm_node *list = NULL, *t1, *t2; | |
+ | |
+ /* very basic, static tests to verify tlpm works as expected */ | |
+ | |
+ assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8)); | |
+ | |
+ t1 = list = tlpm_add(list, (uint8_t[]){ 0xff }, 8); | |
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8)); | |
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16)); | |
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0x00 }, 16)); | |
+ assert(!tlpm_match(list, (uint8_t[]){ 0x7f }, 8)); | |
+ assert(!tlpm_match(list, (uint8_t[]){ 0xfe }, 8)); | |
+ assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 7)); | |
+ | |
+ t2 = list = tlpm_add(list, (uint8_t[]){ 0xff, 0xff }, 16); | |
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8)); | |
+ assert(t2 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16)); | |
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 15)); | |
+ assert(!tlpm_match(list, (uint8_t[]){ 0x7f, 0xff }, 16)); | |
+ | |
+ tlpm_clear(list); | |
+} | |
+ | |
+static void test_lpm_order(void) | |
+{ | |
+ struct tlpm_node *t1, *t2, *l1 = NULL, *l2 = NULL; | |
+ size_t i, j; | |
+ | |
+ /* | |
+ * Verify the tlpm implementation works correctly regardless of the | |
+ * order of entries. Insert a random set of entries into @l1, and copy | |
+ * the same data in reverse order into @l2. Then verify a lookup of | |
+ * random keys will yield the same result in both sets. | |
+ */ | |
+ | |
+ for (i = 0; i < (1 << 12); ++i) | |
+ l1 = tlpm_add(l1, (uint8_t[]){ | |
+ rand() % 0xff, | |
+ rand() % 0xff, | |
+ }, rand() % 16 + 1); | |
+ | |
+ for (t1 = l1; t1; t1 = t1->next) | |
+ l2 = tlpm_add(l2, t1->key, t1->n_bits); | |
+ | |
+ for (i = 0; i < (1 << 8); ++i) { | |
+ uint8_t key[] = { rand() % 0xff, rand() % 0xff }; | |
+ | |
+ t1 = tlpm_match(l1, key, 16); | |
+ t2 = tlpm_match(l2, key, 16); | |
+ | |
+ assert(!t1 == !t2); | |
+ if (t1) { | |
+ assert(t1->n_bits == t2->n_bits); | |
+ for (j = 0; j < t1->n_bits; ++j) | |
+ assert((t1->key[j / 8] & (1 << (7 - j % 8))) == | |
+ (t2->key[j / 8] & (1 << (7 - j % 8)))); | |
+ } | |
+ } | |
+ | |
+ tlpm_clear(l1); | |
+ tlpm_clear(l2); | |
+} | |
+ | |
+static void test_lpm_map(void) | |
+{ | |
+ size_t i, j, n_matches, n_nodes, n_lookups; | |
+ struct tlpm_node *t, *list = NULL; | |
+ struct bpf_lpm_trie_key *key; | |
+ uint8_t value[8] = {}; | |
+ int r, map; | |
+ | |
+ /* | |
+ * Compare behavior of tlpm vs. bpf-lpm. Create a randomized set of | |
+ * prefixes and insert it into both tlpm and bpf-lpm. Then run some | |
+ * randomized lookups and verify both maps return the same result. | |
+ */ | |
+ | |
+ n_matches = 0; | |
+ n_nodes = 1 << 8; | |
+ n_lookups = 1 << 16; | |
+ | |
+ key = alloca(sizeof(*key) + 4); | |
+ memset(key, 0, sizeof(*key) + 4); | |
+ | |
+ map = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, | |
+ sizeof(*key) + 4, | |
+ sizeof(value), | |
+ 4096, | |
+ 0); | |
+ assert(map >= 0); | |
+ | |
+ for (i = 0; i < n_nodes; ++i) { | |
+ value[0] = rand() & 0xff; | |
+ value[1] = rand() & 0xff; | |
+ value[2] = rand() & 0xff; | |
+ value[3] = rand() & 0xff; | |
+ value[4] = rand() % 33; | |
+ | |
+ list = tlpm_add(list, value, value[4]); | |
+ | |
+ key->prefixlen = value[4]; | |
+ memcpy(key->data, value, 4); | |
+ r = bpf_update_elem(map, key, value, 0); | |
+ assert(!r); | |
+ } | |
+ | |
+ for (i = 0; i < n_lookups; ++i) { | |
+ uint8_t data[] = { | |
+ rand() % 0xff, | |
+ rand() % 0xff, | |
+ rand() % 0xff, | |
+ rand() % 0xff | |
+ }; | |
+ | |
+ t = tlpm_match(list, data, 32); | |
+ | |
+ key->prefixlen = 32; | |
+ memcpy(key->data, data, 4); | |
+ r = bpf_lookup_elem(map, key, value); | |
+ assert(!r || errno == ENOENT); | |
+ assert(!t == !!r); | |
+ | |
+ if (t) { | |
+ ++n_matches; | |
+ assert(t->n_bits == value[4]); | |
+ for (j = 0; j < t->n_bits; ++j) | |
+ assert((t->key[j / 8] & (1 << (7 - j % 8))) == | |
+ (value[j / 8] & (1 << (7 - j % 8)))); | |
+ } | |
+ } | |
+ | |
+ close(map); | |
+ tlpm_clear(list); | |
+ | |
+ /* | |
+ * With 255 random nodes in the map, we are pretty likely to match | |
+ * something on every lookup. For statistics, use this: | |
+ * | |
+ * printf(" nodes: %zu\n" | |
+ * "lookups: %zu\n" | |
+ * "matches: %zu\n", n_nodes, n_lookups, n_matches); | |
+ */ | |
+} | |
+ | |
+int main(void) | |
+{ | |
+ /* we want predictable, pseudo random tests */ | |
+ srand(0xf00ba1); | |
+ | |
+ test_lpm_basic(); | |
+ test_lpm_order(); | |
+ test_lpm_map(); | |
+ | |
+ printf("test_lpm: OK\n"); | |
+ return 0; | |
+} | |
-- | |
2.10.2 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment