Skip to content

Instantly share code, notes, and snippets.

@dvdhrm
Created December 13, 2016 13:58
Show Gist options
  • Save dvdhrm/aaf61c8d559442590658d557010acf62 to your computer and use it in GitHub Desktop.
Save dvdhrm/aaf61c8d559442590658d557010acf62 to your computer and use it in GitHub Desktop.
From 23b39ffc66db836c1d5f895d90188e982ebe512d Mon Sep 17 00:00:00 2001
From: David Herrmann <[email protected]>
Date: Tue, 13 Dec 2016 14:49:14 +0100
Subject: [PATCH] bpf-lpm: fixes and tests
Fixes 3 things:
- Initialize new_node to NULL. Otherwise, the error-path is wrong if
we jump there just to unlock the spinlock.
- longest_prefix_match() should check for both node->prefixlen and
key->prefixlen *AT THE SAME TIME*. Otherwise, it returns a wrong
value if:
prefixlen > node->prefixlen &&
key->prefixlen < node->prefixlen
Because in that case the first condition is caught and
node->prefixlen is returned. But that is wrong, since we must never
return anything greater than key->prefixlen. So add the min() there.
Also merge both conditions, since the handler is now basically the
same.
- When inserting a new node that is an *ANCESTOR* of an existing node,
we must insert it as such. We must *NOT* insert an intermediate
node.
Example: We have 192.168.0.0/24 in the trie. Then we add
192.168.0.0/16. This must make the /16 node a parent of the
/24 node. If they end up as siblings, we will never find
the /16 node.
This requires a small change in extract_bit() to accept the data
pointer directly.
Additionally to the fixes, this adds a randomized test in
./samples/bpf/test_lpm.c. This runs a simple trivial-lpm implementation
against the kernel bpf-lpm map and compares their behavior.
Signed-off-by: David Herrmann <[email protected]>
---
kernel/bpf/lpm_trie.c | 30 +++---
samples/bpf/Makefile | 1 +
samples/bpf/test_lpm.c | 248 +++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 267 insertions(+), 12 deletions(-)
create mode 100644 samples/bpf/test_lpm.c
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 1daaed5..8853703 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -148,10 +148,9 @@ struct lpm_trie {
* returned.
*/
-static inline int extract_bit(const struct bpf_lpm_trie_key *key,
- size_t index)
+static inline int extract_bit(const u8 *data, size_t index)
{
- return !!(key->data[index / 8] & (1 << (7 - (index % 8))));
+ return !!(data[index / 8] & (1 << (7 - (index % 8))));
}
/**
@@ -175,11 +174,8 @@ static size_t longest_prefix_match(const struct lpm_trie *trie,
b = 8 - fls(node->data[i] ^ key->data[i]);
prefixlen += b;
- if (prefixlen >= node->prefixlen)
- return node->prefixlen;
-
- if (prefixlen >= key->prefixlen)
- return key->prefixlen;
+ if (prefixlen >= node->prefixlen || prefixlen >= key->prefixlen)
+ return min(node->prefixlen, key->prefixlen);
if (b < 8)
break;
@@ -231,7 +227,7 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key)
* become more specific. Determine the next bit in the key and
* traverse down.
*/
- next_bit = extract_bit(key, node->prefixlen);
+ next_bit = extract_bit(key->data, node->prefixlen);
node = rcu_dereference(node->child[next_bit]);
}
@@ -278,7 +274,7 @@ static size_t find_target_node(struct lpm_trie *trie,
(*node)->prefixlen == trie->max_prefixlen)
break;
- next_bit = extract_bit(key, (*node)->prefixlen);
+ next_bit = extract_bit(key->data, (*node)->prefixlen);
node = &(*node)->child[next_bit];
}
@@ -292,7 +288,7 @@ static int trie_update_elem(struct bpf_map *map,
void *_key, void *value, u64 flags)
{
struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
- struct lpm_tree_node **node, *new_node, *im_node;
+ struct lpm_tree_node **node, *im_node, *new_node = NULL;
struct bpf_lpm_trie_key *key = _key;
size_t matchlen;
int ret = 0;
@@ -357,6 +353,16 @@ static int trie_update_elem(struct bpf_map *map,
goto out;
}
+ /*
+ * If the new node matches the prefix completely, it must be an
+ * inserted as an ancestor. Simply insert it between @node and @*node.
+ */
+ if (matchlen == key->prefixlen) {
+ new_node->child[extract_bit((*node)->data, matchlen)] = *node;
+ rcu_assign_pointer(*node, new_node);
+ goto out;
+ }
+
/* Create an intermediate node and place it inbetween */
im_node = lpm_tree_node_alloc(trie->data_size);
if (!im_node) {
@@ -369,7 +375,7 @@ static int trie_update_elem(struct bpf_map *map,
memcpy(im_node->data, (*node)->data, trie->data_size);
/* Now determine which child to install in which slot */
- if (extract_bit(key, matchlen)) {
+ if (extract_bit(key->data, matchlen)) {
im_node->child[0] = *node;
im_node->child[1] = new_node;
} else {
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 8c49cc1..bacb6eb63 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -28,6 +28,7 @@ hostprogs-y += test_current_task_under_cgroup
hostprogs-y += trace_event
hostprogs-y += sampleip
hostprogs-y += tc_l2_redirect
+hostprogs-y += test_lpm
test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o
diff --git a/samples/bpf/test_lpm.c b/samples/bpf/test_lpm.c
new file mode 100644
index 0000000..c9bae74
--- /dev/null
+++ b/samples/bpf/test_lpm.c
@@ -0,0 +1,248 @@
+/*
+ * Randomized tests for eBPF longest-prefix-match maps
+ *
+ * This program runs randomized tests against the lpm-bpf-map. It implements a
+ * "Trivial Longest Prefix Match" (tlpm) based on simple, linear, singly linked
+ * lists. The implementation should be pretty straightforward.
+ *
+ * Based on tlpm, this inserts randomized data into bpf-lpm-maps and verifies
+ * the trie-based bpf-map implementation behaves the same way as tlpm.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/bpf.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include "libbpf.h"
+
+struct tlpm_node {
+ struct tlpm_node *next;
+ size_t n_bits;
+ uint8_t key[];
+};
+
+static struct tlpm_node *tlpm_add(struct tlpm_node *list,
+ const uint8_t *key,
+ size_t n_bits)
+{
+ struct tlpm_node *node;
+ size_t n;
+
+ /* add new entry with @key/@n_bits to @list and return new head */
+
+ n = (n_bits + 7) / 8;
+ node = malloc(sizeof(*node) + n);
+ assert(node);
+
+ node->next = list;
+ node->n_bits = n_bits;
+ memcpy(node->key, key, n);
+
+ return node;
+}
+
+static void tlpm_clear(struct tlpm_node *list)
+{
+ struct tlpm_node *node;
+
+ /* free all entries in @list */
+
+ while ((node = list)) {
+ list = list->next;
+ free(node);
+ }
+}
+
+static struct tlpm_node *tlpm_match(struct tlpm_node *list,
+ const uint8_t *key,
+ size_t n_bits)
+{
+ struct tlpm_node *best = NULL;
+ size_t i;
+
+ /*
+ * Perform longest prefix-match on @key/@n_bits. That is, iterate all
+ * entries and match each prefix against @key. Remember the "best"
+ * entry we find (i.e., the longest prefix that matches) and return it
+ * to the caller when done.
+ */
+
+ for ( ; list; list = list->next) {
+ for (i = 0; i < n_bits && i < list->n_bits; ++i) {
+ if ((key[i / 8] & (1 << (7 - i % 8))) !=
+ (list->key[i / 8] & (1 << (7 - i % 8))))
+ break;
+ }
+
+ if (i >= list->n_bits) {
+ if (!best || i > best->n_bits)
+ best = list;
+ }
+ }
+
+ return best;
+}
+
+static void test_lpm_basic(void)
+{
+ struct tlpm_node *list = NULL, *t1, *t2;
+
+ /* very basic, static tests to verify tlpm works as expected */
+
+ assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+
+ t1 = list = tlpm_add(list, (uint8_t[]){ 0xff }, 8);
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0x00 }, 16));
+ assert(!tlpm_match(list, (uint8_t[]){ 0x7f }, 8));
+ assert(!tlpm_match(list, (uint8_t[]){ 0xfe }, 8));
+ assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 7));
+
+ t2 = list = tlpm_add(list, (uint8_t[]){ 0xff, 0xff }, 16);
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+ assert(t2 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 15));
+ assert(!tlpm_match(list, (uint8_t[]){ 0x7f, 0xff }, 16));
+
+ tlpm_clear(list);
+}
+
+static void test_lpm_order(void)
+{
+ struct tlpm_node *t1, *t2, *l1 = NULL, *l2 = NULL;
+ size_t i, j;
+
+ /*
+ * Verify the tlpm implementation works correctly regardless of the
+ * order of entries. Insert a random set of entries into @l1, and copy
+ * the same data in reverse order into @l2. Then verify a lookup of
+ * random keys will yield the same result in both sets.
+ */
+
+ for (i = 0; i < (1 << 12); ++i)
+ l1 = tlpm_add(l1, (uint8_t[]){
+ rand() % 0xff,
+ rand() % 0xff,
+ }, rand() % 16 + 1);
+
+ for (t1 = l1; t1; t1 = t1->next)
+ l2 = tlpm_add(l2, t1->key, t1->n_bits);
+
+ for (i = 0; i < (1 << 8); ++i) {
+ uint8_t key[] = { rand() % 0xff, rand() % 0xff };
+
+ t1 = tlpm_match(l1, key, 16);
+ t2 = tlpm_match(l2, key, 16);
+
+ assert(!t1 == !t2);
+ if (t1) {
+ assert(t1->n_bits == t2->n_bits);
+ for (j = 0; j < t1->n_bits; ++j)
+ assert((t1->key[j / 8] & (1 << (7 - j % 8))) ==
+ (t2->key[j / 8] & (1 << (7 - j % 8))));
+ }
+ }
+
+ tlpm_clear(l1);
+ tlpm_clear(l2);
+}
+
+static void test_lpm_map(void)
+{
+ size_t i, j, n_matches, n_nodes, n_lookups;
+ struct tlpm_node *t, *list = NULL;
+ struct bpf_lpm_trie_key *key;
+ uint8_t value[8] = {};
+ int r, map;
+
+ /*
+ * Compare behavior of tlpm vs. bpf-lpm. Create a randomized set of
+ * prefixes and insert it into both tlpm and bpf-lpm. Then run some
+ * randomized lookups and verify both maps return the same result.
+ */
+
+ n_matches = 0;
+ n_nodes = 1 << 8;
+ n_lookups = 1 << 16;
+
+ key = alloca(sizeof(*key) + 4);
+ memset(key, 0, sizeof(*key) + 4);
+
+ map = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ sizeof(*key) + 4,
+ sizeof(value),
+ 4096,
+ 0);
+ assert(map >= 0);
+
+ for (i = 0; i < n_nodes; ++i) {
+ value[0] = rand() & 0xff;
+ value[1] = rand() & 0xff;
+ value[2] = rand() & 0xff;
+ value[3] = rand() & 0xff;
+ value[4] = rand() % 33;
+
+ list = tlpm_add(list, value, value[4]);
+
+ key->prefixlen = value[4];
+ memcpy(key->data, value, 4);
+ r = bpf_update_elem(map, key, value, 0);
+ assert(!r);
+ }
+
+ for (i = 0; i < n_lookups; ++i) {
+ uint8_t data[] = {
+ rand() % 0xff,
+ rand() % 0xff,
+ rand() % 0xff,
+ rand() % 0xff
+ };
+
+ t = tlpm_match(list, data, 32);
+
+ key->prefixlen = 32;
+ memcpy(key->data, data, 4);
+ r = bpf_lookup_elem(map, key, value);
+ assert(!r || errno == ENOENT);
+ assert(!t == !!r);
+
+ if (t) {
+ ++n_matches;
+ assert(t->n_bits == value[4]);
+ for (j = 0; j < t->n_bits; ++j)
+ assert((t->key[j / 8] & (1 << (7 - j % 8))) ==
+ (value[j / 8] & (1 << (7 - j % 8))));
+ }
+ }
+
+ close(map);
+ tlpm_clear(list);
+
+ /*
+ * With 255 random nodes in the map, we are pretty likely to match
+ * something on every lookup. For statistics, use this:
+ *
+ * printf(" nodes: %zu\n"
+ * "lookups: %zu\n"
+ * "matches: %zu\n", n_nodes, n_lookups, n_matches);
+ */
+}
+
+int main(void)
+{
+ /* we want predictable, pseudo random tests */
+ srand(0xf00ba1);
+
+ test_lpm_basic();
+ test_lpm_order();
+ test_lpm_map();
+
+ printf("test_lpm: OK\n");
+ return 0;
+}
--
2.10.2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment