lib: clean up nexthop hashing mess

We were hashing 4 bytes of the address.  Even for IPv6 addresses.

Oops.

The reason this was done was to try to make it faster, but made a
complex maze out of everything.  Time for a refactor.

Signed-off-by: David Lamparter <equinox@opensourcerouting.org>
This commit is contained in:
David Lamparter 2025-01-22 11:23:31 +01:00
parent 4a0e1419a6
commit 001fcfa1dd
2 changed files with 80 additions and 111 deletions

View file

@ -772,68 +772,30 @@ unsigned int nexthop_level(const struct nexthop *nexthop)
return rv; return rv;
} }
/* Only hash word-sized things, let cmp do the rest. */ uint32_t nexthop_hash(const struct nexthop *nexthop)
uint32_t nexthop_hash_quick(const struct nexthop *nexthop)
{ {
uint32_t key = 0x45afe398; uint32_t key = 0x45afe398;
int i;
key = jhash_3words(nexthop->type, nexthop->vrf_id, /* type, vrf, ifindex, ip addresses - see nexthop.h */
nexthop->nh_label_type, key); key = _nexthop_hash_bytes(nexthop, key);
key = jhash_1word(nexthop->flags & NEXTHOP_FLAGS_HASHED, key);
if (nexthop->nh_label) { if (nexthop->nh_label) {
int labels = nexthop->nh_label->num_labels; const struct mpls_label_stack *ls = nexthop->nh_label;
i = 0; /* num_labels itself isn't useful to hash, if the number of
* labels is different, the hash value will change just due to
while (labels >= 3) { * that already.
key = jhash_3words(nexthop->nh_label->label[i], */
nexthop->nh_label->label[i + 1], key = jhash(ls->label, sizeof(ls->label[0]) * ls->num_labels, key);
nexthop->nh_label->label[i + 2],
key);
labels -= 3;
i += 3;
}
if (labels >= 2) {
key = jhash_2words(nexthop->nh_label->label[i],
nexthop->nh_label->label[i + 1],
key);
labels -= 2;
i += 2;
}
if (labels >= 1)
key = jhash_1word(nexthop->nh_label->label[i], key);
} }
key = jhash_2words(nexthop->ifindex,
CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK),
key);
/* Include backup nexthops, if present */ /* Include backup nexthops, if present */
if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) { if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) {
int backups = nexthop->backup_num; int backups = nexthop->backup_num;
i = 0; key = jhash(nexthop->backup_idx, sizeof(nexthop->backup_idx[0]) * backups, key);
while (backups >= 3) {
key = jhash_3words(nexthop->backup_idx[i],
nexthop->backup_idx[i + 1],
nexthop->backup_idx[i + 2], key);
backups -= 3;
i += 3;
}
while (backups >= 2) {
key = jhash_2words(nexthop->backup_idx[i],
nexthop->backup_idx[i + 1], key);
backups -= 2;
i += 2;
}
if (backups >= 1)
key = jhash_1word(nexthop->backup_idx[i], key);
} }
if (nexthop->nh_srv6) { if (nexthop->nh_srv6) {
@ -868,31 +830,6 @@ uint32_t nexthop_hash_quick(const struct nexthop *nexthop)
return key; return key;
} }
#define GATE_SIZE 4 /* Number of uint32_t words in struct g_addr */
/* For a more granular hash */
uint32_t nexthop_hash(const struct nexthop *nexthop)
{
uint32_t gate_src_rmap_raw[GATE_SIZE * 3] = {};
/* Get all the quick stuff */
uint32_t key = nexthop_hash_quick(nexthop);
assert(((sizeof(nexthop->gate) + sizeof(nexthop->src)
+ sizeof(nexthop->rmap_src))
/ 3)
== (GATE_SIZE * sizeof(uint32_t)));
memcpy(gate_src_rmap_raw, &nexthop->gate, GATE_SIZE);
memcpy(gate_src_rmap_raw + GATE_SIZE, &nexthop->src, GATE_SIZE);
memcpy(gate_src_rmap_raw + (2 * GATE_SIZE), &nexthop->rmap_src,
GATE_SIZE);
key = jhash2(gate_src_rmap_raw, (GATE_SIZE * 3), key);
return key;
}
void nexthop_copy_no_recurse(struct nexthop *copy, void nexthop_copy_no_recurse(struct nexthop *copy,
const struct nexthop *nexthop, const struct nexthop *nexthop,
struct nexthop *rparent) struct nexthop *rparent)

View file

@ -8,6 +8,7 @@
#ifndef _LIB_NEXTHOP_H #ifndef _LIB_NEXTHOP_H
#define _LIB_NEXTHOP_H #define _LIB_NEXTHOP_H
#include "jhash.h"
#include "prefix.h" #include "prefix.h"
#include "mpls.h" #include "mpls.h"
#include "vxlan.h" #include "vxlan.h"
@ -56,15 +57,48 @@ struct nexthop {
struct nexthop *next; struct nexthop *next;
struct nexthop *prev; struct nexthop *prev;
/*
* What vrf is this nexthop associated with? /* begin of hashed data - all fields from here onwards are given to
* jhash() as one consecutive chunk. DO NOT create "padding holes".
* DO NOT insert pointers that need to be deep-hashed.
*
* static_assert() below needs to be updated when fields are added
*/ */
char _hash_begin[0];
/* see above */
enum nexthop_types_t type;
/* What vrf is this nexthop associated with? */
vrf_id_t vrf_id; vrf_id_t vrf_id;
/* Interface index. */ /* Interface index. */
ifindex_t ifindex; ifindex_t ifindex;
enum nexthop_types_t type; /* Type of label(s), if any */
enum lsp_types_t nh_label_type;
/* padding: keep 16 byte alignment here */
/* Nexthop address
* make sure all 16 bytes for IPv6 are zeroed when putting in an IPv4
* address since the entire thing is hashed as-is
*/
union {
union g_addr gate;
enum blackhole_type bh_type;
};
union g_addr src;
union g_addr rmap_src; /* Src is set via routemap */
/* end of hashed data - remaining fields in this struct are not
* directly fed into jhash(). Most of them are actually part of the
* hash but have special rules or handling attached.
*/
char _hash_end[0];
/* Weight of the nexthop ( for unequal cost ECMP ) */
uint8_t weight;
uint16_t flags; uint16_t flags;
#define NEXTHOP_FLAG_ACTIVE (1 << 0) /* This nexthop is alive. */ #define NEXTHOP_FLAG_ACTIVE (1 << 0) /* This nexthop is alive. */
@ -82,18 +116,15 @@ struct nexthop {
#define NEXTHOP_FLAG_EVPN (1 << 8) /* nexthop is EVPN */ #define NEXTHOP_FLAG_EVPN (1 << 8) /* nexthop is EVPN */
#define NEXTHOP_FLAG_LINKDOWN (1 << 9) /* is not removed on link down */ #define NEXTHOP_FLAG_LINKDOWN (1 << 9) /* is not removed on link down */
/* which flags are part of nexthop_hash(). Should probably be split
* off into a separate field...
*/
#define NEXTHOP_FLAGS_HASHED NEXTHOP_FLAG_ONLINK
#define NEXTHOP_IS_ACTIVE(flags) \ #define NEXTHOP_IS_ACTIVE(flags) \
(CHECK_FLAG(flags, NEXTHOP_FLAG_ACTIVE) \ (CHECK_FLAG(flags, NEXTHOP_FLAG_ACTIVE) \
&& !CHECK_FLAG(flags, NEXTHOP_FLAG_DUPLICATE)) && !CHECK_FLAG(flags, NEXTHOP_FLAG_DUPLICATE))
/* Nexthop address */
union {
union g_addr gate;
enum blackhole_type bh_type;
};
union g_addr src;
union g_addr rmap_src; /* Src is set via routemap */
/* Nexthops obtained by recursive resolution. /* Nexthops obtained by recursive resolution.
* *
* If the nexthop struct needs to be resolved recursively, * If the nexthop struct needs to be resolved recursively,
@ -104,15 +135,9 @@ struct nexthop {
/* Recursive parent */ /* Recursive parent */
struct nexthop *rparent; struct nexthop *rparent;
/* Type of label(s), if any */
enum lsp_types_t nh_label_type;
/* Label(s) associated with this nexthop. */ /* Label(s) associated with this nexthop. */
struct mpls_label_stack *nh_label; struct mpls_label_stack *nh_label;
/* Weight of the nexthop ( for unequal cost ECMP ) */
uint8_t weight;
/* Count and index of corresponding backup nexthop(s) in a backup list; /* Count and index of corresponding backup nexthop(s) in a backup list;
* only meaningful if the HAS_BACKUP flag is set. * only meaningful if the HAS_BACKUP flag is set.
*/ */
@ -138,6 +163,29 @@ struct nexthop {
struct nexthop_srv6 *nh_srv6; struct nexthop_srv6 *nh_srv6;
}; };
/* all hashed fields (including padding, if it is necessary to add) need to
* be listed in the static_assert below
*/
#define S(field) sizeof(((struct nexthop *)NULL)->field)
static_assert(
offsetof(struct nexthop, _hash_end) - offsetof(struct nexthop, _hash_begin) ==
S(type) + S(vrf_id) + S(ifindex) + S(nh_label_type) + S(gate) + S(src) + S(rmap_src),
"struct nexthop contains padding, this can break things. insert _pad fields at appropriate places");
#undef S
/* this is here to show exactly what is meant by the comments above about
* the hashing
*/
static inline uint32_t _nexthop_hash_bytes(const struct nexthop *nh, uint32_t seed)
{
return jhash(&nh->_hash_begin,
offsetof(struct nexthop, _hash_end) - offsetof(struct nexthop, _hash_begin),
seed);
}
/* Utility to append one nexthop to another. */ /* Utility to append one nexthop to another. */
#define NEXTHOP_APPEND(to, new) \ #define NEXTHOP_APPEND(to, new) \
do { \ do { \
@ -183,27 +231,11 @@ struct nexthop *nexthop_from_blackhole(enum blackhole_type bh_type,
/* /*
* Hash a nexthop. Suitable for use with hash tables. * Hash a nexthop. Suitable for use with hash tables.
* *
* This function uses the following values when computing the hash: * Please double check the code on what is included in the hash, there was
* - vrf_id * documentation here but it got outdated and the only thing worse than no
* - ifindex * doc is incorrect doc.
* - type
* - gate
*
* nexthop
* The nexthop to hash
*
* Returns:
* 32-bit hash of nexthop
*/ */
uint32_t nexthop_hash(const struct nexthop *nexthop); uint32_t nexthop_hash(const struct nexthop *nexthop);
/*
* Hash a nexthop only on word-sized attributes:
* - vrf_id
* - ifindex
* - type
* - (some) flags
*/
uint32_t nexthop_hash_quick(const struct nexthop *nexthop);
extern bool nexthop_same(const struct nexthop *nh1, const struct nexthop *nh2); extern bool nexthop_same(const struct nexthop *nh1, const struct nexthop *nh2);
extern bool nexthop_same_no_labels(const struct nexthop *nh1, extern bool nexthop_same_no_labels(const struct nexthop *nh1,