frr/lib/hash.c
Quentin Young 6f6f00107e
lib, vtysh: hashtable statistics
Adds the ability to name hash tables, and a new cli command that will
show various summary statistics for named hash tables.

Statistics computed are
  - load factor
  - full load factor (see comments)
  - stddev of full load factor

Standard deviation is computed by storing the sum of squares of bucket
lengths. This is somewhat susceptible to overflow. On platforms where a
double is 32 bits, placing 65535 or more elements into a hash table
opens up the potential for overflow, depending on how they are arranged
in buckets (which depends on the hash function). For example, placing
65535 elements into one hash bucket would cause ssq overflow, but
distributing 40000000 elements evenly among 400000 buckets (100 elements
per bucket) would not.

These cases are extremely degenerate, so the vague possibility of
overflow in an informational command is deemed an acceptable tradeoff
for constant time calculation of variance without locks or compromising
efficiency of actual table operations.

Signed-off-by: Quentin Young <qlyoung@cumulusnetworks.com>
2017-07-01 19:18:35 -04:00

488 lines
13 KiB
C

/* Hash routine.
* Copyright (C) 1998 Kunihiro Ishiguro
*
* This file is part of GNU Zebra.
*
* GNU Zebra is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published
* by the Free Software Foundation; either version 2, or (at your
* option) any later version.
*
* GNU Zebra is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; see the file COPYING; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <zebra.h>
#include <math.h>
#include "hash.h"
#include "memory.h"
#include "linklist.h"
#include "termtable.h"
#include "vty.h"
#include "command.h"
#include "libfrr.h"
DEFINE_MTYPE( LIB, HASH, "Hash")
DEFINE_MTYPE( LIB, HASH_BACKET, "Hash Bucket")
DEFINE_MTYPE_STATIC(LIB, HASH_INDEX, "Hash Index")
pthread_mutex_t _hashes_mtx = PTHREAD_MUTEX_INITIALIZER;
static struct list *_hashes;
/* Allocate a new hash. */
struct hash *
hash_create_size (unsigned int size, unsigned int (*hash_key) (void *),
int (*hash_cmp) (const void *, const void *),
const char *name)
{
struct hash *hash;
assert ((size & (size-1)) == 0);
hash = XCALLOC (MTYPE_HASH, sizeof (struct hash));
hash->index = XCALLOC (MTYPE_HASH_INDEX,
sizeof (struct hash_backet *) * size);
hash->size = size;
hash->no_expand = 0;
hash->hash_key = hash_key;
hash->hash_cmp = hash_cmp;
hash->count = 0;
hash->name = name ? XSTRDUP(MTYPE_HASH, name) : NULL;
hash->stats.empty = hash->size;
pthread_mutex_lock (&_hashes_mtx);
{
if (!_hashes)
_hashes = list_new();
listnode_add (_hashes, hash);
}
pthread_mutex_unlock (&_hashes_mtx);
return hash;
}
/* Allocate a new hash with default hash size. */
struct hash *
hash_create (unsigned int (*hash_key) (void *),
int (*hash_cmp) (const void *, const void *),
const char *name)
{
return hash_create_size (HASH_INITIAL_SIZE, hash_key, hash_cmp, name);
}
/* Utility function for hash_get(). When this function is specified
as alloc_func, return arugment as it is. This function is used for
intern already allocated value. */
void *
hash_alloc_intern (void *arg)
{
return arg;
}
#define hash_update_ssq(hz, old, new) \
do { \
long double res; \
res = powl(old, 2.0); \
hz->stats.ssq -= (uint64_t) res;\
res = powl(new, 2.0); \
hz->stats.ssq += (uint64_t) res; \
} while (0); \
/* Expand hash if the chain length exceeds the threshold. */
static void hash_expand (struct hash *hash)
{
unsigned int i, new_size, losers;
struct hash_backet *hb, *hbnext, **new_index;
new_size = hash->size * 2;
new_index = XCALLOC(MTYPE_HASH_INDEX, sizeof(struct hash_backet *) * new_size);
if (new_index == NULL)
return;
hash->stats.empty = new_size;
for (i = 0; i < hash->size; i++)
for (hb = hash->index[i]; hb; hb = hbnext)
{
unsigned int h = hb->key & (new_size - 1);
hbnext = hb->next;
hb->next = new_index[h];
int oldlen = hb->next ? hb->next->len : 0;
int newlen = oldlen + 1;
if (newlen == 1)
hash->stats.empty--;
else
hb->next->len = 0;
hb->len = newlen;
hash_update_ssq(hash, oldlen, newlen);
new_index[h] = hb;
}
/* Switch to new table */
XFREE(MTYPE_HASH_INDEX, hash->index);
hash->size = new_size;
hash->index = new_index;
/* Ideally, new index should have chains half as long as the original.
* If expansion didn't help, then not worth expanding again,
* the problem is the hash function. */
losers = 0;
for (i = 0; i < hash->size; i++)
{
unsigned int len = hash->index[i] ? hash->index[i]->len : 0;
if (len > HASH_THRESHOLD/2)
++losers;
if (len >= HASH_THRESHOLD)
hash->no_expand = 1;
}
if (losers > hash->count / 2)
hash->no_expand = 1;
}
/* Lookup and return hash backet in hash. If there is no
corresponding hash backet and alloc_func is specified, create new
hash backet. */
void *
hash_get (struct hash *hash, void *data, void * (*alloc_func) (void *))
{
unsigned int key;
unsigned int index;
void *newdata;
unsigned int len;
struct hash_backet *backet;
key = (*hash->hash_key) (data);
index = key & (hash->size - 1);
len = 0;
for (backet = hash->index[index]; backet != NULL; backet = backet->next)
{
if (backet->key == key && (*hash->hash_cmp) (backet->data, data))
return backet->data;
++len;
}
if (alloc_func)
{
newdata = (*alloc_func) (data);
if (newdata == NULL)
return NULL;
if (len > HASH_THRESHOLD && !hash->no_expand)
{
hash_expand (hash);
index = key & (hash->size - 1);
}
backet = XCALLOC (MTYPE_HASH_BACKET, sizeof (struct hash_backet));
backet->data = newdata;
backet->key = key;
backet->next = hash->index[index];
hash->index[index] = backet;
hash->count++;
int oldlen = backet->next ? backet->next->len : 0;
int newlen = oldlen + 1;
if (newlen == 1)
hash->stats.empty--;
else
backet->next->len = 0;
backet->len = newlen;
hash_update_ssq(hash, oldlen, newlen);
return backet->data;
}
return NULL;
}
/* Hash lookup. */
void *
hash_lookup (struct hash *hash, void *data)
{
return hash_get (hash, data, NULL);
}
/* Simple Bernstein hash which is simple and fast for common case */
unsigned int string_hash_make (const char *str)
{
unsigned int hash = 0;
while (*str)
hash = (hash * 33) ^ (unsigned int) *str++;
return hash;
}
/* This function release registered value from specified hash. When
release is successfully finished, return the data pointer in the
hash backet. */
void *
hash_release (struct hash *hash, void *data)
{
void *ret;
unsigned int key;
unsigned int index;
struct hash_backet *backet;
struct hash_backet *pp;
key = (*hash->hash_key) (data);
index = key & (hash->size - 1);
for (backet = pp = hash->index[index]; backet; backet = backet->next)
{
if (backet->key == key && (*hash->hash_cmp) (backet->data, data))
{
int oldlen = hash->index[index]->len;
int newlen = oldlen - 1;
if (backet == pp)
hash->index[index] = backet->next;
else
pp->next = backet->next;
if (hash->index[index])
hash->index[index]->len = newlen;
else
hash->stats.empty++;
hash_update_ssq(hash, oldlen, newlen);
ret = backet->data;
XFREE (MTYPE_HASH_BACKET, backet);
hash->count--;
return ret;
}
pp = backet;
}
return NULL;
}
/* Iterator function for hash. */
void
hash_iterate (struct hash *hash,
void (*func) (struct hash_backet *, void *), void *arg)
{
unsigned int i;
struct hash_backet *hb;
struct hash_backet *hbnext;
for (i = 0; i < hash->size; i++)
for (hb = hash->index[i]; hb; hb = hbnext)
{
/* get pointer to next hash backet here, in case (*func)
* decides to delete hb by calling hash_release
*/
hbnext = hb->next;
(*func) (hb, arg);
}
}
/* Iterator function for hash. */
void
hash_walk (struct hash *hash,
int (*func) (struct hash_backet *, void *), void *arg)
{
unsigned int i;
struct hash_backet *hb;
struct hash_backet *hbnext;
int ret = HASHWALK_CONTINUE;
for (i = 0; i < hash->size; i++)
{
for (hb = hash->index[i]; hb; hb = hbnext)
{
/* get pointer to next hash backet here, in case (*func)
* decides to delete hb by calling hash_release
*/
hbnext = hb->next;
ret = (*func) (hb, arg);
if (ret == HASHWALK_ABORT)
return;
}
}
}
/* Clean up hash. */
void
hash_clean (struct hash *hash, void (*free_func) (void *))
{
unsigned int i;
struct hash_backet *hb;
struct hash_backet *next;
for (i = 0; i < hash->size; i++)
{
for (hb = hash->index[i]; hb; hb = next)
{
next = hb->next;
if (free_func)
(*free_func) (hb->data);
XFREE (MTYPE_HASH_BACKET, hb);
hash->count--;
}
hash->index[i] = NULL;
}
hash->stats.ssq = 0;
hash->stats.empty = hash->size;
}
/* Free hash memory. You may call hash_clean before call this
function. */
void
hash_free (struct hash *hash)
{
pthread_mutex_lock (&_hashes_mtx);
{
if (_hashes)
{
listnode_delete (_hashes, hash);
if (_hashes->count == 0)
{
list_delete (_hashes);
_hashes = NULL;
}
}
}
pthread_mutex_unlock (&_hashes_mtx);
if (hash->name)
XFREE (MTYPE_HASH, hash->name);
XFREE (MTYPE_HASH_INDEX, hash->index);
XFREE (MTYPE_HASH, hash);
}
/* CLI commands ------------------------------------------------------------ */
DEFUN(show_hash_stats,
show_hash_stats_cmd,
"show hashtable [statistics]",
SHOW_STR
"Statistics about hash tables\n"
"Statistics about hash tables\n")
{
struct hash *h;
struct listnode *ln;
struct ttable *tt = ttable_new (&ttable_styles[TTSTYLE_BLANK]);
ttable_add_row (tt, "Hash table|Buckets|Entries|Empty|LF|FLF|SD");
tt->style.cell.lpad = 2;
tt->style.cell.rpad = 1;
tt->style.corner = '+';
ttable_restyle (tt);
ttable_rowseps (tt, 0, BOTTOM, true, '-');
/* Summary statistics calculated are:
*
* - Load factor: This is the number of elements in the table divided by the
* number of buckets. Since this hash table implementation uses chaining,
* this value can be greater than 1. This number provides information on
* how 'full' the table is, but does not provide information on how evenly
* distributed the elements are. Notably, a load factor >= 1 does not imply
* that every bucket has an element; with a pathological hash function, all
* elements could be in a single bucket.
*
* - Full load factor: this is the number of elements in the table divided by
* the number of buckets that have some elements in them.
*
* - Std. Dev.: This is the standard deviation from the full load factor. If
* the FLF is the mean of number of elements per bucket, the standard
* deviation measures how much any particular bucket is likely to deviate
* from the mean. As a rule of thumb this number should be less than 2, and
* ideally <= 1 for optimal performance. A number larger than 3 generally
* indicates a poor hash function.
*/
long double lf; // load factor
long double flf; // full load factor
long double var; // overall variance
long double fvar; // full variance
long double stdv; // overall stddev
long double fstdv; // full stddev
long double x2; // h->count ^ 2
long double ldc; // (long double) h->count
long double full; // h->size - h->stats.empty
long double ssq; // ssq casted to long double
pthread_mutex_lock (&_hashes_mtx);
for (ALL_LIST_ELEMENTS_RO (_hashes, ln, h))
{
if (!h->name)
continue;
ssq = (long double) h->stats.ssq;
x2 = pow(h->count, 2.0);
ldc = (long double) h->count;
full = h->size - h->stats.empty;
lf = h->count / (double) h->size;
flf = full ? h->count / (double) (full) : 0;
var = ldc ? (1.0 / ldc) * (h->stats.ssq - x2 / ldc) : 0;
fvar = full ? (1.0 / full) * (h->stats.ssq - x2 / full) : 0;
var = (var < .0001) ? 0 : var;
fvar = (fvar < .0001) ? 0 : fvar;
stdv = sqrtl(var);
fstdv = sqrtl(fvar);
ttable_add_row (tt, "%s|%d|%ld|%.0f%%|%.2Lf|%.2Lf|%.2Lf", h->name,
h->size, h->count,
(h->stats.empty / (double) h->size)*100, lf, flf, fstdv);
}
pthread_mutex_unlock (&_hashes_mtx);
/* display header */
char header[] = "Showing hash table statistics for ";
char underln[sizeof(header) + strlen(frr_protonameinst)];
memset (underln, '-', sizeof(underln));
underln[sizeof(underln) - 1] = '\0';
vty_out (vty, "%s%s%s", header, frr_protonameinst, VTY_NEWLINE);
vty_out (vty, "%s%s", underln, VTY_NEWLINE);
vty_out (vty, "# allocated: %d%s", _hashes->count, VTY_NEWLINE);
vty_out (vty, "# named: %d%s%s", tt->nrows - 1, VTY_NEWLINE,
VTY_NEWLINE);
if (tt->nrows > 1)
{
ttable_colseps (tt, 0, RIGHT, true, '|');
char *table = ttable_dump (tt, VTY_NEWLINE);
vty_out (vty, "%s%s", table, VTY_NEWLINE);
XFREE (MTYPE_TMP, table);
}
else
vty_out (vty, "No named hash tables to display.%s", VTY_NEWLINE);
ttable_del (tt);
return CMD_SUCCESS;
}
void
hash_cmd_init ()
{
_hashes = list_new();
install_element (ENABLE_NODE, &show_hash_stats_cmd);
}