frr/lib/hash.c

/* Hash routine.
 * Copyright (C) 1998 Kunihiro Ishiguro
 *
 * This file is part of GNU Zebra.
 *
 * GNU Zebra is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2, or (at your
 * option) any later version.
 *
 * GNU Zebra is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; see the file COPYING; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include <zebra.h>
#include <math.h>

#include "hash.h"
#include "memory.h"
#include "linklist.h"
#include "termtable.h"
#include "vty.h"
#include "command.h"
#include "libfrr.h"

DEFINE_MTYPE(       LIB, HASH,        "Hash")
DEFINE_MTYPE(       LIB, HASH_BACKET, "Hash Bucket")
DEFINE_MTYPE_STATIC(LIB, HASH_INDEX,  "Hash Index")

pthread_mutex_t _hashes_mtx = PTHREAD_MUTEX_INITIALIZER;
static struct list *_hashes;

/* Allocate a new hash.  */
struct hash *
hash_create_size (unsigned int size, unsigned int (*hash_key) (void *),
                  int (*hash_cmp) (const void *, const void *),
                  const char *name)
{
  struct hash *hash;

  assert ((size & (size-1)) == 0);
  hash = XCALLOC (MTYPE_HASH, sizeof (struct hash));
  hash->index = XCALLOC (MTYPE_HASH_INDEX,
			 sizeof (struct hash_backet *) * size);
  hash->size = size;
  hash->no_expand = 0;
  hash->hash_key = hash_key;
  hash->hash_cmp = hash_cmp;
  hash->count = 0;
  hash->name = name ? XSTRDUP(MTYPE_HASH, name) : NULL;
  hash->stats.empty = hash->size;

  pthread_mutex_lock (&_hashes_mtx);
  {
    if (!_hashes)
      _hashes = list_new();

    listnode_add (_hashes, hash);
  }
  pthread_mutex_unlock (&_hashes_mtx);

  return hash;
}

/* Allocate a new hash with default hash size.  */
struct hash *
hash_create (unsigned int (*hash_key) (void *),
             int (*hash_cmp) (const void *, const void *),
             const char *name)
{
  return hash_create_size (HASH_INITIAL_SIZE, hash_key, hash_cmp, name);
}

/* Utility function for hash_get().  When this function is specified
   as alloc_func, return arugment as it is.  This function is used for
   intern already allocated value.  */
void *
hash_alloc_intern (void *arg)
{
  return arg;
}

#define hash_update_ssq(hz, old, new) \
  do { \
    long double res; \
    res = powl(old, 2.0); \
    hz->stats.ssq -= (uint64_t) res;\
    res = powl(new, 2.0); \
    hz->stats.ssq += (uint64_t) res; \
  } while (0); \

/* Expand hash if the chain length exceeds the threshold. */
static void hash_expand (struct hash *hash)
{
  unsigned int i, new_size, losers;
  struct hash_backet *hb, *hbnext, **new_index;

  new_size = hash->size * 2;
  new_index = XCALLOC(MTYPE_HASH_INDEX, sizeof(struct hash_backet *) * new_size);
  if (new_index == NULL)
    return;

  hash->stats.empty = new_size;

  for (i = 0; i < hash->size; i++)
    for (hb = hash->index[i]; hb; hb = hbnext)
      {
	unsigned int h = hb->key & (new_size - 1);

	hbnext = hb->next;
	hb->next = new_index[h];

	int oldlen = hb->next ? hb->next->len : 0;
	int newlen = oldlen + 1;

	if (newlen == 1)
	  hash->stats.empty--;
	else
	  hb->next->len = 0;

	hb->len = newlen;

	hash_update_ssq(hash, oldlen, newlen);

	new_index[h] = hb;
      }

  /* Switch to new table */
  XFREE(MTYPE_HASH_INDEX, hash->index);
  hash->size = new_size;
  hash->index = new_index;

  /* Ideally, new index should have chains half as long as the original.
   * If expansion didn't help, then not worth expanding again,
   * the problem is the hash function. */
  losers = 0;
  for (i = 0; i < hash->size; i++)
    {
      unsigned int len = hash->index[i] ? hash->index[i]->len : 0;

      if (len > HASH_THRESHOLD/2)
        ++losers;
      if (len >= HASH_THRESHOLD)
        hash->no_expand = 1;
    }

  if (losers > hash->count / 2)
    hash->no_expand = 1;
}

/* Lookup and return hash backet in hash.  If there is no
   corresponding hash backet and alloc_func is specified, create new
   hash backet.  */
void *
hash_get (struct hash *hash, void *data, void * (*alloc_func) (void *))
{
  unsigned int key;
  unsigned int index;
  void *newdata;
  unsigned int len;
  struct hash_backet *backet;

  key = (*hash->hash_key) (data);
  index = key & (hash->size - 1);
  len = 0;

  for (backet = hash->index[index]; backet != NULL; backet = backet->next)
    {
      if (backet->key == key && (*hash->hash_cmp) (backet->data, data))
	return backet->data;
      ++len;
    }

  if (alloc_func)
    {
      newdata = (*alloc_func) (data);
      if (newdata == NULL)
	return NULL;

      if (len > HASH_THRESHOLD && !hash->no_expand)
	{
	  hash_expand (hash);
	  index = key & (hash->size - 1);
	}

      backet = XCALLOC (MTYPE_HASH_BACKET, sizeof (struct hash_backet));
      backet->data = newdata;
      backet->key = key;
      backet->next = hash->index[index];
      hash->index[index] = backet;
      hash->count++;

      int oldlen = backet->next ? backet->next->len : 0;
      int newlen = oldlen + 1;

      if (newlen == 1)
        hash->stats.empty--;
      else
        backet->next->len = 0;

      backet->len = newlen;

      hash_update_ssq(hash, oldlen, newlen);

      return backet->data;
    }
  return NULL;
}

/* Hash lookup.  */
void *
hash_lookup (struct hash *hash, void *data)
{
  return hash_get (hash, data, NULL);
}

/* Simple Bernstein hash which is simple and fast for common case */
unsigned int string_hash_make (const char *str)
{
  unsigned int hash = 0;

  while (*str)
    hash = (hash * 33) ^ (unsigned int) *str++;

  return hash;
}

/* This function release registered value from specified hash.  When
   release is successfully finished, return the data pointer in the
   hash backet.  */
void *
hash_release (struct hash *hash, void *data)
{
  void *ret;
  unsigned int key;
  unsigned int index;
  struct hash_backet *backet;
  struct hash_backet *pp;

  key = (*hash->hash_key) (data);
  index = key & (hash->size - 1);

  for (backet = pp = hash->index[index]; backet; backet = backet->next)
    {
      if (backet->key == key && (*hash->hash_cmp) (backet->data, data))
	{
	  int oldlen = hash->index[index]->len;
	  int newlen = oldlen - 1;

	  if (backet == pp)
	    hash->index[index] = backet->next;
	  else
	    pp->next = backet->next;

	  if (hash->index[index])
	    hash->index[index]->len = newlen;
	  else
            hash->stats.empty++;

	  hash_update_ssq(hash, oldlen, newlen);

	  ret = backet->data;
	  XFREE (MTYPE_HASH_BACKET, backet);
	  hash->count--;
	  return ret;
	}
      pp = backet;
    }
  return NULL;
}

/* Iterator function for hash.  */
void
hash_iterate (struct hash *hash,
	      void (*func) (struct hash_backet *, void *), void *arg)
{
  unsigned int i;
  struct hash_backet *hb;
  struct hash_backet *hbnext;

  for (i = 0; i < hash->size; i++)
    for (hb = hash->index[i]; hb; hb = hbnext)
      {
	/* get pointer to next hash backet here, in case (*func)
	 * decides to delete hb by calling hash_release
	 */
	hbnext = hb->next;
	(*func) (hb, arg);
      }
}

/* Iterator function for hash.  */
void
hash_walk (struct hash *hash,
	   int (*func) (struct hash_backet *, void *), void *arg)
{
  unsigned int i;
  struct hash_backet *hb;
  struct hash_backet *hbnext;
  int ret = HASHWALK_CONTINUE;

  for (i = 0; i < hash->size; i++)
    {
      for (hb = hash->index[i]; hb; hb = hbnext)
	{
	  /* get pointer to next hash backet here, in case (*func)
	   * decides to delete hb by calling hash_release
	   */
	  hbnext = hb->next;
	  ret = (*func) (hb, arg);
	  if (ret == HASHWALK_ABORT)
	    return;
	}
    }
}

/* Clean up hash.  */
void
hash_clean (struct hash *hash, void (*free_func) (void *))
{
  unsigned int i;
  struct hash_backet *hb;
  struct hash_backet *next;

  for (i = 0; i < hash->size; i++)
    {
      for (hb = hash->index[i]; hb; hb = next)
	{
	  next = hb->next;

	  if (free_func)
	    (*free_func) (hb->data);

	  XFREE (MTYPE_HASH_BACKET, hb);
	  hash->count--;
	}
      hash->index[i] = NULL;
    }

  hash->stats.ssq = 0;
  hash->stats.empty = hash->size;
}

/* Free hash memory.  You may call hash_clean before call this
   function.  */
void
hash_free (struct hash *hash)
{
  pthread_mutex_lock (&_hashes_mtx);
  {
    if (_hashes)
      {
        listnode_delete (_hashes, hash);
        if (_hashes->count == 0)
          {
            list_delete (_hashes);
            _hashes = NULL;
          }
      }
  }
  pthread_mutex_unlock (&_hashes_mtx);

  if (hash->name)
    XFREE (MTYPE_HASH, hash->name);

  XFREE (MTYPE_HASH_INDEX, hash->index);
  XFREE (MTYPE_HASH, hash);
}


/* CLI commands ------------------------------------------------------------ */

DEFUN(show_hash_stats,
      show_hash_stats_cmd,
      "show hashtable [statistics]",
      SHOW_STR
      "Statistics about hash tables\n"
      "Statistics about hash tables\n")
{
  struct hash *h;
  struct listnode *ln;
  struct ttable *tt = ttable_new (&ttable_styles[TTSTYLE_BLANK]);

  ttable_add_row (tt, "Hash table|Buckets|Entries|Empty|LF|FLF|SD");
  tt->style.cell.lpad = 2;
  tt->style.cell.rpad = 1;
  tt->style.corner = '+';
  ttable_restyle (tt);
  ttable_rowseps (tt, 0, BOTTOM, true, '-');

  /* Summary statistics calculated are:
   *
   * - Load factor: This is the number of elements in the table divided by the
   *   number of buckets. Since this hash table implementation uses chaining,
   *   this value can be greater than 1. This number provides information on
   *   how 'full' the table is, but does not provide information on how evenly
   *   distributed the elements are. Notably, a load factor >= 1 does not imply
   *   that every bucket has an element; with a pathological hash function, all
   *   elements could be in a single bucket.
   *
   * - Full load factor: this is the number of elements in the table divided by
   *   the number of buckets that have some elements in them.
   *
   * - Std. Dev.: This is the standard deviation from the full load factor. If
   *   the FLF is the mean of number of elements per bucket, the standard
   *   deviation measures how much any particular bucket is likely to deviate
   *   from the mean. As a rule of thumb this number should be less than 2, and
   *   ideally <= 1 for optimal performance. A number larger than 3 generally
   *   indicates a poor hash function.
   */

  long double lf;     // load factor
  long double flf;    // full load factor
  long double var;    // overall variance
  long double fvar;   // full variance
  long double stdv;   // overall stddev
  long double fstdv;  // full stddev

  long double x2;     // h->count ^ 2
  long double ldc;    // (long double) h->count
  long double full;   // h->size - h->stats.empty
  long double ssq;    // ssq casted to long double

  pthread_mutex_lock (&_hashes_mtx);
  for (ALL_LIST_ELEMENTS_RO (_hashes, ln, h))
    {
      if (!h->name)
        continue;

      ssq   = (long double) h->stats.ssq;
      x2    = pow(h->count, 2.0);
      ldc   = (long double) h->count;
      full  = h->size - h->stats.empty;
      lf    = h->count / (double) h->size;
      flf   = full ? h->count / (double) (full) : 0;
      var   = ldc ? (1.0 / ldc) * (h->stats.ssq - x2 / ldc) : 0;
      fvar  = full ? (1.0 / full) * (h->stats.ssq - x2 / full) : 0;
      var   = (var < .0001) ? 0 : var;
      fvar  = (fvar < .0001) ? 0 : fvar;
      stdv  = sqrtl(var);
      fstdv = sqrtl(fvar);

      ttable_add_row (tt, "%s|%d|%ld|%.0f%%|%.2Lf|%.2Lf|%.2Lf", h->name,
                      h->size, h->count,
                      (h->stats.empty / (double) h->size)*100, lf, flf, fstdv);
    }
  pthread_mutex_unlock (&_hashes_mtx);

  /* display header */
  char header[] = "Showing hash table statistics for ";
  char underln[sizeof(header) + strlen(frr_protonameinst)];
  memset (underln, '-', sizeof(underln));
  underln[sizeof(underln) - 1] = '\0';
  vty_out (vty, "%s%s%s", header, frr_protonameinst, VTY_NEWLINE);
  vty_out (vty, "%s%s", underln, VTY_NEWLINE);

  vty_out (vty, "# allocated: %d%s", _hashes->count, VTY_NEWLINE);
  vty_out (vty, "# named:     %d%s%s", tt->nrows - 1, VTY_NEWLINE,
           VTY_NEWLINE);

  if (tt->nrows > 1)
    {
      ttable_colseps (tt, 0, RIGHT, true, '|');
      char *table = ttable_dump (tt, VTY_NEWLINE);
      vty_out (vty, "%s%s", table, VTY_NEWLINE);
      XFREE (MTYPE_TMP, table);
    }
  else
    vty_out (vty, "No named hash tables to display.%s", VTY_NEWLINE);

  ttable_del (tt);

  return CMD_SUCCESS;
}

void
hash_cmd_init ()
{
  _hashes = list_new();
  install_element (ENABLE_NODE, &show_hash_stats_cmd);
}