Pages: [1]   Go Down
Author Topic: Fast hashing library  (Read 304 times)
0 Members and 1 Guest are viewing this topic.
Offline Offline
Newbie
*
Karma: 0
Posts: 2
View Profile
 Bigger Bigger  Smaller Smaller  Reset Reset

Hi everyone.
I've made port of a 32-bit hashing function that works quite fast. That is, speed is 4x higher than the usual CRC32.
Port is available here.
https://github.com/mrbio/Arduino-rokkit-hash

And I'd like to receive some feedback.
Logged

Sweden
Offline Offline
Sr. Member
****
Karma: 6
Posts: 375
View Profile
 Bigger Bigger  Smaller Smaller  Reset Reset

@biocheshire
Did you do your benchmark against AVR standard CRC funtions or just quoting the original post?

What was the performance of this implementation?
And how did you tweak it for the AVR processor?

The original implementation and benchmark was at 1.6 GHz i386 architecture, And "Data is time in seconds taken to hash a random buffer of 256 bytes 5 million times" http://www.azillionmonkeys.com/qed/hash.html

How long time would this take on an Arduino ;-)?

I believe the incremental version is more useful for a micro-controller with only 2-4 Kbyte of memory. And there are some tweaks to be done to improve performance on an 8-bit architecture.

Here is an interesting link: http://perso.uclouvain.be/fstandae/source_codes/hash_atmel/


Cheers!
« Last Edit: March 18, 2014, 02:01:05 pm by kowalski » Logged

Global Moderator
Netherlands
Offline Offline
Shannon Member
*****
Karma: 170
Posts: 12462
In theory there is no difference between theory and practice, however in practice there are many...
View Profile
 Bigger Bigger  Smaller Smaller  Reset Reset

Thanks for sharing (a challenge!)

Some performance optimizations (for Arduino 8-bitter), on the internal loop,
there will be some more gain possible in optimizing the remainders.

Please check

NOTE: the original version of rokkithash is copyrighted LGPL 2.1 by Paul Hsieh
Code:
//
//    FILE: rokkitHash.ino
//  AUTHOR: Rob Tillaart
// VERSION: 0.1.00
// PURPOSE: optimizing rokkitHash
//    DATE: 2014-03-18
//     URL:
//
// original version of rokkithash is copyrighted LGPL 2.1 by Paul Hsieh
//

volatile uint32_t x;
char data[] = "the quick brown fox jumps over the lazy dog.";

void setup()
{
  Serial.begin(115200);
  Serial.println("Start ");

  int le = strlen(data);

  uint32_t start = micros();
  for (int i=0; i<1000; i++)
  {
    x = rokkit(data, le);
  }
  uint32_t stop  = micros();
  Serial.print("pre:\t");
  Serial.println(stop - start);
  Serial.println(x);

  start = micros();
  for (int i=0; i<1000; i++)
  {
    x = rokkitNew(data, le);
  }
  stop  = micros();
  Serial.print("post:\t");
  Serial.println(stop - start);
  Serial.println(x);
}

void loop()
{
}

uint32_t rokkitNew(const char * data, uint16_t len)
{
  union
  {
    uint32_t h;
    uint16_t b[2];
    uint8_t a[4];
  }
  hash, tmp;
  uint8_t rem;
  uint16_t *p = (uint16_t*) data;

  if (data == 0) return 0;
  hash.h = len;
  rem = len & 3;
  len >>= 2;

  /* Main loop */
  while (len > 0) {
    // hash  += *((uint16_t*)data);
    hash.h  += *p;
    p++;

    // tmp    = (*((uint16_t*)(data+2)) << 11) ^ hash;
    // hash   = (hash << 16) ^ tmp;
    // ==>
    // hash   = (hash << 16) ^ (*((uint16_t*)(data+2)) << 11) ^ hash;
    // ==>
    // hash   ^= (hash << 16) ^ (*((uint16_t*)(data+2)) << 11);
    tmp.h = hash.h;
    tmp.b[1] = tmp.b[0];  //  (hash << 16)   === clean lower 16 bits!
    tmp.b[0] = (*p) << 11;  //  (*((uint16_t*)(data+2)) << 11)  === 16 bit operation!!

    p++;
    // hash.h ^= tmp.h;
    hash.a[0] ^= tmp.a[0];
    hash.a[1] ^= tmp.a[1];
    hash.a[2] ^= tmp.a[2];
    hash.a[3] ^= tmp.a[3];

    // hash  += hash >> 11;
    tmp.a[0] = hash.a[1];  // shift 8
    tmp.a[1] = hash.a[2];
    tmp.a[2] = hash.a[3];
    tmp.a[3] = 0;
    hash.h += tmp.h >> 3;

    // len--;
    --len;
  }


  data = (char*) p;
  /* Handle end cases */
  switch (rem) {
  case 3:
    hash.h += *((uint16_t*)data);
    hash.h ^= hash.h << 16;
    hash.h ^= ((signed char)data[2]) << 18;
    hash.h += hash.h >> 11;
    break;
  case 2:
    hash.h += *((uint16_t*)data);
    hash.h ^= hash.h << 11;
    hash.h += hash.h >> 17;
    break;
  case 1:
    hash.h += (signed char)*data;
    hash.h ^= hash.h << 10;
    hash.h += hash.h >> 1;
  }

  /* Force "avalanching" of final 127 bits */
  hash.h ^= hash.h << 3;
  hash.h += hash.h >> 5;
  hash.h ^= hash.h << 4;
  hash.h += hash.h >> 17;
  hash.h ^= hash.h << 25;
  hash.h += hash.h >> 6;

  return hash.h;
}



uint32_t rokkit(const char * data, int len)
{
  uint32_t hash, tmp;
  int rem;

  if (len <= 0 || data == 0) return 0;
  hash = len;
  rem = len & 3;
  len >>= 2;

  /* Main loop */
  while (len > 0) {
    hash  += *((uint16_t*)data);
    tmp    = (*((uint16_t*)(data+2)) << 11) ^ hash;
    hash   = (hash << 16) ^ tmp;
    data  += 2*2;
    hash  += hash >> 11;
    len--;
  }

  /* Handle end cases */
  switch (rem) {
  case 3:
    hash += *((uint16_t*)data);
    hash ^= hash << 16;
    hash ^= ((signed char)data[2]) << 18;
    hash += hash >> 11;
    break;
  case 2:
    hash += *((uint16_t*)data);
    hash ^= hash << 11;
    hash += hash >> 17;
    break;
  case 1:
    hash += (signed char)*data;
    hash ^= hash << 10;
    hash += hash >> 1;
  }

  /* Force "avalanching" of final 127 bits */
  hash ^= hash << 3;
  hash += hash >> 5;
  hash ^= hash << 4;
  hash += hash >> 17;
  hash ^= hash << 25;
  hash += hash >> 6;

  return hash;
}

output
Code:
Start
pre: 121136
669202426
post: 76956
669202426
~35%  faster
« Last Edit: March 18, 2014, 04:28:11 pm by robtillaart » Logged

Rob Tillaart

Nederlandse sectie - http://arduino.cc/forum/index.php/board,77.0.html -
(Please do not PM for private consultancy)

Global Moderator
Netherlands
Offline Offline
Shannon Member
*****
Karma: 170
Posts: 12462
In theory there is no difference between theory and practice, however in practice there are many...
View Profile
 Bigger Bigger  Smaller Smaller  Reset Reset

+ two tricks  in the end...

Code:
uint32_t rokkitNew2(const char * data, uint16_t len)
{
  union
  {
    uint32_t h;
    uint16_t b[2];
    uint8_t a[4];
  }
  hash, tmp;
  uint8_t rem;
  uint16_t *p = (uint16_t*) data;

  if (data == 0) return 0;
  hash.h = len;
  rem = len & 3;
  len >>= 2;

  /* Main loop */
  while (len > 0) {
    // hash  += *((uint16_t*)data);
    hash.h  += *p;
    p++;

    // tmp    = (*((uint16_t*)(data+2)) << 11) ^ hash;
    // hash   = (hash << 16) ^ tmp;
    // ==>
    // hash   = (hash << 16) ^ (*((uint16_t*)(data+2)) << 11) ^ hash;
    // ==>
    // hash   ^= (hash << 16) ^ (*((uint16_t*)(data+2)) << 11);
    tmp.h = hash.h;
    tmp.b[1] = tmp.b[0];  // shift 16
    tmp.a[1] = ((*p) & 0xFF) << 3;
    tmp.a[0] = 0;
    // tmp.b[0] = (*p) << 11;  // 16 bit operation!!

    p++;
    // hash.h ^= tmp.h;
    // hash.a[0] ^= tmp.a[0];  // not needed as tmp.a[0] == 0
    hash.a[1] ^= tmp.a[1];
    hash.a[2] ^= tmp.a[2];
    hash.a[3] ^= tmp.a[3];

    // hash  += hash >> 11;
    tmp.a[0] = hash.a[1];  // shift 8
    tmp.a[1] = hash.a[2];
    tmp.a[2] = hash.a[3];
    tmp.a[3] = 0;
    hash.h += tmp.h >> 3;

    // len--;
    --len;
  }


  data = (char*) p;
  /* Handle end cases */
  switch (rem) {
  case 3:
    hash.h += *((uint16_t*)data);
    hash.h ^= hash.h << 16;  // todo
    hash.h ^= ((signed char)data[2]) << 18;
    hash.h += hash.h >> 11;  // todo
    break;
  case 2:
    hash.h += *((uint16_t*)data);
    hash.h ^= hash.h << 11;  // todo
    hash.h += hash.h >> 17;  // todo
    break;
  case 1:
    hash.h += (signed char)*data;
    hash.h ^= hash.h << 10;  // todo
    hash.h += hash.h >> 1;
  }

  /* Force "avalanching" of final 127 bits */
  hash.h ^= hash.h << 3;
  hash.h += hash.h >> 5;
  hash.h ^= hash.h << 4;

  // hash.h += hash.h >> 17;  // todo
  tmp.b[0] = hash.b[1] >> 1;
  tmp.b[1] = 0;
  hash.h += tmp.h;

  // hash.h ^= hash.h << 25;
//  tmp.a[3] = hash.a[0] << 1; // shift 25!!
//  tmp.a[2] = 0;
//  tmp.a[1] = 0;
//  tmp.a[0] = 0;
  hash.a[3] ^= hash.a[0] << 1; // shift 25!!;

  hash.h += hash.h >> 6;

  return hash.h;
}

pre:   121136
669202426
post:   58976
669202426

==>50% off

note: Arduino 1.5.4 + UNO
Logged

Rob Tillaart

Nederlandse sectie - http://arduino.cc/forum/index.php/board,77.0.html -
(Please do not PM for private consultancy)

Offline Offline
Newbie
*
Karma: 0
Posts: 2
View Profile
 Bigger Bigger  Smaller Smaller  Reset Reset

@kowalski
I did comparsion between rokkit and this particular crc32 implementation - http://excamera.com/sphinx/article-crc.html
So it showed ~4x gain.
No tweaking was done, just changed some pointer operation

@robtillaart
thanks for your contribution!
I will check the results
« Last Edit: March 19, 2014, 09:07:19 am by biocheshire » Logged

Pages: [1]   Go Up
Jump to: