blob: c245ba3c78bdda083082dcff8813e3e2745a3dbc [file] [log] [blame]
/**
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jmaessen@google.com (Jan Maessen)
#ifndef NET_INSTAWEB_UTIL_PUBLIC_CYCLIC_HASH_H_
#define NET_INSTAWEB_UTIL_PUBLIC_CYCLIC_HASH_H_
#include "base/basictypes.h"
#include "net/instaweb/util/public/string_util.h"
namespace net_instaweb {
// Rolling hash for char buffers based on a polynomial lookup table.
// See http://en.wikipedia.org/wiki/Rolling_hash
// Per character hash values. Exported for use in NextRollingHash.
extern const uint64 kRollingHashCharTable[256];
// Compute the rolling hash of buf[start : start + n - 1]
uint64 RollingHash(const char* buf, size_t start, size_t n);
// Given the rolling hash prev of buf[start - 1 : start + n - 2], efficiently
// compute the hash of buf[start : start + n - 1]. Note that this indexes
// buf[start - 1], so we can't just use a StringPiece here. We eschew
// StringPiece in any case, because of efficiency.
//
// Note that to get efficient operation here for fixed n (eg when we're doing
// something like Rabin-Karp string matching), we must inline the computation of
// shift amounts and then hoist them as loop invariants. That is why this
// function (intended for use in an inner loop) is inlined.
inline uint64 NextRollingHash(
const char* buf, size_t start, size_t n, uint64 prev) {
// In a reasonable loop, the following two tests should be eliminated based on
// contextual information, if our compiler is optimizing enough.
CHECK_LT(static_cast<size_t>(0), start);
uint64 start_hash =
kRollingHashCharTable[static_cast<uint8>(buf[start - 1])];
uint64 end_hash =
kRollingHashCharTable[static_cast<uint8>(buf[start - 1 + n])];
uint64 prev_rot1 = (prev << 1) | (prev >> 63); // rotate left 1
uint64 start_hash_rotn;
// Corner case: shift by >= 64 bits is not defined in C. gcc had better
// constant-fold this to a rotate! (It appears to.) We inline in large part
// to ensure the truthiness of this fact.
size_t shift = n % 64;
if (shift == 0) {
start_hash_rotn = start_hash;
} else {
// rotate left by shift (equiv to rotating left n times).
start_hash_rotn = (start_hash << shift) | (start_hash >> (64 - shift));
}
return (start_hash_rotn ^ prev_rot1 ^ end_hash);
}
} // net_instaweb
#endif // NET_INSTAWEB_UTIL_PUBLIC_ROLLING_HASH_H_