blob: 46387501b02cae2c1c8cec327334432c17c0059e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef PAGESPEED_KERNEL_BASE_ROLLING_HASH_H_
#define PAGESPEED_KERNEL_BASE_ROLLING_HASH_H_
#include <cstddef>
#include "base/logging.h"
#include "pagespeed/kernel/base/basictypes.h"
namespace net_instaweb {
// Rolling hash for char buffers based on a polynomial lookup table.
// See http://en.wikipedia.org/wiki/Rolling_hash
// Per character hash values. Exported for use in NextRollingHash.
extern const uint64 kRollingHashCharTable[256];
// Compute the rolling hash of buf[start : start + n - 1]
uint64 RollingHash(const char* buf, size_t start, size_t n);
// Given the rolling hash prev of buf[start - 1 : start + n - 2], efficiently
// compute the hash of buf[start : start + n - 1]. Note that this indexes
// buf[start - 1], so we can't just use a StringPiece here. We eschew
// StringPiece in any case, because of efficiency.
//
// Note that to get efficient operation here for fixed n (eg when we're doing
// something like Rabin-Karp string matching), we must inline the computation of
// shift amounts and then hoist them as loop invariants. That is why this
// function (intended for use in an inner loop) is inlined.
inline uint64 NextRollingHash(
const char* buf, size_t start, size_t n, uint64 prev) {
// In a reasonable loop, the following two tests should be eliminated based on
// contextual information, if our compiler is optimizing enough.
CHECK_LT(static_cast<size_t>(0), start);
uint64 start_hash =
kRollingHashCharTable[static_cast<uint8>(buf[start - 1])];
uint64 end_hash =
kRollingHashCharTable[static_cast<uint8>(buf[start - 1 + n])];
uint64 prev_rot1 = (prev << 1) | (prev >> 63); // rotate left 1
uint64 start_hash_rotn;
// Corner case: shift by >= 64 bits is not defined in C. gcc had better
// constant-fold this to a rotate! (It appears to.) We inline in large part
// to ensure the truthiness of this fact.
size_t shift = n % 64;
if (shift == 0) {
start_hash_rotn = start_hash;
} else {
// rotate left by shift (equiv to rotating left n times).
start_hash_rotn = (start_hash << shift) | (start_hash >> (64 - shift));
}
return (start_hash_rotn ^ prev_rot1 ^ end_hash);
}
} // namespace net_instaweb
#endif // PAGESPEED_KERNEL_BASE_ROLLING_HASH_H_