blob: 4d71e452f0e103360d1b5d8fdca9d2bffcef7da4 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <gtest/gtest.h>
#include "hash-store.h"
#include "standard-hash-table.h"
#include "tuple-types.h"
#include "util/cpu-info.h"
#include "util/debug-util.h"
#include "util/pretty-printer.h"
#include "util/stopwatch.h"
using namespace impala;
namespace impala {
// Tests performance of hash aggregation with HashStore, which grows to contain multiple
// hash tables as needed. Allows exploring how buffer size and size of the keyspace
// affect performance.
class GrowingTest : public testing::Test {
public:
static const uint64_t NUM_PROBE_TUPLES = 100000000; // 10^8
template <int buffer_size>
inline static uint64_t AggregateTest(uint64_t num_probe_tuples, int max_id,
int num_tables) {
ProbeTuple* tuples = GenTuples(num_probe_tuples, max_id);
HashStore<buffer_size> hs;
StopWatch watch;
watch.Start();
hs.Aggregate(tuples, num_probe_tuples);
watch.Stop();
SanityCheck<buffer_size>(hs, num_probe_tuples, max_id, num_tables);
free(tuples);
return watch.ElapsedTime();
}
// Confirm that hs appears to be the correct result of aggregating num_probe_tuples,
// with a keyspace of size max_id, with a fanout into num_tables tables.
template <int buffer_size>
static void SanityCheck(const HashStore<buffer_size> & hs,
uint64_t num_probe_tuples, int max_id, int num_tables) {
uint64_t total_count = 0;
for (int i = 0; i < hs.num_tables_; ++i) {
StandardHashTable& ht = hs.tables_[i];
for (StandardHashTable::Iterator it = ht.Begin(); it.HasNext(); it.Next()) {
total_count += it.GetRow()->count;
}
}
ASSERT_EQ(num_probe_tuples, total_count);
ASSERT_EQ(num_tables, hs.num_tables_)
<< "You do not have the number of tables that you hoped.\n"
<< "This could mean that there weren't enough probe tuples to fill the keyspace, "
<< "skewed hashing lead a hash table that we expected not to overflow to overflow, "
<< "or a genuine bug.";
if (buffer_size > 0) {
ASSERT_EQ(buffer_size, sizeof(typename HashStore<buffer_size>::Buffer));
}
}
template <int buffer_size>
inline static uint64_t NextTestCase(int build_tuples, uint64_t prev, int num_tables) {
uint64_t time = AggregateTest<buffer_size>(NUM_PROBE_TUPLES, build_tuples, num_tables);
int64_t delta;
if (prev == 0) {
// just print 0 for the delta the first time.
delta = 0;
}
else {
delta = static_cast<int64_t>(time) - static_cast<int64_t>(prev);
}
LOG(ERROR) << build_tuples << "\t"
<< PrettyPrinter::Print(time, TUnit::CPU_TICKS)
<< "\t" << PrettyPrinter::Print(delta, TUnit::CPU_TICKS);
return time;
}
// Run a test aggregation with buffers of size buffer_size bytes.
// Try the full range of working set size (and thus fanout) that we can do with
// single-level fanout.
template <int buffer_size>
inline static void Test() {
LOG(ERROR) << "Buffer size " << HashStore<buffer_size>::BUFFER_SIZE << " tuples ("
<< PrettyPrinter::Print(buffer_size, TUnit::BYTES)
<< "):";
LOG(ERROR) << "#BuildTuples\tTime\tdTime";
uint64_t prev = 0;
// only go up to 2^12 hashtables because after that, there are at least as many
// buffers as L2 cache lines. We won't let this happen; we'll do multi-level fanout.
// Note that we should probably allow 2 cache lines per buffer (since a write might
// span a line break), plus we need some memory aside from the buffers, so we'll
// actually stop at 2^11, or maybe 2^12. And we might want to keep them in L1,
// in which case we'll stop at 2^7 or so.
for (int num_tables = 1; num_tables <= 1<<12; num_tables *= 2) {
// how many total build tuples we'll use to fill num_tables tables
// Needs to be comfortably less than the total capacity of num_tables tables
// (the below expression without the constant factor multiplied by it)
// because the hashes will not be spread perfectly.
// But should be more than 1/2 of that expression because otherwise if the hashes
// spread out perfectly, it will fit in num_tables / 2 and not give us the fanout
// we expect. (A test will fail in this case so that we know.)
int build_tuples = (StandardHashTable::NODES * num_tables / 10) * 8;
prev = NextTestCase<buffer_size>(build_tuples, prev, num_tables);
}
}
};
}
int main(int argc, char** argv) {
google::InitGoogleLogging(argv[0]);
::testing::InitGoogleTest(&argc, argv);
CpuInfo::Init();
LOG(ERROR) << "Testing with " << GrowingTest::NUM_PROBE_TUPLES << " tuples.";
return RUN_ALL_TESTS();
}
TEST(GrowingTest, All) {
// test without buffers
GrowingTest::Test<0>();
// test with one of the best buffer sizes
// Make it slightly more than a power of 2 cache lines so that they are offset.
GrowingTest::Test< (1 << 13) + 3 * 64 >();
}