blob: 2e658fd108ececbe35277a9451c6c285aae6d975 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "arrow/util/bpacking.h"
#include "arrow/util/bpacking_default.h"
#include "arrow/util/cpu_info.h"
#include "arrow/util/dispatch.h"
#include "arrow/util/logging.h"
#if defined(ARROW_HAVE_RUNTIME_AVX2)
#include "arrow/util/bpacking_avx2.h"
#endif
#if defined(ARROW_HAVE_RUNTIME_AVX512)
#include "arrow/util/bpacking_avx512.h"
#endif
#if defined(ARROW_HAVE_NEON)
#include "arrow/util/bpacking_neon.h"
#endif
namespace arrow {
namespace internal {
namespace {
int unpack32_default(const uint32_t* in, uint32_t* out, int batch_size, int num_bits) {
batch_size = batch_size / 32 * 32;
int num_loops = batch_size / 32;
switch (num_bits) {
case 0:
for (int i = 0; i < num_loops; ++i) in = nullunpacker32(in, out + i * 32);
break;
case 1:
for (int i = 0; i < num_loops; ++i) in = unpack1_32(in, out + i * 32);
break;
case 2:
for (int i = 0; i < num_loops; ++i) in = unpack2_32(in, out + i * 32);
break;
case 3:
for (int i = 0; i < num_loops; ++i) in = unpack3_32(in, out + i * 32);
break;
case 4:
for (int i = 0; i < num_loops; ++i) in = unpack4_32(in, out + i * 32);
break;
case 5:
for (int i = 0; i < num_loops; ++i) in = unpack5_32(in, out + i * 32);
break;
case 6:
for (int i = 0; i < num_loops; ++i) in = unpack6_32(in, out + i * 32);
break;
case 7:
for (int i = 0; i < num_loops; ++i) in = unpack7_32(in, out + i * 32);
break;
case 8:
for (int i = 0; i < num_loops; ++i) in = unpack8_32(in, out + i * 32);
break;
case 9:
for (int i = 0; i < num_loops; ++i) in = unpack9_32(in, out + i * 32);
break;
case 10:
for (int i = 0; i < num_loops; ++i) in = unpack10_32(in, out + i * 32);
break;
case 11:
for (int i = 0; i < num_loops; ++i) in = unpack11_32(in, out + i * 32);
break;
case 12:
for (int i = 0; i < num_loops; ++i) in = unpack12_32(in, out + i * 32);
break;
case 13:
for (int i = 0; i < num_loops; ++i) in = unpack13_32(in, out + i * 32);
break;
case 14:
for (int i = 0; i < num_loops; ++i) in = unpack14_32(in, out + i * 32);
break;
case 15:
for (int i = 0; i < num_loops; ++i) in = unpack15_32(in, out + i * 32);
break;
case 16:
for (int i = 0; i < num_loops; ++i) in = unpack16_32(in, out + i * 32);
break;
case 17:
for (int i = 0; i < num_loops; ++i) in = unpack17_32(in, out + i * 32);
break;
case 18:
for (int i = 0; i < num_loops; ++i) in = unpack18_32(in, out + i * 32);
break;
case 19:
for (int i = 0; i < num_loops; ++i) in = unpack19_32(in, out + i * 32);
break;
case 20:
for (int i = 0; i < num_loops; ++i) in = unpack20_32(in, out + i * 32);
break;
case 21:
for (int i = 0; i < num_loops; ++i) in = unpack21_32(in, out + i * 32);
break;
case 22:
for (int i = 0; i < num_loops; ++i) in = unpack22_32(in, out + i * 32);
break;
case 23:
for (int i = 0; i < num_loops; ++i) in = unpack23_32(in, out + i * 32);
break;
case 24:
for (int i = 0; i < num_loops; ++i) in = unpack24_32(in, out + i * 32);
break;
case 25:
for (int i = 0; i < num_loops; ++i) in = unpack25_32(in, out + i * 32);
break;
case 26:
for (int i = 0; i < num_loops; ++i) in = unpack26_32(in, out + i * 32);
break;
case 27:
for (int i = 0; i < num_loops; ++i) in = unpack27_32(in, out + i * 32);
break;
case 28:
for (int i = 0; i < num_loops; ++i) in = unpack28_32(in, out + i * 32);
break;
case 29:
for (int i = 0; i < num_loops; ++i) in = unpack29_32(in, out + i * 32);
break;
case 30:
for (int i = 0; i < num_loops; ++i) in = unpack30_32(in, out + i * 32);
break;
case 31:
for (int i = 0; i < num_loops; ++i) in = unpack31_32(in, out + i * 32);
break;
case 32:
for (int i = 0; i < num_loops; ++i) in = unpack32_32(in, out + i * 32);
break;
default:
DCHECK(false) << "Unsupported num_bits";
}
return batch_size;
}
struct Unpack32DynamicFunction {
using FunctionType = decltype(&unpack32_default);
static std::vector<std::pair<DispatchLevel, FunctionType>> implementations() {
return {
{ DispatchLevel::NONE, unpack32_default }
#if defined(ARROW_HAVE_RUNTIME_AVX2)
, { DispatchLevel::AVX2, unpack32_avx2 }
#endif
#if defined(ARROW_HAVE_RUNTIME_AVX512)
, { DispatchLevel::AVX512, unpack32_avx512 }
#endif
};
}
};
} // namespace
int unpack32(const uint32_t* in, uint32_t* out, int batch_size, int num_bits) {
#if defined(ARROW_HAVE_NEON)
return unpack32_neon(in, out, batch_size, num_bits);
#else
static DynamicDispatch<Unpack32DynamicFunction> dispatch;
return dispatch.func(in, out, batch_size, num_bits);
#endif
}
} // namespace internal
} // namespace arrow