blob: 24784cc957cf726ed82d51ebfcb559a0d0d8db55 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <stack>
#include <string>
#include "util/bitmap_intersect.h"
namespace doris {
// Compute the intersection union difference set of two or more bitmaps
// Usage: orthogonal_bitmap_parse_calculate(bitmap_column, filter_column, input_string)
// Example: orthogonal_bitmap_expr_calculate(user_id, event, '(A|B)&(C-D)'), meaning find the intersection union difference set of user_id in all A/B/C/D 4 bitmaps
// Operation symbol:
// the operator '|' stands for union, the operator '&' stands for intersection, the operator '-' indicates the difference set, the operator '^' stands for xor
class BitmapExprCalculation : public BitmapIntersect<std::string> {
public:
BitmapExprCalculation() = default;
explicit BitmapExprCalculation(const char* src) { deserialize(src); }
void bitmap_calculation_init(std::string& input_str) {
_polish = reverse_polish(input_str);
std::string bitmap_key;
for (int i = 0; i < _polish.length(); i++) {
char c = _polish.at(i);
if (c != '&' && c != '|' && c != '^' && c != '-' && c != ' ' && c != '\\') {
bitmap_key += c;
} else if (i != 0 && _polish.at(i - 1) == '\\') {
bitmap_key += c;
} else if (c == '\\') {
continue;
} else {
if (bitmap_key.length() > 0) {
add_key(bitmap_key);
bitmap_key.clear();
}
}
}
if (bitmap_key.length() > 0) {
add_key(bitmap_key);
bitmap_key.clear();
}
}
BitmapValue bitmap_calculate() {
std::stack<BitmapValue> values;
std::string bitmap_key;
for (int i = 0; i < _polish.length(); i++) {
char c = _polish.at(i);
if (c == ' ') {
if (bitmap_key.length() > 0) {
values.push(_bitmaps[bitmap_key]);
bitmap_key.clear();
}
} else if (c != '&' && c != '|' && c != '^' && c != '-' && c != '\\') {
bitmap_key += c;
} else if (i != 0 && _polish.at(i - 1) == '\\') {
bitmap_key += c;
} else if (c == '\\') {
continue;
} else {
if (bitmap_key.length() > 0) {
values.push(_bitmaps[bitmap_key]);
bitmap_key.clear();
}
if (values.size() >= 2) {
BitmapValue op_a = values.top();
values.pop();
BitmapValue op_b = values.top();
values.pop();
BitmapValue cal_result;
bitmap_calculate(op_a, op_b, c, cal_result);
values.push(cal_result);
}
}
}
BitmapValue result;
if (bitmap_key.length() > 0) {
result |= _bitmaps[bitmap_key];
} else if (!values.empty()) {
result |= values.top();
}
return result;
}
// calculate the bitmap value by expr bitmap calculate
int64_t bitmap_calculate_count() {
if (_bitmaps.empty()) {
return 0;
}
return bitmap_calculate().cardinality();
}
private:
constexpr int priority(char c) {
switch (c) {
case '&':
return 1;
case '|':
return 1;
case '^':
return 1;
case '-':
return 1;
default:
return 0;
}
}
template <class T>
std::string print_stack(std::stack<T>& stack) {
std::string result;
while (!stack.empty()) {
result = stack.top() + result;
stack.pop();
}
return result;
}
std::string reverse_polish(const std::string& input_str) {
std::stack<char> polish;
std::stack<char> op_stack;
bool last_is_char = false;
for (int i = 0; i < input_str.length(); i++) {
char cur_char = input_str.at(i);
if (cur_char != '&' && cur_char != '|' && cur_char != '^' && cur_char != '-' &&
cur_char != '(' && cur_char != ')' && cur_char != ' ' && cur_char != '\t') {
if (!last_is_char) {
polish.push(' ');
}
polish.push(cur_char);
last_is_char = true;
continue;
} else if (i != 0 && input_str.at(i - 1) == '\\') {
polish.push(cur_char);
last_is_char = true;
continue;
} else if (cur_char == ' ' || cur_char == '\t') {
last_is_char = false;
continue;
} else if (cur_char == '(') {
op_stack.push(cur_char);
} else if (!op_stack.empty() && cur_char == ')') {
while (!op_stack.empty() && op_stack.top() != '(') {
polish.push(op_stack.top());
op_stack.pop();
}
op_stack.pop();
} else {
if (!op_stack.empty() && op_stack.top() == '(') {
op_stack.push(cur_char);
} else {
if (!op_stack.empty() && priority(cur_char) > priority(op_stack.top())) {
op_stack.push(cur_char);
} else {
while (!op_stack.empty()) {
if (op_stack.top() == '(') {
break;
}
if (priority(cur_char) <= priority(op_stack.top())) {
polish.push(op_stack.top());
op_stack.pop();
} else {
break;
}
}
op_stack.push(cur_char);
}
}
}
last_is_char = false;
}
while (!op_stack.empty()) {
polish.push(op_stack.top());
op_stack.pop();
}
return print_stack(polish);
}
void bitmap_calculate(BitmapValue& op_a, BitmapValue& op_b, char op, BitmapValue& result) {
result |= op_b;
switch (op) {
case '&':
result &= op_a;
break;
case '|':
result |= op_a;
break;
case '-':
result -= op_a;
break;
case '^':
result ^= op_a;
break;
}
}
std::string _polish;
};
} // namespace doris