blob: 55346497489a1230001772dc0fe8f6cf16862ef5 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "redactor.h"
#include "redactor.detail.h"
#include <cstdlib> // rand
#include <cstdio> // file stuff
#include <pthread.h>
#include <unistd.h> // cpu info
#include "redactor-test-utils.h"
#include "testutil/gtest-util.h"
namespace impala {
using std::string;
extern std::vector<Rule>* g_rules;
void* MultiThreadWorkload(void* unused) {
unsigned int rand_seed = RandSeed();
int buffer_size = 10000 + rand_r(&rand_seed) % 1000;
char buffer[buffer_size];
string message;
for (int i = 0; i < 100; ++i) {
RandomlyFillString(buffer, buffer_size);
message = buffer;
Redact(&message);
if ((buffer_size - 1) != message.length()) {
ADD_FAILURE() << "Message length changed; new size is " << message.length();
return NULL;
}
for (int c = 0; c < buffer_size - 1; ++c) {
if ('0' <= message[c] && message[c] <= '9') {
ADD_FAILURE() << "Number " << message[c] << " should be replaced with #";
return NULL;
}
if (message[c] < ' ' || '~' < message[c]) {
ADD_FAILURE() << "Unexpected char " << message[c];
return NULL;
}
}
if (message[buffer_size - 1] != '\0') {
ADD_FAILURE() << "Missing string terminator";
return NULL;
}
}
return NULL;
}
TEST(RedactorTest, NoTrigger) {
TempRulesFile rules_file(
"{"
" \"version\": 1,"
" \"rules\": ["
" {\"search\": \"foo\", \"replace\": \"bar\"}"
" ]"
"}");
string error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_EQ("", error);
ASSERT_EQ(1, g_rules->size());
ASSERT_EQ("", g_rules->begin()->trigger);
ASSERT_EQ("foo", g_rules->begin()->search_pattern.pattern());
ASSERT_EQ("bar", g_rules->begin()->replacement);
ASSERT_UNREDACTED("baz");
ASSERT_REDACTED_EQ("foo", "bar");
ASSERT_REDACTED_EQ("foo bar foo baz", "bar bar bar baz");
ASSERT_REDACTED_EQ("foo\nbar\nfoo baz", "bar\nbar\nbar baz");
}
TEST(RedactorTest, Trigger) {
TempRulesFile rules_file(
"{"
" \"version\": 1,"
" \"rules\": ["
" {\"trigger\": \"baz\", \"search\": \"foo\", \"replace\": \"bar\"}"
" ]"
"}");
string error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_EQ("", error);
ASSERT_EQ(1, g_rules->size());
ASSERT_EQ("baz", g_rules->begin()->trigger);
ASSERT_UNREDACTED("foo");
ASSERT_REDACTED_EQ("foo bar foo baz", "bar bar bar baz");
}
TEST(RedactorTest, MultiTrigger) {
TempRulesFile rules_file(
"{"
" \"version\": 1,"
" \"rules\": ["
" {\"search\": \"\\\\d+\", \"replace\": \"#\"},"
" {\"trigger\": \"baz\", \"search\": \"foo\", \"replace\": \"bar\"}"
" ]"
"}");
string error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_EQ("", error);
ASSERT_EQ(2, g_rules->size());
ASSERT_REDACTED_EQ("foo33", "foo#");
ASSERT_REDACTED_EQ("foo foo baz!3", "bar bar baz!#");
}
TEST(RedactorTest, CaseSensitivityProperty) {
TempRulesFile rules_file(
"{"
" \"version\": 1,"
" \"rules\": ["
" {\"search\": \"(C|d)+\", \"replace\": \"_\", \"caseSensitive\": false}"
" ]"
"}");
string error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_EQ("", error);
ASSERT_UNREDACTED("123");
ASSERT_REDACTED_EQ("abcD Cd c D d C", "ab_ _ _ _ _ _");
rules_file.OverwriteContents(
"{"
" \"version\": 1,"
" \"rules\": ["
" {"
" \"trigger\": \"BaZ\","
" \"caseSensitive\": false,"
" \"search\": \"bAz\","
" \"replace\": \"bar\""
" }"
" ]"
"}");
error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_EQ("", error);
ASSERT_REDACTED_EQ("bAz bar", "bar bar");
ASSERT_REDACTED_EQ("BAz bar", "bar bar");
rules_file.OverwriteContents(
"{"
" \"version\": 1,"
" \"rules\": ["
" {"
" \"trigger\": \"FOO\","
" \"caseSensitive\": false,"
" \"search\": \"foO\","
" \"replace\": \"BAR\""
" }"
" ]"
"}");
error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_EQ("", error);
ASSERT_REDACTED_EQ("fOO bar", "BAR bar");
rules_file.OverwriteContents(
"{"
" \"version\": 1,"
" \"rules\": ["
" {\"search\": \"(Xy)+\", \"replace\": \"$\", \"caseSensitive\": true}"
" ]"
"}");
error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_EQ("", error);
ASSERT_UNREDACTED("xY");
ASSERT_REDACTED_EQ("Xy", "$");
rules_file.OverwriteContents(
"{"
" \"version\": 1,"
" \"rules\": ["
" {"
" \"trigger\": \"Sensitive\","
" \"caseSensitive\": true,"
" \"search\": \"SsS\","
" \"replace\": \"sss\""
" }"
" ]"
"}");
error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_EQ("", error);
ASSERT_UNREDACTED("SsS");
ASSERT_UNREDACTED("sensitive SsS");
ASSERT_UNREDACTED("Sensitive sss");
ASSERT_REDACTED_EQ("Sensitive SsS", "Sensitive sss");
rules_file.OverwriteContents(
"{"
" \"version\": 1,"
" \"rules\": ["
" {"
" \"trigger\": \"QQQ\","
" \"search\": \"qQq\","
" \"replace\": \"QqQ\""
" }"
" ]"
"}");
error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_EQ("", error);
ASSERT_UNREDACTED("qQq");
ASSERT_UNREDACTED("QQQ");
ASSERT_UNREDACTED("QQq qQq");
ASSERT_REDACTED_EQ("QQQ qQq", "QQQ QqQ");
}
TEST(RedactorTest, SingleTriggerMultiRule) {
TempRulesFile rules_file(
"{"
" \"version\": 1,"
" \"rules\": ["
" {\"trigger\": \"baz\", \"search\": \"\\\\d+\", \"replace\": \"#\"},"
" {\"trigger\": \"baz\", \"search\": \"foo\", \"replace\": \"bar\"}"
" ]"
"}");
string error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_EQ("", error);
ASSERT_EQ(2, g_rules->size());
ASSERT_UNREDACTED("foo33");
ASSERT_REDACTED_EQ("foo foo baz!3", "bar bar baz!#");
}
TEST(RedactorTest, RuleOrder) {
TempRulesFile rules_file(
"{"
" \"version\": 1,"
" \"rules\": ["
" {\"trigger\": \"barC\", \"search\": \".*\", \"replace\": \"Z\"},"
" {\"search\": \"1\", \"replace\": \"2\"},"
" {\"search\": \"1\", \"replace\": \"3\"},"
" {\"trigger\": \"foo\", \"search\": \"2\", \"replace\": \"A\"},"
" {\"trigger\": \"bar\", \"search\": \"2\", \"replace\": \"1\"},"
" {\"search\": \"1\", \"replace\": \"4\"},"
" {\"search\": \"1\", \"replace\": \"5\"},"
" {\"trigger\": \"foo\", \"search\": \"A\", \"replace\": \"C\"},"
" {\"trigger\": \"bar\", \"search\": \"5\", \"replace\": \"1\"},"
" {\"trigger\": \"barC\", \"search\": \".*\", \"replace\": \"D\"}"
" ]"
"}");
string error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_EQ("", error);
ASSERT_EQ(10, g_rules->size());
ASSERT_UNREDACTED("foo");
ASSERT_REDACTED_EQ("1", "2");
ASSERT_REDACTED_EQ("foo1", "fooC");
ASSERT_REDACTED_EQ("bar1", "bar4");
ASSERT_REDACTED_EQ("bar5", "bar1");
ASSERT_REDACTED_EQ("foobar1", "D");
}
TEST(RedactorTest, InputSize) {
TempRulesFile rules_file(
"{"
" \"version\": 1,"
" \"rules\": ["
" {\"search\": \"[0-9]\", \"replace\": \"#\"}"
" ]"
"}");
string error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_EQ("", error);
ASSERT_UNREDACTED("");
int buffer_size = 10000;
char buffer[buffer_size];
RandomlyFillString(buffer, buffer_size);
string message(buffer);
Redact(&message);
ASSERT_EQ(buffer_size - 1, message.length());
for (int i = 0; i < buffer_size; ++i) {
ASSERT_TRUE(message[i] < '0' || '9' < message[i])
<< "Number " << message[i] << " should be replaced with #";
}
}
TEST(RedactorTest, ChangeInputSize) {
TempRulesFile rules_file(
"{"
" \"version\": 1,"
" \"rules\": ["
" {\"search\": \"[A-Z]\", \"replace\": \"\"},"
" {\"trigger\": \"reduce\", \"search\": \"[0-9]+\", \"replace\": \"#\"},"
" {\"trigger\": \"add\", \"search\": \"[0-9]\", \"replace\": \"####\"}"
" ]"
"}");
string error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_EQ("", error);
ASSERT_REDACTED_EQ("AAAAAAA", "");
ASSERT_REDACTED_EQ("reduce1234", "reduce#");
ASSERT_REDACTED_EQ("add1234", "add################");
}
TEST(RedactorTest, MultiThreaded) {
TempRulesFile rules_file(
"{"
" \"version\": 1,"
" \"rules\": ["
" {\"search\": \"0\", \"replace\": \"#\"},"
" {\"search\": \"1\", \"replace\": \"#\"},"
" {\"search\": \"2\", \"replace\": \"#\"},"
" {\"search\": \"3\", \"replace\": \"#\"},"
" {\"search\": \"4\", \"replace\": \"#\"},"
" {\"search\": \"5\", \"replace\": \"#\"},"
" {\"search\": \"6\", \"replace\": \"#\"},"
" {\"search\": \"7\", \"replace\": \"#\"},"
" {\"trigger\": \"8\", \"search\": \"8\", \"replace\": \"#\"},"
" {\"trigger\": \"9\", \"search\": \"9\", \"replace\": \"#\"}"
" ]"
"}");
string error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_EQ("", error);
int processor_count = sysconf(_SC_NPROCESSORS_ONLN);
int worker_count = 2 * processor_count;
pthread_t worker_ids[worker_count];
for (int i = 0; i < worker_count; ++i) {
int status = pthread_create(worker_ids + i, NULL, MultiThreadWorkload, NULL);
ASSERT_EQ(0, status);
}
for (int i = 0; i < worker_count; ++i) {
int status = pthread_join(worker_ids[i], NULL);
ASSERT_EQ(0, status);
}
}
}