blob: 93732732a542823cd334c01872ed74be5806ea53 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "test_commons.h"
TEST(Primitives, fmemcmp) {
std::vector<std::string> vs;
char buff[14];
vs.push_back("");
for (uint32_t i = 0; i < 5000; i += 7) {
snprintf(buff, 14, "%d", i * 31);
vs.push_back(buff);
snprintf(buff, 10, "%010d", i);
vs.push_back(buff);
}
for (size_t i = 0; i < vs.size(); i++) {
for (size_t j = 0; j < vs.size(); j++) {
std::string & ls = vs[i];
std::string & rs = vs[j];
size_t m = std::min(ls.length(), rs.length());
int c = memcmp(ls.c_str(), rs.c_str(), m);
int t = fmemcmp(ls.c_str(), rs.c_str(), m);
if (!((c == 0 && t == 0) || (c > 0 && t > 0) || (c < 0 && t < 0))) {
ASSERT_TRUE(false);
}
}
}
}
static int test_memcmp() {
uint8_t buff[2048];
for (uint32_t i = 0; i < 2048; i++) {
buff[i] = i & 0xff;
}
std::random_shuffle(buff, buff + 2048);
int r = 0;
for (uint32_t i = 0; i < 100000000; i++) {
int offset = i % 1000;
r += memcmp(buff, buff + 1024, 5);
r += memcmp(buff + offset, buff + 1124, 9);
r += memcmp(buff + offset, buff + 1224, 10);
r += memcmp(buff + offset, buff + 1324, 15);
r += memcmp(buff + offset, buff + 1424, 16);
r += memcmp(buff + offset, buff + 1524, 17);
r += memcmp(buff + offset, buff + 1624, 18);
r += memcmp(buff + offset, buff + 1724, 19);
}
return r;
}
static int test_fmemcmp() {
char buff[2048];
for (uint32_t i = 0; i < 2048; i++) {
buff[i] = i & 0xff;
}
std::random_shuffle(buff, buff + 2048);
int r = 0;
for (uint32_t i = 0; i < 100000000; i++) {
int offset = i % 1000;
r += fmemcmp(buff, buff + 1024, 5);
r += fmemcmp(buff + offset, buff + 1124, 9);
r += fmemcmp(buff + offset, buff + 1224, 10);
r += fmemcmp(buff + offset, buff + 1324, 15);
r += fmemcmp(buff + offset, buff + 1424, 16);
r += fmemcmp(buff + offset, buff + 1524, 17);
r += fmemcmp(buff + offset, buff + 1624, 18);
r += fmemcmp(buff + offset, buff + 1724, 19);
}
return r;
}
TEST(Perf, fmemcmp) {
Timer t;
int a = test_memcmp();
LOG("%s", t.getInterval(" memcmp ").c_str());
t.reset();
int b = test_fmemcmp();
LOG("%s", t.getInterval(" fmemcmp ").c_str());
// prevent compiler optimization
TestConfig.setInt("tempvalue", a + b);
}
static void test_memcpy_perf_len(char * src, char * dest, size_t len, size_t time) {
for (size_t i = 0; i < time; i++) {
memcpy(src, dest, len);
memcpy(dest, src, len);
}
}
static void test_simple_memcpy_perf_len(char * src, char * dest, size_t len, size_t time) {
for (size_t i = 0; i < time; i++) {
simple_memcpy(src, dest, len);
simple_memcpy(dest, src, len);
}
}
TEST(Perf, simple_memcpy_small) {
char * src = new char[10240];
char * dest = new char[10240];
char buff[32];
for (size_t len = 1; len < 256; len = len + 2) {
LOG("------------------------------");
snprintf(buff, 32, " memcpy %luB\t", len);
Timer t;
test_memcpy_perf_len(src, dest, len, 1000000);
LOG("%s", t.getInterval(buff).c_str());
snprintf(buff, 32, "simple_memcpy %luB\t", len);
t.reset();
test_simple_memcpy_perf_len(src, dest, len, 1000000);
LOG("%s", t.getInterval(buff).c_str());
}
delete[] src;
delete[] dest;
}
inline char * memchrbrf4(char * p, char ch, size_t len) {
ssize_t i = 0;
for (; i < ((ssize_t)len) - 3; i += 3) {
if (p[i] == ch) {
return p + i;
}
if (p[i + 1] == ch) {
return p + i + 1;
}
if (p[i + 2] == ch) {
return p + i + 2;
}
}
for (; i < (ssize_t)len; i++) {
if (p[i] == ch) {
return p + i;
}
}
return NULL;
}
inline char * memchrbrf2(char * p, char ch, size_t len) {
for (size_t i = 0; i < len / 2; i += 2) {
if (p[i] == ch) {
return p + i;
}
if (p[i + 1] == ch) {
return p + i + 1;
}
}
if (len % 2 && p[len - 1] == ch) {
return p + len - 1;
}
return NULL;
}
// not safe in MACOSX, segment fault, should be safe on Linux with out mmap
inline int memchr_sse(const char *s, int c, int len) {
// len : edx; c: esi; s:rdi
int index = 0;
#ifdef __X64
__asm__ __volatile__(
//"and $0xff, %%esi;" //clear upper bytes
"movd %%esi, %%xmm1;"
"mov $1, %%eax;"
"add $16, %%edx;"
"mov %%rdi ,%%r8;"
"1:"
"movdqu (%%rdi), %%xmm2;"
"sub $16, %%edx;"
"addq $16, %%rdi;"
//"pcmpestri $0x0, %%xmm2,%%xmm1;"
".byte 0x66 ,0x0f ,0x3a ,0x61 ,0xca ,0x00;"
//"lea 16(%%rdi), %%rdi;"
"ja 1b;"//Res2==0:no match and zflag==0: s is not end
"jc 3f;"//Res2==1: match and s is not end
"mov $0xffffffff, %%eax;"//no match
"jmp 0f;"
"3:"
"sub %%r8, %%rdi;"
"lea -16(%%edi,%%ecx),%%eax;"
"0:"
// "mov %%eax, %0;"
:"=a"(index),"=D"(s),"=S"(c),"=d"(len)
:"D"(s),"S"(c),"d"(len)
:"rcx","r8","memory"
);
#endif
return index;
}
TEST(Perf, memchr) {
Random r;
int32_t size = 100 * 1024 * 1024;
int32_t lineLength = TestConfig.getInt("memchr.line.length", 100);
char * buff = new char[size + 16];
memset(buff, 'a', size);
for (int i = 0; i < size / lineLength; i++) {
buff[r.next_int32(size)] = '\n';
}
Timer timer;
char * pos = buff;
int count = 0;
while (true) {
if (pos == buff + size) {
break;
}
pos = (char*)memchr(pos, '\n', buff + size - pos);
if (pos == NULL) {
break;
}
pos++;
count++;
}
LOG("%s", timer.getSpeedM2("memchr bytes/lines", size, count).c_str());
timer.reset();
pos = buff;
count = 0;
while (true) {
if (pos == buff + size) {
break;
}
pos = (char*)memchrbrf2(pos, '\n', buff + size - pos);
if (pos == NULL) {
break;
}
pos++;
count++;
}
LOG("%s", timer.getSpeedM2("memchrbrf2 bytes/lines", size, count).c_str());
timer.reset();
pos = buff;
count = 0;
while (true) {
if (pos == buff + size) {
break;
}
pos = (char*)memchrbrf4(pos, '\n', buff + size - pos);
if (pos == NULL) {
break;
}
pos++;
count++;
}
LOG("%s", timer.getSpeedM2("memchrbrf4 bytes/lines", size, count).c_str());
timer.reset();
pos = buff;
count = 0;
while (true) {
if (pos == buff + size) {
break;
}
int ret = memchr_sse(pos, '\n', buff + size - pos);
if (ret == -1) {
break;
}
pos = pos + ret;
pos++;
count++;
}
LOG("%s", timer.getSpeedM2("memchr_sse bytes/lines", size, count).c_str());
delete[] buff;
}
TEST(Perf, memcpy_batch) {
int32_t size = TestConfig.getInt("input.size", 64 * 1024);
size_t mb = TestConfig.getInt("input.mb", 320) * 1024 * 1024UL;
char * src = new char[size];
char * dest = new char[size];
memset(src, 0, size);
memset(dest, 0, size);
Timer t;
for (size_t i = 0; i < mb; i += size) {
memcpy(dest, src, size);
}
LOG("%s", t.getSpeedM("memcpy", mb).c_str());
t.reset();
for (size_t i = 0; i < mb; i += size) {
simple_memcpy(dest, src, size);
}
LOG("%s", t.getSpeedM("simple_memcpy", mb).c_str());
delete[] src;
delete[] dest;
}