Add benchmark support
diff --git a/src/main/cpp/benchmark/CMakeLists.txt b/src/main/cpp/benchmark/CMakeLists.txt
new file mode 100644
index 0000000..9ca8b8a
--- /dev/null
+++ b/src/main/cpp/benchmark/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_executable(producer_benchmark ProducerBenchmark.cpp)
+target_link_libraries(producer_benchmark ${ROCKETMQ_CLIENT_CORE} benchmark)
+
+add_executable(example_benchmark ExampleBenchmark.cpp)
+target_link_libraries(example_benchmark benchmark)
\ No newline at end of file
diff --git a/src/main/cpp/benchmark/ExampleBenchmark.cpp b/src/main/cpp/benchmark/ExampleBenchmark.cpp
new file mode 100644
index 0000000..383628c
--- /dev/null
+++ b/src/main/cpp/benchmark/ExampleBenchmark.cpp
@@ -0,0 +1,35 @@
+#include <benchmark/benchmark.h>
+#include <atomic>
+
+int simpleLoopRelax(int n) {
+    std::atomic_long s(0);
+    for (int i = 0; i < n; ++i) {
+        s.fetch_add(1, std::memory_order_relaxed);
+    }
+    return s.load(std::memory_order_relaxed);
+}
+
+int simpleLoopSeq(int n) {
+    std::atomic_long s(0);
+    for (int i = 0; i < n; ++i) {
+        s.fetch_add(1, std::memory_order_seq_cst);
+    }
+    return s.load(std::memory_order_seq_cst);
+}
+
+static void BM_simpleLoopRelaxed(benchmark::State &state) {
+    for (auto _ : state) {
+        benchmark::DoNotOptimize(simpleLoopRelax(state.range(0)));
+    }
+}
+
+static void BM_simpleLoopSeq(benchmark::State &state) {
+    for (auto _ : state) {
+        benchmark::DoNotOptimize(simpleLoopSeq(state.range(0)));
+    }
+}
+
+BENCHMARK(BM_simpleLoopRelaxed)->RangeMultiplier(2)->Range(1, 1<<10)->UseRealTime()->Unit(benchmark::kNanosecond);
+BENCHMARK(BM_simpleLoopSeq)->RangeMultiplier(2)->Range(1, 1<<10)->UseRealTime()->Unit(benchmark::kNanosecond);
+
+BENCHMARK_MAIN();
\ No newline at end of file
diff --git a/src/main/cpp/benchmark/ProducerBenchmark.cpp b/src/main/cpp/benchmark/ProducerBenchmark.cpp
new file mode 100644
index 0000000..ce73293
--- /dev/null
+++ b/src/main/cpp/benchmark/ProducerBenchmark.cpp
@@ -0,0 +1,40 @@
+#include <benchmark/benchmark.h>
+#include "rocketmq.h"
+#include "rocketmq-client-cpp-full.h"
+#include <cstring>
+
+static void BM_ProducerSendMessage(benchmark::State &state) {
+    state.PauseTiming();
+    graal_isolatethread_t *thread_;
+    graal_create_isolate(NULL, NULL, &thread_);
+
+    factory_property property = {
+            .group_id = "GID_opensource_unit_test",
+            .access_key = "AK",
+            .access_secret = "SK",
+            .name_srv_addr =
+            "47.100.33.127:9876"};
+
+    int instance_id = create_producer(thread_, &property);
+    char *data = new char[state.range(0)];
+    message m;
+    m.topic = "t_opensource_unit_test";
+    m.tags = "tagA";
+    m.body = data;
+    m.body_size = state.range(0);
+    m.key = "abc";
+
+    send_result sr;
+    state.ResumeTiming();
+    for (auto _ : state) {
+        send_message(thread_, instance_id, &m, &sr);
+    }
+
+    delete[](data);
+    destroy_instance(thread_, instance_id);
+    graal_detach_thread(thread_);
+}
+
+BENCHMARK(BM_ProducerSendMessage)->RangeMultiplier(2)->Range(1<<6, 1<<10)->UseRealTime()->Unit(benchmark::kMillisecond);
+
+BENCHMARK_MAIN();