Optimize hot spots in Stream Manager (#3322)

* Replace std::unordered_map with tsl::hopscotch_map in tuple-cache.h

* Replaced std::hash with a faster xxhash implementation to speedup the topology if it uses fields-grouping strategy

* Enabled O3 optimization level be default to guarantee all smart pointers are inlined

* xxhash migration has been rollbacked since its performance effect is negligible

* RETRY_ATTEMPTS bumped to 50 since Travis is slow
diff --git a/WORKSPACE b/WORKSPACE
index 07ebcab..872c975 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -925,6 +925,14 @@
     build_file = "@//:third_party/cppcheck/cppcheck.BUILD",
     sha256 = "b3de7fbdc1a23d7341b55f7f88877e106a76847bd5a07fa721c07310b625318b",
 )
+
+http_archive(
+    name = "com_github_hopscotch_hashmap",
+    build_file = "@//:third_party/hopscotch-hashmap/hopscotch.BUILD",
+    sha256 = "73e301925e1418c5ed930ef37ebdcab2c395a6d1bdaf5a012034bb75307d33f1",
+    strip_prefix = "hopscotch-map-2.2.1",
+    urls = ["https://github.com/Tessil/hopscotch-map/archive/v2.2.1.tar.gz"],
+)
 # end 3rdparty C++ dependencies
 
 # for helm
diff --git a/heron/stmgr/src/cpp/BUILD b/heron/stmgr/src/cpp/BUILD
index ebd14f8..c056023 100644
--- a/heron/stmgr/src/cpp/BUILD
+++ b/heron/stmgr/src/cpp/BUILD
@@ -61,6 +61,7 @@
         "//heron/proto:proto-cxx",
         "//heron/common/src/cpp/network:network-cxx",
         "//heron/common/src/cpp/config:config-cxx",
+        "@com_github_hopscotch_hashmap//:hopscotch-hashmap",
     ],
     linkstatic = 1,
 )
diff --git a/heron/stmgr/src/cpp/util/tuple-cache.h b/heron/stmgr/src/cpp/util/tuple-cache.h
index a0e23eb..d25ae9c 100644
--- a/heron/stmgr/src/cpp/util/tuple-cache.h
+++ b/heron/stmgr/src/cpp/util/tuple-cache.h
@@ -20,6 +20,7 @@
 #ifndef SRC_CPP_SVCS_STMGR_SRC_UTIL_TUPLE_CACHE_H_
 #define SRC_CPP_SVCS_STMGR_SRC_UTIL_TUPLE_CACHE_H_
 
+#include <tsl/hopscotch_map.h>
 #include <deque>
 #include <vector>
 #include <map>
@@ -114,7 +115,7 @@
   TupleList* get(sp_int32 _task_id);
 
   // map from task_id to the TupleList
-  std::unordered_map<sp_int32, TupleList*> cache_;
+  tsl::hopscotch_map<sp_int32, TupleList*> cache_;
   std::shared_ptr<EventLoop> eventLoop_;
   std::function<void(sp_int32, proto::system::HeronTupleSet2*)> tuple_drainer_;
   std::function<void(sp_int32, proto::ckptmgr::DownstreamStatefulCheckpoint*)>
diff --git a/integration_test/src/python/topology_test_runner/main.py b/integration_test/src/python/topology_test_runner/main.py
index 741bade..08fefea 100644
--- a/integration_test/src/python/topology_test_runner/main.py
+++ b/integration_test/src/python/topology_test_runner/main.py
@@ -37,7 +37,7 @@
 DEFAULT_TEST_CONF_FILE = "integration_test/src/python/topology_test_runner/resources/test.json"
 
 #seconds
-RETRY_ATTEMPTS = 25
+RETRY_ATTEMPTS = 50
 RETRY_INTERVAL = 10
 WAIT_FOR_DEACTIVATION = 5
 
diff --git a/third_party/hopscotch-hashmap/hopscotch.BUILD b/third_party/hopscotch-hashmap/hopscotch.BUILD
new file mode 100644
index 0000000..8ef7618
--- /dev/null
+++ b/third_party/hopscotch-hashmap/hopscotch.BUILD
@@ -0,0 +1,12 @@
+licenses(["notice"])
+
+package(default_visibility = ["//visibility:public"])
+
+cc_library(
+    name = "hopscotch-hashmap",
+    hdrs = glob([
+        "include/tsl/*.h",
+    ]),
+    includes = ["include"],
+    linkstatic = 1,
+)
diff --git a/tools/bazel.rc b/tools/bazel.rc
index 2346ca4..0e35097 100644
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
@@ -34,6 +34,7 @@
 build:centos --linkopt -lrt
 build:centos --spawn_strategy=standalone
 build:centos --workspace_status_command scripts/release/status.sh
+build:centos --copt=-O3
 
 # For debian
 # To use it: bazel build --config=debian
@@ -47,6 +48,7 @@
 build:debian --linkopt -lrt
 build:debian --spawn_strategy=standalone
 build:debian --workspace_status_command scripts/release/status.sh
+build:debian --copt=-O3
 
 # For Mac
 # To use it: bazel build --config=darwin
@@ -57,6 +59,7 @@
 build:darwin --ignore_unsupported_sandboxing
 build:darwin --spawn_strategy=standalone
 build:darwin --workspace_status_command scripts/release/status.sh
+build:darwin --copt=-O3
 
 # For Ubuntu
 # To use it: bazel build --config=ubuntu
@@ -70,7 +73,7 @@
 build:ubuntu --linkopt -lrt
 build:ubuntu --spawn_strategy=standalone
 build:ubuntu --workspace_status_command scripts/release/status.sh
-
+build:ubuntu --copt=-O3
 
 ### Disabled checkstyle
 
@@ -83,6 +86,7 @@
 build:centos_nostyle --linkopt -lrt
 build:centos_nostyle --spawn_strategy=standalone
 build:centos_nostyle --workspace_status_command scripts/release/status.sh
+build:centos_nostyle --copt=-O3
 
 # For debian
 # To use it: bazel build --config=debian_nostyle
@@ -93,6 +97,7 @@
 build:debian_nostyle --linkopt -lrt
 build:debian_nostyle --spawn_strategy=standalone
 build:debian_nostyle --workspace_status_command scripts/release/status.sh
+build:debian_nostyle --copt=-O3
 
 # For Mac
 # To use it: bazel build --config=darwin_nostyle
@@ -100,6 +105,7 @@
 build:darwin_nostyle --ignore_unsupported_sandboxing
 build:darwin_nostyle --spawn_strategy=standalone
 build:darwin_nostyle --workspace_status_command scripts/release/status.sh
+build:darwin_nostyle --copt=-O3
 
 # For Ubuntu
 # To use it: bazel build --config=ubuntu_nostyle
@@ -110,3 +116,4 @@
 build:ubuntu_nostyle --linkopt -lrt
 build:ubuntu_nostyle --spawn_strategy=standalone
 build:ubuntu_nostyle --workspace_status_command scripts/release/status.sh
+build:ubuntu_nostyle --copt=-O3
diff --git a/tools/travis/toolchain/CROSSTOOL b/tools/travis/toolchain/CROSSTOOL
index ca0aea2..e70b188 100644
--- a/tools/travis/toolchain/CROSSTOOL
+++ b/tools/travis/toolchain/CROSSTOOL
@@ -134,7 +134,7 @@
     # Conservative choice for -O
     # -O3 can increase binary size and even slow down the resulting binaries.
     # Profile first and / or use FDO if you need better performance than this.
-    compiler_flag: "-O2"
+    compiler_flag: "-O3"
 
     # Disable assertions
     compiler_flag: "-DNDEBUG"