Use xxHash for couch_file document and attachment summary checksums

Use the 128 bit variant of xxHash as it has the same output size as MD5, is
non-cryptographic, and is quite a bit faster [1].

Writing xxHash checkums is disabled by default. This would allow us to make a
an intermediate release to which we can downgrade to as it can read both xxHash
and MD5 checksums. In a future release writing xxHash checksums will flip to
`true`, but it would be possible to safely downgrade to the intermediate
releases which would be aware of xxHash checksums and won't interpret them as
corrupt data.

If downgrade is not a concern, using xxHash checksums can yield a noticeable
speed improvement when reading larger documents (128KB+ or so).

There is metric which would indicate if any MD5 checksums are present after the
xxHash has been enabled.

To avoid duplicating the verfication logic for headers and blocks combined into
one function with a check if it's for a header or not. This is to preserve
previous behavior where orr headers we don't want to emit emergency logs if
they fail the check since we may be just reading left-over uncommitted data
from the file end.

Add a stats counter to indicate if there are still any md5 checksums
found during normal cluster operation after it has been enabled.

During test coverage checks noticed we never actually tested block level
corruption before so added a test for that as well for both xxHash and legacy
cases.

[1] Comparison of hashing a 4KB block (units are microseconds).
```
(node1@127.0.0.1)20> f(T), {T, ok} = timer:tc(fun() -> lists:foreach(fun (_) -> do_nothing_overhead end, lists:seq(1, 1000000)) end), (T/1000000.0).
0.167425
(node1@127.0.0.1)21> f(T), {T, ok} = timer:tc(fun() -> lists:foreach(fun (_) -> exxhash:xxhash128(B) end, lists:seq(1, 1000000)) end), (T/1000000).
0.770687
(node1@127.0.0.1)22> f(T), {T, ok} = timer:tc(fun() -> lists:foreach(fun (_) -> crypto:hash(md5, B) end, lists:seq(1, 1000000)) end), (T/1000000).
6.205445
```
diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index a7de550..4f2c44d 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -95,6 +95,11 @@
 ; Sets the log level for informational compaction related entries.
 ;compaction_log_level = info
 
+; Enable writting xxHash checksums in .couch files. The current
+; default is false. When the value is false both xxHash and legacy
+; checksums can be read and verified.
+;write_xxhash_checksums = false
+
 [purge]
 ; Allowed maximum number of documents in one purge request
 ;max_document_id_number = 100
diff --git a/src/couch/priv/stats_descriptions.cfg b/src/couch/priv/stats_descriptions.cfg
index f87d8d3..6c0d4da 100644
--- a/src/couch/priv/stats_descriptions.cfg
+++ b/src/couch/priv/stats_descriptions.cfg
@@ -326,6 +326,10 @@
     {type, counter},
     {desc, <<"number of OS process prompt errors">>}
 ]}.
+{[couchdb, legacy_checksums], [
+    {type, counter},
+    {desc, <<"number of legacy checksums found in couch_file instances">>}
+]}.
 {[pread, exceed_eof], [
     {type, counter},
     {desc, <<"number of the attempts to read beyond end of db file">>}
diff --git a/src/couch/src/couch_file.erl b/src/couch/src/couch_file.erl
index 74d4df2..379925f 100644
--- a/src/couch/src/couch_file.erl
+++ b/src/couch/src/couch_file.erl
@@ -23,6 +23,8 @@
 -define(IS_OLD_STATE(S), is_pid(S#file.db_monitor)).
 -define(PREFIX_SIZE, 5).
 -define(DEFAULT_READ_COUNT, 1024).
+-define(WRITE_XXHASH_CHECKSUMS_KEY, {?MODULE, write_xxhash_checksums}).
+-define(WRITE_XXHASH_CHECKSUMS_DEFAULT, false).
 
 -type block_id() :: non_neg_integer().
 -type location() :: non_neg_integer().
@@ -55,6 +57,9 @@
 %% helper functions
 -export([process_info/1]).
 
+% test helper functions
+-export([reset_checksum_persistent_term_config/0]).
+
 %%----------------------------------------------------------------------
 %% Args:   Valid Options are [create] and [create,overwrite].
 %%  Files are opened in read/write mode.
@@ -142,8 +147,8 @@
     [<<0:1/integer, (iolist_size(Bin)):31/integer>>, Bin].
 
 assemble_file_chunk_and_checksum(Bin) ->
-    Md5 = couch_hash:md5_hash(Bin),
-    [<<1:1/integer, (iolist_size(Bin)):31/integer>>, Md5, Bin].
+    Checksum = generate_checksum(Bin),
+    [<<1:1/integer, (iolist_size(Bin)):31/integer>>, Checksum, Bin].
 
 %%----------------------------------------------------------------------
 %% Purpose: Reads a term from a file that was written with append_term
@@ -169,8 +174,8 @@
 
 pread_iolist(Fd, Pos) ->
     case ioq:call(Fd, {pread_iolist, Pos}, erlang:get(io_priority)) of
-        {ok, IoList, Md5} ->
-            {ok, verify_md5(Fd, Pos, IoList, Md5)};
+        {ok, IoList, Checksum} ->
+            {ok, verify_checksum(Fd, Pos, IoList, Checksum, false)};
         Error ->
             Error
     end.
@@ -191,13 +196,13 @@
 
 pread_iolists(Fd, PosList) ->
     case ioq:call(Fd, {pread_iolists, PosList}, erlang:get(io_priority)) of
-        {ok, DataMd5s} ->
+        {ok, DataAndChecksums} ->
             Data = lists:zipwith(
-                fun(Pos, {IoList, Md5}) ->
-                    verify_md5(Fd, Pos, IoList, Md5)
+                fun(Pos, {IoList, Checksum}) ->
+                    verify_checksum(Fd, Pos, IoList, Checksum, false)
                 end,
                 PosList,
-                DataMd5s
+                DataAndChecksums
             ),
             {ok, Data};
         Error ->
@@ -400,9 +405,9 @@
 
 write_header(Fd, Data) ->
     Bin = ?term_to_bin(Data),
-    Md5 = couch_hash:md5_hash(Bin),
+    Checksum = generate_checksum(Bin),
     % now we assemble the final header binary and write to disk
-    FinalBin = <<Md5/binary, Bin/binary>>,
+    FinalBin = <<Checksum/binary, Bin/binary>>,
     ioq:call(Fd, {write_header, FinalBin}, erlang:get(io_priority)).
 
 init_status_error(ReturnPid, Ref, Error) ->
@@ -504,11 +509,11 @@
     update_read_timestamp(),
     {LenIolist, NextPos} = read_raw_iolist_int(File, Pos, 4),
     case iolist_to_binary(LenIolist) of
-        % an MD5-prefixed term
+        % an checksum-prefixed term
         <<1:1/integer, Len:31/integer>> ->
-            {Md5AndIoList, _} = read_raw_iolist_int(File, NextPos, Len + 16),
-            {Md5, IoList} = extract_md5(Md5AndIoList),
-            {reply, {ok, IoList, Md5}, File};
+            {ChecksumAndIoList, _} = read_raw_iolist_int(File, NextPos, Len + 16),
+            {Checksum, IoList} = extract_checksum(ChecksumAndIoList),
+            {reply, {ok, IoList, Checksum}, File};
         <<0:1/integer, Len:31/integer>> ->
             {Iolist, _} = read_raw_iolist_int(File, NextPos, Len),
             {reply, {ok, Iolist, <<>>}, File}
@@ -520,7 +525,7 @@
     LocNums2 = lists:map(
         fun({LenIoList, NextPos}) ->
             case iolist_to_binary(LenIoList) of
-                % an MD5-prefixed term
+                % a checksum-prefixed term
                 <<1:1/integer, Len:31/integer>> ->
                     {NextPos, Len + 16};
                 <<0:1/integer, Len:31/integer>> ->
@@ -534,8 +539,8 @@
         fun({LenIoList, _}, {IoList, _}) ->
             case iolist_to_binary(LenIoList) of
                 <<1:1/integer, _:31/integer>> ->
-                    {Md5, IoList} = extract_md5(IoList),
-                    {IoList, Md5};
+                    {Checksum, IoList} = extract_checksum(IoList),
+                    {IoList, Checksum};
                 <<0:1/integer, _:31/integer>> ->
                     {IoList, <<>>}
             end
@@ -686,10 +691,9 @@
                 {ok, Missing} = file:pread(Fd, ReadStart, ReadLen),
                 <<RestBlock/binary, Missing/binary>>
         end,
-    <<Md5Sig:16/binary, HeaderBin/binary>> =
+    <<Checksum:16/binary, HeaderBin/binary>> =
         iolist_to_binary(remove_block_prefixes(?PREFIX_SIZE, RawBin)),
-    Md5Sig = couch_hash:md5_hash(HeaderBin),
-    {ok, HeaderBin}.
+    {ok, verify_checksum(Fd, Pos, HeaderBin, Checksum, true)}.
 
 %% Read multiple block locations using a single file:pread/2.
 -spec find_header(file:fd(), block_id(), non_neg_integer()) ->
@@ -791,10 +795,10 @@
             {Pos, TotalBytes}
     end.
 
--spec extract_md5(iolist()) -> {binary(), iolist()}.
-extract_md5(FullIoList) ->
-    {Md5List, IoList} = split_iolist(FullIoList, 16, []),
-    {iolist_to_binary(Md5List), IoList}.
+-spec extract_checksum(iolist()) -> {binary(), iolist()}.
+extract_checksum(FullIoList) ->
+    {ChecksumList, IoList} = split_iolist(FullIoList, 16, []),
+    {iolist_to_binary(ChecksumList), IoList}.
 
 calculate_total_read_len(0, FinalLen) ->
     calculate_total_read_len(1, FinalLen) + 1;
@@ -864,15 +868,47 @@
     {monitored_by, PidsAndRefs} = process_info(self(), monitored_by),
     lists:filter(fun is_pid/1, PidsAndRefs).
 
-verify_md5(_Fd, _Pos, IoList, <<>>) ->
+verify_checksum(_Fd, _Pos, IoList, <<>>, _IsHeader) ->
     IoList;
-verify_md5(Fd, Pos, IoList, Md5) ->
-    case couch_hash:md5_hash(IoList) of
-        Md5 -> IoList;
-        _ -> report_md5_error(Fd, Pos)
+verify_checksum(Fd, Pos, IoList, Checksum, IsHeader) ->
+    % If writing xxhash checksums is enabled, check those first, then check
+    % legacy ones. If any legacy ones are found, bump the legacy metric. If
+    % generating xxhash checksums is disabled, assume most checksums would be
+    % legacy, so check that first, and then, in a likely case of release
+    % downgrade, check xxhash ones.
+    case generate_xxhash_checksums() of
+        true ->
+            case exxhash:xxhash128(iolist_to_binary(IoList)) of
+                Checksum ->
+                    IoList;
+                <<_/binary>> ->
+                    case couch_hash:md5_hash(IoList) of
+                        Checksum ->
+                            legacy_checksums_stats_update(),
+                            IoList;
+                        _ ->
+                            report_checksum_error(Fd, Pos, IsHeader)
+                    end
+            end;
+        false ->
+            case couch_hash:md5_hash(IoList) of
+                Checksum ->
+                    IoList;
+                _ ->
+                    case exxhash:xxhash128(iolist_to_binary(IoList)) of
+                        Checksum ->
+                            IoList;
+                        <<_/binary>> ->
+                            report_checksum_error(Fd, Pos, IsHeader)
+                    end
+            end
     end.
 
-report_md5_error(Fd, Pos) ->
+report_checksum_error(_Fd, _Pos, _IsHeader = true) ->
+    % When loading header we'd expect to find junk data which might not have
+    % been committed at the end of the file, so we don't emit an emergency log for it.
+    exit({file_corruption, <<"file corruption">>});
+report_checksum_error(Fd, Pos, _IsHeader = false) ->
     couch_log:emergency("File corruption in ~p at position ~B", [Fd, Pos]),
     exit({file_corruption, <<"file corruption">>}).
 
@@ -918,6 +954,37 @@
     {ok, Eof} = file:position(File#file.fd, eof),
     File#file{eof = Eof}.
 
+-spec generate_checksum(binary()) -> <<_:128>>.
+generate_checksum(Bin) when is_binary(Bin) ->
+    case generate_xxhash_checksums() of
+        true -> <<_:128>> = exxhash:xxhash128(Bin);
+        false -> <<_:128>> = couch_hash:md5_hash(Bin)
+    end.
+
+legacy_checksums_stats_update() ->
+    % Bump stats only if we're writing new checksums.
+    case generate_xxhash_checksums() of
+        true -> couch_stats:increment_counter([couchdb, legacy_checksums]);
+        false -> ok
+    end.
+
+reset_checksum_persistent_term_config() ->
+    persistent_term:erase(?WRITE_XXHASH_CHECKSUMS_KEY).
+
+generate_xxhash_checksums() ->
+    % Caching the config value here as we'd need to call this per file chunk
+    % and also from various processes (not just couch_file pids). Node must be
+    % restarted for the new value to take effect.
+    case persistent_term:get(?WRITE_XXHASH_CHECKSUMS_KEY, not_cached) of
+        not_cached ->
+            Default = ?WRITE_XXHASH_CHECKSUMS_DEFAULT,
+            Val = config:get_boolean("couchdb", "write_xxhash_checksums", Default),
+            persistent_term:put(?WRITE_XXHASH_CHECKSUMS_KEY, Val),
+            Val;
+        Val when is_boolean(Val) ->
+            Val
+    end.
+
 -ifdef(TEST).
 -include_lib("couch/include/couch_eunit.hrl").
 
diff --git a/src/couch/test/eunit/couch_file_tests.erl b/src/couch/test/eunit/couch_file_tests.erl
index 8105e69..3bc1951 100644
--- a/src/couch/test/eunit/couch_file_tests.erl
+++ b/src/couch/test/eunit/couch_file_tests.erl
@@ -580,3 +580,182 @@
         {'$gen_call', From, sync} ->
             gen:reply(From, {error, eio})
     end.
+
+checksum_test_() ->
+    {
+        foreach,
+        fun setup_checksum/0,
+        fun teardown_checksum/1,
+        [
+            ?TDEF_FE(t_write_read_xxhash_checksums),
+            ?TDEF_FE(t_downgrade_xxhash_checksums),
+            ?TDEF_FE(t_read_legacy_checksums_after_upgrade),
+            ?TDEF_FE(t_can_detect_block_corruption_with_xxhash),
+            ?TDEF_FE(t_can_detect_block_corruption_with_legacy_checksum)
+        ]
+    }.
+
+setup_checksum() ->
+    Path = ?tempfile(),
+    Ctx = test_util:start_couch(),
+    config:set("couchdb", "write_xxhash_checksums", "false", _Persist = false),
+    {Ctx, Path}.
+
+teardown_checksum({Ctx, Path}) ->
+    file:delete(Path),
+    meck:unload(),
+    test_util:stop_couch(Ctx),
+    couch_file:reset_checksum_persistent_term_config().
+
+t_write_read_xxhash_checksums({_Ctx, Path}) ->
+    enable_xxhash(),
+
+    {ok, Fd} = couch_file:open(Path, [create]),
+    Header = header,
+    ok = couch_file:write_header(Fd, Header),
+    Bin = <<"bin">>,
+    Chunk = couch_file:assemble_file_chunk_and_checksum(Bin),
+    {ok, Pos, _} = couch_file:append_raw_chunk(Fd, Chunk),
+    couch_file:close(Fd),
+
+    {ok, Fd1} = couch_file:open(Path, []),
+    {ok, Header1} = couch_file:read_header(Fd1),
+    ?assertEqual(Header, Header1),
+    {ok, Bin1} = couch_file:pread_binary(Fd1, Pos),
+    ?assertEqual(Bin, Bin1),
+    ?assertEqual(0, legacy_stats()),
+    couch_file:close(Fd1).
+
+t_downgrade_xxhash_checksums({_Ctx, Path}) ->
+    % We're in the future and writting xxhash checkums by default
+    enable_xxhash(),
+    {ok, Fd} = couch_file:open(Path, [create]),
+    Header = header,
+    ok = couch_file:write_header(Fd, Header),
+    Bin = <<"bin">>,
+    Chunk = couch_file:assemble_file_chunk_and_checksum(Bin),
+    {ok, Pos, _} = couch_file:append_raw_chunk(Fd, Chunk),
+    couch_file:close(Fd),
+
+    % The future was broken, we travel back, but still know how to
+    % interpret future checksums without crashing
+    disable_xxhash(),
+    {ok, Fd1} = couch_file:open(Path, []),
+    {ok, Header1} = couch_file:read_header(Fd1),
+    ?assertEqual(Header, Header1),
+    {ok, Bin1} = couch_file:pread_binary(Fd1, Pos),
+    ?assertEqual(Bin, Bin1),
+
+    % We'll write some legacy checksums to the file and then ensure
+    % we can read both legacy and the new ones
+    OtherBin = <<"otherbin">>,
+    OtherChunk = couch_file:assemble_file_chunk_and_checksum(OtherBin),
+    {ok, OtherPos, _} = couch_file:append_raw_chunk(Fd1, OtherChunk),
+    couch_file:close(Fd1),
+
+    {ok, Fd2} = couch_file:open(Path, []),
+    {ok, Header2} = couch_file:read_header(Fd2),
+    ?assertEqual(Header, Header2),
+    {ok, Bin2} = couch_file:pread_binary(Fd2, Pos),
+    {ok, OtherBin1} = couch_file:pread_binary(Fd2, OtherPos),
+    ?assertEqual(Bin, Bin2),
+    ?assertEqual(OtherBin, OtherBin1),
+    couch_file:close(Fd2).
+
+t_read_legacy_checksums_after_upgrade({_Ctx, Path}) ->
+    % We're in the past and writting legacy checkums by default
+    disable_xxhash(),
+    {ok, Fd} = couch_file:open(Path, [create]),
+    Header = header,
+    ok = couch_file:write_header(Fd, Header),
+    Bin = <<"bin">>,
+    Chunk = couch_file:assemble_file_chunk_and_checksum(Bin),
+    {ok, Pos, _} = couch_file:append_raw_chunk(Fd, Chunk),
+    couch_file:close(Fd),
+
+    % We upgrade and xxhash checksums are now the default, but we can
+    % still read legacy checksums.
+    enable_xxhash(),
+    {ok, Fd1} = couch_file:open(Path, []),
+    {ok, Header1} = couch_file:read_header(Fd1),
+    ?assertEqual(Header, Header1),
+    {ok, Bin1} = couch_file:pread_binary(Fd1, Pos),
+    ?assertEqual(Bin, Bin1),
+    % one header, one chunk
+    ?assertEqual(2, legacy_stats()),
+
+    % We'll write some new checksums to the file and then ensure
+    % we can read both legacy and the new ones
+    OtherBin = <<"otherbin">>,
+    OtherChunk = couch_file:assemble_file_chunk_and_checksum(OtherBin),
+    {ok, OtherPos, _} = couch_file:append_raw_chunk(Fd1, OtherChunk),
+    couch_file:close(Fd1),
+
+    couch_stats:decrement_counter([couchdb, legacy_checksums], legacy_stats()),
+    {ok, Fd2} = couch_file:open(Path, []),
+    {ok, Header2} = couch_file:read_header(Fd2),
+    ?assertEqual(Header, Header2),
+    {ok, Bin2} = couch_file:pread_binary(Fd2, Pos),
+    {ok, OtherBin1} = couch_file:pread_binary(Fd2, OtherPos),
+    ?assertEqual(Bin, Bin2),
+    ?assertEqual(OtherBin, OtherBin1),
+    % one header, legacy chunk, not counting new chunk
+    ?assertEqual(2, legacy_stats()),
+    couch_file:close(Fd2).
+
+t_can_detect_block_corruption_with_xxhash({_Ctx, Path}) ->
+    enable_xxhash(),
+
+    {ok, Fd} = couch_file:open(Path, [create]),
+    Bin = crypto:strong_rand_bytes(100000),
+    Chunk = couch_file:assemble_file_chunk_and_checksum(Bin),
+    {ok, Pos, _} = couch_file:append_raw_chunk(Fd, Chunk),
+    ok = couch_file:write_header(Fd, header),
+    couch_file:close(Fd),
+
+    {ok, SneakyFd} = file:open(Path, [binary, read, write, raw]),
+    ok = file:pwrite(SneakyFd, Pos + 100, <<"oops!">>),
+    file:close(SneakyFd),
+
+    {ok, Fd1} = couch_file:open(Path, []),
+    {ok, Header} = couch_file:read_header(Fd1),
+    ?assertEqual(header, Header),
+    ?assertExit({file_corruption, <<"file corruption">>}, couch_file:pread_binary(Fd1, Pos)),
+    catch couch_file:close(Fd1).
+
+t_can_detect_block_corruption_with_legacy_checksum({_Ctx, Path}) ->
+    disable_xxhash(),
+
+    {ok, Fd} = couch_file:open(Path, [create]),
+    Bin = crypto:strong_rand_bytes(100000),
+    Chunk = couch_file:assemble_file_chunk_and_checksum(Bin),
+    {ok, Pos, _} = couch_file:append_raw_chunk(Fd, Chunk),
+    ok = couch_file:write_header(Fd, header),
+    couch_file:close(Fd),
+
+    {ok, SneakyFd} = file:open(Path, [write, binary, read, raw]),
+    ok = file:pwrite(SneakyFd, Pos + 100, <<"oops!">>),
+    file:close(SneakyFd),
+
+    {ok, Fd1} = couch_file:open(Path, []),
+    {ok, Header} = couch_file:read_header(Fd1),
+    ?assertEqual(header, Header),
+    ?assertExit({file_corruption, <<"file corruption">>}, couch_file:pread_binary(Fd1, Pos)),
+    catch couch_file:close(Fd1).
+
+enable_xxhash() ->
+    couch_file:reset_checksum_persistent_term_config(),
+    reset_legacy_checksum_stats(),
+    config:set("couchdb", "write_xxhash_checksums", "true", _Persist = false).
+
+disable_xxhash() ->
+    couch_file:reset_checksum_persistent_term_config(),
+    reset_legacy_checksum_stats(),
+    config:set("couchdb", "write_xxhash_checksums", "false", _Persist = false).
+
+legacy_stats() ->
+    couch_stats:sample([couchdb, legacy_checksums]).
+
+reset_legacy_checksum_stats() ->
+    Counter = couch_stats:sample([couchdb, legacy_checksums]),
+    couch_stats:decrement_counter([couchdb, legacy_checksums], Counter).
diff --git a/src/docs/src/config/couchdb.rst b/src/docs/src/config/couchdb.rst
index b481c10..ce0409d 100644
--- a/src/docs/src/config/couchdb.rst
+++ b/src/docs/src/config/couchdb.rst
@@ -230,3 +230,20 @@
 
             [couchdb]
             view_index_dir = /var/lib/couchdb
+
+    .. config:option:: write_xxhash_checksums :: Enable writting xxHash checksums
+
+        .. versionadded:: 3.4
+
+        The default value in version 3.4 is ``false``. The legacy checksum
+        algorithm will be used for writing couch_file blocks. During reads,
+        both xxHash and the legacy checksum algorithm will be used to verify
+        data integrity. In a future version of CouchDB the default value will
+        become ``true``. However, it would still be possible to safely
+        downgrade to version 3.4, which would be able to verify both xxHash and
+        legacy checksums. If CouchDB version downgrade is not a concern,
+        enabling xxHash checksums can result in a measuralbe document read
+        performance, especially for larger document sizes::
+
+            [couchdb]
+            write_xxhash_checksums = false