subversion/libsvn_delta/xdelta.c - subversion - Git at Google

 /*
  * xdelta.c:  xdelta generator.
  *
  * ====================================================================
  *    Licensed to the Apache Software Foundation (ASF) under one
  *    or more contributor license agreements.  See the NOTICE file
  *    distributed with this work for additional information
  *    regarding copyright ownership.  The ASF licenses this file
  *    to you under the Apache License, Version 2.0 (the
  *    "License"); you may not use this file except in compliance
  *    with the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  *    Unless required by applicable law or agreed to in writing,
  *    software distributed under the License is distributed on an
  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  *    KIND, either express or implied.  See the License for the
  *    specific language governing permissions and limitations
  *    under the License.
  * ====================================================================
  */


 #include <assert.h>

 #include <apr_general.h>        /* for APR_INLINE */
 #include <apr_hash.h>

 #include "svn_hash.h"
 #include "svn_delta.h"
 #include "private/svn_string_private.h"
 #include "delta.h"

 /* This is pseudo-adler32. It is adler32 without the prime modulus.
    The idea is borrowed from monotone, and is a translation of the C++
    code.  Graydon Hoare, the author of the original code, gave his
    explicit permission to use it under these terms at 8:02pm on
    Friday, February 11th, 2005.  */

 /* Size of the blocks we compute checksums for. This was chosen out of
    thin air.  Monotone used 64, xdelta1 used 64, rsync uses 128.
    However, later optimizations assume it to be 256 or less.
  */
 #define MATCH_BLOCKSIZE 64

 /* Size of the checksum presence FLAGS array in BLOCKS_T.  With standard
    MATCH_BLOCKSIZE and SVN_DELTA_WINDOW_SIZE, 32k entries is about 20x
    the number of checksums that actually occur, i.e. we expect a >95%
    probability that non-matching checksums get already detected by checking
    against the FLAGS array.
    Must be a power of 2.
  */
 #define FLAGS_COUNT (32 * 1024)

 /* "no" / "invalid" / "unused" value for positions within the delta windows
  */
 #define NO_POSITION ((apr_uint32_t)-1)

 /* Feed C_IN into the adler32 checksum and remove C_OUT at the same time.
  * This function may (and will) only be called for characters that are
  * MATCH_BLOCKSIZE positions apart.
  *
  * Please note that the lower 16 bits cannot overflow in neither direction.
  * Therefore, we don't need to split the value into separate values for
  * sum(char) and sum(sum(char)).
  */
 static APR_INLINE apr_uint32_t
 adler32_replace(apr_uint32_t adler32, const char c_out, const char c_in)
 {
   adler32 -= (MATCH_BLOCKSIZE * 0x10000u * ((unsigned char) c_out));

   adler32 -= (unsigned char)c_out;
   adler32 += (unsigned char)c_in;

   return adler32 + adler32 * 0x10000;
 }

 /* Calculate an pseudo-adler32 checksum for MATCH_BLOCKSIZE bytes starting
    at DATA.  Return the checksum value.  */

 static APR_INLINE apr_uint32_t
 init_adler32(const char *data)
 {
   const unsigned char *input = (const unsigned char *)data;
   const unsigned char *last = input + MATCH_BLOCKSIZE;

   apr_uint32_t s1 = 0;
   apr_uint32_t s2 = 0;

   for (; input < last; input += 8)
     {
       s1 += input[0]; s2 += s1;
       s1 += input[1]; s2 += s1;
       s1 += input[2]; s2 += s1;
       s1 += input[3]; s2 += s1;
       s1 += input[4]; s2 += s1;
       s1 += input[5]; s2 += s1;
       s1 += input[6]; s2 += s1;
       s1 += input[7]; s2 += s1;
     }

   return s2 * 0x10000 + s1;
 }

 /* Information for a block of the delta source.  The length of the
    block is the smaller of MATCH_BLOCKSIZE and the difference between
    the size of the source data and the position of this block. */
 struct block
 {
   apr_uint32_t adlersum;

 /* Even in 64 bit systems, store only 32 bit offsets in our hash table
    (our delta window size much much smaller than 4GB).
    That reduces the hash table size by 50% from 32to 16KB
    and makes it easier to fit into the CPU's L1 cache. */
   apr_uint32_t pos;    /* NO_POSITION -> block is not used */
 };

 /* A hash table, using open addressing, of the blocks of the source. */
 struct blocks
 {
   /* The largest valid index of slots.
      This value has an upper bound proportionate to the text delta
      window size, so unless we dramatically increase the window size,
      it's safe to make this a 32-bit value.  In any case, it has to be
      the same width as the block position index, (struct
      block).pos. */
   apr_uint32_t max;

   /* Source buffer that the positions in SLOTS refer to. */
   const char* data;

   /* Bit array indicating whether there may be a matching slot for a given
      adler32 checksum.  Since FLAGS has much more entries than SLOTS, this
      will indicate most cases of non-matching checksums with a "0" bit, i.e.
      as "known not to have a match".
      The mapping of adler32 checksum bits is [0..2][16..27] (LSB -> MSB),
      i.e. address the byte by the multiplicative part of adler32 and address
      the bits in that byte by the additive part of adler32. */
   char flags[FLAGS_COUNT / 8];

   /* The vector of blocks.  A pos value of NO_POSITION represents an unused
      slot. */
   struct block *slots;
 };


 /* Return a hash value calculated from the adler32 SUM, suitable for use with
    our hash table. */
 static apr_uint32_t hash_func(apr_uint32_t sum)
 {
   /* Since the adl32 checksum have a bad distribution for the 11th to 16th
      bits when used for our small block size, we add some bits from the
      other half of the checksum. */
   return sum ^ (sum >> 12);
 }

 /* Return the offset in BLOCKS.FLAGS for the adler32 SUM. */
 static apr_uint32_t hash_flags(apr_uint32_t sum)
 {
   /* The upper half of SUM has a wider value range than the lower 16 bit.
      Also, we want to a different folding than HASH_FUNC to minimize
      correlation between different hash levels. */
   return (sum >> 16) & ((FLAGS_COUNT / 8) - 1);
 }

 /* Insert a block with the checksum ADLERSUM at position POS in the source
    data into the table BLOCKS.  Ignore true duplicates, i.e. blocks with
    actually the same content. */
 static void
 add_block(struct blocks *blocks, apr_uint32_t adlersum, apr_uint32_t pos)
 {
   apr_uint32_t h = hash_func(adlersum) & blocks->max;

   /* This will terminate, since we know that we will not fill the table. */
   for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max)
     if (blocks->slots[h].adlersum == adlersum)
       if (memcmp(blocks->data + blocks->slots[h].pos, blocks->data + pos,
                  MATCH_BLOCKSIZE) == 0)
         return;

   blocks->slots[h].adlersum = adlersum;
   blocks->slots[h].pos = pos;
   blocks->flags[hash_flags(adlersum)] |= 1 << (adlersum & 7);
 }

 /* Find a block in BLOCKS with the checksum ADLERSUM and matching the content
    at DATA, returning its position in the source data.  If there is no such
    block, return NO_POSITION. */
 static apr_uint32_t
 find_block(const struct blocks *blocks,
            apr_uint32_t adlersum,
            const char* data)
 {
   apr_uint32_t h = hash_func(adlersum) & blocks->max;

   for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max)
     if (blocks->slots[h].adlersum == adlersum)
       if (memcmp(blocks->data + blocks->slots[h].pos, data,
                  MATCH_BLOCKSIZE) == 0)
         return blocks->slots[h].pos;

   return NO_POSITION;
 }

 /* Initialize the matches table from DATA of size DATALEN.  This goes
    through every block of MATCH_BLOCKSIZE bytes in the source and
    checksums it, inserting the result into the BLOCKS table.  */
 static void
 init_blocks_table(const char *data,
                   apr_size_t datalen,
                   struct blocks *blocks,
                   apr_pool_t *pool)
 {
   apr_size_t nblocks;
   apr_size_t wnslots = 1;
   apr_uint32_t nslots;
   apr_uint32_t i;

   /* Be pessimistic about the block count. */
   nblocks = datalen / MATCH_BLOCKSIZE + 1;
   /* Find nearest larger power of two. */
   while (wnslots <= nblocks)
     wnslots *= 2;
   /* Double the number of slots to avoid a too high load. */
   wnslots *= 2;
   /* Narrow the number of slots to 32 bits, which is the size of the
      block position index in the hash table.
      Sanity check: On 64-bit platforms, apr_size_t is likely to be
      larger than apr_uint32_t. Make sure that the number of slots
      actually fits into blocks->max.  It's safe to use a hard assert
      here, because the largest possible value for nslots is
      proportional to the text delta window size and is therefore much
      smaller than the range of an apr_uint32_t.  If we ever happen to
      increase the window size too much, this assertion will get
      triggered by the test suite. */
   nslots = (apr_uint32_t) wnslots;
   SVN_ERR_ASSERT_NO_RETURN(wnslots == nslots);
   blocks->max = nslots - 1;
   blocks->data = data;
   blocks->slots = apr_palloc(pool, nslots * sizeof(*(blocks->slots)));
   for (i = 0; i < nslots; ++i)
     {
       /* Avoid using an indeterminate value in the lookup. */
       blocks->slots[i].adlersum = 0;
       blocks->slots[i].pos = NO_POSITION;
     }

   /* No checksum entries in SLOTS, yet => reset all checksum flags. */
   memset(blocks->flags, 0, sizeof(blocks->flags));

   /* If there is an odd block at the end of the buffer, we will
      not use that shorter block for deltification (only indirectly
      as an extension of some previous block). */
   for (i = 0; i + MATCH_BLOCKSIZE <= datalen; i += MATCH_BLOCKSIZE)
     add_block(blocks, init_adler32(data + i), i);
 }

 /* Try to find a match for the target data B in BLOCKS, and then
    extend the match as long as data in A and B at the match position
    continues to match.  We set the position in A we ended up in (in
    case we extended it backwards) in APOSP and update the corresponding
    position within B given in BPOSP. PENDING_INSERT_START sets the
    lower limit to BPOSP.
    Return number of matching bytes starting at ASOP.  Return 0 if
    no match has been found.
  */
 static apr_size_t
 find_match(const struct blocks *blocks,
            const apr_uint32_t rolling,
            const char *a,
            apr_size_t asize,
            const char *b,
            apr_size_t bsize,
            apr_size_t *bposp,
            apr_size_t *aposp,
            apr_size_t pending_insert_start)
 {
   apr_size_t apos, bpos = *bposp;
   apr_size_t delta, max_delta;

   apos = find_block(blocks, rolling, b + bpos);

   /* See if we have a match.  */
   if (apos == NO_POSITION)
     return 0;

   /* Extend the match forward as far as possible */
   max_delta = asize - apos - MATCH_BLOCKSIZE < bsize - bpos - MATCH_BLOCKSIZE
             ? asize - apos - MATCH_BLOCKSIZE
             : bsize - bpos - MATCH_BLOCKSIZE;
   delta = svn_cstring__match_length(a + apos + MATCH_BLOCKSIZE,
                                     b + bpos + MATCH_BLOCKSIZE,
                                     max_delta);

   /* See if we can extend backwards (max MATCH_BLOCKSIZE-1 steps because A's
      content has been sampled only every MATCH_BLOCKSIZE positions).  */
   while (apos > 0 && bpos > pending_insert_start && a[apos-1] == b[bpos-1])
     {
       --apos;
       --bpos;
       ++delta;
     }

   *aposp = apos;
   *bposp = bpos;

   return MATCH_BLOCKSIZE + delta;
 }

 /* Utility for compute_delta() that compares the range B[START,BSIZE) with
  * the range of similar size before A[ASIZE]. Create corresponding copy and
  * insert operations.
  *
  * BUILD_BATON and POOL will be passed through from compute_delta().
  */
 static void
 store_delta_trailer(svn_txdelta__ops_baton_t *build_baton,
                     const char *a,
                     apr_size_t asize,
                     const char *b,
                     apr_size_t bsize,
                     apr_size_t start,
                     apr_pool_t *pool)
 {
   apr_size_t end_match;
   apr_size_t max_len = asize > (bsize - start) ? bsize - start : asize;
   if (max_len == 0)
     return;

   end_match = svn_cstring__reverse_match_length(a + asize, b + bsize,
                                                 max_len);
   if (end_match <= 4)
     end_match = 0;

   if (bsize - start > end_match)
     svn_txdelta__insert_op(build_baton, svn_txdelta_new,
                            start, bsize - start - end_match, b + start, pool);
   if (end_match)
     svn_txdelta__insert_op(build_baton, svn_txdelta_source,
                            asize - end_match, end_match, NULL, pool);
 }


 /* Compute a delta from A to B using xdelta.

    The basic xdelta algorithm is as follows:

    1. Go through the source data, checksumming every MATCH_BLOCKSIZE
       block of bytes using adler32, and inserting the checksum into a
       match table with the position of the match.
    2. Go through the target byte by byte, seeing if that byte starts a
       match that we have in the match table.
       2a. If so, try to extend the match as far as possible both
           forwards and backwards, and then insert a source copy
           operation into the delta ops builder for the match.
       2b. If not, insert the byte as new data using an insert delta op.

    Our implementation doesn't immediately insert "insert" operations,
    it waits until we have another copy, or we are done.  The reasoning
    is twofold:

    1. Otherwise, we would just be building a ton of 1 byte insert
       operations
    2. So that we can extend a source match backwards into a pending
      insert operation, and possibly remove the need for the insert
      entirely.  This can happen due to stream alignment.
 */
 static void
 compute_delta(svn_txdelta__ops_baton_t *build_baton,
               const char *a,
               apr_size_t asize,
               const char *b,
               apr_size_t bsize,
               apr_pool_t *pool)
 {
   struct blocks blocks;
   apr_uint32_t rolling;
   apr_size_t lo = 0, pending_insert_start = 0, upper;

   /* Optimization: directly compare window starts. If more than 4
    * bytes match, we can immediately create a matching windows.
    * Shorter sequences result in a net data increase. */
   lo = svn_cstring__match_length(a, b, asize > bsize ? bsize : asize);
   if ((lo > 4) || (lo == bsize))
     {
       svn_txdelta__insert_op(build_baton, svn_txdelta_source,
                              0, lo, NULL, pool);
       pending_insert_start = lo;
     }
   else
     lo = 0;

   /* If the size of the target is smaller than the match blocksize, just
      insert the entire target.  */
   if ((bsize - lo < MATCH_BLOCKSIZE) || (asize < MATCH_BLOCKSIZE))
     {
       store_delta_trailer(build_baton, a, asize, b, bsize, lo, pool);
       return;
     }

   upper = bsize - MATCH_BLOCKSIZE; /* this is now known to be >= LO */

   /* Initialize the matches table.  */
   init_blocks_table(a, asize, &blocks, pool);

   /* Initialize our rolling checksum.  */
   rolling = init_adler32(b + lo);
   while (lo < upper)
     {
       apr_size_t matchlen;
       apr_size_t apos;

       /* Quickly skip positions whose respective ROLLING checksums
          definitely do not match any SLOT in BLOCKS. */
       while (!(blocks.flags[hash_flags(rolling)] & (1 << (rolling & 7)))
              && lo < upper)
         {
           rolling = adler32_replace(rolling, b[lo], b[lo+MATCH_BLOCKSIZE]);
           lo++;
         }

       /* LO is still <= UPPER, i.e. the following lookup is legal:
          Closely check whether we've got a match for the current location.
          Due to the above pre-filter, chances are that we find one. */
       matchlen = find_match(&blocks, rolling, a, asize, b, bsize,
                             &lo, &apos, pending_insert_start);

       /* If we didn't find a real match, insert the byte at the target
          position into the pending insert.  */
       if (matchlen == 0)
         {
           /* move block one position forward. Short blocks at the end of
              the buffer cannot be used as the beginning of a new match */
           if (lo + MATCH_BLOCKSIZE < bsize)
             rolling = adler32_replace(rolling, b[lo], b[lo+MATCH_BLOCKSIZE]);

           lo++;
         }
       else
         {
           /* store the sequence of B that is between the matches */
           if (lo - pending_insert_start > 0)
             svn_txdelta__insert_op(build_baton, svn_txdelta_new,
                                    0, lo - pending_insert_start,
                                    b + pending_insert_start, pool);
           else
             {
               /* the match borders on the previous op. Maybe, we found a
                * match that is better than / overlapping the previous one. */
               apr_size_t len = svn_cstring__reverse_match_length
                                  (a + apos, b + lo, apos < lo ? apos : lo);
               if (len > 0)
                 {
                   len = svn_txdelta__remove_copy(build_baton, len);
                   apos -= len;
                   matchlen += len;
                   lo -= len;
                 }
             }

           /* Reset the pending insert start to immediately after the
              match. */
           lo += matchlen;
           pending_insert_start = lo;
           svn_txdelta__insert_op(build_baton, svn_txdelta_source,
                                  apos, matchlen, NULL, pool);

           /* Calculate the Adler32 sum for the first block behind the match.
            * Ignore short buffers at the end of B.
            */
           if (lo + MATCH_BLOCKSIZE <= bsize)
             rolling = init_adler32(b + lo);
         }
     }

   /* If we still have an insert pending at the end, throw it in.  */
   store_delta_trailer(build_baton, a, asize, b, bsize, pending_insert_start, pool);
 }

 void
 svn_txdelta__xdelta(svn_txdelta__ops_baton_t *build_baton,
                     const char *data,
                     apr_size_t source_len,
                     apr_size_t target_len,
                     apr_pool_t *pool)
 {
   /*  We should never be asked to compute something when the source_len is 0;
       we just use a single insert op there (and rely on zlib for
       compression). */
   assert(source_len != 0);
   compute_delta(build_baton, data, source_len,
                 data + source_len, target_len,
                 pool);
 }
	/*
	* xdelta.c: xdelta generator.
	*
	* ====================================================================
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	* ====================================================================
	*/


	#include <assert.h>

	#include <apr_general.h> /* for APR_INLINE */
	#include <apr_hash.h>

	#include "svn_hash.h"
	#include "svn_delta.h"
	#include "private/svn_string_private.h"
	#include "delta.h"

	/* This is pseudo-adler32. It is adler32 without the prime modulus.
	The idea is borrowed from monotone, and is a translation of the C++
	code. Graydon Hoare, the author of the original code, gave his
	explicit permission to use it under these terms at 8:02pm on
	Friday, February 11th, 2005. */

	/* Size of the blocks we compute checksums for. This was chosen out of
	thin air. Monotone used 64, xdelta1 used 64, rsync uses 128.
	However, later optimizations assume it to be 256 or less.
	*/
	#define MATCH_BLOCKSIZE 64

	/* Size of the checksum presence FLAGS array in BLOCKS_T. With standard
	MATCH_BLOCKSIZE and SVN_DELTA_WINDOW_SIZE, 32k entries is about 20x
	the number of checksums that actually occur, i.e. we expect a >95%
	probability that non-matching checksums get already detected by checking
	against the FLAGS array.
	Must be a power of 2.
	*/
	#define FLAGS_COUNT (32 * 1024)

	/* "no" / "invalid" / "unused" value for positions within the delta windows
	*/
	#define NO_POSITION ((apr_uint32_t)-1)

	/* Feed C_IN into the adler32 checksum and remove C_OUT at the same time.
	* This function may (and will) only be called for characters that are
	* MATCH_BLOCKSIZE positions apart.
	*
	* Please note that the lower 16 bits cannot overflow in neither direction.
	* Therefore, we don't need to split the value into separate values for
	* sum(char) and sum(sum(char)).
	*/
	static APR_INLINE apr_uint32_t
	adler32_replace(apr_uint32_t adler32, const char c_out, const char c_in)
	{
	adler32 -= (MATCH_BLOCKSIZE * 0x10000u * ((unsigned char) c_out));

	adler32 -= (unsigned char)c_out;
	adler32 += (unsigned char)c_in;

	return adler32 + adler32 * 0x10000;
	}

	/* Calculate an pseudo-adler32 checksum for MATCH_BLOCKSIZE bytes starting
	at DATA. Return the checksum value. */

	static APR_INLINE apr_uint32_t
	init_adler32(const char *data)
	{
	const unsigned char input = (const unsigned char )data;
	const unsigned char *last = input + MATCH_BLOCKSIZE;

	apr_uint32_t s1 = 0;
	apr_uint32_t s2 = 0;

	for (; input < last; input += 8)
	{
	s1 += input[0]; s2 += s1;
	s1 += input[1]; s2 += s1;
	s1 += input[2]; s2 += s1;
	s1 += input[3]; s2 += s1;
	s1 += input[4]; s2 += s1;
	s1 += input[5]; s2 += s1;
	s1 += input[6]; s2 += s1;
	s1 += input[7]; s2 += s1;
	}

	return s2 * 0x10000 + s1;
	}

	/* Information for a block of the delta source. The length of the
	block is the smaller of MATCH_BLOCKSIZE and the difference between
	the size of the source data and the position of this block. */
	struct block
	{
	apr_uint32_t adlersum;

	/* Even in 64 bit systems, store only 32 bit offsets in our hash table
	(our delta window size much much smaller than 4GB).
	That reduces the hash table size by 50% from 32to 16KB
	and makes it easier to fit into the CPU's L1 cache. */
	apr_uint32_t pos; /* NO_POSITION -> block is not used */
	};

	/* A hash table, using open addressing, of the blocks of the source. */
	struct blocks
	{
	/* The largest valid index of slots.
	This value has an upper bound proportionate to the text delta
	window size, so unless we dramatically increase the window size,
	it's safe to make this a 32-bit value. In any case, it has to be
	the same width as the block position index, (struct
	block).pos. */
	apr_uint32_t max;

	/* Source buffer that the positions in SLOTS refer to. */
	const char* data;

	/* Bit array indicating whether there may be a matching slot for a given
	adler32 checksum. Since FLAGS has much more entries than SLOTS, this
	will indicate most cases of non-matching checksums with a "0" bit, i.e.
	as "known not to have a match".
	The mapping of adler32 checksum bits is [0..2][16..27] (LSB -> MSB),
	i.e. address the byte by the multiplicative part of adler32 and address
	the bits in that byte by the additive part of adler32. */
	char flags[FLAGS_COUNT / 8];

	/* The vector of blocks. A pos value of NO_POSITION represents an unused
	slot. */
	struct block *slots;
	};


	/* Return a hash value calculated from the adler32 SUM, suitable for use with
	our hash table. */
	static apr_uint32_t hash_func(apr_uint32_t sum)
	{
	/* Since the adl32 checksum have a bad distribution for the 11th to 16th
	bits when used for our small block size, we add some bits from the
	other half of the checksum. */
	return sum ^ (sum >> 12);
	}

	/* Return the offset in BLOCKS.FLAGS for the adler32 SUM. */
	static apr_uint32_t hash_flags(apr_uint32_t sum)
	{
	/* The upper half of SUM has a wider value range than the lower 16 bit.
	Also, we want to a different folding than HASH_FUNC to minimize
	correlation between different hash levels. */
	return (sum >> 16) & ((FLAGS_COUNT / 8) - 1);
	}

	/* Insert a block with the checksum ADLERSUM at position POS in the source
	data into the table BLOCKS. Ignore true duplicates, i.e. blocks with
	actually the same content. */
	static void
	add_block(struct blocks *blocks, apr_uint32_t adlersum, apr_uint32_t pos)
	{
	apr_uint32_t h = hash_func(adlersum) & blocks->max;

	/* This will terminate, since we know that we will not fill the table. */
	for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max)
	if (blocks->slots[h].adlersum == adlersum)
	if (memcmp(blocks->data + blocks->slots[h].pos, blocks->data + pos,
	MATCH_BLOCKSIZE) == 0)
	return;

	blocks->slots[h].adlersum = adlersum;
	blocks->slots[h].pos = pos;
	blocks->flags[hash_flags(adlersum)] \|= 1 << (adlersum & 7);
	}

	/* Find a block in BLOCKS with the checksum ADLERSUM and matching the content
	at DATA, returning its position in the source data. If there is no such
	block, return NO_POSITION. */
	static apr_uint32_t
	find_block(const struct blocks *blocks,
	apr_uint32_t adlersum,
	const char* data)
	{
	apr_uint32_t h = hash_func(adlersum) & blocks->max;

	for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max)
	if (blocks->slots[h].adlersum == adlersum)
	if (memcmp(blocks->data + blocks->slots[h].pos, data,
	MATCH_BLOCKSIZE) == 0)
	return blocks->slots[h].pos;

	return NO_POSITION;
	}

	/* Initialize the matches table from DATA of size DATALEN. This goes
	through every block of MATCH_BLOCKSIZE bytes in the source and
	checksums it, inserting the result into the BLOCKS table. */
	static void
	init_blocks_table(const char *data,
	apr_size_t datalen,
	struct blocks *blocks,
	apr_pool_t *pool)
	{
	apr_size_t nblocks;
	apr_size_t wnslots = 1;
	apr_uint32_t nslots;
	apr_uint32_t i;

	/* Be pessimistic about the block count. */
	nblocks = datalen / MATCH_BLOCKSIZE + 1;
	/* Find nearest larger power of two. */
	while (wnslots <= nblocks)
	wnslots *= 2;
	/* Double the number of slots to avoid a too high load. */
	wnslots *= 2;
	/* Narrow the number of slots to 32 bits, which is the size of the
	block position index in the hash table.
	Sanity check: On 64-bit platforms, apr_size_t is likely to be
	larger than apr_uint32_t. Make sure that the number of slots
	actually fits into blocks->max. It's safe to use a hard assert
	here, because the largest possible value for nslots is
	proportional to the text delta window size and is therefore much
	smaller than the range of an apr_uint32_t. If we ever happen to
	increase the window size too much, this assertion will get
	triggered by the test suite. */
	nslots = (apr_uint32_t) wnslots;
	SVN_ERR_ASSERT_NO_RETURN(wnslots == nslots);
	blocks->max = nslots - 1;
	blocks->data = data;
	blocks->slots = apr_palloc(pool, nslots * sizeof(*(blocks->slots)));
	for (i = 0; i < nslots; ++i)
	{
	/* Avoid using an indeterminate value in the lookup. */
	blocks->slots[i].adlersum = 0;
	blocks->slots[i].pos = NO_POSITION;
	}

	/* No checksum entries in SLOTS, yet => reset all checksum flags. */
	memset(blocks->flags, 0, sizeof(blocks->flags));

	/* If there is an odd block at the end of the buffer, we will
	not use that shorter block for deltification (only indirectly
	as an extension of some previous block). */
	for (i = 0; i + MATCH_BLOCKSIZE <= datalen; i += MATCH_BLOCKSIZE)
	add_block(blocks, init_adler32(data + i), i);
	}

	/* Try to find a match for the target data B in BLOCKS, and then
	extend the match as long as data in A and B at the match position
	continues to match. We set the position in A we ended up in (in
	case we extended it backwards) in APOSP and update the corresponding
	position within B given in BPOSP. PENDING_INSERT_START sets the
	lower limit to BPOSP.
	Return number of matching bytes starting at ASOP. Return 0 if
	no match has been found.
	*/
	static apr_size_t
	find_match(const struct blocks *blocks,
	const apr_uint32_t rolling,
	const char *a,
	apr_size_t asize,
	const char *b,
	apr_size_t bsize,
	apr_size_t *bposp,
	apr_size_t *aposp,
	apr_size_t pending_insert_start)
	{
	apr_size_t apos, bpos = *bposp;
	apr_size_t delta, max_delta;

	apos = find_block(blocks, rolling, b + bpos);

	/* See if we have a match. */
	if (apos == NO_POSITION)
	return 0;

	/* Extend the match forward as far as possible */
	max_delta = asize - apos - MATCH_BLOCKSIZE < bsize - bpos - MATCH_BLOCKSIZE
	? asize - apos - MATCH_BLOCKSIZE
	: bsize - bpos - MATCH_BLOCKSIZE;
	delta = svn_cstring__match_length(a + apos + MATCH_BLOCKSIZE,
	b + bpos + MATCH_BLOCKSIZE,
	max_delta);

	/* See if we can extend backwards (max MATCH_BLOCKSIZE-1 steps because A's
	content has been sampled only every MATCH_BLOCKSIZE positions). */
	while (apos > 0 && bpos > pending_insert_start && a[apos-1] == b[bpos-1])
	{
	--apos;
	--bpos;
	++delta;
	}

	*aposp = apos;
	*bposp = bpos;

	return MATCH_BLOCKSIZE + delta;
	}

	/* Utility for compute_delta() that compares the range B[START,BSIZE) with
	* the range of similar size before A[ASIZE]. Create corresponding copy and
	* insert operations.
	*
	* BUILD_BATON and POOL will be passed through from compute_delta().
	*/
	static void
	store_delta_trailer(svn_txdelta__ops_baton_t *build_baton,
	const char *a,
	apr_size_t asize,
	const char *b,
	apr_size_t bsize,
	apr_size_t start,
	apr_pool_t *pool)
	{
	apr_size_t end_match;
	apr_size_t max_len = asize > (bsize - start) ? bsize - start : asize;
	if (max_len == 0)
	return;

	end_match = svn_cstring__reverse_match_length(a + asize, b + bsize,
	max_len);
	if (end_match <= 4)
	end_match = 0;

	if (bsize - start > end_match)
	svn_txdelta__insert_op(build_baton, svn_txdelta_new,
	start, bsize - start - end_match, b + start, pool);
	if (end_match)
	svn_txdelta__insert_op(build_baton, svn_txdelta_source,
	asize - end_match, end_match, NULL, pool);
	}


	/* Compute a delta from A to B using xdelta.

	The basic xdelta algorithm is as follows:

	1. Go through the source data, checksumming every MATCH_BLOCKSIZE
	block of bytes using adler32, and inserting the checksum into a
	match table with the position of the match.
	2. Go through the target byte by byte, seeing if that byte starts a
	match that we have in the match table.
	2a. If so, try to extend the match as far as possible both
	forwards and backwards, and then insert a source copy
	operation into the delta ops builder for the match.
	2b. If not, insert the byte as new data using an insert delta op.

	Our implementation doesn't immediately insert "insert" operations,
	it waits until we have another copy, or we are done. The reasoning
	is twofold:

	1. Otherwise, we would just be building a ton of 1 byte insert
	operations
	2. So that we can extend a source match backwards into a pending
	insert operation, and possibly remove the need for the insert
	entirely. This can happen due to stream alignment.
	*/
	static void
	compute_delta(svn_txdelta__ops_baton_t *build_baton,
	const char *a,
	apr_size_t asize,
	const char *b,
	apr_size_t bsize,
	apr_pool_t *pool)
	{
	struct blocks blocks;
	apr_uint32_t rolling;
	apr_size_t lo = 0, pending_insert_start = 0, upper;

	/* Optimization: directly compare window starts. If more than 4
	* bytes match, we can immediately create a matching windows.
	* Shorter sequences result in a net data increase. */
	lo = svn_cstring__match_length(a, b, asize > bsize ? bsize : asize);
	if ((lo > 4) \|\| (lo == bsize))
	{
	svn_txdelta__insert_op(build_baton, svn_txdelta_source,
	0, lo, NULL, pool);
	pending_insert_start = lo;
	}
	else
	lo = 0;

	/* If the size of the target is smaller than the match blocksize, just
	insert the entire target. */
	if ((bsize - lo < MATCH_BLOCKSIZE) \|\| (asize < MATCH_BLOCKSIZE))
	{
	store_delta_trailer(build_baton, a, asize, b, bsize, lo, pool);
	return;
	}

	upper = bsize - MATCH_BLOCKSIZE; /* this is now known to be >= LO */

	/* Initialize the matches table. */
	init_blocks_table(a, asize, &blocks, pool);

	/* Initialize our rolling checksum. */
	rolling = init_adler32(b + lo);
	while (lo < upper)
	{
	apr_size_t matchlen;
	apr_size_t apos;

	/* Quickly skip positions whose respective ROLLING checksums
	definitely do not match any SLOT in BLOCKS. */
	while (!(blocks.flags[hash_flags(rolling)] & (1 << (rolling & 7)))
	&& lo < upper)
	{
	rolling = adler32_replace(rolling, b[lo], b[lo+MATCH_BLOCKSIZE]);
	lo++;
	}

	/* LO is still <= UPPER, i.e. the following lookup is legal:
	Closely check whether we've got a match for the current location.
	Due to the above pre-filter, chances are that we find one. */
	matchlen = find_match(&blocks, rolling, a, asize, b, bsize,
	&lo, &apos, pending_insert_start);

	/* If we didn't find a real match, insert the byte at the target
	position into the pending insert. */
	if (matchlen == 0)
	{
	/* move block one position forward. Short blocks at the end of
	the buffer cannot be used as the beginning of a new match */
	if (lo + MATCH_BLOCKSIZE < bsize)
	rolling = adler32_replace(rolling, b[lo], b[lo+MATCH_BLOCKSIZE]);

	lo++;
	}
	else
	{
	/* store the sequence of B that is between the matches */
	if (lo - pending_insert_start > 0)
	svn_txdelta__insert_op(build_baton, svn_txdelta_new,
	0, lo - pending_insert_start,
	b + pending_insert_start, pool);
	else
	{
	/* the match borders on the previous op. Maybe, we found a
	* match that is better than / overlapping the previous one. */
	apr_size_t len = svn_cstring__reverse_match_length
	(a + apos, b + lo, apos < lo ? apos : lo);
	if (len > 0)
	{
	len = svn_txdelta__remove_copy(build_baton, len);
	apos -= len;
	matchlen += len;
	lo -= len;
	}
	}

	/* Reset the pending insert start to immediately after the
	match. */
	lo += matchlen;
	pending_insert_start = lo;
	svn_txdelta__insert_op(build_baton, svn_txdelta_source,
	apos, matchlen, NULL, pool);

	/* Calculate the Adler32 sum for the first block behind the match.
	* Ignore short buffers at the end of B.
	*/
	if (lo + MATCH_BLOCKSIZE <= bsize)
	rolling = init_adler32(b + lo);
	}
	}

	/* If we still have an insert pending at the end, throw it in. */
	store_delta_trailer(build_baton, a, asize, b, bsize, pending_insert_start, pool);
	}

	void
	svn_txdelta__xdelta(svn_txdelta__ops_baton_t *build_baton,
	const char *data,
	apr_size_t source_len,
	apr_size_t target_len,
	apr_pool_t *pool)
	{
	/* We should never be asked to compute something when the source_len is 0;
	we just use a single insert op there (and rely on zlib for
	compression). */
	assert(source_len != 0);
	compute_delta(build_baton, data, source_len,
	data + source_len, target_len,
	pool);
	}