subversion/libsvn_fs_x/reps.c - subversion - Git at Google

 /* reps.c --- FSX representation container
  *
  * ====================================================================
  *    Licensed to the Apache Software Foundation (ASF) under one
  *    or more contributor license agreements.  See the NOTICE file
  *    distributed with this work for additional information
  *    regarding copyright ownership.  The ASF licenses this file
  *    to you under the Apache License, Version 2.0 (the
  *    "License"); you may not use this file except in compliance
  *    with the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  *    Unless required by applicable law or agreed to in writing,
  *    software distributed under the License is distributed on an
  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  *    KIND, either express or implied.  See the License for the
  *    specific language governing permissions and limitations
  *    under the License.
  * ====================================================================
  */

 #include "reps.h"

 #include "svn_sorts.h"
 #include "private/svn_string_private.h"
 #include "private/svn_packed_data.h"
 #include "private/svn_temp_serializer.h"

 #include "svn_private_config.h"

 #include "cached_data.h"

 /* Length of the text chunks we hash and match.  The algorithm will find
  * most matches with a length of 2 * MATCH_BLOCKSIZE and only specific
  * ones that are shorter than MATCH_BLOCKSIZE.
  *
  * This should be a power of two and must be a multiple of 8.
  * Good choices are 32, 64 and 128.
  */
 #define MATCH_BLOCKSIZE 64

 /* Limit the total text body within a container to 16MB.  Larger values
  * of up to 2GB are possible but become increasingly impractical as the
  * container has to be loaded in its entirety before any of it can be read.
  */
 #define MAX_TEXT_BODY 0x1000000

 /* Limit the size of the instructions stream.  This should not exceed the
  * text body size limit. */
 #define MAX_INSTRUCTIONS (MAX_TEXT_BODY / 8)

 /* value of unused hash buckets */
 #define NO_OFFSET ((apr_uint32_t)(-1))

 /* Byte strings are described by a series of copy instructions that each
  * do one of the following
  *
  * - copy a given number of bytes from the text corpus starting at a
  *   given offset
  * - reference other instruction and specify how many of instructions of
  *   that sequence shall be executed (i.e. a sub-sequence)
  * - copy a number of bytes from the base representation buffer starting
  *   at a given offset
  */

 /* The contents of a fulltext / representation is defined by its first
  * instruction and the number of instructions to execute.
  */
 typedef struct rep_t
 {
   apr_uint32_t first_instruction;
   apr_uint32_t instruction_count;
 } rep_t;

 /* A single instruction.  The instruction type is being encoded in OFFSET.
  */
 typedef struct instruction_t
 {
   /* Instruction type and offset.
    * - offset < 0
    *   reference to instruction sub-sequence starting with
    *   container->instructions[-offset].
    * - 0 <= offset < container->base_text_len
    *   reference to the base text corpus;
    *   start copy at offset
    * - offset >= container->base_text_len
    *   reference to the text corpus;
    *   start copy at offset-container->base_text_len
    */
   apr_int32_t offset;

   /* Number of bytes to copy / instructions to execute
    */
   apr_uint32_t count;
 } instruction_t;

 /* Describe a base fulltext.
  */
 typedef struct base_t
 {
   /* Revision */
   svn_revnum_t revision;

   /* Item within that revision */
   apr_uint64_t item_index;

   /* Priority with which to use this base over others */
   int priority;

   /* Index into builder->representations that identifies the copy
    * instructions for this base. */
   apr_uint32_t rep;
 } base_t;

 /* Yet another hash data structure.  This one tries to be more cache
  * friendly by putting the first byte of each hashed sequence in a
  * common array.  This array will often fit into L1 or L2 at least and
  * give a 99% accurate test for a match without giving false negatives.
  */
 typedef struct hash_t
 {
   /* for used entries i, prefixes[i] == text[offsets[i]]; 0 otherwise.
    * This allows for a quick check without resolving the double
    * indirection. */
   char *prefixes;

   /* for used entries i, offsets[i] is start offset in the text corpus;
    * NO_OFFSET otherwise.
    */
   apr_uint32_t *offsets;

   /* to be used later for optimizations. */
   apr_uint32_t *last_matches;

   /* number of buckets in this hash, i.e. elements in each array above.
    * Must be 1 << (8 * sizeof(hash_key_t) - shift) */
   apr_size_t size;

   /* number of buckets actually in use. Must be <= size. */
   apr_size_t used;

   /* number of bits to shift right to map a hash_key_t to a bucket index */
   apr_size_t shift;

   /* pool to use when growing the hash */
   apr_pool_t *pool;
 } hash_t;

 /* Hash key type. 32 bits for pseudo-Adler32 hash sums.
  */
 typedef apr_uint32_t hash_key_t;

 /* Constructor data structure.
  */
 struct svn_fs_x__reps_builder_t
 {
   /* file system to read base representations from */
   svn_fs_t *fs;

   /* text corpus */
   svn_stringbuf_t *text;

   /* text block hash */
   hash_t hash;

   /* array of base_t objects describing all bases defined so far */
   apr_array_header_t *bases;

   /* array of rep_t objects describing all fulltexts (including bases)
    * added so far */
   apr_array_header_t *reps;

   /* array of instruction_t objects describing all instructions */
   apr_array_header_t *instructions;

   /* number of bytes in the text corpus that belongs to bases */
   apr_size_t base_text_len;
 };

 /* R/o container.
  */
 struct svn_fs_x__reps_t
 {
   /* text corpus */
   const char *text;

   /* length of the text corpus in bytes */
   apr_size_t text_len;

   /* bases used */
   const base_t *bases;

   /* number of bases used */
   apr_size_t base_count;

   /* fulltext i can be reconstructed by executing instructions
    * first_instructions[i] .. first_instructions[i+1]-1
    * (this array has one extra element at the end)
    */
   const apr_uint32_t *first_instructions;

   /* number of fulltexts (no bases) */
   apr_size_t rep_count;

   /* instructions */
   const instruction_t *instructions;

   /* total number of instructions */
   apr_size_t instruction_count;

   /* offsets > 0 but smaller that this are considered base references */
   apr_size_t base_text_len;
 };

 /* describe a section in the extractor's result string that is not filled
  * yet (but already exists).
  */
 typedef struct missing_t
 {
   /* start offset within the result string */
   apr_uint32_t start;

   /* number of bytes to write */
   apr_uint32_t count;

   /* index into extractor->bases selecting the base representation to
    * copy from */
   apr_uint32_t base;

   /* copy source offset within that base representation */
   apr_uint32_t offset;
 } missing_t;

 /* Fulltext extractor data structure.
  */
 struct svn_fs_x__rep_extractor_t
 {
   /* filesystem to read the bases from */
   svn_fs_t *fs;

   /* fulltext being constructed */
   svn_stringbuf_t *result;

   /* bases (base_t) yet to process (not used ATM) */
   apr_array_header_t *bases;

   /* missing sections (missing_t) in result->data that need to be filled,
    * yet */
   apr_array_header_t *missing;

   /* pool to use for allocating the above arrays */
   apr_pool_t *pool;
 };

 /* Given the ADLER32 checksum for a certain range of MATCH_BLOCKSIZE
  * bytes, return the checksum for the range excluding the first byte
  * C_OUT and appending C_IN.
  */
 static hash_key_t
 hash_key_replace(hash_key_t adler32, const char c_out, const char c_in)
 {
   adler32 -= (MATCH_BLOCKSIZE * 0x10000u * ((unsigned char) c_out));

   adler32 -= (unsigned char)c_out;
   adler32 += (unsigned char)c_in;

   return adler32 + adler32 * 0x10000;
 }

 /* Calculate an pseudo-adler32 checksum for MATCH_BLOCKSIZE bytes starting
    at DATA.  Return the checksum value.  */
 static hash_key_t
 hash_key(const char *data)
 {
   const unsigned char *input = (const unsigned char *)data;
   const unsigned char *last = input + MATCH_BLOCKSIZE;

   hash_key_t s1 = 0;
   hash_key_t s2 = 0;

   for (; input < last; input += 8)
     {
       s1 += input[0]; s2 += s1;
       s1 += input[1]; s2 += s1;
       s1 += input[2]; s2 += s1;
       s1 += input[3]; s2 += s1;
       s1 += input[4]; s2 += s1;
       s1 += input[5]; s2 += s1;
       s1 += input[6]; s2 += s1;
       s1 += input[7]; s2 += s1;
     }

   return s2 * 0x10000 + s1;
 }

 /* Map the ADLER32 key to a bucket index in HASH and return that index.
  */
 static apr_size_t
 hash_to_index(hash_t *hash, hash_key_t adler32)
 {
   return (adler32 * 0xd1f3da69) >> hash->shift;
 }

 /* Allocate and initialized SIZE buckets in RESULT_POOL.
  * Assign them to HASH.
  */
 static void
 allocate_hash_members(hash_t *hash,
                       apr_size_t size,
                       apr_pool_t *result_pool)
 {
   apr_size_t i;

   hash->pool = result_pool;
   hash->size = size;

   hash->prefixes = apr_pcalloc(result_pool, size);
   hash->last_matches = apr_pcalloc(result_pool,
                                    sizeof(*hash->last_matches) * size);
   hash->offsets = apr_palloc(result_pool, sizeof(*hash->offsets) * size);

   for (i = 0; i < size; ++i)
     hash->offsets[i] = NO_OFFSET;
 }

 /* Initialize the HASH data structure with 2**TWOPOWER buckets allocated
  * in RESULT_POOL.
  */
 static void
 init_hash(hash_t *hash,
           apr_size_t twoPower,
           apr_pool_t *result_pool)
 {
   hash->used = 0;
   hash->shift = sizeof(hash_key_t) * 8 - twoPower;

   allocate_hash_members(hash, 1 << twoPower, result_pool);
 }

 /* Make HASH have at least MIN_SIZE buckets but at least double the number
  * of buckets in HASH by rehashing it based TEXT.
  */
 static void
 grow_hash(hash_t *hash,
           svn_stringbuf_t *text,
           apr_size_t min_size)
 {
   hash_t copy;
   apr_size_t i;

   /* determine the new hash size */
   apr_size_t new_size = hash->size * 2;
   apr_size_t new_shift = hash->shift - 1;
   while (new_size < min_size)
     {
       new_size *= 2;
       --new_shift;
     }

   /* allocate new hash */
   allocate_hash_members(&copy, new_size, hash->pool);
   copy.used = 0;
   copy.shift = new_shift;

   /* copy / translate data */
   for (i = 0; i < hash->size; ++i)
     {
       apr_uint32_t offset = hash->offsets[i];
       if (offset != NO_OFFSET)
         {
           hash_key_t key = hash_key(text->data + offset);
           size_t idx = hash_to_index(&copy, key);

           if (copy.offsets[idx] == NO_OFFSET)
             copy.used++;

           copy.prefixes[idx] = hash->prefixes[i];
           copy.offsets[idx] = offset;
           copy.last_matches[idx] = hash->last_matches[i];
         }
     }

   *hash = copy;
 }

 svn_fs_x__reps_builder_t *
 svn_fs_x__reps_builder_create(svn_fs_t *fs,
                               apr_pool_t *result_pool)
 {
   svn_fs_x__reps_builder_t *result = apr_pcalloc(result_pool,
                                                  sizeof(*result));

   result->fs = fs;
   result->text = svn_stringbuf_create_empty(result_pool);
   init_hash(&result->hash, 4, result_pool);

   result->bases = apr_array_make(result_pool, 0, sizeof(base_t));
   result->reps = apr_array_make(result_pool, 0, sizeof(rep_t));
   result->instructions = apr_array_make(result_pool, 0,
                                         sizeof(instruction_t));

   return result;
 }

 svn_error_t *
 svn_fs_x__reps_add_base(svn_fs_x__reps_builder_t *builder,
                         svn_fs_x__representation_t *rep,
                         int priority,
                         apr_pool_t *scratch_pool)
 {
   base_t base;
   apr_size_t text_start_offset = builder->text->len;

   svn_stream_t *stream;
   svn_string_t *contents;
   apr_size_t idx;
   SVN_ERR(svn_fs_x__get_contents(&stream, builder->fs, rep, FALSE,
                                  scratch_pool));
   SVN_ERR(svn_string_from_stream2(&contents, stream, SVN__STREAM_CHUNK_SIZE,
                                   scratch_pool));
   SVN_ERR(svn_fs_x__reps_add(&idx, builder, contents));

   base.revision = svn_fs_x__get_revnum(rep->id.change_set);
   base.item_index = rep->id.number;
   base.priority = priority;
   base.rep = (apr_uint32_t)idx;

   APR_ARRAY_PUSH(builder->bases, base_t) = base;
   builder->base_text_len += builder->text->len - text_start_offset;

   return SVN_NO_ERROR;
 }

 /* Add LEN bytes from DATA to BUILDER's text corpus. Also, add a copy
  * operation for that text fragment.
  */
 static void
 add_new_text(svn_fs_x__reps_builder_t *builder,
              const char *data,
              apr_size_t len)
 {
   instruction_t instruction;
   apr_size_t offset;
   apr_size_t buckets_required;

   if (len == 0)
     return;

   /* new instruction */
   instruction.offset = (apr_int32_t)builder->text->len;
   instruction.count = (apr_uint32_t)len;
   APR_ARRAY_PUSH(builder->instructions, instruction_t) = instruction;

   /* add to text corpus */
   svn_stringbuf_appendbytes(builder->text, data, len);

   /* expand the hash upfront to minimize the chances of collisions */
   buckets_required = builder->hash.used + len / MATCH_BLOCKSIZE;
   if (buckets_required * 3 >= builder->hash.size * 2)
     grow_hash(&builder->hash, builder->text, 2 * buckets_required);

   /* add hash entries for the new sequence */
   for (offset = instruction.offset;
        offset + MATCH_BLOCKSIZE <= builder->text->len;
        offset += MATCH_BLOCKSIZE)
     {
       hash_key_t key = hash_key(builder->text->data + offset);
       size_t idx = hash_to_index(&builder->hash, key);

       /* Don't replace hash entries that stem from the current text.
        * This makes early matches more likely. */
       if (builder->hash.offsets[idx] == NO_OFFSET)
         ++builder->hash.used;
       else if (builder->hash.offsets[idx] >= instruction.offset)
         continue;

       builder->hash.offsets[idx] = (apr_uint32_t)offset;
       builder->hash.prefixes[idx] = builder->text->data[offset];
     }
 }

 svn_error_t *
 svn_fs_x__reps_add(apr_size_t *rep_idx,
                    svn_fs_x__reps_builder_t *builder,
                    const svn_string_t *contents)
 {
   rep_t rep;
   const char *current = contents->data;
   const char *processed = current;
   const char *end = current + contents->len;
   const char *last_to_test = end - MATCH_BLOCKSIZE - 1;

   if (builder->text->len + contents->len > MAX_TEXT_BODY)
     return svn_error_create(SVN_ERR_FS_CONTAINER_SIZE, NULL,
                       _("Text body exceeds star delta container capacity"));

   if (  builder->instructions->nelts + 2 * contents->len / MATCH_BLOCKSIZE
       > MAX_INSTRUCTIONS)
     return svn_error_create(SVN_ERR_FS_CONTAINER_SIZE, NULL,
               _("Instruction count exceeds star delta container capacity"));

   rep.first_instruction = (apr_uint32_t)builder->instructions->nelts;
   while (current < last_to_test)
     {
       hash_key_t key = hash_key(current);
       size_t offset;
       size_t idx;

       /* search for the next matching sequence */

       for (; current < last_to_test; ++current)
         {
           idx = hash_to_index(&builder->hash, key);
           if (builder->hash.prefixes[idx] == current[0])
             {
               offset = builder->hash.offsets[idx];
               if (   (offset != NO_OFFSET)
                   && (memcmp(&builder->text->data[offset], current,
                              MATCH_BLOCKSIZE) == 0))
                 break;
             }
           key = hash_key_replace(key, current[0], current[MATCH_BLOCKSIZE]);
         }

       /* found it? */

       if (current < last_to_test)
         {
           instruction_t instruction;

           /* extend the match */

           size_t prefix_match
             = svn_cstring__reverse_match_length(current,
                                                 builder->text->data + offset,
                                                 MIN(offset, current - processed));
           size_t postfix_match
             = svn_cstring__match_length(current + MATCH_BLOCKSIZE,
                            builder->text->data + offset + MATCH_BLOCKSIZE,
                            MIN(builder->text->len - offset - MATCH_BLOCKSIZE,
                                end - current - MATCH_BLOCKSIZE));

           /* non-matched section */

           size_t new_copy = (current - processed) - prefix_match;
           if (new_copy)
             add_new_text(builder, processed, new_copy);

           /* add instruction for matching section */

           instruction.offset = (apr_int32_t)(offset - prefix_match);
           instruction.count = (apr_uint32_t)(prefix_match + postfix_match +
                                              MATCH_BLOCKSIZE);
           APR_ARRAY_PUSH(builder->instructions, instruction_t) = instruction;

           processed = current + MATCH_BLOCKSIZE + postfix_match;
           current = processed;
         }
     }

   add_new_text(builder, processed, end - processed);
   rep.instruction_count = (apr_uint32_t)builder->instructions->nelts
                         - rep.first_instruction;
   APR_ARRAY_PUSH(builder->reps, rep_t) = rep;

   *rep_idx = (apr_size_t)(builder->reps->nelts - 1);
   return SVN_NO_ERROR;
 }

 apr_size_t
 svn_fs_x__reps_estimate_size(const svn_fs_x__reps_builder_t *builder)
 {
   /* approx: size of the text exclusive to us @ 50% compression rate
    *       + 2 bytes per instruction
    *       + 2 bytes per representation
    *       + 8 bytes per base representation
    *       + 1:8 inefficiency in using the base representations
    *       + 100 bytes static overhead
    */
   return (builder->text->len - builder->base_text_len) / 2
        + builder->instructions->nelts * 2
        + builder->reps->nelts * 2
        + builder->bases->nelts * 8
        + builder->base_text_len / 8
        + 100;
 }

 /* Execute COUNT instructions starting at INSTRUCTION_IDX in CONTAINER
  * and fill the parts of EXTRACTOR->RESULT that we can from this container.
  * Record the remainder in EXTRACTOR->MISSING.
  *
  * This function will recurse for instructions that reference other
  * instruction sequences. COUNT refers to the top-level instructions only.
  */
 static void
 get_text(svn_fs_x__rep_extractor_t *extractor,
          const svn_fs_x__reps_t *container,
          apr_size_t instruction_idx,
          apr_size_t count)
 {
   const instruction_t *instruction;
   const char *offset_0 = container->text - container->base_text_len;

   for (instruction = container->instructions + instruction_idx;
        instruction < container->instructions + instruction_idx + count;
        instruction++)
     if (instruction->offset < 0)
       {
         /* instruction sub-sequence */
         get_text(extractor, container, -instruction->offset,
                  instruction->count);
       }
     else if (instruction->offset >= container->base_text_len)
       {
         /* direct copy instruction */
         svn_stringbuf_appendbytes(extractor->result,
                                   offset_0 + instruction->offset,
                                   instruction->count);
       }
     else
       {
         /* a section that we need to fill from some external base rep. */
         missing_t missing;
         missing.base = 0;
         missing.start = (apr_uint32_t)extractor->result->len;
         missing.count = instruction->count;
         missing.offset = instruction->offset;
         svn_stringbuf_appendfill(extractor->result, 0, instruction->count);

         if (extractor->missing == NULL)
           extractor->missing = apr_array_make(extractor->pool, 1,
                                               sizeof(missing));

         APR_ARRAY_PUSH(extractor->missing, missing_t) = missing;
       }
 }

 svn_error_t *
 svn_fs_x__reps_get(svn_fs_x__rep_extractor_t **extractor,
                    svn_fs_t *fs,
                    const svn_fs_x__reps_t *container,
                    apr_size_t idx,
                    apr_pool_t *result_pool)
 {
   apr_uint32_t first = container->first_instructions[idx];
   apr_uint32_t last = container->first_instructions[idx + 1];

   /* create the extractor object */
   svn_fs_x__rep_extractor_t *result = apr_pcalloc(result_pool,
                                                   sizeof(*result));
   result->fs = fs;
   result->result = svn_stringbuf_create_empty(result_pool);
   result->pool = result_pool;

   /* fill all the bits of the result that we can, i.e. all but bits coming
    * from base representations */
   get_text(result, container, first, last - first);
   *extractor = result;
   return SVN_NO_ERROR;
 }

 svn_error_t *
 svn_fs_x__extractor_drive(svn_stringbuf_t **contents,
                           svn_fs_x__rep_extractor_t *extractor,
                           apr_size_t start_offset,
                           apr_size_t size,
                           apr_pool_t *result_pool,
                           apr_pool_t *scratch_pool)
 {
   /* we don't support base reps right now */
   SVN_ERR_ASSERT(extractor->missing == NULL);

   if (size == 0)
     {
       *contents = svn_stringbuf_dup(extractor->result, result_pool);
     }
   else
     {
       /* clip the selected range */
       if (start_offset > extractor->result->len)
         start_offset = extractor->result->len;

       if (size > extractor->result->len - start_offset)
         size = extractor->result->len - start_offset;

       *contents = svn_stringbuf_ncreate(extractor->result->data + start_offset,
                                         size, result_pool);
     }

   return SVN_NO_ERROR;
 }

 svn_error_t *
 svn_fs_x__write_reps_container(svn_stream_t *stream,
                                const svn_fs_x__reps_builder_t *builder,
                                apr_pool_t *scratch_pool)
 {
   int i;
   svn_packed__data_root_t *root = svn_packed__data_create_root(scratch_pool);

   /* one top-level stream for each array */
   svn_packed__int_stream_t *bases_stream
     = svn_packed__create_int_stream(root, FALSE, FALSE);
   svn_packed__int_stream_t *reps_stream
     = svn_packed__create_int_stream(root, TRUE, FALSE);
   svn_packed__int_stream_t *instructions_stream
     = svn_packed__create_int_stream(root, FALSE, FALSE);

   /* for misc stuff */
   svn_packed__int_stream_t *misc_stream
     = svn_packed__create_int_stream(root, FALSE, FALSE);

   /* TEXT will be just a single string */
   svn_packed__byte_stream_t *text_stream
     = svn_packed__create_bytes_stream(root);

   /* structure the struct streams such we can extract much of the redundancy
    */
   svn_packed__create_int_substream(bases_stream, TRUE, TRUE);
   svn_packed__create_int_substream(bases_stream, TRUE, FALSE);
   svn_packed__create_int_substream(bases_stream, TRUE, FALSE);
   svn_packed__create_int_substream(bases_stream, TRUE, FALSE);

   svn_packed__create_int_substream(instructions_stream, TRUE, TRUE);
   svn_packed__create_int_substream(instructions_stream, FALSE, FALSE);

   /* text */
   svn_packed__add_bytes(text_stream, builder->text->data, builder->text->len);

   /* serialize bases */
   for (i = 0; i < builder->bases->nelts; ++i)
     {
       const base_t *base = &APR_ARRAY_IDX(builder->bases, i, base_t);
       svn_packed__add_int(bases_stream, base->revision);
       svn_packed__add_uint(bases_stream, base->item_index);
       svn_packed__add_uint(bases_stream, base->priority);
       svn_packed__add_uint(bases_stream, base->rep);
     }

   /* serialize reps */
   for (i = 0; i < builder->reps->nelts; ++i)
     {
       const rep_t *rep = &APR_ARRAY_IDX(builder->reps, i, rep_t);
       svn_packed__add_uint(reps_stream, rep->first_instruction);
     }

   svn_packed__add_uint(reps_stream, builder->instructions->nelts);

   /* serialize instructions */
   for (i = 0; i < builder->instructions->nelts; ++i)
     {
       const instruction_t *instruction
         = &APR_ARRAY_IDX(builder->instructions, i, instruction_t);
       svn_packed__add_int(instructions_stream, instruction->offset);
       svn_packed__add_uint(instructions_stream, instruction->count);
     }

   /* other elements */
   svn_packed__add_uint(misc_stream, 0);

   /* write to stream */
   SVN_ERR(svn_packed__data_write(stream, root, scratch_pool));

   return SVN_NO_ERROR;
 }

 svn_error_t *
 svn_fs_x__read_reps_container(svn_fs_x__reps_t **container,
                               svn_stream_t *stream,
                               apr_pool_t *result_pool,
                               apr_pool_t *scratch_pool)
 {
   apr_size_t i;

   base_t *bases;
   apr_uint32_t *first_instructions;
   instruction_t *instructions;

   svn_fs_x__reps_t *reps = apr_pcalloc(result_pool, sizeof(*reps));

   svn_packed__data_root_t *root;
   svn_packed__int_stream_t *bases_stream;
   svn_packed__int_stream_t *reps_stream;
   svn_packed__int_stream_t *instructions_stream;
   svn_packed__int_stream_t *misc_stream;
   svn_packed__byte_stream_t *text_stream;

   /* read from disk */
   SVN_ERR(svn_packed__data_read(&root, stream, result_pool, scratch_pool));

   bases_stream = svn_packed__first_int_stream(root);
   reps_stream = svn_packed__next_int_stream(bases_stream);
   instructions_stream = svn_packed__next_int_stream(reps_stream);
   misc_stream = svn_packed__next_int_stream(instructions_stream);
   text_stream = svn_packed__first_byte_stream(root);

   /* text */
   reps->text = svn_packed__get_bytes(text_stream, &reps->text_len);
   reps->text = apr_pmemdup(result_pool, reps->text, reps->text_len);

   /* de-serialize  bases */
   reps->base_count
     = svn_packed__int_count(svn_packed__first_int_substream(bases_stream));
   bases = apr_palloc(result_pool, reps->base_count * sizeof(*bases));
   reps->bases = bases;

   for (i = 0; i < reps->base_count; ++i)
     {
       base_t *base = bases + i;
       base->revision = (svn_revnum_t)svn_packed__get_int(bases_stream);
       base->item_index = svn_packed__get_uint(bases_stream);
       base->priority = (int)svn_packed__get_uint(bases_stream);
       base->rep = (apr_uint32_t)svn_packed__get_uint(bases_stream);
     }

   /* de-serialize instructions */
   reps->instruction_count
     = svn_packed__int_count
          (svn_packed__first_int_substream(instructions_stream));
   instructions
     = apr_palloc(result_pool,
                  reps->instruction_count * sizeof(*instructions));
   reps->instructions = instructions;

   for (i = 0; i < reps->instruction_count; ++i)
     {
       instruction_t *instruction = instructions + i;
       instruction->offset
         = (apr_int32_t)svn_packed__get_int(instructions_stream);
       instruction->count
         = (apr_uint32_t)svn_packed__get_uint(instructions_stream);
     }

   /* de-serialize reps */
   reps->rep_count = svn_packed__int_count(reps_stream);
   first_instructions
     = apr_palloc(result_pool,
                  (reps->rep_count + 1) * sizeof(*first_instructions));
   reps->first_instructions = first_instructions;

   for (i = 0; i < reps->rep_count; ++i)
     first_instructions[i]
       = (apr_uint32_t)svn_packed__get_uint(reps_stream);
   first_instructions[reps->rep_count] = (apr_uint32_t)reps->instruction_count;

   /* other elements */
   reps->base_text_len = (apr_size_t)svn_packed__get_uint(misc_stream);

   /* return result */
   *container = reps;

   return SVN_NO_ERROR;
 }

 svn_error_t *
 svn_fs_x__serialize_reps_container(void **data,
                                    apr_size_t *data_len,
                                    void *in,
                                    apr_pool_t *pool)
 {
   svn_fs_x__reps_t *reps = in;
   svn_stringbuf_t *serialized;

   /* make a guesstimate on the size of the serialized data.  Erring on the
    * low side will cause the serializer to re-alloc its buffer. */
   apr_size_t size
     = reps->text_len
     + reps->base_count * sizeof(*reps->bases)
     + reps->rep_count * sizeof(*reps->first_instructions)
     + reps->instruction_count * sizeof(*reps->instructions)
     + 100;

   /* serialize array header and all its elements */
   svn_temp_serializer__context_t *context
     = svn_temp_serializer__init(reps, sizeof(*reps), size, pool);

   /* serialize sub-structures */
   svn_temp_serializer__add_leaf(context, (const void **)&reps->text,
                                 reps->text_len);
   svn_temp_serializer__add_leaf(context, (const void **)&reps->bases,
                                 reps->base_count * sizeof(*reps->bases));
   svn_temp_serializer__add_leaf(context,
                                 (const void **)&reps->first_instructions,
                                 reps->rep_count *
                                     sizeof(*reps->first_instructions));
   svn_temp_serializer__add_leaf(context, (const void **)&reps->instructions,
                                 reps->instruction_count *
                                     sizeof(*reps->instructions));

   /* return the serialized result */
   serialized = svn_temp_serializer__get(context);

   *data = serialized->data;
   *data_len = serialized->len;

   return SVN_NO_ERROR;
 }

 svn_error_t *
 svn_fs_x__deserialize_reps_container(void **out,
                                      void *data,
                                      apr_size_t data_len,
                                      apr_pool_t *result_pool)
 {
   svn_fs_x__reps_t *reps = (svn_fs_x__reps_t *)data;

   /* de-serialize sub-structures */
   svn_temp_deserializer__resolve(reps, (void **)&reps->text);
   svn_temp_deserializer__resolve(reps, (void **)&reps->bases);
   svn_temp_deserializer__resolve(reps, (void **)&reps->first_instructions);
   svn_temp_deserializer__resolve(reps, (void **)&reps->instructions);

   /* done */
   *out = reps;

   return SVN_NO_ERROR;
 }

 svn_error_t *
 svn_fs_x__reps_get_func(void **out,
                         const void *data,
                         apr_size_t data_len,
                         void *baton,
                         apr_pool_t *pool)
 {
   svn_fs_x__reps_baton_t *reps_baton = baton;

   /* get a usable reps structure  */
   const svn_fs_x__reps_t *cached = data;
   svn_fs_x__reps_t *reps = apr_pmemdup(pool, cached, sizeof(*reps));

   reps->text
     = svn_temp_deserializer__ptr(cached, (const void **)&cached->text);
   reps->bases
     = svn_temp_deserializer__ptr(cached, (const void **)&cached->bases);
   reps->first_instructions
     = svn_temp_deserializer__ptr(cached,
                                  (const void **)&cached->first_instructions);
   reps->instructions
     = svn_temp_deserializer__ptr(cached,
                                  (const void **)&cached->instructions);

   /* return an extractor for the selected item */
   SVN_ERR(svn_fs_x__reps_get((svn_fs_x__rep_extractor_t **)out,
                              reps_baton->fs, reps, reps_baton->idx, pool));

   return SVN_NO_ERROR;
 }