blob: d6c09cf7637881f6adaaa62f46ba5a22c178e7a7 [file] [log] [blame]
/* cached_data.c --- cached (read) access to FSX data
*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
* ====================================================================
*/
#include "cached_data.h"
#include <assert.h>
#include "svn_hash.h"
#include "svn_ctype.h"
#include "svn_sorts.h"
#include "private/svn_io_private.h"
#include "private/svn_sorts_private.h"
#include "private/svn_string_private.h"
#include "private/svn_subr_private.h"
#include "private/svn_temp_serializer.h"
#include "fs_x.h"
#include "low_level.h"
#include "util.h"
#include "pack.h"
#include "temp_serializer.h"
#include "index.h"
#include "changes.h"
#include "noderevs.h"
#include "reps.h"
#include "../libsvn_fs/fs-loader.h"
#include "../libsvn_delta/delta.h" /* for SVN_DELTA_WINDOW_SIZE */
#include "svn_private_config.h"
/* forward-declare. See implementation for the docstring */
static svn_error_t *
block_read(void **result,
svn_fs_t *fs,
const svn_fs_x__id_t *id,
svn_fs_x__revision_file_t *revision_file,
void *baton,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool);
/* Defined this to enable access logging via dgb__log_access
#define SVN_FS_X__LOG_ACCESS
*/
/* When SVN_FS_X__LOG_ACCESS has been defined, write a line to console
* showing where ID is located in FS and use ITEM to show details on it's
* contents if not NULL. Use SCRATCH_POOL for temporary allocations.
*/
static svn_error_t *
dbg__log_access(svn_fs_t *fs,
const svn_fs_x__id_t *id,
void *item,
apr_uint32_t item_type,
apr_pool_t *scratch_pool)
{
/* no-op if this macro is not defined */
#ifdef SVN_FS_X__LOG_ACCESS
svn_fs_x__data_t *ffd = fs->fsap_data;
apr_off_t offset = -1;
apr_off_t end_offset = 0;
apr_uint32_t sub_item = 0;
svn_fs_x__p2l_entry_t *entry = NULL;
static const char *types[] = {"<n/a>", "frep ", "drep ", "fprop", "dprop",
"node ", "chgs ", "rep ", "c:", "n:", "r:"};
const char *description = "";
const char *type = types[item_type];
const char *pack = "";
svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
/* determine rev / pack file offset */
SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, id, scratch_pool));
/* constructing the pack file description */
if (revision < ffd->min_unpacked_rev)
pack = apr_psprintf(scratch_pool, "%4ld|",
revision / ffd->max_files_per_dir);
/* construct description if possible */
if (item_type == SVN_FS_X__ITEM_TYPE_NODEREV && item != NULL)
{
svn_fs_x__noderev_t *node = item;
const char *data_rep
= node->data_rep
? apr_psprintf(scratch_pool, " d=%ld/%" APR_UINT64_T_FMT,
svn_fs_x__get_revnum(node->data_rep->id.change_set),
node->data_rep->id.number)
: "";
const char *prop_rep
= node->prop_rep
? apr_psprintf(scratch_pool, " p=%ld/%" APR_UINT64_T_FMT,
svn_fs_x__get_revnum(node->prop_rep->id.change_set),
node->prop_rep->id.number)
: "";
description = apr_psprintf(scratch_pool, "%s (pc=%d%s%s)",
node->created_path,
node->predecessor_count,
data_rep,
prop_rep);
}
else if (item_type == SVN_FS_X__ITEM_TYPE_ANY_REP)
{
svn_fs_x__rep_header_t *header = item;
if (header == NULL)
description = " (txdelta window)";
else if (header->type == svn_fs_x__rep_self_delta)
description = " DELTA";
else
description = apr_psprintf(scratch_pool,
" DELTA against %ld/%" APR_UINT64_T_FMT,
header->base_revision,
header->base_item_index);
}
else if (item_type == SVN_FS_X__ITEM_TYPE_CHANGES && item != NULL)
{
apr_array_header_t *changes = item;
switch (changes->nelts)
{
case 0: description = " no change";
break;
case 1: description = " 1 change";
break;
default: description = apr_psprintf(scratch_pool, " %d changes",
changes->nelts);
}
}
/* reverse index lookup: get item description in ENTRY */
SVN_ERR(svn_fs_x__p2l_entry_lookup(&entry, fs, revision, offset,
scratch_pool));
if (entry)
{
/* more details */
end_offset = offset + entry->size;
type = types[entry->type];
/* merge the sub-item number with the container type */
if ( entry->type == SVN_FS_X__ITEM_TYPE_CHANGES_CONT
|| entry->type == SVN_FS_X__ITEM_TYPE_NODEREVS_CONT
|| entry->type == SVN_FS_X__ITEM_TYPE_REPS_CONT)
type = apr_psprintf(scratch_pool, "%s%-3d", type, sub_item);
}
/* line output */
printf("%5s%4lx:%04lx -%4lx:%04lx %s %7ld %5"APR_UINT64_T_FMT" %s\n",
pack, (long)(offset / ffd->block_size),
(long)(offset % ffd->block_size),
(long)(end_offset / ffd->block_size),
(long)(end_offset % ffd->block_size),
type, revision, id->number, description);
#endif
return SVN_NO_ERROR;
}
/* Open the revision file for the item given by ID in filesystem FS and
store the newly opened file in FILE. Seek to the item's location before
returning.
Allocate the result in RESULT_POOL and temporaries in SCRATCH_POOL. */
static svn_error_t *
open_and_seek_revision(svn_fs_x__revision_file_t **file,
svn_fs_t *fs,
const svn_fs_x__id_t *id,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_fs_x__revision_file_t *rev_file;
apr_off_t offset = -1;
apr_uint32_t sub_item = 0;
svn_revnum_t rev = svn_fs_x__get_revnum(id->change_set);
SVN_ERR(svn_fs_x__ensure_revision_exists(rev, fs, scratch_pool));
SVN_ERR(svn_fs_x__rev_file_init(&rev_file, fs, rev, result_pool));
SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, rev_file, id,
scratch_pool));
SVN_ERR(svn_fs_x__rev_file_seek(rev_file, NULL, offset));
*file = rev_file;
return SVN_NO_ERROR;
}
/* Open the representation REP for a node-revision in filesystem FS, seek
to its position and store the newly opened file in FILE.
Allocate the result in RESULT_POOL and temporaries in SCRATCH_POOL. */
static svn_error_t *
open_and_seek_transaction(svn_fs_x__revision_file_t **file,
svn_fs_t *fs,
svn_fs_x__representation_t *rep,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
apr_off_t offset;
apr_uint32_t sub_item = 0;
apr_int64_t txn_id = svn_fs_x__get_txn_id(rep->id.change_set);
SVN_ERR(svn_fs_x__rev_file_open_proto_rev(file, fs, txn_id, result_pool,
scratch_pool));
SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, *file, &rep->id,
scratch_pool));
SVN_ERR(svn_fs_x__rev_file_seek(*file, NULL, offset));
return SVN_NO_ERROR;
}
/* Given a node-id ID, and a representation REP in filesystem FS, open
the correct file and seek to the correction location. Store this
file in *FILE_P.
Allocate the result in RESULT_POOL and temporaries in SCRATCH_POOL. */
static svn_error_t *
open_and_seek_representation(svn_fs_x__revision_file_t **file_p,
svn_fs_t *fs,
svn_fs_x__representation_t *rep,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
if (svn_fs_x__is_revision(rep->id.change_set))
return open_and_seek_revision(file_p, fs, &rep->id, result_pool,
scratch_pool);
else
return open_and_seek_transaction(file_p, fs, rep, result_pool,
scratch_pool);
}
static svn_error_t *
err_dangling_id(svn_fs_t *fs,
const svn_fs_x__id_t *id)
{
svn_string_t *id_str = svn_fs_x__id_unparse(id, fs->pool);
return svn_error_createf
(SVN_ERR_FS_ID_NOT_FOUND, 0,
_("Reference to non-existent node '%s' in filesystem '%s'"),
id_str->data, fs->path);
}
/* Get the node-revision for the node ID in FS.
Set *NODEREV_P to the new node-revision structure, allocated in POOL.
See svn_fs_x__get_node_revision, which wraps this and adds another
error. */
static svn_error_t *
get_node_revision_body(svn_fs_x__noderev_t **noderev_p,
svn_fs_t *fs,
const svn_fs_x__id_t *id,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_error_t *err;
svn_boolean_t is_cached = FALSE;
svn_fs_x__data_t *ffd = fs->fsap_data;
if (svn_fs_x__is_txn(id->change_set))
{
apr_file_t *file;
svn_stream_t *stream;
/* This is a transaction node-rev. Its storage logic is very
different from that of rev / pack files. */
err = svn_io_file_open(&file,
svn_fs_x__path_txn_node_rev(fs, id,
scratch_pool,
scratch_pool),
APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
scratch_pool);
if (err && APR_STATUS_IS_ENOENT(err->apr_err))
{
svn_error_clear(err);
return svn_error_trace(err_dangling_id(fs, id));
}
else if (err)
{
return svn_error_trace(err);
}
/* Be sure to close the file ASAP. */
stream = svn_stream_from_aprfile2(file, FALSE, scratch_pool);
SVN_ERR(svn_fs_x__read_noderev(noderev_p, stream,
result_pool, scratch_pool));
}
else
{
svn_fs_x__revision_file_t *revision_file;
/* noderevs in rev / pack files can be cached */
svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
svn_fs_x__pair_cache_key_t key;
SVN_ERR(svn_fs_x__rev_file_init(&revision_file, fs, revision,
scratch_pool));
/* First, try a noderevs container cache lookup. */
if ( svn_fs_x__is_packed_rev(fs, revision)
&& ffd->noderevs_container_cache)
{
apr_off_t offset;
apr_uint32_t sub_item;
SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, revision_file,
id, scratch_pool));
key.revision = svn_fs_x__packed_base_rev(fs, revision);
key.second = offset;
SVN_ERR(svn_cache__get_partial((void **)noderev_p, &is_cached,
ffd->noderevs_container_cache, &key,
svn_fs_x__noderevs_get_func,
&sub_item, result_pool));
if (is_cached)
return SVN_NO_ERROR;
}
key.revision = revision;
key.second = id->number;
/* Not found or not applicable. Try a noderev cache lookup.
* If that succeeds, we are done here. */
SVN_ERR(svn_cache__get((void **) noderev_p,
&is_cached,
ffd->node_revision_cache,
&key,
result_pool));
if (is_cached)
return SVN_NO_ERROR;
/* block-read will parse the whole block and will also return
the one noderev that we need right now. */
SVN_ERR(block_read((void **)noderev_p, fs,
id,
revision_file,
NULL,
result_pool,
scratch_pool));
SVN_ERR(svn_fs_x__close_revision_file(revision_file));
}
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_x__get_node_revision(svn_fs_x__noderev_t **noderev_p,
svn_fs_t *fs,
const svn_fs_x__id_t *id,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_error_t *err = get_node_revision_body(noderev_p, fs, id,
result_pool, scratch_pool);
if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
{
svn_string_t *id_string = svn_fs_x__id_unparse(id, scratch_pool);
return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
"Corrupt node-revision '%s'",
id_string->data);
}
SVN_ERR(dbg__log_access(fs, id, *noderev_p,
SVN_FS_X__ITEM_TYPE_NODEREV, scratch_pool));
return svn_error_trace(err);
}
svn_error_t *
svn_fs_x__get_mergeinfo_count(apr_int64_t *count,
svn_fs_t *fs,
const svn_fs_x__id_t *id,
apr_pool_t *scratch_pool)
{
svn_fs_x__noderev_t *noderev;
/* If we want a full access log, we need to provide full data and
cannot take shortcuts here. */
#if !defined(SVN_FS_X__LOG_ACCESS)
/* First, try a noderevs container cache lookup. */
if (! svn_fs_x__is_txn(id->change_set))
{
/* noderevs in rev / pack files can be cached */
svn_fs_x__data_t *ffd = fs->fsap_data;
svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
svn_fs_x__revision_file_t *rev_file;
SVN_ERR(svn_fs_x__rev_file_init(&rev_file, fs, revision,
scratch_pool));
if ( svn_fs_x__is_packed_rev(fs, revision)
&& ffd->noderevs_container_cache)
{
svn_fs_x__pair_cache_key_t key;
apr_off_t offset;
apr_uint32_t sub_item;
svn_boolean_t is_cached;
SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, rev_file,
id, scratch_pool));
key.revision = svn_fs_x__packed_base_rev(fs, revision);
key.second = offset;
SVN_ERR(svn_cache__get_partial((void **)count, &is_cached,
ffd->noderevs_container_cache, &key,
svn_fs_x__mergeinfo_count_get_func,
&sub_item, scratch_pool));
if (is_cached)
return SVN_NO_ERROR;
}
}
#endif
/* fallback to the naive implementation handling all edge cases */
SVN_ERR(svn_fs_x__get_node_revision(&noderev, fs, id, scratch_pool,
scratch_pool));
*count = noderev->mergeinfo_count;
return SVN_NO_ERROR;
}
/* Describes a lazily opened rev / pack file. Instances will be shared
between multiple instances of rep_state_t. */
typedef struct shared_file_t
{
/* The opened file. NULL while file is not open, yet. */
svn_fs_x__revision_file_t *rfile;
/* file system to open the file in */
svn_fs_t *fs;
/* a revision contained in the FILE. Since this file may be shared,
that value may be different from REP_STATE_T->REVISION. */
svn_revnum_t revision;
/* pool to use when creating the FILE. This guarantees that the file
remains open / valid beyond the respective local context that required
the file to be opened eventually. */
apr_pool_t *pool;
} shared_file_t;
/* Represents where in the current svndiff data block each
representation is. */
typedef struct rep_state_t
{
/* shared lazy-open rev/pack file structure */
shared_file_t *sfile;
/* The txdelta window cache to use or NULL. */
svn_cache__t *window_cache;
/* Caches un-deltified windows. May be NULL. */
svn_cache__t *combined_cache;
/* ID addressing the representation */
svn_fs_x__id_t rep_id;
/* length of the header at the start of the rep.
0 iff this is rep is stored in a container
(i.e. does not have a header) */
apr_size_t header_size;
apr_off_t start; /* The starting offset for the raw
svndiff data minus header.
-1 if the offset is yet unknown. */
/* sub-item index in case the rep is containered */
apr_uint32_t sub_item;
apr_off_t current;/* The current offset relative to START. */
apr_off_t size; /* The on-disk size of the representation. */
int ver; /* If a delta, what svndiff version?
-1 for unknown delta version. */
int chunk_index; /* number of the window to read */
} rep_state_t;
/* Open FILE->FILE and FILE->STREAM if they haven't been opened, yet. */
static svn_error_t*
auto_open_shared_file(shared_file_t *file)
{
if (file->rfile == NULL)
SVN_ERR(svn_fs_x__rev_file_init(&file->rfile, file->fs,
file->revision, file->pool));
return SVN_NO_ERROR;
}
/* Set RS->START to the begin of the representation raw in RS->SFILE->RFILE,
if that hasn't been done yet. Use SCRATCH_POOL for temporary allocations.
*/
static svn_error_t*
auto_set_start_offset(rep_state_t *rs,
apr_pool_t *scratch_pool)
{
if (rs->start == -1)
{
SVN_ERR(svn_fs_x__item_offset(&rs->start, &rs->sub_item,
rs->sfile->fs, rs->sfile->rfile,
&rs->rep_id, scratch_pool));
rs->start += rs->header_size;
}
return SVN_NO_ERROR;
}
/* Set RS->VER depending on what is found in the already open RS->FILE->FILE
if the diff version is still unknown. Use SCRATCH_POOL for temporary
allocations.
*/
static svn_error_t*
auto_read_diff_version(rep_state_t *rs,
apr_pool_t *scratch_pool)
{
if (rs->ver == -1)
{
char buf[4];
SVN_ERR(svn_fs_x__rev_file_seek(rs->sfile->rfile, NULL, rs->start));
SVN_ERR(svn_fs_x__rev_file_read(rs->sfile->rfile, buf, sizeof(buf)));
/* ### Layering violation */
if (! ((buf[0] == 'S') && (buf[1] == 'V') && (buf[2] == 'N')))
return svn_error_create
(SVN_ERR_FS_CORRUPT, NULL,
_("Malformed svndiff data in representation"));
rs->ver = buf[3];
rs->chunk_index = 0;
rs->current = 4;
}
return SVN_NO_ERROR;
}
/* See create_rep_state, which wraps this and adds another error. */
static svn_error_t *
create_rep_state_body(rep_state_t **rep_state,
svn_fs_x__rep_header_t **rep_header,
shared_file_t **shared_file,
svn_fs_x__representation_t *rep,
svn_fs_t *fs,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_fs_x__data_t *ffd = fs->fsap_data;
rep_state_t *rs = apr_pcalloc(result_pool, sizeof(*rs));
svn_fs_x__rep_header_t *rh;
svn_boolean_t is_cached = FALSE;
svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set);
apr_uint64_t estimated_window_storage;
/* If the hint is
* - given,
* - refers to a valid revision,
* - refers to a packed revision,
* - as does the rep we want to read, and
* - refers to the same pack file as the rep
* we can re-use the same, already open file object
*/
svn_boolean_t reuse_shared_file
= shared_file && *shared_file && (*shared_file)->rfile
&& SVN_IS_VALID_REVNUM((*shared_file)->revision)
&& (*shared_file)->revision < ffd->min_unpacked_rev
&& revision < ffd->min_unpacked_rev
&& ( ((*shared_file)->revision / ffd->max_files_per_dir)
== (revision / ffd->max_files_per_dir));
svn_fs_x__representation_cache_key_t key = { 0 };
key.revision = revision;
key.is_packed = revision < ffd->min_unpacked_rev;
key.item_index = rep->id.number;
/* continue constructing RS and RA */
rs->size = rep->size;
rs->rep_id = rep->id;
rs->ver = -1;
rs->start = -1;
/* Very long files stored as self-delta will produce a huge number of
delta windows. Don't cache them lest we don't thrash the cache.
Since we don't know the depth of the delta chain, let's assume, the
whole contents get rewritten 3 times.
*/
estimated_window_storage
= 4 * ( (rep->expanded_size ? rep->expanded_size : rep->size)
+ SVN_DELTA_WINDOW_SIZE);
estimated_window_storage = MIN(estimated_window_storage, APR_SIZE_MAX);
rs->window_cache = ffd->txdelta_window_cache
&& svn_cache__is_cachable(ffd->txdelta_window_cache,
(apr_size_t)estimated_window_storage)
? ffd->txdelta_window_cache
: NULL;
rs->combined_cache = ffd->combined_window_cache
&& svn_cache__is_cachable(ffd->combined_window_cache,
(apr_size_t)estimated_window_storage)
? ffd->combined_window_cache
: NULL;
/* cache lookup, i.e. skip reading the rep header if possible */
if (SVN_IS_VALID_REVNUM(revision))
SVN_ERR(svn_cache__get((void **) &rh, &is_cached,
ffd->rep_header_cache, &key, result_pool));
/* initialize the (shared) FILE member in RS */
if (reuse_shared_file)
{
rs->sfile = *shared_file;
}
else
{
shared_file_t *file = apr_pcalloc(result_pool, sizeof(*file));
file->revision = revision;
file->pool = result_pool;
file->fs = fs;
rs->sfile = file;
/* remember the current file, if suggested by the caller */
if (shared_file)
*shared_file = file;
}
/* read rep header, if necessary */
if (!is_cached)
{
svn_stream_t *stream;
/* we will need the on-disk location for non-txn reps */
apr_off_t offset;
svn_boolean_t in_container = TRUE;
/* ensure file is open and navigate to the start of rep header */
if (reuse_shared_file)
{
/* ... we can re-use the same, already open file object.
* This implies that we don't read from a txn.
*/
rs->sfile = *shared_file;
SVN_ERR(auto_open_shared_file(rs->sfile));
}
else
{
/* otherwise, create a new file object. May or may not be
* an in-txn file.
*/
SVN_ERR(open_and_seek_representation(&rs->sfile->rfile, fs, rep,
result_pool, scratch_pool));
}
if (SVN_IS_VALID_REVNUM(revision))
{
apr_uint32_t sub_item;
SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs,
rs->sfile->rfile, &rep->id,
scratch_pool));
/* is rep stored in some star-deltified container? */
if (sub_item == 0)
{
svn_fs_x__p2l_entry_t *entry;
SVN_ERR(svn_fs_x__p2l_entry_lookup(&entry, fs, rs->sfile->rfile,
revision, offset,
scratch_pool, scratch_pool));
in_container = entry->type == SVN_FS_X__ITEM_TYPE_REPS_CONT;
}
if (in_container)
{
/* construct a container rep header */
*rep_header = apr_pcalloc(result_pool, sizeof(**rep_header));
(*rep_header)->type = svn_fs_x__rep_container;
/* exit to caller */
*rep_state = rs;
return SVN_NO_ERROR;
}
SVN_ERR(svn_fs_x__rev_file_seek(rs->sfile->rfile, NULL, offset));
}
SVN_ERR(svn_fs_x__rev_file_stream(&stream, rs->sfile->rfile));
SVN_ERR(svn_fs_x__read_rep_header(&rh, stream,
result_pool, scratch_pool));
SVN_ERR(svn_fs_x__rev_file_offset(&rs->start, rs->sfile->rfile));
/* populate the cache if appropriate */
if (SVN_IS_VALID_REVNUM(revision))
{
SVN_ERR(block_read(NULL, fs, &rs->rep_id, rs->sfile->rfile, NULL,
result_pool, scratch_pool));
SVN_ERR(svn_cache__set(ffd->rep_header_cache, &key, rh,
scratch_pool));
}
}
/* finalize */
SVN_ERR(dbg__log_access(fs, &rs->rep_id, rh, SVN_FS_X__ITEM_TYPE_ANY_REP,
scratch_pool));
rs->header_size = rh->header_size;
*rep_state = rs;
*rep_header = rh;
rs->chunk_index = 0;
/* skip "SVNx" diff marker */
rs->current = 4;
return SVN_NO_ERROR;
}
/* Read the rep args for REP in filesystem FS and create a rep_state
for reading the representation. Return the rep_state in *REP_STATE
and the rep args in *REP_ARGS, both allocated in POOL.
When reading multiple reps, i.e. a skip delta chain, you may provide
non-NULL SHARED_FILE. (If SHARED_FILE is not NULL, in the first
call it should be a pointer to NULL.) The function will use this
variable to store the previous call results and tries to re-use it.
This may result in significant savings in I/O for packed files and
number of open file handles.
*/
static svn_error_t *
create_rep_state(rep_state_t **rep_state,
svn_fs_x__rep_header_t **rep_header,
shared_file_t **shared_file,
svn_fs_x__representation_t *rep,
svn_fs_t *fs,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_error_t *err = create_rep_state_body(rep_state, rep_header,
shared_file, rep, fs,
result_pool, scratch_pool);
if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
{
/* ### This always returns "-1" for transaction reps, because
### this particular bit of code doesn't know if the rep is
### stored in the protorev or in the mutable area (for props
### or dir contents). It is pretty rare for FSX to *read*
### from the protorev file, though, so this is probably OK.
### And anyone going to debug corruption errors is probably
### going to jump straight to this comment anyway! */
return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
"Corrupt representation '%s'",
rep
? svn_fs_x__unparse_representation
(rep, TRUE, scratch_pool,
scratch_pool)->data
: "(null)");
}
/* ### Call representation_string() ? */
return svn_error_trace(err);
}
svn_error_t *
svn_fs_x__check_rep(svn_fs_x__representation_t *rep,
svn_fs_t *fs,
apr_pool_t *scratch_pool)
{
apr_off_t offset;
apr_uint32_t sub_item;
svn_fs_x__p2l_entry_t *entry;
svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set);
svn_fs_x__revision_file_t *rev_file;
SVN_ERR(svn_fs_x__rev_file_init(&rev_file, fs, revision, scratch_pool));
/* Does REP->ID refer to an actual item? Which one is it? */
SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, rev_file, &rep->id,
scratch_pool));
/* What is the type of that item? */
SVN_ERR(svn_fs_x__p2l_entry_lookup(&entry, fs, rev_file, revision, offset,
scratch_pool, scratch_pool));
/* Verify that we've got an item that is actually a representation. */
if ( entry == NULL
|| ( entry->type != SVN_FS_X__ITEM_TYPE_FILE_REP
&& entry->type != SVN_FS_X__ITEM_TYPE_DIR_REP
&& entry->type != SVN_FS_X__ITEM_TYPE_FILE_PROPS
&& entry->type != SVN_FS_X__ITEM_TYPE_DIR_PROPS
&& entry->type != SVN_FS_X__ITEM_TYPE_REPS_CONT))
return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
_("No representation found at offset %s "
"for item %s in revision %ld"),
apr_off_t_toa(scratch_pool, offset),
apr_psprintf(scratch_pool, "%" APR_UINT64_T_FMT,
rep->id.number),
revision);
return SVN_NO_ERROR;
}
/* .
Do any allocations in POOL. */
svn_error_t *
svn_fs_x__rep_chain_length(int *chain_length,
int *shard_count,
svn_fs_x__representation_t *rep,
svn_fs_t *fs,
apr_pool_t *scratch_pool)
{
svn_fs_x__data_t *ffd = fs->fsap_data;
svn_revnum_t shard_size = ffd->max_files_per_dir;
svn_boolean_t is_delta = FALSE;
int count = 0;
int shards = 1;
svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set);
svn_revnum_t last_shard = revision / shard_size;
/* Note that this iteration pool will be used in a non-standard way.
* To reuse open file handles between iterations (e.g. while within the
* same pack file), we only clear this pool once in a while instead of
* at the start of each iteration. */
apr_pool_t *iterpool = svn_pool_create(scratch_pool);
/* Check whether the length of the deltification chain is acceptable.
* Otherwise, shared reps may form a non-skipping delta chain in
* extreme cases. */
svn_fs_x__representation_t base_rep = *rep;
/* re-use open files between iterations */
shared_file_t *file_hint = NULL;
svn_fs_x__rep_header_t *header;
/* follow the delta chain towards the end but for at most
* MAX_CHAIN_LENGTH steps. */
do
{
rep_state_t *rep_state;
revision = svn_fs_x__get_revnum(base_rep.id.change_set);
if (revision / shard_size != last_shard)
{
last_shard = revision / shard_size;
++shards;
}
SVN_ERR(create_rep_state_body(&rep_state,
&header,
&file_hint,
&base_rep,
fs,
iterpool,
iterpool));
base_rep.id.change_set
= svn_fs_x__change_set_by_rev(header->base_revision);
base_rep.id.number = header->base_item_index;
base_rep.size = header->base_length;
is_delta = header->type == svn_fs_x__rep_delta;
/* Clear it the ITERPOOL once in a while. Doing it too frequently
* renders the FILE_HINT ineffective. Doing too infrequently, may
* leave us with too many open file handles.
*
* Note that this is mostly about efficiency, with larger values
* being more efficient, and any non-zero value is legal here. When
* reading deltified contents, we may keep 10s of rev files open at
* the same time and the system has to cope with that. Thus, the
* limit of 16 chosen below is in the same ballpark.
*/
++count;
if (count % 16 == 0)
{
file_hint = NULL;
svn_pool_clear(iterpool);
}
}
while (is_delta && base_rep.id.change_set);
*chain_length = count;
*shard_count = shards;
svn_pool_destroy(iterpool);
return SVN_NO_ERROR;
}
typedef struct rep_read_baton_t
{
/* The FS from which we're reading. */
svn_fs_t *fs;
/* Representation to read. */
svn_fs_x__representation_t rep;
/* If not NULL, this is the base for the first delta window in rs_list */
svn_stringbuf_t *base_window;
/* The state of all prior delta representations. */
apr_array_header_t *rs_list;
/* The plaintext state, if there is a plaintext. */
rep_state_t *src_state;
/* The index of the current delta chunk, if we are reading a delta. */
int chunk_index;
/* The buffer where we store undeltified data. */
char *buf;
apr_size_t buf_pos;
apr_size_t buf_len;
/* A checksum context for summing the data read in order to verify it.
Note: we don't need to use the sha1 checksum because we're only doing
data verification, for which md5 is perfectly safe. */
svn_checksum_ctx_t *md5_checksum_ctx;
svn_boolean_t checksum_finalized;
/* The stored checksum of the representation we are reading, its
length, and the amount we've read so far. Some of this
information is redundant with rs_list and src_state, but it's
convenient for the checksumming code to have it here. */
unsigned char md5_digest[APR_MD5_DIGESTSIZE];
svn_filesize_t len;
svn_filesize_t off;
/* The key for the fulltext cache for this rep, if there is a
fulltext cache. */
svn_fs_x__pair_cache_key_t fulltext_cache_key;
/* The text we've been reading, if we're going to cache it. */
svn_stringbuf_t *current_fulltext;
/* If not NULL, attempt to read the data from this cache.
Once that lookup fails, reset it to NULL. */
svn_cache__t *fulltext_cache;
/* Bytes delivered from the FULLTEXT_CACHE so far. If the next
lookup fails, we need to skip that much data from the reconstructed
window stream before we continue normal operation. */
svn_filesize_t fulltext_delivered;
/* Used for temporary allocations during the read. */
apr_pool_t *scratch_pool;
/* Pool used to store file handles and other data that is persistent
for the entire stream read. */
apr_pool_t *filehandle_pool;
} rep_read_baton_t;
/* Set window key in *KEY to address the window described by RS.
For convenience, return the KEY. */
static svn_fs_x__window_cache_key_t *
get_window_key(svn_fs_x__window_cache_key_t *key,
rep_state_t *rs)
{
svn_revnum_t revision = svn_fs_x__get_revnum(rs->rep_id.change_set);
assert(revision <= APR_UINT32_MAX);
key->revision = (apr_uint32_t)revision;
key->item_index = rs->rep_id.number;
key->chunk_index = rs->chunk_index;
return key;
}
/* Read the WINDOW_P number CHUNK_INDEX for the representation given in
* rep state RS from the current FSX session's cache. This will be a
* no-op and IS_CACHED will be set to FALSE if no cache has been given.
* If a cache is available IS_CACHED will inform the caller about the
* success of the lookup. Allocations (of the window in particualar) will
* be made from POOL.
*
* If the information could be found, put RS to CHUNK_INDEX.
*/
/* Return data type for get_cached_window_sizes_func.
*/
typedef struct window_sizes_t
{
/* length of the txdelta window in its on-disk format */
svn_filesize_t packed_len;
/* expanded (and combined) window length */
svn_filesize_t target_len;
} window_sizes_t;
/* Implements svn_cache__partial_getter_func_t extracting the packed
* and expanded window sizes from a cached window and return the size
* info as a window_sizes_t* in *OUT.
*/
static svn_error_t *
get_cached_window_sizes_func(void **out,
const void *data,
apr_size_t data_len,
void *baton,
apr_pool_t *pool)
{
const svn_fs_x__txdelta_cached_window_t *window = data;
const svn_txdelta_window_t *txdelta_window
= svn_temp_deserializer__ptr(window, (const void **)&window->window);
window_sizes_t *result = apr_palloc(pool, sizeof(*result));
result->packed_len = window->end_offset - window->start_offset;
result->target_len = txdelta_window->tview_len;
*out = result;
return SVN_NO_ERROR;
}
/* Read the WINDOW_P number CHUNK_INDEX for the representation given in
* rep state RS from the current FSFS session's cache. This will be a
* no-op and IS_CACHED will be set to FALSE if no cache has been given.
* If a cache is available IS_CACHED will inform the caller about the
* success of the lookup. Allocations of the window in will be made
* from RESULT_POOL. Use SCRATCH_POOL for temporary allocations.
*
* If the information could be found, put RS to CHUNK_INDEX.
*/
static svn_error_t *
get_cached_window_sizes(window_sizes_t **sizes,
rep_state_t *rs,
svn_boolean_t *is_cached,
apr_pool_t *pool)
{
svn_fs_x__window_cache_key_t key = { 0 };
SVN_ERR(svn_cache__get_partial((void **)sizes,
is_cached,
rs->window_cache,
get_window_key(&key, rs),
get_cached_window_sizes_func,
NULL,
pool));
return SVN_NO_ERROR;
}
static svn_error_t *
get_cached_window(svn_txdelta_window_t **window_p,
rep_state_t *rs,
int chunk_index,
svn_boolean_t *is_cached,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
/* ask the cache for the desired txdelta window */
svn_fs_x__txdelta_cached_window_t *cached_window;
svn_fs_x__window_cache_key_t key = { 0 };
get_window_key(&key, rs);
key.chunk_index = chunk_index;
SVN_ERR(svn_cache__get((void **) &cached_window,
is_cached,
rs->window_cache,
&key,
result_pool));
if (*is_cached)
{
/* found it. Pass it back to the caller. */
*window_p = cached_window->window;
/* manipulate the RS as if we just read the data */
rs->current = cached_window->end_offset;
rs->chunk_index = chunk_index;
}
return SVN_NO_ERROR;
}
/* Store the WINDOW read for the rep state RS with the given START_OFFSET
* within the pack / rev file in the current FSX session's cache. This
* will be a no-op if no cache has been given.
* Temporary allocations will be made from SCRATCH_POOL. */
static svn_error_t *
set_cached_window(svn_txdelta_window_t *window,
rep_state_t *rs,
apr_off_t start_offset,
apr_pool_t *scratch_pool)
{
/* store the window and the first offset _past_ it */
svn_fs_x__txdelta_cached_window_t cached_window;
svn_fs_x__window_cache_key_t key = {0};
cached_window.window = window;
cached_window.start_offset = start_offset - rs->start;
cached_window.end_offset = rs->current;
/* but key it with the start offset because that is the known state
* when we will look it up */
SVN_ERR(svn_cache__set(rs->window_cache,
get_window_key(&key, rs),
&cached_window,
scratch_pool));
return SVN_NO_ERROR;
}
/* Read the WINDOW_P for the rep state RS from the current FSX session's
* cache. This will be a no-op and IS_CACHED will be set to FALSE if no
* cache has been given. If a cache is available IS_CACHED will inform
* the caller about the success of the lookup. Allocations (of the window
* in particular) will be made from POOL.
*/
static svn_error_t *
get_cached_combined_window(svn_stringbuf_t **window_p,
rep_state_t *rs,
svn_boolean_t *is_cached,
apr_pool_t *pool)
{
/* ask the cache for the desired txdelta window */
svn_fs_x__window_cache_key_t key = { 0 };
return svn_cache__get((void **)window_p,
is_cached,
rs->combined_cache,
get_window_key(&key, rs),
pool);
}
/* Store the WINDOW read for the rep state RS in the current FSX session's
* cache. This will be a no-op if no cache has been given.
* Temporary allocations will be made from SCRATCH_POOL. */
static svn_error_t *
set_cached_combined_window(svn_stringbuf_t *window,
rep_state_t *rs,
apr_pool_t *scratch_pool)
{
/* but key it with the start offset because that is the known state
* when we will look it up */
svn_fs_x__window_cache_key_t key = { 0 };
return svn_cache__set(rs->combined_cache,
get_window_key(&key, rs),
window,
scratch_pool);
}
/* Build an array of rep_state structures in *LIST giving the delta
reps from first_rep to a self-compressed rep. Set *SRC_STATE to
the container rep we find at the end of the chain, or to NULL if
the final delta representation is self-compressed.
The representation to start from is designated by filesystem FS, id
ID, and representation REP.
Also, set *WINDOW_P to the base window content for *LIST, if it
could be found in cache. Otherwise, *LIST will contain the base
representation for the whole delta chain.
*/
static svn_error_t *
build_rep_list(apr_array_header_t **list,
svn_stringbuf_t **window_p,
rep_state_t **src_state,
svn_fs_t *fs,
svn_fs_x__representation_t *first_rep,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_fs_x__representation_t rep;
rep_state_t *rs = NULL;
svn_fs_x__rep_header_t *rep_header;
svn_boolean_t is_cached = FALSE;
shared_file_t *shared_file = NULL;
apr_pool_t *iterpool = svn_pool_create(scratch_pool);
*list = apr_array_make(result_pool, 1, sizeof(rep_state_t *));
rep = *first_rep;
/* for the top-level rep, we need the rep_args */
SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file, &rep, fs,
result_pool, iterpool));
while (1)
{
svn_pool_clear(iterpool);
/* fetch state, if that has not been done already */
if (!rs)
SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file,
&rep, fs, result_pool, iterpool));
/* for txn reps and containered reps, there won't be a cached
* combined window */
if (svn_fs_x__is_revision(rep.id.change_set)
&& rep_header->type != svn_fs_x__rep_container
&& rs->combined_cache)
SVN_ERR(get_cached_combined_window(window_p, rs, &is_cached,
result_pool));
if (is_cached)
{
/* We already have a reconstructed window in our cache.
Write a pseudo rep_state with the full length. */
rs->start = 0;
rs->current = 0;
rs->size = (*window_p)->len;
*src_state = rs;
break;
}
if (rep_header->type == svn_fs_x__rep_container)
{
/* This is a container item, so just return the current rep_state. */
*src_state = rs;
break;
}
/* Push this rep onto the list. If it's self-compressed, we're done. */
APR_ARRAY_PUSH(*list, rep_state_t *) = rs;
if (rep_header->type == svn_fs_x__rep_self_delta)
{
*src_state = NULL;
break;
}
rep.id.change_set
= svn_fs_x__change_set_by_rev(rep_header->base_revision);
rep.id.number = rep_header->base_item_index;
rep.size = rep_header->base_length;
rs = NULL;
}
svn_pool_destroy(iterpool);
return SVN_NO_ERROR;
}
/* Create a rep_read_baton structure for node revision NODEREV in
filesystem FS and store it in *RB_P. If FULLTEXT_CACHE_KEY is not
NULL, it is the rep's key in the fulltext cache, and a stringbuf
must be allocated to store the text. If rep is mutable, it must be
refer to file contents.
Allocate the result in RESULT_POOL. This includes the pools within *RB_P.
*/
static svn_error_t *
rep_read_get_baton(rep_read_baton_t **rb_p,
svn_fs_t *fs,
svn_fs_x__representation_t *rep,
svn_fs_x__pair_cache_key_t fulltext_cache_key,
apr_pool_t *result_pool)
{
rep_read_baton_t *b;
b = apr_pcalloc(result_pool, sizeof(*b));
b->fs = fs;
b->rep = *rep;
b->base_window = NULL;
b->chunk_index = 0;
b->buf = NULL;
b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5,
result_pool);
b->checksum_finalized = FALSE;
memcpy(b->md5_digest, rep->md5_digest, sizeof(rep->md5_digest));
b->len = rep->expanded_size;
b->off = 0;
b->fulltext_cache_key = fulltext_cache_key;
/* Clearable sub-pools. Since they have to remain valid for as long as B
lives, we can't take them from some scratch pool. The caller of this
function will have no control over how those subpools will be used. */
b->scratch_pool = svn_pool_create(result_pool);
b->filehandle_pool = svn_pool_create(result_pool);
b->fulltext_cache = NULL;
b->fulltext_delivered = 0;
b->current_fulltext = NULL;
/* Save our output baton. */
*rb_p = b;
return SVN_NO_ERROR;
}
/* Skip forwards to THIS_CHUNK in REP_STATE and then read the next delta
window into *NWIN. */
static svn_error_t *
read_delta_window(svn_txdelta_window_t **nwin, int this_chunk,
rep_state_t *rs, apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_boolean_t is_cached;
apr_off_t start_offset;
apr_off_t end_offset;
apr_pool_t *iterpool;
svn_stream_t *stream;
svn_fs_x__revision_file_t *file;
svn_boolean_t cacheable = rs->chunk_index == 0
&& svn_fs_x__is_revision(rs->rep_id.change_set)
&& rs->window_cache;
SVN_ERR_ASSERT(rs->chunk_index <= this_chunk);
SVN_ERR(dbg__log_access(rs->sfile->fs, &rs->rep_id, NULL,
SVN_FS_X__ITEM_TYPE_ANY_REP, scratch_pool));
/* Read the next window. But first, try to find it in the cache. */
if (cacheable)
{
SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
result_pool, scratch_pool));
if (is_cached)
return SVN_NO_ERROR;
}
/* someone has to actually read the data from file. Open it */
SVN_ERR(auto_open_shared_file(rs->sfile));
file = rs->sfile->rfile;
/* invoke the 'block-read' feature for non-txn data.
However, don't do that if we are in the middle of some representation,
because the block is unlikely to contain other data. */
if (cacheable)
{
SVN_ERR(block_read(NULL, rs->sfile->fs, &rs->rep_id, file, NULL,
result_pool, scratch_pool));
/* reading the whole block probably also provided us with the
desired txdelta window */
SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
result_pool, scratch_pool));
if (is_cached)
return SVN_NO_ERROR;
}
/* data is still not cached -> we need to read it.
Make sure we have all the necessary info. */
SVN_ERR(auto_set_start_offset(rs, scratch_pool));
SVN_ERR(auto_read_diff_version(rs, scratch_pool));
/* RS->FILE may be shared between RS instances -> make sure we point
* to the right data. */
start_offset = rs->start + rs->current;
SVN_ERR(svn_fs_x__rev_file_seek(file, NULL, start_offset));
/* Skip windows to reach the current chunk if we aren't there yet. */
iterpool = svn_pool_create(scratch_pool);
while (rs->chunk_index < this_chunk)
{
apr_file_t *apr_file;
svn_pool_clear(iterpool);
SVN_ERR(svn_fs_x__rev_file_get(&apr_file, file));
SVN_ERR(svn_txdelta_skip_svndiff_window(apr_file, rs->ver, iterpool));
rs->chunk_index++;
SVN_ERR(svn_io_file_get_offset(&start_offset, apr_file, iterpool));
rs->current = start_offset - rs->start;
if (rs->current >= rs->size)
return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
_("Reading one svndiff window read "
"beyond the end of the "
"representation"));
}
svn_pool_destroy(iterpool);
/* Actually read the next window. */
SVN_ERR(svn_fs_x__rev_file_stream(&stream, file));
SVN_ERR(svn_txdelta_read_svndiff_window(nwin, stream, rs->ver,
result_pool));
SVN_ERR(svn_fs_x__rev_file_offset(&end_offset, file));
rs->current = end_offset - rs->start;
if (rs->current > rs->size)
return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
_("Reading one svndiff window read beyond "
"the end of the representation"));
/* the window has not been cached before, thus cache it now
* (if caching is used for them at all) */
if (cacheable)
SVN_ERR(set_cached_window(*nwin, rs, start_offset, scratch_pool));
return SVN_NO_ERROR;
}
/* Read the whole representation RS and return it in *NWIN. */
static svn_error_t *
read_container_window(svn_stringbuf_t **nwin,
rep_state_t *rs,
apr_size_t size,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_fs_x__rep_extractor_t *extractor = NULL;
svn_fs_t *fs = rs->sfile->fs;
svn_fs_x__data_t *ffd = fs->fsap_data;
svn_fs_x__pair_cache_key_t key;
svn_revnum_t revision = svn_fs_x__get_revnum(rs->rep_id.change_set);
svn_boolean_t is_cached = FALSE;
svn_fs_x__reps_baton_t baton;
SVN_ERR(auto_set_start_offset(rs, scratch_pool));
key.revision = svn_fs_x__packed_base_rev(fs, revision);
key.second = rs->start;
/* already in cache? */
baton.fs = fs;
baton.idx = rs->sub_item;
SVN_ERR(svn_cache__get_partial((void**)&extractor, &is_cached,
ffd->reps_container_cache, &key,
svn_fs_x__reps_get_func, &baton,
result_pool));
/* read from disk, if necessary */
if (extractor == NULL)
{
SVN_ERR(auto_open_shared_file(rs->sfile));
SVN_ERR(block_read((void **)&extractor, fs, &rs->rep_id,
rs->sfile->rfile, NULL,
result_pool, scratch_pool));
}
SVN_ERR(svn_fs_x__extractor_drive(nwin, extractor, rs->current, size,
result_pool, scratch_pool));
/* Update RS. */
rs->current += (apr_off_t)size;
return SVN_NO_ERROR;
}
/* Get the undeltified window that is a result of combining all deltas
from the current desired representation identified in *RB with its
base representation. Store the window in *RESULT. */
static svn_error_t *
get_combined_window(svn_stringbuf_t **result,
rep_read_baton_t *rb)
{
apr_pool_t *pool, *new_pool, *window_pool;
int i;
apr_array_header_t *windows;
svn_stringbuf_t *source, *buf = rb->base_window;
rep_state_t *rs;
apr_pool_t *iterpool;
/* Read all windows that we need to combine. This is fine because
the size of each window is relatively small (100kB) and skip-
delta limits the number of deltas in a chain to well under 100.
Stop early if one of them does not depend on its predecessors. */
window_pool = svn_pool_create(rb->scratch_pool);
windows = apr_array_make(window_pool, 0, sizeof(svn_txdelta_window_t *));
iterpool = svn_pool_create(rb->scratch_pool);
for (i = 0; i < rb->rs_list->nelts; ++i)
{
svn_txdelta_window_t *window;
svn_pool_clear(iterpool);
rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
SVN_ERR(read_delta_window(&window, rb->chunk_index, rs, window_pool,
iterpool));
APR_ARRAY_PUSH(windows, svn_txdelta_window_t *) = window;
if (window->src_ops == 0)
{
++i;
break;
}
}
/* Combine in the windows from the other delta reps. */
pool = svn_pool_create(rb->scratch_pool);
for (--i; i >= 0; --i)
{
svn_txdelta_window_t *window;
svn_pool_clear(iterpool);
rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
window = APR_ARRAY_IDX(windows, i, svn_txdelta_window_t *);
/* Maybe, we've got a start representation in a container. If we do,
read as much data from it as the needed for the txdelta window's
source view.
Note that BUF / SOURCE may only be NULL in the first iteration. */
source = buf;
if (source == NULL && rb->src_state != NULL)
SVN_ERR(read_container_window(&source, rb->src_state,
window->sview_len, pool, iterpool));
/* Combine this window with the current one. */
new_pool = svn_pool_create(rb->scratch_pool);
buf = svn_stringbuf_create_ensure(window->tview_len, new_pool);
buf->len = window->tview_len;
svn_txdelta_apply_instructions(window, source ? source->data : NULL,
buf->data, &buf->len);
if (buf->len != window->tview_len)
return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
_("svndiff window length is "
"corrupt"));
/* Cache windows only if the whole rep content could be read as a
single chunk. Only then will no other chunk need a deeper RS
list than the cached chunk. */
if ( (rb->chunk_index == 0) && (rs->current == rs->size)
&& svn_fs_x__is_revision(rs->rep_id.change_set)
&& rs->combined_cache)
SVN_ERR(set_cached_combined_window(buf, rs, new_pool));
rs->chunk_index++;
/* Cycle pools so that we only need to hold three windows at a time. */
svn_pool_destroy(pool);
pool = new_pool;
}
svn_pool_destroy(iterpool);
svn_pool_destroy(window_pool);
*result = buf;
return SVN_NO_ERROR;
}
/* Returns whether or not the expanded fulltext of the file is cacheable
* based on its size SIZE. The decision depends on the cache used by FFD.
*/
static svn_boolean_t
fulltext_size_is_cachable(svn_fs_x__data_t *ffd,
svn_filesize_t size)
{
return (size < APR_SIZE_MAX)
&& svn_cache__is_cachable(ffd->fulltext_cache, (apr_size_t)size);
}
/* Close method used on streams returned by read_representation().
*/
static svn_error_t *
rep_read_contents_close(void *baton)
{
rep_read_baton_t *rb = baton;
svn_pool_destroy(rb->scratch_pool);
svn_pool_destroy(rb->filehandle_pool);
return SVN_NO_ERROR;
}
/* Inialize the representation read state RS for the given REP_HEADER and
* p2l index ENTRY. If not NULL, assign FILE and STREAM to RS.
* Allocate all sub-structures of RS in RESULT_POOL.
*/
static svn_error_t *
init_rep_state(rep_state_t *rs,
svn_fs_x__rep_header_t *rep_header,
svn_fs_t *fs,
svn_fs_x__revision_file_t *rev_file,
svn_fs_x__p2l_entry_t* entry,
apr_pool_t *result_pool)
{
svn_fs_x__data_t *ffd = fs->fsap_data;
shared_file_t *shared_file = apr_pcalloc(result_pool, sizeof(*shared_file));
/* this function does not apply to representation containers */
SVN_ERR_ASSERT(entry->type >= SVN_FS_X__ITEM_TYPE_FILE_REP
&& entry->type <= SVN_FS_X__ITEM_TYPE_DIR_PROPS);
SVN_ERR_ASSERT(entry->item_count == 1);
shared_file->rfile = rev_file;
shared_file->fs = fs;
shared_file->revision = svn_fs_x__get_revnum(entry->items[0].change_set);
shared_file->pool = result_pool;
rs->sfile = shared_file;
rs->rep_id = entry->items[0];
rs->header_size = rep_header->header_size;
rs->start = entry->offset + rs->header_size;
rs->current = 4;
rs->size = entry->size - rep_header->header_size - 7;
rs->ver = 1;
rs->chunk_index = 0;
rs->window_cache = ffd->txdelta_window_cache;
rs->combined_cache = ffd->combined_window_cache;
return SVN_NO_ERROR;
}
/* Walk through all windows in the representation addressed by RS in FS
* (excluding the delta bases) and put those not already cached into the
* window caches. If MAX_OFFSET is not -1, don't read windows that start
* at or beyond that offset. As a side effect, return the total sum of all
* expanded window sizes in *FULLTEXT_LEN.
* Use SCRATCH_POOL for temporary allocations.
*/
static svn_error_t *
cache_windows(svn_filesize_t *fulltext_len,
svn_fs_t *fs,
rep_state_t *rs,
apr_off_t max_offset,
apr_pool_t *scratch_pool)
{
apr_pool_t *iterpool = svn_pool_create(scratch_pool);
*fulltext_len = 0;
while (rs->current < rs->size)
{
svn_boolean_t is_cached = FALSE;
window_sizes_t *window_sizes;
svn_pool_clear(iterpool);
if (max_offset != -1 && rs->start + rs->current >= max_offset)
{
svn_pool_destroy(iterpool);
return SVN_NO_ERROR;
}
/* efficiently skip windows that are still being cached instead
* of fully decoding them */
SVN_ERR(get_cached_window_sizes(&window_sizes, rs, &is_cached,
iterpool));
if (is_cached)
{
*fulltext_len += window_sizes->target_len;
rs->current += window_sizes->packed_len;
}
else
{
svn_txdelta_window_t *window;
svn_fs_x__revision_file_t *file = rs->sfile->rfile;
svn_stream_t *stream;
apr_off_t start_offset = rs->start + rs->current;
apr_off_t end_offset;
apr_off_t block_start;
/* navigate to & read the current window */
SVN_ERR(svn_fs_x__rev_file_stream(&stream, file));
SVN_ERR(svn_fs_x__rev_file_seek(file, &block_start, start_offset));
SVN_ERR(svn_txdelta_read_svndiff_window(&window, stream, rs->ver,
iterpool));
/* aggregate expanded window size */
*fulltext_len += window->tview_len;
/* determine on-disk window size */
SVN_ERR(svn_fs_x__rev_file_offset(&end_offset, rs->sfile->rfile));
rs->current = end_offset - rs->start;
if (rs->current > rs->size)
return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
_("Reading one svndiff window read beyond "
"the end of the representation"));
/* if the window has not been cached before, cache it now
* (if caching is used for them at all) */
if (!is_cached)
SVN_ERR(set_cached_window(window, rs, start_offset, iterpool));
}
rs->chunk_index++;
}
svn_pool_destroy(iterpool);
return SVN_NO_ERROR;
}
/* Try to get the representation header identified by KEY from FS's cache.
* If it has not been cached, read it from the current position in STREAM
* and put it into the cache (if caching has been enabled for rep headers).
* Return the result in *REP_HEADER. Use POOL for allocations.
*/
static svn_error_t *
read_rep_header(svn_fs_x__rep_header_t **rep_header,
svn_fs_t *fs,
svn_fs_x__revision_file_t *file,
svn_fs_x__representation_cache_key_t *key,
apr_pool_t *pool)
{
svn_fs_x__data_t *ffd = fs->fsap_data;
svn_stream_t *stream;
svn_boolean_t is_cached = FALSE;
SVN_ERR(svn_cache__get((void**)rep_header, &is_cached,
ffd->rep_header_cache, key, pool));
if (is_cached)
return SVN_NO_ERROR;
SVN_ERR(svn_fs_x__rev_file_stream(&stream, file));
SVN_ERR(svn_fs_x__read_rep_header(rep_header, stream, pool, pool));
SVN_ERR(svn_cache__set(ffd->rep_header_cache, key, *rep_header, pool));
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_x__get_representation_length(svn_filesize_t *packed_len,
svn_filesize_t *expanded_len,
svn_fs_t *fs,
svn_fs_x__revision_file_t *rev_file,
svn_fs_x__p2l_entry_t* entry,
apr_pool_t *scratch_pool)
{
svn_fs_x__representation_cache_key_t key = { 0 };
rep_state_t rs = { 0 };
svn_fs_x__rep_header_t *rep_header;
/* this function does not apply to representation containers */
SVN_ERR_ASSERT(entry->type >= SVN_FS_X__ITEM_TYPE_FILE_REP
&& entry->type <= SVN_FS_X__ITEM_TYPE_DIR_PROPS);
SVN_ERR_ASSERT(entry->item_count == 1);
/* get / read the representation header */
key.revision = svn_fs_x__get_revnum(entry->items[0].change_set);
key.is_packed = svn_fs_x__is_packed_rev(fs, key.revision);
key.item_index = entry->items[0].number;
SVN_ERR(read_rep_header(&rep_header, fs, rev_file, &key, scratch_pool));
/* prepare representation reader state (rs) structure */
SVN_ERR(init_rep_state(&rs, rep_header, fs, rev_file, entry,
scratch_pool));
/* RS->SFILE may be shared between RS instances -> make sure we point
* to the right data. */
*packed_len = rs.size;
SVN_ERR(cache_windows(expanded_len, fs, &rs, -1, scratch_pool));
return SVN_NO_ERROR;
}
/* Return the next *LEN bytes of the rep from our plain / delta windows
and store them in *BUF. */
static svn_error_t *
get_contents_from_windows(rep_read_baton_t *rb,
char *buf,
apr_size_t *len)
{
apr_size_t copy_len, remaining = *len;
char *cur = buf;
rep_state_t *rs;
/* Special case for when there are no delta reps, only a
containered text. */
if (rb->rs_list->nelts == 0 && rb->buf == NULL)
{
copy_len = remaining;
rs = rb->src_state;
/* reps in containers don't have a header */
if (rs->header_size == 0 && rb->base_window == NULL)
{
/* RS->SIZE is unreliable here because it is based upon
* the delta rep size _before_ putting the data into a
* a container. */
SVN_ERR(read_container_window(&rb->base_window, rs, rb->len,
rb->scratch_pool, rb->scratch_pool));
rs->current -= rb->base_window->len;
}
if (rb->base_window != NULL)
{
/* We got the desired rep directly from the cache.
This is where we need the pseudo rep_state created
by build_rep_list(). */
apr_size_t offset = (apr_size_t)rs->current;
if (offset >= rb->base_window->len)
copy_len = 0ul;
else if (copy_len > rb->base_window->len - offset)
copy_len = rb->base_window->len - offset;
memcpy (cur, rb->base_window->data + offset, copy_len);
}
rs->current += copy_len;
*len = copy_len;
return SVN_NO_ERROR;
}
while (remaining > 0)
{
/* If we have buffered data from a previous chunk, use that. */
if (rb->buf)
{
/* Determine how much to copy from the buffer. */
copy_len = rb->buf_len - rb->buf_pos;
if (copy_len > remaining)
copy_len = remaining;
/* Actually copy the data. */
memcpy(cur, rb->buf + rb->buf_pos, copy_len);
rb->buf_pos += copy_len;
cur += copy_len;
remaining -= copy_len;
/* If the buffer is all used up, clear it and empty the
local pool. */
if (rb->buf_pos == rb->buf_len)
{
svn_pool_clear(rb->scratch_pool);
rb->buf = NULL;
}
}
else
{
svn_stringbuf_t *sbuf = NULL;
rs = APR_ARRAY_IDX(rb->rs_list, 0, rep_state_t *);
if (rs->current == rs->size)
break;
/* Get more buffered data by evaluating a chunk. */
SVN_ERR(get_combined_window(&sbuf, rb));
rb->chunk_index++;
rb->buf_len = sbuf->len;
rb->buf = sbuf->data;
rb->buf_pos = 0;
}
}
*len = cur - buf;
return SVN_NO_ERROR;
}
/* Baton type for get_fulltext_partial. */
typedef struct fulltext_baton_t
{
/* Target buffer to write to; of at least LEN bytes. */
char *buffer;
/* Offset within the respective fulltext at which we shall start to
copy data into BUFFER. */
apr_size_t start;
/* Number of bytes to copy. The actual amount may be less in case
the fulltext is short(er). */
apr_size_t len;
/* Number of bytes actually copied into BUFFER. */
apr_size_t read;
} fulltext_baton_t;
/* Implement svn_cache__partial_getter_func_t for fulltext caches.
* From the fulltext in DATA, we copy the range specified by the
* fulltext_baton_t* BATON into the buffer provided by that baton.
* OUT and RESULT_POOL are not used.
*/
static svn_error_t *
get_fulltext_partial(void **out,
const void *data,
apr_size_t data_len,
void *baton,
apr_pool_t *result_pool)
{
fulltext_baton_t *fulltext_baton = baton;
/* We cached the fulltext with an NUL appended to it. */
apr_size_t fulltext_len = data_len - 1;
/* Clip the copy range to what the fulltext size allows. */
apr_size_t start = MIN(fulltext_baton->start, fulltext_len);
fulltext_baton->read = MIN(fulltext_len - start, fulltext_baton->len);
/* Copy the data to the output buffer and be done. */
memcpy(fulltext_baton->buffer, (const char *)data + start,
fulltext_baton->read);
return SVN_NO_ERROR;
}
/* Find the fulltext specified in BATON in the fulltext cache given
* as well by BATON. If that succeeds, set *CACHED to TRUE and copy
* up to the next *LEN bytes into BUFFER. Set *LEN to the actual
* number of bytes copied.
*/
static svn_error_t *
get_contents_from_fulltext(svn_boolean_t *cached,
rep_read_baton_t *baton,
char *buffer,
apr_size_t *len)
{
void *dummy;
fulltext_baton_t fulltext_baton;
SVN_ERR_ASSERT((apr_size_t)baton->fulltext_delivered
== baton->fulltext_delivered);
fulltext_baton.buffer = buffer;
fulltext_baton.start = (apr_size_t)baton->fulltext_delivered;
fulltext_baton.len = *len;
fulltext_baton.read = 0;
SVN_ERR(svn_cache__get_partial(&dummy, cached, baton->fulltext_cache,
&baton->fulltext_cache_key,
get_fulltext_partial, &fulltext_baton,
baton->scratch_pool));
if (*cached)
{
baton->fulltext_delivered += fulltext_baton.read;
*len = fulltext_baton.read;
}
return SVN_NO_ERROR;
}
/* Determine the optimal size of a string buf that shall receive a
* (full-) text of NEEDED bytes.
*
* The critical point is that those buffers may be very large and
* can cause memory fragmentation. We apply simple heuristics to
* make fragmentation less likely.
*/
static apr_size_t
optimimal_allocation_size(apr_size_t needed)
{
/* For all allocations, assume some overhead that is shared between
* OS memory managemnt, APR memory management and svn_stringbuf_t. */
const apr_size_t overhead = 0x400;
apr_size_t optimal;
/* If an allocation size if safe for other ephemeral buffers, it should
* be safe for ours. */
if (needed <= SVN__STREAM_CHUNK_SIZE)
return needed;
/* Paranoia edge case:
* Skip our heuristics if they created arithmetical overflow.
* Beware to make this test work for NEEDED = APR_SIZE_MAX as well! */
if (needed >= APR_SIZE_MAX / 2 - overhead)
return needed;
/* As per definition SVN__STREAM_CHUNK_SIZE is a power of two.
* Since we know NEEDED to be larger than that, use it as the
* starting point.
*
* Heuristics: Allocate a power-of-two number of bytes that fit
* NEEDED plus some OVERHEAD. The APR allocator
* will round it up to the next full page size.
*/
optimal = SVN__STREAM_CHUNK_SIZE;
while (optimal - overhead < needed)
optimal *= 2;
/* This is above or equal to NEEDED. */
return optimal - overhead;
}
/* After a fulltext cache lookup failure, we will continue to read from
* combined delta or plain windows. However, we must first make that data
* stream in BATON catch up tho the position LEN already delivered from the
* fulltext cache. Also, we need to store the reconstructed fulltext if we
* want to cache it at the end.
*/
static svn_error_t *
skip_contents(rep_read_baton_t *baton,
svn_filesize_t len)
{
svn_error_t *err = SVN_NO_ERROR;
/* Do we want to cache the reconstructed fulltext? */
if (SVN_IS_VALID_REVNUM(baton->fulltext_cache_key.revision))
{
char *buffer;
svn_filesize_t to_alloc = MAX(len, baton->len);
/* This should only be happening if BATON->LEN and LEN are
* cacheable, implying they fit into memory. */
SVN_ERR_ASSERT((apr_size_t)to_alloc == to_alloc);
/* Allocate the fulltext buffer. */
baton->current_fulltext = svn_stringbuf_create_ensure(
optimimal_allocation_size((apr_size_t)to_alloc),
baton->filehandle_pool);
/* Read LEN bytes from the window stream and store the data
* in the fulltext buffer (will be filled by further reads later). */
baton->current_fulltext->len = (apr_size_t)len;
baton->current_fulltext->data[(apr_size_t)len] = 0;
buffer = baton->current_fulltext->data;
while (len > 0 && !err)
{
apr_size_t to_read = (apr_size_t)len;
err = get_contents_from_windows(baton, buffer, &to_read);
len -= to_read;
buffer += to_read;
}
/* Make the MD5 calculation catch up with the data delivered
* (we did not run MD5 on the data that we took from the cache). */
if (!err)
{
SVN_ERR(svn_checksum_update(baton->md5_checksum_ctx,
baton->current_fulltext->data,
baton->current_fulltext->len));
baton->off += baton->current_fulltext->len;
}
}
else if (len > 0)
{
/* Simply drain LEN bytes from the window stream. */
apr_pool_t *subpool = svn_pool_create(baton->scratch_pool);
char *buffer = apr_palloc(subpool, SVN__STREAM_CHUNK_SIZE);
while (len > 0 && !err)
{
apr_size_t to_read = len > SVN__STREAM_CHUNK_SIZE
? SVN__STREAM_CHUNK_SIZE
: (apr_size_t)len;
err = get_contents_from_windows(baton, buffer, &to_read);
len -= to_read;
/* Make the MD5 calculation catch up with the data delivered
* (we did not run MD5 on the data that we took from the cache). */
if (!err)
{
SVN_ERR(svn_checksum_update(baton->md5_checksum_ctx,
buffer, to_read));
baton->off += to_read;
}
}
svn_pool_destroy(subpool);
}
return svn_error_trace(err);
}
/* BATON is of type `rep_read_baton_t'; read the next *LEN bytes of the
representation and store them in *BUF. Sum as we read and verify
the MD5 sum at the end. */
static svn_error_t *
rep_read_contents(void *baton,
char *buf,
apr_size_t *len)
{
rep_read_baton_t *rb = baton;
/* Get data from the fulltext cache for as long as we can. */
if (rb->fulltext_cache)
{
svn_boolean_t cached;
SVN_ERR(get_contents_from_fulltext(&cached, rb, buf, len));
if (cached)
return SVN_NO_ERROR;
/* Cache miss. From now on, we will never read from the fulltext
* cache for this representation anymore. */
rb->fulltext_cache = NULL;
}
/* No fulltext cache to help us. We must read from the window stream. */
if (!rb->rs_list)
{
/* Window stream not initialized, yet. Do it now. */
SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
&rb->src_state, rb->fs, &rb->rep,
rb->filehandle_pool, rb->scratch_pool));
/* In case we did read from the fulltext cache before, make the
* window stream catch up. Also, initialize the fulltext buffer
* if we want to cache the fulltext at the end. */
SVN_ERR(skip_contents(rb, rb->fulltext_delivered));
}
/* Get the next block of data.
* Keep in mind that the representation might be empty and leave us
* already positioned at the end of the rep. */
if (rb->off == rb->len)
*len = 0;
else
SVN_ERR(get_contents_from_windows(rb, buf, len));
if (rb->current_fulltext)
svn_stringbuf_appendbytes(rb->current_fulltext, buf, *len);
/* Perform checksumming. We want to check the checksum as soon as
the last byte of data is read, in case the caller never performs
a short read, but we don't want to finalize the MD5 context
twice. */
if (!rb->checksum_finalized)
{
SVN_ERR(svn_checksum_update(rb->md5_checksum_ctx, buf, *len));
rb->off += *len;
if (rb->off == rb->len)
{
svn_checksum_t *md5_checksum;
svn_checksum_t expected;
expected.kind = svn_checksum_md5;
expected.digest = rb->md5_digest;
rb->checksum_finalized = TRUE;
SVN_ERR(svn_checksum_final(&md5_checksum, rb->md5_checksum_ctx,
rb->scratch_pool));
if (!svn_checksum_match(md5_checksum, &expected))
return svn_error_create(SVN_ERR_FS_CORRUPT,
svn_checksum_mismatch_err(&expected, md5_checksum,
rb->scratch_pool,
_("Checksum mismatch while reading representation")),
NULL);
}
}
if (rb->off == rb->len && rb->current_fulltext)
{
svn_fs_x__data_t *ffd = rb->fs->fsap_data;
SVN_ERR(svn_cache__set(ffd->fulltext_cache, &rb->fulltext_cache_key,
rb->current_fulltext, rb->scratch_pool));
rb->current_fulltext = NULL;
}
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_x__get_contents(svn_stream_t **contents_p,
svn_fs_t *fs,
svn_fs_x__representation_t *rep,
svn_boolean_t cache_fulltext,
apr_pool_t *result_pool)
{
if (! rep)
{
*contents_p = svn_stream_empty(result_pool);
}
else
{
svn_fs_x__data_t *ffd = fs->fsap_data;
svn_filesize_t len = rep->expanded_size;
rep_read_baton_t *rb;
svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set);
svn_fs_x__pair_cache_key_t fulltext_cache_key = { 0 };
fulltext_cache_key.revision = revision;
fulltext_cache_key.second = rep->id.number;
/* Initialize the reader baton. Some members may added lazily
* while reading from the stream */
SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key,
result_pool));
/* Make the stream attempt fulltext cache lookups if the fulltext
* is cacheable. If it is not, then also don't try to buffer and
* cache it. */
if ( cache_fulltext
&& SVN_IS_VALID_REVNUM(revision)
&& fulltext_size_is_cachable(ffd, len))
{
rb->fulltext_cache = ffd->fulltext_cache;
}
else
{
/* This will also prevent the reconstructed fulltext from being
put into the cache. */
rb->fulltext_cache_key.revision = SVN_INVALID_REVNUM;
}
*contents_p = svn_stream_create(rb, result_pool);
svn_stream_set_read2(*contents_p, NULL /* only full read support */,
rep_read_contents);
svn_stream_set_close(*contents_p, rep_read_contents_close);
}
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_x__get_contents_from_file(svn_stream_t **contents_p,
svn_fs_t *fs,
svn_fs_x__representation_t *rep,
apr_file_t *file,
apr_off_t offset,
apr_pool_t *pool)
{
rep_read_baton_t *rb;
svn_fs_x__pair_cache_key_t fulltext_cache_key = { SVN_INVALID_REVNUM, 0 };
rep_state_t *rs = apr_pcalloc(pool, sizeof(*rs));
svn_fs_x__rep_header_t *rh;
svn_stream_t *stream;
/* Initialize the reader baton. Some members may added lazily
* while reading from the stream. */
SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool));
/* Continue constructing RS. Leave caches as NULL. */
rs->size = rep->size;
rs->rep_id = rep->id;
rs->ver = -1;
rs->start = -1;
/* Provide just enough file access info to allow for a basic read from
* FILE but leave all index / footer info with empty values b/c FILE
* probably is not a complete revision file. */
rs->sfile = apr_pcalloc(pool, sizeof(*rs->sfile));
rs->sfile->revision = SVN_INVALID_REVNUM;
rs->sfile->pool = pool;
rs->sfile->fs = fs;
SVN_ERR(svn_fs_x__rev_file_wrap_temp(&rs->sfile->rfile, fs, file, pool));
/* Read the rep header. */
SVN_ERR(svn_fs_x__rev_file_seek(rs->sfile->rfile, NULL, offset));
SVN_ERR(svn_fs_x__rev_file_stream(&stream, rs->sfile->rfile));
SVN_ERR(svn_fs_x__read_rep_header(&rh, stream, pool, pool));
SVN_ERR(svn_fs_x__rev_file_offset(&rs->start, rs->sfile->rfile));
rs->header_size = rh->header_size;
/* Log the access. */
SVN_ERR(dbg__log_access(fs, &rep->id, rh,
SVN_FS_X__ITEM_TYPE_ANY_REP, pool));
/* Build the representation list (delta chain). */
if (rh->type == svn_fs_x__rep_self_delta)
{
rb->rs_list = apr_array_make(pool, 1, sizeof(rep_state_t *));
APR_ARRAY_PUSH(rb->rs_list, rep_state_t *) = rs;
rb->src_state = NULL;
}
else
{
svn_fs_x__representation_t next_rep = { 0 };
/* skip "SVNx" diff marker */
rs->current = 4;
/* REP's base rep is inside a proper revision.
* It can be reconstructed in the usual way. */
next_rep.id.change_set = svn_fs_x__change_set_by_rev(rh->base_revision);
next_rep.id.number = rh->base_item_index;
next_rep.size = rh->base_length;
SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
&rb->src_state, rb->fs, &next_rep,
rb->filehandle_pool, rb->scratch_pool));
/* Insert the access to REP as the first element of the delta chain. */
SVN_ERR(svn_sort__array_insert2(rb->rs_list, &rs, 0));
}
/* Now, the baton is complete and we can assemble the stream around it. */
*contents_p = svn_stream_create(rb, pool);
svn_stream_set_read2(*contents_p, NULL /* only full read support */,
rep_read_contents);
svn_stream_set_close(*contents_p, rep_read_contents_close);
return SVN_NO_ERROR;
}
/* Baton for cache_access_wrapper. Wraps the original parameters of
* svn_fs_x__try_process_file_content().
*/
typedef struct cache_access_wrapper_baton_t
{
svn_fs_process_contents_func_t func;
void* baton;
} cache_access_wrapper_baton_t;
/* Wrapper to translate between svn_fs_process_contents_func_t and
* svn_cache__partial_getter_func_t.
*/
static svn_error_t *
cache_access_wrapper(void **out,
const void *data,
apr_size_t data_len,
void *baton,
apr_pool_t *pool)
{
cache_access_wrapper_baton_t *wrapper_baton = baton;
SVN_ERR(wrapper_baton->func((const unsigned char *)data,
data_len - 1, /* cache adds terminating 0 */
wrapper_baton->baton,
pool));
/* non-NULL value to signal the calling cache that all went well */
*out = baton;
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_x__try_process_file_contents(svn_boolean_t *success,
svn_fs_t *fs,
svn_fs_x__noderev_t *noderev,
svn_fs_process_contents_func_t processor,
void* baton,
apr_pool_t *scratch_pool)
{
svn_fs_x__representation_t *rep = noderev->data_rep;
if (rep)
{
svn_fs_x__data_t *ffd = fs->fsap_data;
svn_fs_x__pair_cache_key_t fulltext_cache_key = { 0 };
fulltext_cache_key.revision = svn_fs_x__get_revnum(rep->id.change_set);
fulltext_cache_key.second = rep->id.number;
if ( SVN_IS_VALID_REVNUM(fulltext_cache_key.revision)
&& fulltext_size_is_cachable(ffd, rep->expanded_size))
{
cache_access_wrapper_baton_t wrapper_baton;
void *dummy = NULL;
wrapper_baton.func = processor;
wrapper_baton.baton = baton;
return svn_cache__get_partial(&dummy, success,
ffd->fulltext_cache,
&fulltext_cache_key,
cache_access_wrapper,
&wrapper_baton,
scratch_pool);
}
}
*success = FALSE;
return SVN_NO_ERROR;
}
/* Baton used when reading delta windows. */
typedef struct delta_read_baton_t
{
struct rep_state_t *rs;
unsigned char md5_digest[APR_MD5_DIGESTSIZE];
} delta_read_baton_t;
/* This implements the svn_txdelta_next_window_fn_t interface. */
static svn_error_t *
delta_read_next_window(svn_txdelta_window_t **window,
void *baton,
apr_pool_t *pool)
{
delta_read_baton_t *drb = baton;
apr_pool_t *scratch_pool = svn_pool_create(pool);
*window = NULL;
if (drb->rs->current < drb->rs->size)
{
SVN_ERR(read_delta_window(window, drb->rs->chunk_index, drb->rs, pool,
scratch_pool));
drb->rs->chunk_index++;
}
svn_pool_destroy(scratch_pool);
return SVN_NO_ERROR;
}
/* This implements the svn_txdelta_md5_digest_fn_t interface. */
static const unsigned char *
delta_read_md5_digest(void *baton)
{
delta_read_baton_t *drb = baton;
return drb->md5_digest;
}
/* Return a txdelta stream for on-disk representation REP_STATE
* of TARGET. Allocate the result in RESULT_POOL.
*/
static svn_txdelta_stream_t *
get_storaged_delta_stream(rep_state_t *rep_state,
svn_fs_x__noderev_t *target,
apr_pool_t *result_pool)
{
/* Create the delta read baton. */
delta_read_baton_t *drb = apr_pcalloc(result_pool, sizeof(*drb));
drb->rs = rep_state;
memcpy(drb->md5_digest, target->data_rep->md5_digest,
sizeof(drb->md5_digest));
return svn_txdelta_stream_create(drb, delta_read_next_window,
delta_read_md5_digest, result_pool);
}
svn_error_t *
svn_fs_x__get_file_delta_stream(svn_txdelta_stream_t **stream_p,
svn_fs_t *fs,
svn_fs_x__noderev_t *source,
svn_fs_x__noderev_t *target,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_stream_t *source_stream, *target_stream;
rep_state_t *rep_state;
svn_fs_x__rep_header_t *rep_header;
/* Try a shortcut: if the target is stored as a delta against the source,
then just use that delta. However, prefer using the fulltext cache
whenever that is available. */
if (target->data_rep && source)
{
/* Read target's base rep if any. */
SVN_ERR(create_rep_state(&rep_state, &rep_header, NULL,
target->data_rep, fs, result_pool,
scratch_pool));
/* Try a shortcut: if the target is stored as a delta against the source,
then just use that delta. */
if (source && source->data_rep && target->data_rep)
{
/* If that matches source, then use this delta as is.
Note that we want an actual delta here. E.g. a self-delta would
not be good enough. */
if (rep_header->type == svn_fs_x__rep_delta
&& rep_header->base_revision
== svn_fs_x__get_revnum(source->data_rep->id.change_set)
&& rep_header->base_item_index == source->data_rep->id.number)
{
*stream_p = get_storaged_delta_stream(rep_state, target,
result_pool);
return SVN_NO_ERROR;
}
}
else if (!source)
{
/* We want a self-delta. There is a fair chance that TARGET got
added in this revision and is already stored in the requested
format. */
if (rep_header->type == svn_fs_x__rep_self_delta)
{
*stream_p = get_storaged_delta_stream(rep_state, target,
result_pool);
return SVN_NO_ERROR;
}
}
/* Don't keep file handles open for longer than necessary. */
if (rep_state->sfile->rfile)
{
SVN_ERR(svn_fs_x__close_revision_file(rep_state->sfile->rfile));
rep_state->sfile->rfile = NULL;
}
}
/* Read both fulltexts and construct a delta. */
if (source)
SVN_ERR(svn_fs_x__get_contents(&source_stream, fs, source->data_rep,
TRUE, result_pool));
else
source_stream = svn_stream_empty(result_pool);
SVN_ERR(svn_fs_x__get_contents(&target_stream, fs, target->data_rep,
TRUE, result_pool));
/* Because source and target stream will already verify their content,
* there is no need to do this once more. In particular if the stream
* content is being fetched from cache. */
svn_txdelta2(stream_p, source_stream, target_stream, FALSE, result_pool);
return SVN_NO_ERROR;
}
/* Return TRUE when all svn_fs_x__dirent_t* in ENTRIES are already sorted
by their respective name. */
static svn_boolean_t
sorted(apr_array_header_t *entries)
{
int i;
const svn_fs_x__dirent_t * const *dirents = (const void *)entries->elts;
for (i = 0; i < entries->nelts-1; ++i)
if (strcmp(dirents[i]->name, dirents[i+1]->name) > 0)
return FALSE;
return TRUE;
}
/* Compare the names of the two dirents given in **A and **B. */
static int
compare_dirents(const void *a,
const void *b)
{
const svn_fs_x__dirent_t *lhs = *((const svn_fs_x__dirent_t * const *) a);
const svn_fs_x__dirent_t *rhs = *((const svn_fs_x__dirent_t * const *) b);
return strcmp(lhs->name, rhs->name);
}
/* Compare the name of the dirents given in **A with the C string in *B. */
static int
compare_dirent_name(const void *a,
const void *b)
{
const svn_fs_x__dirent_t *lhs = *((const svn_fs_x__dirent_t * const *) a);
const char *rhs = b;
return strcmp(lhs->name, rhs);
}
/* Into ENTRIES, parse all directories entries from the serialized form in
* DATA. If INCREMENTAL is TRUE, read until the end of the STREAM and
* update the data. ID is provided for nicer error messages.
*
* The contents of DATA will be shared with the items in ENTRIES, i.e. it
* must not be modified afterwards and must remain valid as long as ENTRIES
* is valid. Use SCRATCH_POOL for temporary allocations.
*/
static svn_error_t *
parse_dir_entries(apr_array_header_t **entries_p,
const svn_stringbuf_t *data,
svn_boolean_t incremental,
const svn_fs_x__id_t *id,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
const apr_byte_t *p = (const apr_byte_t *)data->data;
const apr_byte_t *end = p + data->len;
apr_uint64_t count;
apr_hash_t *hash = incremental ? svn_hash__make(scratch_pool) : NULL;
apr_array_header_t *entries;
/* Construct the resulting container. */
p = svn__decode_uint(&count, p, end);
if (count > INT_MAX)
return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
_("Directory for '%s' is too large"),
svn_fs_x__id_unparse(id, scratch_pool)->data);
entries = apr_array_make(result_pool, (int)count,
sizeof(svn_fs_x__dirent_t *));
while (p != end)
{
apr_size_t len;
svn_fs_x__dirent_t *dirent;
dirent = apr_pcalloc(result_pool, sizeof(*dirent));
/* The part of the serialized entry that is not the name will be
* about 6 bytes or less. Since APR allocates with an 8 byte
* alignment (4 bytes loss on average per string), simply using
* the name string in DATA already gives us near-optimal memory
* usage. */
dirent->name = (const char *)p;
len = strlen(dirent->name);
p += len + 1;
if (p == end)
return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
_("Directory entry missing kind in '%s'"),
svn_fs_x__id_unparse(id, scratch_pool)->data);
dirent->kind = (svn_node_kind_t)*(p++);
if (p == end)
return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
_("Directory entry missing change set in '%s'"),
svn_fs_x__id_unparse(id, scratch_pool)->data);
p = svn__decode_int(&dirent->id.change_set, p, end);
if (p == end)
return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
_("Directory entry missing item number in '%s'"),
svn_fs_x__id_unparse(id, scratch_pool)->data);
p = svn__decode_uint(&dirent->id.number, p, end);
/* In incremental mode, update the hash; otherwise, write to the
* final array. */
if (incremental)
{
/* Insertion / update or a deletion? */
if (svn_fs_x__id_used(&dirent->id))
apr_hash_set(hash, dirent->name, len, dirent);
else
apr_hash_set(hash, dirent->name, len, NULL);
}
else
{
APR_ARRAY_PUSH(entries, svn_fs_x__dirent_t *) = dirent;
}
}
if (incremental)
{
/* Convert container into a sorted array. */
apr_hash_index_t *hi;
for (hi = apr_hash_first(scratch_pool, hash); hi; hi = apr_hash_next(hi))
APR_ARRAY_PUSH(entries, svn_fs_x__dirent_t *) = apr_hash_this_val(hi);
if (!sorted(entries))
svn_sort__array(entries, compare_dirents);
}
else
{
/* Check that we read the expected amount of entries. */
if ((apr_uint64_t)entries->nelts != count)
return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
_("Directory length mismatch in '%s'"),
svn_fs_x__id_unparse(id, scratch_pool)->data);
}
*entries_p = entries;
return SVN_NO_ERROR;
}
/* For directory NODEREV in FS, return the *FILESIZE of its in-txn
* representation. If the directory representation is committed data,
* set *FILESIZE to SVN_INVALID_FILESIZE. Use SCRATCH_POOL for temporaries.
*/
static svn_error_t *
get_txn_dir_info(svn_filesize_t *filesize,
svn_fs_t *fs,
svn_fs_x__noderev_t *noderev,
apr_pool_t *scratch_pool)
{
if (noderev->data_rep
&& ! svn_fs_x__is_revision(noderev->data_rep->id.change_set))
{
const svn_io_dirent2_t *dirent;
const char *filename;
filename = svn_fs_x__path_txn_node_children(fs, &noderev->noderev_id,
scratch_pool, scratch_pool);
SVN_ERR(svn_io_stat_dirent2(&dirent, filename, FALSE, FALSE,
scratch_pool, scratch_pool));
*filesize = dirent->filesize;
}
else
{
*filesize = SVN_INVALID_FILESIZE;
}
return SVN_NO_ERROR;
}
/* Fetch the contents of a directory into DIR. Values are stored
as filename to string mappings; further conversion is necessary to
convert them into svn_fs_x__dirent_t values. */
static svn_error_t *
get_dir_contents(svn_fs_x__dir_data_t *dir,
svn_fs_t *fs,
svn_fs_x__noderev_t *noderev,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_stream_t *contents;
const svn_fs_x__id_t *id = &noderev->noderev_id;
apr_size_t len;
svn_stringbuf_t *text;
svn_boolean_t incremental;
/* Initialize the result. */
dir->txn_filesize = SVN_INVALID_FILESIZE;
/* Read dir contents - unless there is none in which case we are done. */
if (noderev->data_rep
&& ! svn_fs_x__is_revision(noderev->data_rep->id.change_set))
{
/* Get location & current size of the directory representation. */
const char *filename;
apr_file_t *file;
filename = svn_fs_x__path_txn_node_children(fs, id, scratch_pool,
scratch_pool);
/* The representation is mutable. Read the old directory
contents from the mutable children file, followed by the
changes we've made in this transaction. */
SVN_ERR(svn_io_file_open(&file, filename, APR_READ | APR_BUFFERED,
APR_OS_DEFAULT, scratch_pool));
/* Obtain txn children file size. */
SVN_ERR(svn_io_file_size_get(&dir->txn_filesize, file, scratch_pool));
len = (apr_size_t)dir->txn_filesize;
/* Finally, provide stream access to FILE. */
contents = svn_stream_from_aprfile2(file, FALSE, scratch_pool);
incremental = TRUE;
}
else if (noderev->data_rep)
{
/* The representation is immutable. Read it normally. */
len = noderev->data_rep->expanded_size;
SVN_ERR(svn_fs_x__get_contents(&contents, fs, noderev->data_rep,
FALSE, scratch_pool));
incremental = FALSE;
}
else
{
/* Empty representation == empty directory. */
dir->entries = apr_array_make(result_pool, 0,
sizeof(svn_fs_x__dirent_t *));
return SVN_NO_ERROR;
}
/* Read the whole stream contents into a single buffer.
* Due to our LEN hint, no allocation overhead occurs.
*
* Also, a large portion of TEXT will be file / dir names which we
* directly reference from DIR->ENTRIES instead of copying them.
* Hence, we need to use the RESULT_POOL here. */
SVN_ERR(svn_stringbuf_from_stream(&text, contents, len, result_pool));
SVN_ERR(svn_stream_close(contents));
/* de-serialize hash */
SVN_ERR(parse_dir_entries(&dir->entries, text, incremental, id,
result_pool, scratch_pool));
return SVN_NO_ERROR;
}
/* Return the cache object in FS responsible to storing the directory the
* NODEREV plus the corresponding pre-allocated *KEY.
*/
static svn_cache__t *
locate_dir_cache(svn_fs_t *fs,
svn_fs_x__id_t *key,
svn_fs_x__noderev_t *noderev)
{
svn_fs_x__data_t *ffd = fs->fsap_data;
if (!noderev->data_rep)
{
/* no data rep -> empty directory.
Use a key that does definitely not clash with non-NULL reps. */
key->change_set = SVN_FS_X__INVALID_CHANGE_SET;
key->number = SVN_FS_X__ITEM_INDEX_UNUSED;
}
else if (svn_fs_x__is_txn(noderev->noderev_id.change_set))
{
/* data in txns must be addressed by noderev ID since the
representation has not been created, yet. */
*key = noderev->noderev_id;
}
else
{
/* committed data can use simple rev,item pairs */
*key = noderev->data_rep->id;
}
return ffd->dir_cache;
}
svn_error_t *
svn_fs_x__rep_contents_dir(apr_array_header_t **entries_p,
svn_fs_t *fs,
svn_fs_x__noderev_t *noderev,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_fs_x__id_t key;
svn_fs_x__dir_data_t *dir;
/* find the cache we may use */
svn_cache__t *cache = locate_dir_cache(fs, &key, noderev);
svn_boolean_t found;
SVN_ERR(svn_cache__get((void **)&dir, &found, cache, &key, result_pool));
if (found)
{
/* Verify that the cached dir info is not stale
* (no-op for committed data). */
svn_filesize_t filesize;
SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool));
if (filesize == dir->txn_filesize)
{
/* Still valid. Done. */
*entries_p = dir->entries;
return SVN_NO_ERROR;
}
}
/* Read in the directory contents. */
dir = apr_pcalloc(scratch_pool, sizeof(*dir));
SVN_ERR(get_dir_contents(dir, fs, noderev, result_pool, scratch_pool));
*entries_p = dir->entries;
/* Update the cache, if we are to use one.
*
* Don't even attempt to serialize very large directories; it would cause
* an unnecessary memory allocation peak. 100 bytes/entry is about right.
*/
if (svn_cache__is_cachable(cache, 100 * dir->entries->nelts))
SVN_ERR(svn_cache__set(cache, &key, dir, scratch_pool));
return SVN_NO_ERROR;
}
svn_fs_x__dirent_t *
svn_fs_x__find_dir_entry(apr_array_header_t *entries,
const char *name,
int *hint)
{
svn_fs_x__dirent_t **result
= svn_sort__array_lookup(entries, name, hint, compare_dirent_name);
return result ? *result : NULL;
}
svn_error_t *
svn_fs_x__rep_contents_dir_entry(svn_fs_x__dirent_t **dirent,
svn_fs_t *fs,
svn_fs_x__noderev_t *noderev,
const char *name,
apr_size_t *hint,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_boolean_t found = FALSE;
/* find the cache we may use */
svn_fs_x__id_t key;
svn_cache__t *cache = locate_dir_cache(fs, &key, noderev);
svn_fs_x__ede_baton_t baton;
svn_filesize_t filesize;
SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool));
/* Cache lookup. */
baton.hint = *hint;
baton.name = name;
baton.txn_filesize = filesize;
SVN_ERR(svn_cache__get_partial((void **)dirent,
&found,
cache,
&key,
svn_fs_x__extract_dir_entry,
&baton,
result_pool));
/* Remember the new clue only if we found something at that spot. */
if (found)
*hint = baton.hint;
/* fetch data from disk if we did not find it in the cache */
if (! found || baton.out_of_date)
{
svn_fs_x__dirent_t *entry;
svn_fs_x__dirent_t *entry_copy = NULL;
svn_fs_x__dir_data_t dir;
/* Read in the directory contents. */
SVN_ERR(get_dir_contents(&dir, fs, noderev, scratch_pool,
scratch_pool));
/* Update the cache, if we are to use one.
*
* Don't even attempt to serialize very large directories; it would
* cause an unnecessary memory allocation peak. 150 bytes / entry is
* about right. */
if (cache && svn_cache__is_cachable(cache, 150 * dir.entries->nelts))
SVN_ERR(svn_cache__set(cache, &key, &dir, scratch_pool));
/* find desired entry and return a copy in POOL, if found */
entry = svn_fs_x__find_dir_entry(dir.entries, name, NULL);
if (entry)
{
entry_copy = apr_pmemdup(result_pool, entry, sizeof(*entry_copy));
entry_copy->name = apr_pstrdup(result_pool, entry->name);
}
*dirent = entry_copy;
}
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_x__get_proplist(apr_hash_t **proplist,
svn_fs_t *fs,
svn_fs_x__noderev_t *noderev,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_stream_t *stream;
const svn_fs_x__id_t *noderev_id = &noderev->noderev_id;
if (noderev->prop_rep
&& !svn_fs_x__is_revision(noderev->prop_rep->id.change_set))
{
svn_stringbuf_t *content;
svn_string_t *as_string;
const char *filename = svn_fs_x__path_txn_node_props(fs, noderev_id,
scratch_pool,
scratch_pool);
SVN_ERR(svn_stringbuf_from_file2(&content, filename, result_pool));
as_string = svn_stringbuf__morph_into_string(content);
SVN_ERR_W(svn_fs_x__parse_properties(proplist, as_string, result_pool),
apr_psprintf(scratch_pool,
"malformed property list for node-revision '%s' in '%s'",
svn_fs_x__id_unparse(&noderev->noderev_id,
scratch_pool)->data,
filename));
}
else if (noderev->prop_rep)
{
svn_fs_x__data_t *ffd = fs->fsap_data;
svn_fs_x__representation_t *rep = noderev->prop_rep;
svn_fs_x__pair_cache_key_t key = { 0 };
svn_string_t *content;
svn_boolean_t is_cached;
key.revision = svn_fs_x__get_revnum(rep->id.change_set);
key.second = rep->id.number;
SVN_ERR(svn_cache__get((void **) proplist, &is_cached,
ffd->properties_cache, &key, result_pool));
if (is_cached)
return SVN_NO_ERROR;
SVN_ERR(svn_fs_x__get_contents(&stream, fs, rep, FALSE, scratch_pool));
SVN_ERR(svn_string_from_stream2(&content, stream, rep->expanded_size,
result_pool));
SVN_ERR_W(svn_fs_x__parse_properties(proplist, content, result_pool),
apr_psprintf(scratch_pool,
"malformed property list for node-revision '%s'",
svn_fs_x__id_unparse(&noderev->noderev_id,
scratch_pool)->data));
SVN_ERR(svn_cache__set(ffd->properties_cache, &key, *proplist,
scratch_pool));
}
else
{
/* return an empty prop list if the node doesn't have any props */
*proplist = apr_hash_make(result_pool);
}
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_x__create_changes_context(svn_fs_x__changes_context_t **context,
svn_fs_t *fs,
svn_revnum_t rev,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_fs_x__changes_context_t *result = apr_pcalloc(result_pool,
sizeof(*result));
result->fs = fs;
result->revision = rev;
SVN_ERR(svn_fs_x__ensure_revision_exists(rev, fs, scratch_pool));
SVN_ERR(svn_fs_x__rev_file_init(&result->revision_file, fs, rev,
result_pool));
*context = result;
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_x__get_changes(apr_array_header_t **changes,
svn_fs_x__changes_context_t *context,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_boolean_t found;
svn_fs_x__data_t *ffd = context->fs->fsap_data;
svn_fs_x__id_t id;
id.change_set = svn_fs_x__change_set_by_rev(context->revision);
id.number = SVN_FS_X__ITEM_INDEX_CHANGES;
/* try cache lookup first */
if (svn_fs_x__is_packed_rev(context->fs, context->revision))
{
apr_off_t offset;
svn_fs_x__pair_cache_key_t key;
svn_fs_x__changes_get_list_baton_t baton;
baton.start = (int)context->next;
baton.eol = &context->eol;
SVN_ERR(svn_fs_x__item_offset(&offset, &baton.sub_item, context->fs,
context->revision_file,
&id, scratch_pool));
key.revision = svn_fs_x__packed_base_rev(context->fs,
context->revision);
key.second = offset;
SVN_ERR(svn_cache__get_partial((void **)changes, &found,
ffd->changes_container_cache, &key,
svn_fs_x__changes_get_list_func,
&baton, result_pool));
}
else
{
svn_fs_x__changes_list_t *changes_list;
svn_fs_x__pair_cache_key_t key;
key.revision = context->revision;
key.second = context->next;
SVN_ERR(svn_cache__get((void **)&changes_list, &found,
ffd->changes_cache, &key, result_pool));
if (found)
{
/* Where to look next - if there is more data. */
context->eol = changes_list->eol;
context->next_offset = changes_list->end_offset;
/* Return the block as a "proper" APR array. */
(*changes) = apr_array_make(result_pool, 0, sizeof(void *));
(*changes)->elts = (char *)changes_list->changes;
(*changes)->nelts = changes_list->count;
(*changes)->nalloc = changes_list->count;
}
}
if (!found)
{
/* 'block-read' will also provide us with the desired data */
SVN_ERR(block_read((void **)changes, context->fs, &id,
context->revision_file, context,
result_pool, scratch_pool));
}
context->next += (*changes)->nelts;
SVN_ERR(dbg__log_access(context->fs, &id, *changes,
SVN_FS_X__ITEM_TYPE_CHANGES, scratch_pool));
return SVN_NO_ERROR;
}
/* Fetch the representation data (header, txdelta / plain windows)
* addressed by ENTRY->ITEM in FS and cache it under KEY. Read the data
* from REV_FILE. If MAX_OFFSET is not -1, don't read windows that start
* at or beyond that offset. Use SCRATCH_POOL for temporary allocations.
*/
static svn_error_t *
block_read_contents(svn_fs_t *fs,
svn_fs_x__revision_file_t *rev_file,
svn_fs_x__p2l_entry_t* entry,
svn_fs_x__pair_cache_key_t *key,
apr_off_t max_offset,
apr_pool_t *scratch_pool)
{
svn_fs_x__representation_cache_key_t header_key = { 0 };
rep_state_t rs = { 0 };
svn_filesize_t fulltext_len;
svn_fs_x__rep_header_t *rep_header;
header_key.revision = (apr_int32_t)key->revision;
header_key.is_packed = svn_fs_x__is_packed_rev(fs, header_key.revision);
header_key.item_index = key->second;
SVN_ERR(read_rep_header(&rep_header, fs, rev_file, &header_key,
scratch_pool));
SVN_ERR(init_rep_state(&rs, rep_header, fs, rev_file, entry, scratch_pool));
SVN_ERR(cache_windows(&fulltext_len, fs, &rs, max_offset, scratch_pool));
return SVN_NO_ERROR;
}
/* For the given REV_FILE in FS, in *STREAM return a stream covering the
* item specified by ENTRY. Also, verify the item's content by low-level
* checksum. Allocate the result in RESULT_POOL.
*/
static svn_error_t *
read_item(svn_stream_t **stream,
svn_fs_t *fs,
svn_fs_x__revision_file_t *rev_file,
svn_fs_x__p2l_entry_t* entry,
apr_pool_t *result_pool)
{
apr_uint32_t digest;
svn_checksum_t *expected, *actual;
apr_uint32_t plain_digest;
svn_stringbuf_t *text;
/* Read item into string buffer. */
text = svn_stringbuf_create_ensure(entry->size, result_pool);
text->len = entry->size;
text->data[text->len] = 0;
SVN_ERR(svn_fs_x__rev_file_read(rev_file, text->data, text->len));
/* Return (construct, calculate) stream and checksum. */
*stream = svn_stream_from_stringbuf(text, result_pool);
digest = svn__fnv1a_32x4(text->data, text->len);
/* Checksums will match most of the time. */
if (entry->fnv1_checksum == digest)
return SVN_NO_ERROR;
/* Construct proper checksum objects from their digests to allow for
* nice error messages. */
plain_digest = htonl(entry->fnv1_checksum);
expected = svn_checksum__from_digest_fnv1a_32x4(
(const unsigned char *)&plain_digest, result_pool);
plain_digest = htonl(digest);
actual = svn_checksum__from_digest_fnv1a_32x4(
(const unsigned char *)&plain_digest, result_pool);
/* Construct the full error message with all the info we have. */
return svn_checksum_mismatch_err(expected, actual, result_pool,
_("Low-level checksum mismatch while reading\n"
"%s bytes of meta data at offset %s "),
apr_off_t_toa(result_pool, entry->size),
apr_off_t_toa(result_pool, entry->offset));
}
/* If not already cached or if MUST_READ is set, read the changed paths
* list addressed by ENTRY in FS and retúrn it in *CHANGES. Cache the
* result if caching is enabled. Read the data from REV_FILE. Trim the
* data in *CHANGES to the range given by CONTEXT. Allocate *CHANGES in
* RESUSLT_POOL and allocate temporaries in SCRATCH_POOL.
*/
static svn_error_t *
block_read_changes(apr_array_header_t **changes,
svn_fs_t *fs,
svn_fs_x__revision_file_t *rev_file,
svn_fs_x__p2l_entry_t* entry,
svn_fs_x__changes_context_t *context,
svn_boolean_t must_read,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_fs_x__data_t *ffd = fs->fsap_data;
svn_stream_t *stream;
svn_fs_x__pair_cache_key_t key;
svn_fs_x__changes_list_t changes_list;
/* If we don't have to return any data, just read and cache the first
block. This means we won't cache the remaining blocks from longer
lists right away but only if they are actually needed. */
apr_size_t next = must_read ? context->next : 0;
apr_size_t next_offset = must_read ? context->next_offset : 0;
/* we don't support containers, yet */
SVN_ERR_ASSERT(entry->item_count == 1);
/* The item to read / write. */
key.revision = svn_fs_x__get_revnum(entry->items[0].change_set);
key.second = next;
/* already in cache? */
if (!must_read)
{
svn_boolean_t is_cached = FALSE;
SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_cache, &key,
scratch_pool));
if (is_cached)
return SVN_NO_ERROR;
}
/* Verify the whole list only once. We don't use the STREAM any further. */
if (!must_read || next == 0)
SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
/* Seek to the block to read within the changes list. */
SVN_ERR(svn_fs_x__rev_file_seek(rev_file, NULL,
entry->offset + next_offset));
SVN_ERR(svn_fs_x__rev_file_stream(&stream, rev_file));
/* read changes from revision file */
SVN_ERR(svn_fs_x__read_changes(changes, stream, SVN_FS_X__CHANGES_BLOCK_SIZE,
result_pool, scratch_pool));
SVN_ERR(svn_fs_x__rev_file_offset(&changes_list.end_offset, rev_file));
changes_list.end_offset -= entry->offset;
changes_list.start_offset = next_offset;
changes_list.count = (*changes)->nelts;
changes_list.changes = (svn_fs_x__change_t **)(*changes)->elts;
changes_list.eol = (changes_list.count < SVN_FS_X__CHANGES_BLOCK_SIZE)
|| (changes_list.end_offset + 1 >= entry->size);
/* cache for future reference */
SVN_ERR(svn_cache__set(ffd->changes_cache, &key, &changes_list,
scratch_pool));
/* Trim the result:
* Remove the entries that already been reported. */
if (must_read)
{
context->next_offset = changes_list.end_offset;
context->eol = changes_list.eol;
}
return SVN_NO_ERROR;
}
/* If not already cached or if MUST_READ is set, read the changed paths
* list container addressed by ENTRY in FS. Return the changes list
* identified by SUB_ITEM in *CHANGES, using CONTEXT to select a sub-range
* within that list. Read the data from REV_FILE and cache the result.
*
* Allocate *CHANGES in RESUSLT_POOL and everything else in SCRATCH_POOL.
*/
static svn_error_t *
block_read_changes_container(apr_array_header_t **changes,
svn_fs_t *fs,
svn_fs_x__revision_file_t *rev_file,
svn_fs_x__p2l_entry_t* entry,
apr_uint32_t sub_item,
svn_fs_x__changes_context_t *context,
svn_boolean_t must_read,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_fs_x__data_t *ffd = fs->fsap_data;
svn_fs_x__changes_t *container;
svn_fs_x__pair_cache_key_t key;
svn_stream_t *stream;
svn_revnum_t revision = svn_fs_x__get_revnum(entry->items[0].change_set);
key.revision = svn_fs_x__packed_base_rev(fs, revision);
key.second = entry->offset;
/* already in cache? */
if (!must_read)
{
svn_boolean_t is_cached = FALSE;
SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_container_cache,
&key, scratch_pool));
if (is_cached)
return SVN_NO_ERROR;
}
SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
/* read changes from revision file */
SVN_ERR(svn_fs_x__read_changes_container(&container, stream, scratch_pool,
scratch_pool));
/* extract requested data */
if (must_read)
SVN_ERR(svn_fs_x__changes_get_list(changes, container, sub_item,
context, result_pool));
SVN_ERR(svn_cache__set(ffd->changes_container_cache, &key, container,
scratch_pool));
return SVN_NO_ERROR;
}
/* If not already cached or if MUST_READ is set, read the node revision
* addressed by ENTRY in FS and return it in *NODEREV_P. Cache the
* result under KEY if caching is enabled. Read the data from REV_FILE.
* Allocate *NODEREV_P in RESUSLT_POOL and allocate temporaries in
* SCRATCH_POOL.
*/
static svn_error_t *
block_read_noderev(svn_fs_x__noderev_t **noderev_p,
svn_fs_t *fs,
svn_fs_x__revision_file_t *rev_file,
svn_fs_x__p2l_entry_t* entry,
svn_fs_x__pair_cache_key_t *key,
svn_boolean_t must_read,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_fs_x__data_t *ffd = fs->fsap_data;
svn_stream_t *stream;
/* we don't support containers, yet */
SVN_ERR_ASSERT(entry->item_count == 1);
/* already in cache? */
if (!must_read)
{
svn_boolean_t is_cached = FALSE;
SVN_ERR(svn_cache__has_key(&is_cached, ffd->node_revision_cache, key,
scratch_pool));
if (is_cached)
return SVN_NO_ERROR;
}
SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
/* read node rev from revision file */
SVN_ERR(svn_fs_x__read_noderev(noderev_p, stream, result_pool,
scratch_pool));
SVN_ERR(svn_cache__set(ffd->node_revision_cache, key, *noderev_p,
scratch_pool));
return SVN_NO_ERROR;
}
/* If not already cached or if MUST_READ is set, read the node revision
* container addressed by ENTRY in FS. Return the item identified by
* SUB_ITEM in *NODEREV_P. Read the data from REV_FILE and cache it.
* Allocate *NODEREV_P in RESUSLT_POOL and allocate temporaries in
* SCRATCH_POOL.
*/
static svn_error_t *
block_read_noderevs_container(svn_fs_x__noderev_t **noderev_p,
svn_fs_t *fs,
svn_fs_x__revision_file_t *rev_file,
svn_fs_x__p2l_entry_t* entry,
apr_uint32_t sub_item,
svn_boolean_t must_read,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_fs_x__data_t *ffd = fs->fsap_data;
svn_fs_x__noderevs_t *container;
svn_stream_t *stream;
svn_fs_x__pair_cache_key_t key;
svn_revnum_t revision = svn_fs_x__get_revnum(entry->items[0].change_set);
key.revision = svn_fs_x__packed_base_rev(fs, revision);
key.second = entry->offset;
/* already in cache? */
if (!must_read)
{
svn_boolean_t is_cached = FALSE;
SVN_ERR(svn_cache__has_key(&is_cached, ffd->noderevs_container_cache,
&key, scratch_pool));
if (is_cached)
return SVN_NO_ERROR;
}
SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
/* read noderevs from revision file */
SVN_ERR(svn_fs_x__read_noderevs_container(&container, stream, scratch_pool,
scratch_pool));
/* extract requested data */
if (must_read)
SVN_ERR(svn_fs_x__noderevs_get(noderev_p, container, sub_item,
result_pool));
SVN_ERR(svn_cache__set(ffd->noderevs_container_cache, &key, container,
scratch_pool));
return SVN_NO_ERROR;
}
/* If not already cached or if MUST_READ is set, read the representation
* container addressed by ENTRY in FS. Return an extractor object for the
* item identified by SUB_ITEM in *EXTRACTOR. Read the data from REV_FILE
* and cache it. Allocate *EXTRACTOR in RESUSLT_POOL and all temporaries
* in SCRATCH_POOL.
*/
static svn_error_t *
block_read_reps_container(svn_fs_x__rep_extractor_t **extractor,
svn_fs_t *fs,
svn_fs_x__revision_file_t *rev_file,
svn_fs_x__p2l_entry_t* entry,
apr_uint32_t sub_item,
svn_boolean_t must_read,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_fs_x__data_t *ffd = fs->fsap_data;
svn_fs_x__reps_t *container;
svn_stream_t *stream;
svn_fs_x__pair_cache_key_t key;
svn_revnum_t revision = svn_fs_x__get_revnum(entry->items[0].change_set);
key.revision = svn_fs_x__packed_base_rev(fs, revision);
key.second = entry->offset;
/* already in cache? */
if (!must_read)
{
svn_boolean_t is_cached = FALSE;
SVN_ERR(svn_cache__has_key(&is_cached, ffd->reps_container_cache,
&key, scratch_pool));
if (is_cached)
return SVN_NO_ERROR;
}
SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
/* read noderevs from revision file */
SVN_ERR(svn_fs_x__read_reps_container(&container, stream, result_pool,
scratch_pool));
/* extract requested data */
if (must_read)
SVN_ERR(svn_fs_x__reps_get(extractor, fs, container, sub_item,
result_pool));
SVN_ERR(svn_cache__set(ffd->reps_container_cache, &key, container,
scratch_pool));
return SVN_NO_ERROR;
}
/* Read the whole (e.g. 64kB) block containing the item identified by ID in
* FS and put all data into cache. If necessary and depending on heuristics,
* neighboring blocks may also get read. The data is being read from
* already open REVISION_FILE, which must be the correct rev / pack file
* w.r.t. ID->CHANGE_SET.
*
* For noderevs and changed path lists, the item fetched can be allocated
* RESULT_POOL and returned in *RESULT. Otherwise, RESULT must be NULL.
* The BATON is passed along to the extractor sub-functions and will be
* used only when constructing the *RESULT. SCRATCH_POOL will be used for
* all temporary allocations.
*/
static svn_error_t *
block_read(void **result,
svn_fs_t *fs,
const svn_fs_x__id_t *id,
svn_fs_x__revision_file_t *revision_file,
void *baton,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_fs_x__data_t *ffd = fs->fsap_data;
apr_off_t offset, wanted_offset = 0;
apr_off_t block_start = 0;
apr_uint32_t wanted_sub_item = 0;
svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
apr_array_header_t *entries;
int run_count = 0;
int i;
apr_pool_t *iterpool = svn_pool_create(scratch_pool);
/* don't try this on transaction protorev files */
SVN_ERR_ASSERT(SVN_IS_VALID_REVNUM(revision));
/* index lookup: find the OFFSET of the item we *must* read plus (in the
* "do-while" block) the list of items in the same block. */
SVN_ERR(svn_fs_x__item_offset(&wanted_offset, &wanted_sub_item, fs,
revision_file, id, iterpool));
offset = wanted_offset;
do
{
/* fetch list of items in the block surrounding OFFSET */
SVN_ERR(svn_fs_x__rev_file_seek(revision_file, &block_start, offset));
SVN_ERR(svn_fs_x__p2l_index_lookup(&entries, fs, revision_file,
revision, block_start,
ffd->block_size, scratch_pool,
scratch_pool));
/* read all items from the block */
for (i = 0; i < entries->nelts; ++i)
{
svn_boolean_t is_result, is_wanted;
apr_pool_t *pool;
svn_fs_x__p2l_entry_t* entry
= &APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t);
/* skip empty sections */
if (entry->type == SVN_FS_X__ITEM_TYPE_UNUSED)
continue;
/* the item / container we were looking for? */
is_wanted = entry->offset == wanted_offset
&& entry->item_count >= wanted_sub_item
&& svn_fs_x__id_eq(entry->items + wanted_sub_item, id);
is_result = result && is_wanted;
/* select the pool that we want the item to be allocated in */
pool = is_result ? result_pool : iterpool;
/* handle all items that start within this block and are relatively
* small (i.e. < block size). Always read the item we need to return.
*/
if (is_result || ( entry->offset >= block_start
&& entry->size < ffd->block_size))
{
void *item = NULL;
svn_fs_x__pair_cache_key_t key = { 0 };
key.revision = svn_fs_x__get_revnum(entry->items[0].change_set);
key.second = entry->items[0].number;
SVN_ERR(svn_fs_x__rev_file_seek(revision_file, NULL,
entry->offset));
switch (entry->type)
{
case SVN_FS_X__ITEM_TYPE_FILE_REP:
case SVN_FS_X__ITEM_TYPE_DIR_REP:
case SVN_FS_X__ITEM_TYPE_FILE_PROPS:
case SVN_FS_X__ITEM_TYPE_DIR_PROPS:
SVN_ERR(block_read_contents(fs, revision_file,
entry, &key,
is_wanted
? -1
: block_start + ffd->block_size,
iterpool));
break;
case SVN_FS_X__ITEM_TYPE_NODEREV:
SVN_ERR(block_read_noderev((svn_fs_x__noderev_t **)&item,
fs, revision_file,
entry, &key, is_result,
pool, iterpool));
break;
case SVN_FS_X__ITEM_TYPE_CHANGES:
SVN_ERR(block_read_changes((apr_array_header_t **)&item,
fs, revision_file,
entry, baton, is_result,
pool, iterpool));
break;
case SVN_FS_X__ITEM_TYPE_CHANGES_CONT:
SVN_ERR(block_read_changes_container
((apr_array_header_t **)&item,
fs, revision_file,
entry, wanted_sub_item,
baton, is_result,
pool, iterpool));
break;
case SVN_FS_X__ITEM_TYPE_NODEREVS_CONT:
SVN_ERR(block_read_noderevs_container
((svn_fs_x__noderev_t **)&item,
fs, revision_file,
entry, wanted_sub_item,
is_result, pool, iterpool));
break;
case SVN_FS_X__ITEM_TYPE_REPS_CONT:
SVN_ERR(block_read_reps_container
((svn_fs_x__rep_extractor_t **)&item,
fs, revision_file,
entry, wanted_sub_item,
is_result, pool, iterpool));
break;
default:
break;
}
if (is_result)
*result = item;
/* if we crossed a block boundary, read the remainder of
* the last block as well */
offset = entry->offset + entry->size;
if (offset - block_start > ffd->block_size)
++run_count;
svn_pool_clear(iterpool);
}
}
}
while(run_count++ == 1); /* can only be true once and only if a block
* boundary got crossed */
/* if the caller requested a result, we must have provided one by now */
assert(!result || *result);
svn_pool_destroy(iterpool);
return SVN_NO_ERROR;
}