blob: 99ec8d6af1e57431b47561197dfb1e4367a07e82 [file] [log] [blame]
/* cached_data.c --- cached (read) access to FSFS data
*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
* ====================================================================
*/
#include "cached_data.h"
#include <assert.h>
#include "svn_hash.h"
#include "svn_ctype.h"
#include "svn_sorts.h"
#include "private/svn_delta_private.h"
#include "private/svn_io_private.h"
#include "private/svn_sorts_private.h"
#include "private/svn_subr_private.h"
#include "private/svn_temp_serializer.h"
#include "fs_fs.h"
#include "id.h"
#include "index.h"
#include "low_level.h"
#include "pack.h"
#include "util.h"
#include "temp_serializer.h"
#include "../libsvn_fs/fs-loader.h"
#include "../libsvn_delta/delta.h" /* for SVN_DELTA_WINDOW_SIZE */
#include "svn_private_config.h"
/* forward-declare. See implementation for the docstring */
static svn_error_t *
block_read(void **result,
svn_fs_t *fs,
svn_revnum_t revision,
apr_uint64_t item_index,
svn_fs_fs__revision_file_t *revision_file,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool);
/* Define this to enable access logging via dbg_log_access
#define SVN_FS_FS__LOG_ACCESS
*/
/* When SVN_FS_FS__LOG_ACCESS has been defined, write a line to console
* showing where REVISION, ITEM_INDEX is located in FS and use ITEM to
* show details on it's contents if not NULL. To support format 6 and
* earlier repos, ITEM_TYPE (SVN_FS_FS__ITEM_TYPE_*) must match ITEM.
* Use SCRATCH_POOL for temporary allocations.
*
* For pre-format7 repos, the display will be restricted.
*/
static svn_error_t *
dbg_log_access(svn_fs_t *fs,
svn_revnum_t revision,
apr_uint64_t item_index,
void *item,
apr_uint32_t item_type,
apr_pool_t *scratch_pool)
{
/* no-op if this macro is not defined */
#ifdef SVN_FS_FS__LOG_ACCESS
fs_fs_data_t *ffd = fs->fsap_data;
apr_off_t end_offset = 0;
svn_fs_fs__p2l_entry_t *entry = NULL;
static const char *types[] = {"<n/a>", "frep ", "drep ", "fprop", "dprop",
"node ", "chgs ", "rep "};
const char *description = "";
const char *type = types[item_type];
const char *pack = "";
apr_off_t offset;
svn_fs_fs__revision_file_t *rev_file;
SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, revision,
scratch_pool, scratch_pool));
/* determine rev / pack file offset */
SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, revision, NULL,
item_index, scratch_pool));
/* constructing the pack file description */
if (revision < ffd->min_unpacked_rev)
pack = apr_psprintf(scratch_pool, "%4ld|",
revision / ffd->max_files_per_dir);
/* construct description if possible */
if (item_type == SVN_FS_FS__ITEM_TYPE_NODEREV && item != NULL)
{
node_revision_t *node = item;
const char *data_rep
= node->data_rep
? apr_psprintf(scratch_pool, " d=%ld/%" APR_UINT64_T_FMT,
node->data_rep->revision,
node->data_rep->item_index)
: "";
const char *prop_rep
= node->prop_rep
? apr_psprintf(scratch_pool, " p=%ld/%" APR_UINT64_T_FMT,
node->prop_rep->revision,
node->prop_rep->item_index)
: "";
description = apr_psprintf(scratch_pool, "%s (pc=%d%s%s)",
node->created_path,
node->predecessor_count,
data_rep,
prop_rep);
}
else if (item_type == SVN_FS_FS__ITEM_TYPE_ANY_REP)
{
svn_fs_fs__rep_header_t *header = item;
if (header == NULL)
description = " (txdelta window)";
else if (header->type == svn_fs_fs__rep_plain)
description = " PLAIN";
else if (header->type == svn_fs_fs__rep_self_delta)
description = " DELTA";
else
description = apr_psprintf(scratch_pool,
" DELTA against %ld/%" APR_UINT64_T_FMT,
header->base_revision,
header->base_item_index);
}
else if (item_type == SVN_FS_FS__ITEM_TYPE_CHANGES && item != NULL)
{
apr_array_header_t *changes = item;
switch (changes->nelts)
{
case 0: description = " no change";
break;
case 1: description = " 1 change";
break;
default: description = apr_psprintf(scratch_pool, " %d changes",
changes->nelts);
}
}
/* some info is only available in format7 repos */
if (svn_fs_fs__use_log_addressing(fs))
{
/* reverse index lookup: get item description in ENTRY */
SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file, revision,
offset, scratch_pool,
scratch_pool));
if (entry)
{
/* more details */
end_offset = offset + entry->size;
type = types[entry->type];
}
/* line output */
printf("%5s%4lx:%04lx -%4lx:%04lx %s %7ld %5"APR_UINT64_T_FMT" %s\n",
pack, (long)(offset / ffd->block_size),
(long)(offset % ffd->block_size),
(long)(end_offset / ffd->block_size),
(long)(end_offset % ffd->block_size),
type, revision, item_index, description);
}
else
{
/* reduced logging for format 6 and earlier */
printf("%5s%10" APR_UINT64_T_HEX_FMT " %s %7ld %7" APR_UINT64_T_FMT \
" %s\n",
pack, (apr_uint64_t)(offset), type, revision, item_index,
description);
}
/* We don't know when SCRATCH_POOL will be cleared, so close the rev file
explicitly. */
SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
#endif
return SVN_NO_ERROR;
}
/* Convenience wrapper around svn_io_file_aligned_seek, taking filesystem
FS instead of a block size. */
static svn_error_t *
aligned_seek(svn_fs_t *fs,
apr_file_t *file,
apr_off_t *buffer_start,
apr_off_t offset,
apr_pool_t *pool)
{
fs_fs_data_t *ffd = fs->fsap_data;
return svn_error_trace(svn_io_file_aligned_seek(file, ffd->block_size,
buffer_start, offset,
pool));
}
/* Open the revision file for revision REV in filesystem FS and store
the newly opened file in FILE. Seek to location OFFSET before
returning. Perform temporary allocations in POOL. */
static svn_error_t *
open_and_seek_revision(svn_fs_fs__revision_file_t **file,
svn_fs_t *fs,
svn_revnum_t rev,
apr_uint64_t item,
apr_pool_t *pool)
{
svn_fs_fs__revision_file_t *rev_file;
apr_off_t offset = -1;
SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, pool));
SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rev, pool, pool));
SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rev, NULL, item,
pool));
SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, pool));
*file = rev_file;
return SVN_NO_ERROR;
}
/* Open the representation REP for a node-revision in filesystem FS, seek
to its position and store the newly opened file in FILE. Perform
temporary allocations in POOL. */
static svn_error_t *
open_and_seek_transaction(svn_fs_fs__revision_file_t **file,
svn_fs_t *fs,
representation_t *rep,
apr_pool_t *pool)
{
apr_off_t offset;
SVN_ERR(svn_fs_fs__open_proto_rev_file(file, fs, &rep->txn_id, pool, pool));
SVN_ERR(svn_fs_fs__item_offset(&offset, fs, NULL, SVN_INVALID_REVNUM,
&rep->txn_id, rep->item_index, pool));
SVN_ERR(aligned_seek(fs, (*file)->file, NULL, offset, pool));
return SVN_NO_ERROR;
}
/* Given a node-id ID, and a representation REP in filesystem FS, open
the correct file and seek to the correction location. Store this
file in *FILE_P. Perform any allocations in POOL. */
static svn_error_t *
open_and_seek_representation(svn_fs_fs__revision_file_t **file_p,
svn_fs_t *fs,
representation_t *rep,
apr_pool_t *pool)
{
if (! svn_fs_fs__id_txn_used(&rep->txn_id))
return open_and_seek_revision(file_p, fs, rep->revision, rep->item_index,
pool);
else
return open_and_seek_transaction(file_p, fs, rep, pool);
}
static svn_error_t *
err_dangling_id(svn_fs_t *fs, const svn_fs_id_t *id)
{
svn_string_t *id_str = svn_fs_fs__id_unparse(id, fs->pool);
return svn_error_createf
(SVN_ERR_FS_ID_NOT_FOUND, 0,
_("Reference to non-existent node '%s' in filesystem '%s'"),
id_str->data, fs->path);
}
/* Return TRUE, if FS is of a format that supports block-read and the
feature has been enabled. */
static svn_boolean_t
use_block_read(svn_fs_t *fs)
{
fs_fs_data_t *ffd = fs->fsap_data;
return svn_fs_fs__use_log_addressing(fs) && ffd->use_block_read;
}
svn_error_t *
svn_fs_fs__fixup_expanded_size(svn_fs_t *fs,
representation_t *rep,
apr_pool_t *scratch_pool)
{
svn_checksum_t checksum;
svn_checksum_t *empty_md5;
svn_fs_fs__revision_file_t *revision_file;
svn_fs_fs__rep_header_t *rep_header;
/* Anything to do at all?
*
* Note that a 0 SIZE is only possible for PLAIN reps due to the SVN\1
* magic prefix in any DELTA rep. */
if (!rep || rep->expanded_size != 0 || rep->size == 0)
return SVN_NO_ERROR;
/* This function may only be called for committed data. */
assert(!svn_fs_fs__id_txn_used(&rep->txn_id));
/* EXPANDED_SIZE is 0. If the MD5 does not match the one for empty
* contents, we know that EXPANDED_SIZE == 0 is wrong and needs to
* be set to the actual value given by SIZE.
*
* Using svn_checksum_match() will also accept all-zero values for
* the MD5 digest and only report a mismatch if the MD5 has actually
* been given. */
empty_md5 = svn_checksum_empty_checksum(svn_checksum_md5, scratch_pool);
checksum.digest = rep->md5_digest;
checksum.kind = svn_checksum_md5;
if (!svn_checksum_match(empty_md5, &checksum))
{
rep->expanded_size = rep->size;
return SVN_NO_ERROR;
}
/* Data in the rep-cache.db does not have MD5 checksums (all zero) on it.
* Compare SHA1 instead. */
if (rep->has_sha1)
{
svn_checksum_t *empty_sha1
= svn_checksum_empty_checksum(svn_checksum_sha1, scratch_pool);
checksum.digest = rep->sha1_digest;
checksum.kind = svn_checksum_sha1;
if (!svn_checksum_match(empty_sha1, &checksum))
{
rep->expanded_size = rep->size;
return SVN_NO_ERROR;
}
}
/* Only two cases are left here.
* (1) A non-empty PLAIN rep with a MD5 collision on EMPTY_MD5.
* (2) A DELTA rep with zero-length output. */
/* SVN always stores a DELTA rep with zero-length output as an empty
* sequence of txdelta windows, i.e. as "SVN\1". In that case, SIZE is
* 4 bytes. There is no other possible DELTA rep of that size and any
* PLAIN rep of 4 bytes would produce a different MD5. Hence, if SIZE is
* actually 4 here, we know that this is an empty DELTA rep.
*
* Note that it is technically legal to have DELTA reps with a 0 length
* output window. Their on-disk size would be longer. We handle that
* case later together with the equally unlikely MD5 collision. */
if (rep->size == 4)
{
/* EXPANDED_SIZE is already 0. */
return SVN_NO_ERROR;
}
/* We still have the two options, PLAIN or DELTA rep. At this point, we
* are in an extremely unlikely case and can spend some time to figure it
* out. So, let's just look at the representation header. */
SVN_ERR(open_and_seek_revision(&revision_file, fs, rep->revision,
rep->item_index, scratch_pool));
SVN_ERR(svn_fs_fs__read_rep_header(&rep_header, revision_file->stream,
scratch_pool, scratch_pool));
SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
/* Only for PLAIN reps do we have to correct EXPANDED_SIZE. */
if (rep_header->type == svn_fs_fs__rep_plain)
rep->expanded_size = rep->size;
return SVN_NO_ERROR;
}
/* Correct known issues with committed NODEREV in FS.
* Uses SCRATCH_POOL for temporaries.
*/
static svn_error_t *
fixup_node_revision(svn_fs_t *fs,
node_revision_t *noderev,
apr_pool_t *scratch_pool)
{
/* Workaround issue #4031: is-fresh-txn-root in revision files. */
noderev->is_fresh_txn_root = FALSE;
/* Make sure EXPANDED_SIZE has the correct value for every rep. */
SVN_ERR(svn_fs_fs__fixup_expanded_size(fs, noderev->data_rep,
scratch_pool));
SVN_ERR(svn_fs_fs__fixup_expanded_size(fs, noderev->prop_rep,
scratch_pool));
return SVN_NO_ERROR;
}
/* Get the node-revision for the node ID in FS.
Set *NODEREV_P to the new node-revision structure, allocated in POOL.
See svn_fs_fs__get_node_revision, which wraps this and adds another
error. */
static svn_error_t *
get_node_revision_body(node_revision_t **noderev_p,
svn_fs_t *fs,
const svn_fs_id_t *id,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_error_t *err;
svn_boolean_t is_cached = FALSE;
fs_fs_data_t *ffd = fs->fsap_data;
if (svn_fs_fs__id_is_txn(id))
{
apr_file_t *file;
/* This is a transaction node-rev. Its storage logic is very
different from that of rev / pack files. */
err = svn_io_file_open(&file,
svn_fs_fs__path_txn_node_rev(fs, id,
scratch_pool),
APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
scratch_pool);
if (err && APR_STATUS_IS_ENOENT(err->apr_err))
{
svn_error_clear(err);
return svn_error_trace(err_dangling_id(fs, id));
}
else if (err)
{
return svn_error_trace(err);
}
SVN_ERR(svn_fs_fs__read_noderev(noderev_p,
svn_stream_from_aprfile2(file,
FALSE,
scratch_pool),
result_pool, scratch_pool));
}
else
{
svn_fs_fs__revision_file_t *revision_file;
/* noderevs in rev / pack files can be cached */
const svn_fs_fs__id_part_t *rev_item = svn_fs_fs__id_rev_item(id);
pair_cache_key_t key = { 0 };
key.revision = rev_item->revision;
key.second = rev_item->number;
/* Not found or not applicable. Try a noderev cache lookup.
* If that succeeds, we are done here. */
if (ffd->node_revision_cache)
{
SVN_ERR(svn_cache__get((void **) noderev_p,
&is_cached,
ffd->node_revision_cache,
&key,
result_pool));
if (is_cached)
return SVN_NO_ERROR;
}
/* read the data from disk */
SVN_ERR(open_and_seek_revision(&revision_file, fs,
rev_item->revision,
rev_item->number,
scratch_pool));
if (use_block_read(fs))
{
/* block-read will parse the whole block and will also return
the one noderev that we need right now. */
SVN_ERR(block_read((void **)noderev_p, fs,
rev_item->revision,
rev_item->number,
revision_file,
result_pool,
scratch_pool));
}
else
{
/* physical addressing mode reading, parsing and caching */
SVN_ERR(svn_fs_fs__read_noderev(noderev_p,
revision_file->stream,
result_pool,
scratch_pool));
SVN_ERR(fixup_node_revision(fs, *noderev_p, scratch_pool));
/* The noderev is not in cache, yet. Add it, if caching has been enabled. */
if (ffd->node_revision_cache)
SVN_ERR(svn_cache__set(ffd->node_revision_cache,
&key,
*noderev_p,
scratch_pool));
}
SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
}
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_fs__get_node_revision(node_revision_t **noderev_p,
svn_fs_t *fs,
const svn_fs_id_t *id,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
const svn_fs_fs__id_part_t *rev_item = svn_fs_fs__id_rev_item(id);
svn_error_t *err = get_node_revision_body(noderev_p, fs, id,
result_pool, scratch_pool);
if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
{
svn_string_t *id_string = svn_fs_fs__id_unparse(id, scratch_pool);
return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
"Corrupt node-revision '%s'",
id_string->data);
}
SVN_ERR(dbg_log_access(fs,
rev_item->revision,
rev_item->number,
*noderev_p,
SVN_FS_FS__ITEM_TYPE_NODEREV,
scratch_pool));
return svn_error_trace(err);
}
/* Given a revision file REV_FILE, opened to REV in FS, find the Node-ID
of the header located at OFFSET and store it in *ID_P. Allocate
temporary variables from POOL. */
static svn_error_t *
get_fs_id_at_offset(svn_fs_id_t **id_p,
svn_fs_fs__revision_file_t *rev_file,
svn_fs_t *fs,
svn_revnum_t rev,
apr_off_t offset,
apr_pool_t *pool)
{
node_revision_t *noderev;
SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, pool));
SVN_ERR(svn_fs_fs__read_noderev(&noderev,
rev_file->stream,
pool, pool));
/* noderev->id is const, get rid of that */
*id_p = svn_fs_fs__id_copy(noderev->id, pool);
/* assert that the txn_id is REV
* (asserting on offset would be harder because we the rev_offset is not
* known here) */
assert(svn_fs_fs__id_rev(*id_p) == rev);
return SVN_NO_ERROR;
}
/* Given an open revision file REV_FILE in FS for REV, locate the trailer that
specifies the offset to the root node-id and to the changed path
information. Store the root node offset in *ROOT_OFFSET and the
changed path offset in *CHANGES_OFFSET. If either of these
pointers is NULL, do nothing with it.
Allocate temporary variables from POOL. */
static svn_error_t *
get_root_changes_offset(apr_off_t *root_offset,
apr_off_t *changes_offset,
svn_fs_fs__revision_file_t *rev_file,
svn_fs_t *fs,
svn_revnum_t rev,
apr_pool_t *pool)
{
fs_fs_data_t *ffd = fs->fsap_data;
apr_off_t rev_offset;
apr_seek_where_t seek_relative;
svn_stringbuf_t *trailer;
char buffer[64];
apr_off_t start;
apr_off_t end;
apr_size_t len;
/* Determine where to seek to in the file.
If we've got a pack file, we want to seek to the end of the desired
revision. But we don't track that, so we seek to the beginning of the
next revision.
Unless the next revision is in a different file, in which case, we can
just seek to the end of the pack file -- just like we do in the
non-packed case. */
if (rev_file->is_packed && ((rev + 1) % ffd->max_files_per_dir != 0))
{
SVN_ERR(svn_fs_fs__get_packed_offset(&end, fs, rev + 1, pool));
seek_relative = APR_SET;
}
else
{
seek_relative = APR_END;
end = 0;
}
/* Offset of the revision from the start of the pack file, if applicable. */
if (rev_file->is_packed)
SVN_ERR(svn_fs_fs__get_packed_offset(&rev_offset, fs, rev, pool));
else
rev_offset = 0;
/* We will assume that the last line containing the two offsets
will never be longer than 64 characters. */
SVN_ERR(svn_io_file_seek(rev_file->file, seek_relative, &end, pool));
if (end < sizeof(buffer))
{
len = (apr_size_t)end;
start = 0;
}
else
{
len = sizeof(buffer);
start = end - sizeof(buffer);
}
/* Read in this last block, from which we will identify the last line. */
SVN_ERR(aligned_seek(fs, rev_file->file, NULL, start, pool));
SVN_ERR(svn_io_file_read_full2(rev_file->file, buffer, len, NULL, NULL,
pool));
/* Parse the last line. */
trailer = svn_stringbuf_ncreate(buffer, len, pool);
SVN_ERR(svn_fs_fs__parse_revision_trailer(root_offset,
changes_offset,
trailer,
rev));
/* return absolute offsets */
if (root_offset)
*root_offset += rev_offset;
if (changes_offset)
*changes_offset += rev_offset;
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_fs__rev_get_root(svn_fs_id_t **root_id_p,
svn_fs_t *fs,
svn_revnum_t rev,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
fs_fs_data_t *ffd = fs->fsap_data;
SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, scratch_pool));
if (svn_fs_fs__use_log_addressing(fs))
{
*root_id_p = svn_fs_fs__id_create_root(rev, result_pool);
}
else
{
svn_fs_fs__revision_file_t *revision_file;
apr_off_t root_offset;
svn_fs_id_t *root_id = NULL;
svn_boolean_t is_cached;
SVN_ERR(svn_cache__get((void **) root_id_p, &is_cached,
ffd->rev_root_id_cache, &rev, result_pool));
if (is_cached)
return SVN_NO_ERROR;
SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&revision_file, fs, rev,
scratch_pool, scratch_pool));
SVN_ERR(get_root_changes_offset(&root_offset, NULL,
revision_file, fs, rev,
scratch_pool));
SVN_ERR(get_fs_id_at_offset(&root_id, revision_file, fs, rev,
root_offset, result_pool));
SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
SVN_ERR(svn_cache__set(ffd->rev_root_id_cache, &rev, root_id,
scratch_pool));
*root_id_p = root_id;
}
return SVN_NO_ERROR;
}
/* Describes a lazily opened rev / pack file. Instances will be shared
between multiple instances of rep_state_t. */
typedef struct shared_file_t
{
/* The opened file. NULL while file is not open, yet. */
svn_fs_fs__revision_file_t *rfile;
/* file system to open the file in */
svn_fs_t *fs;
/* a revision contained in the FILE. Since this file may be shared,
that value may be different from REP_STATE_T->REVISION. */
svn_revnum_t revision;
/* pool to use when creating the FILE. This guarantees that the file
remains open / valid beyond the respective local context that required
the file to be opened eventually. */
apr_pool_t *pool;
} shared_file_t;
/* Represents where in the current svndiff data block each
representation is. */
typedef struct rep_state_t
{
/* shared lazy-open rev/pack file structure */
shared_file_t *sfile;
/* The txdelta window cache to use or NULL. */
svn_cache__t *raw_window_cache;
/* Caches raw (unparsed) windows. May be NULL. */
svn_cache__t *window_cache;
/* Caches un-deltified windows. May be NULL. */
svn_cache__t *combined_cache;
/* revision containing the representation */
svn_revnum_t revision;
/* representation's item index in REVISION */
apr_uint64_t item_index;
/* length of the header at the start of the rep.
0 iff this is rep is stored in a container
(i.e. does not have a header) */
apr_size_t header_size;
apr_off_t start; /* The starting offset for the raw
svndiff/plaintext data minus header.
-1 if the offset is yet unknown. */
apr_off_t current;/* The current offset relative to START. */
apr_off_t size; /* The on-disk size of the representation. */
int ver; /* If a delta, what svndiff version?
-1 for unknown delta version. */
int chunk_index; /* number of the window to read */
} rep_state_t;
/* Simple wrapper around svn_io_file_get_offset to simplify callers. */
static svn_error_t *
get_file_offset(apr_off_t *offset,
rep_state_t *rs,
apr_pool_t *pool)
{
return svn_error_trace(svn_io_file_get_offset(offset,
rs->sfile->rfile->file,
pool));
}
/* Simple wrapper around svn_io_file_aligned_seek to simplify callers. */
static svn_error_t *
rs_aligned_seek(rep_state_t *rs,
apr_off_t *buffer_start,
apr_off_t offset,
apr_pool_t *pool)
{
fs_fs_data_t *ffd = rs->sfile->fs->fsap_data;
return svn_error_trace(svn_io_file_aligned_seek(rs->sfile->rfile->file,
ffd->block_size,
buffer_start, offset,
pool));
}
/* Open FILE->FILE and FILE->STREAM if they haven't been opened, yet. */
static svn_error_t*
auto_open_shared_file(shared_file_t *file)
{
if (file->rfile == NULL)
SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&file->rfile, file->fs,
file->revision, file->pool,
file->pool));
return SVN_NO_ERROR;
}
/* Set RS->START to the begin of the representation raw in RS->FILE->FILE,
if that hasn't been done yet. Use POOL for temporary allocations. */
static svn_error_t*
auto_set_start_offset(rep_state_t *rs, apr_pool_t *pool)
{
if (rs->start == -1)
{
SVN_ERR(svn_fs_fs__item_offset(&rs->start, rs->sfile->fs,
rs->sfile->rfile, rs->revision, NULL,
rs->item_index, pool));
rs->start += rs->header_size;
}
return SVN_NO_ERROR;
}
/* Set RS->VER depending on what is found in the already open RS->FILE->FILE
if the diff version is still unknown. Use POOL for temporary allocations.
*/
static svn_error_t*
auto_read_diff_version(rep_state_t *rs, apr_pool_t *pool)
{
if (rs->ver == -1)
{
char buf[4];
SVN_ERR(rs_aligned_seek(rs, NULL, rs->start, pool));
SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, buf,
sizeof(buf), NULL, NULL, pool));
/* ### Layering violation */
if (! ((buf[0] == 'S') && (buf[1] == 'V') && (buf[2] == 'N')))
return svn_error_create
(SVN_ERR_FS_CORRUPT, NULL,
_("Malformed svndiff data in representation"));
rs->ver = buf[3];
rs->chunk_index = 0;
rs->current = 4;
}
return SVN_NO_ERROR;
}
/* See create_rep_state, which wraps this and adds another error. */
static svn_error_t *
create_rep_state_body(rep_state_t **rep_state,
svn_fs_fs__rep_header_t **rep_header,
shared_file_t **shared_file,
representation_t *rep,
svn_fs_t *fs,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
fs_fs_data_t *ffd = fs->fsap_data;
rep_state_t *rs = apr_pcalloc(result_pool, sizeof(*rs));
svn_fs_fs__rep_header_t *rh;
svn_boolean_t is_cached = FALSE;
apr_uint64_t estimated_window_storage;
/* If the hint is
* - given,
* - refers to a valid revision,
* - refers to a packed revision,
* - as does the rep we want to read, and
* - refers to the same pack file as the rep
* we can re-use the same, already open file object
*/
svn_boolean_t reuse_shared_file
= shared_file && *shared_file && (*shared_file)->rfile
&& SVN_IS_VALID_REVNUM((*shared_file)->revision)
&& (*shared_file)->revision < ffd->min_unpacked_rev
&& rep->revision < ffd->min_unpacked_rev
&& ( ((*shared_file)->revision / ffd->max_files_per_dir)
== (rep->revision / ffd->max_files_per_dir));
pair_cache_key_t key;
key.revision = rep->revision;
key.second = rep->item_index;
/* continue constructing RS and RA */
rs->size = rep->size;
rs->revision = rep->revision;
rs->item_index = rep->item_index;
rs->raw_window_cache = ffd->raw_window_cache;
rs->ver = -1;
rs->start = -1;
/* Very long files stored as self-delta will produce a huge number of
delta windows. Don't cache them lest we don't thrash the cache.
Since we don't know the depth of the delta chain, let's assume, the
whole contents get rewritten 3 times.
*/
estimated_window_storage = 4 * (rep->expanded_size + SVN_DELTA_WINDOW_SIZE);
estimated_window_storage = MIN(estimated_window_storage, APR_SIZE_MAX);
rs->window_cache = ffd->txdelta_window_cache
&& svn_cache__is_cachable(ffd->txdelta_window_cache,
(apr_size_t)estimated_window_storage)
? ffd->txdelta_window_cache
: NULL;
rs->combined_cache = ffd->combined_window_cache
&& svn_cache__is_cachable(ffd->combined_window_cache,
(apr_size_t)estimated_window_storage)
? ffd->combined_window_cache
: NULL;
/* cache lookup, i.e. skip reading the rep header if possible */
if (ffd->rep_header_cache && !svn_fs_fs__id_txn_used(&rep->txn_id))
SVN_ERR(svn_cache__get((void **) &rh, &is_cached,
ffd->rep_header_cache, &key, result_pool));
/* initialize the (shared) FILE member in RS */
if (reuse_shared_file)
{
rs->sfile = *shared_file;
}
else
{
shared_file_t *file = apr_pcalloc(result_pool, sizeof(*file));
file->revision = rep->revision;
file->pool = result_pool;
file->fs = fs;
rs->sfile = file;
/* remember the current file, if suggested by the caller */
if (shared_file)
*shared_file = file;
}
/* read rep header, if necessary */
if (!is_cached)
{
/* ensure file is open and navigate to the start of rep header */
if (reuse_shared_file)
{
apr_off_t offset;
/* ... we can re-use the same, already open file object.
* This implies that we don't read from a txn.
*/
rs->sfile = *shared_file;
SVN_ERR(auto_open_shared_file(rs->sfile));
SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rs->sfile->rfile,
rep->revision, NULL, rep->item_index,
scratch_pool));
SVN_ERR(rs_aligned_seek(rs, NULL, offset, scratch_pool));
}
else
{
/* otherwise, create a new file object. May or may not be
* an in-txn file.
*/
SVN_ERR(open_and_seek_representation(&rs->sfile->rfile, fs, rep,
result_pool));
}
SVN_ERR(svn_fs_fs__read_rep_header(&rh, rs->sfile->rfile->stream,
result_pool, scratch_pool));
SVN_ERR(get_file_offset(&rs->start, rs, result_pool));
/* populate the cache if appropriate */
if (! svn_fs_fs__id_txn_used(&rep->txn_id))
{
if (use_block_read(fs))
SVN_ERR(block_read(NULL, fs, rep->revision, rep->item_index,
rs->sfile->rfile, result_pool, scratch_pool));
else
if (ffd->rep_header_cache)
SVN_ERR(svn_cache__set(ffd->rep_header_cache, &key, rh,
scratch_pool));
}
}
/* finalize */
SVN_ERR(dbg_log_access(fs, rep->revision, rep->item_index, rh,
SVN_FS_FS__ITEM_TYPE_ANY_REP, scratch_pool));
rs->header_size = rh->header_size;
*rep_state = rs;
*rep_header = rh;
if (rh->type == svn_fs_fs__rep_plain)
/* This is a plaintext, so just return the current rep_state. */
return SVN_NO_ERROR;
/* skip "SVNx" diff marker */
rs->current = 4;
return SVN_NO_ERROR;
}
/* Read the rep args for REP in filesystem FS and create a rep_state
for reading the representation. Return the rep_state in *REP_STATE
and the rep header in *REP_HEADER, both allocated in POOL.
When reading multiple reps, i.e. a skip delta chain, you may provide
non-NULL SHARED_FILE. (If SHARED_FILE is not NULL, in the first
call it should be a pointer to NULL.) The function will use this
variable to store the previous call results and tries to re-use it.
This may result in significant savings in I/O for packed files and
number of open file handles.
*/
static svn_error_t *
create_rep_state(rep_state_t **rep_state,
svn_fs_fs__rep_header_t **rep_header,
shared_file_t **shared_file,
representation_t *rep,
svn_fs_t *fs,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_error_t *err = create_rep_state_body(rep_state, rep_header,
shared_file, rep, fs,
result_pool, scratch_pool);
if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
{
fs_fs_data_t *ffd = fs->fsap_data;
const char *rep_str;
/* ### This always returns "-1" for transaction reps, because
### this particular bit of code doesn't know if the rep is
### stored in the protorev or in the mutable area (for props
### or dir contents). It is pretty rare for FSFS to *read*
### from the protorev file, though, so this is probably OK.
### And anyone going to debug corruption errors is probably
### going to jump straight to this comment anyway! */
rep_str = rep
? svn_fs_fs__unparse_representation
(rep, ffd->format, TRUE, scratch_pool, scratch_pool)->data
: "(null)";
return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
"Corrupt representation '%s'",
rep_str);
}
/* ### Call representation_string() ? */
return svn_error_trace(err);
}
svn_error_t *
svn_fs_fs__check_rep(representation_t *rep,
svn_fs_t *fs,
void **hint,
apr_pool_t *scratch_pool)
{
if (svn_fs_fs__use_log_addressing(fs))
{
apr_off_t offset;
svn_fs_fs__p2l_entry_t *entry;
svn_fs_fs__revision_file_t *rev_file = NULL;
/* Reuse the revision file provided by *HINT, if it is given and
* actually the rev / pack file that we want. */
svn_revnum_t start_rev = svn_fs_fs__packed_base_rev(fs, rep->revision);
if (hint)
rev_file = *(svn_fs_fs__revision_file_t **)hint;
if (rev_file == NULL || rev_file->start_revision != start_rev)
SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rep->revision,
scratch_pool, scratch_pool));
if (hint)
*hint = rev_file;
/* This will auto-retry if there was a background pack. */
SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rep->revision,
NULL, rep->item_index, scratch_pool));
/* This may fail if there is a background pack operation (can't auto-
retry because the item offset lookup has to be redone as well). */
SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file,
rep->revision, offset,
scratch_pool, scratch_pool));
if ( entry == NULL
|| entry->type < SVN_FS_FS__ITEM_TYPE_FILE_REP
|| entry->type > SVN_FS_FS__ITEM_TYPE_DIR_PROPS)
return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
_("No representation found at offset %s "
"for item %s in revision %ld"),
apr_off_t_toa(scratch_pool, offset),
apr_psprintf(scratch_pool,
"%" APR_UINT64_T_FMT,
rep->item_index),
rep->revision);
}
else
{
rep_state_t *rs;
svn_fs_fs__rep_header_t *rep_header;
/* ### Should this be using read_rep_line() directly? */
SVN_ERR(create_rep_state(&rs, &rep_header, (shared_file_t**)hint,
rep, fs, scratch_pool, scratch_pool));
}
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_fs__rep_chain_length(int *chain_length,
int *shard_count,
representation_t *rep,
svn_fs_t *fs,
apr_pool_t *scratch_pool)
{
fs_fs_data_t *ffd = fs->fsap_data;
svn_revnum_t shard_size = ffd->max_files_per_dir
? ffd->max_files_per_dir
: 1;
apr_pool_t *subpool = svn_pool_create(scratch_pool);
apr_pool_t *iterpool = svn_pool_create(scratch_pool);
svn_boolean_t is_delta = FALSE;
int count = 0;
int shards = 1;
svn_revnum_t last_shard = rep->revision / shard_size;
/* Check whether the length of the deltification chain is acceptable.
* Otherwise, shared reps may form a non-skipping delta chain in
* extreme cases. */
representation_t base_rep = *rep;
/* re-use open files between iterations */
shared_file_t *file_hint = NULL;
svn_fs_fs__rep_header_t *header;
/* follow the delta chain towards the end but for at most
* MAX_CHAIN_LENGTH steps. */
do
{
rep_state_t *rep_state;
svn_pool_clear(iterpool);
if (base_rep.revision / shard_size != last_shard)
{
last_shard = base_rep.revision / shard_size;
++shards;
}
SVN_ERR(create_rep_state_body(&rep_state,
&header,
&file_hint,
&base_rep,
fs,
subpool,
iterpool));
base_rep.revision = header->base_revision;
base_rep.item_index = header->base_item_index;
base_rep.size = header->base_length;
svn_fs_fs__id_txn_reset(&base_rep.txn_id);
is_delta = header->type == svn_fs_fs__rep_delta;
/* Clear it the SUBPOOL once in a while. Doing it too frequently
* renders the FILE_HINT ineffective. Doing too infrequently, may
* leave us with too many open file handles.
*
* Note that this is mostly about efficiency, with larger values
* being more efficient, and any non-zero value is legal here. When
* reading deltified contents, we may keep 10s of rev files open at
* the same time and the system has to cope with that. Thus, the
* limit of 16 chosen below is in the same ballpark.
*/
++count;
if (count % 16 == 0)
{
file_hint = NULL;
svn_pool_clear(subpool);
}
}
while (is_delta && base_rep.revision);
*chain_length = count;
*shard_count = shards;
svn_pool_destroy(subpool);
svn_pool_destroy(iterpool);
return SVN_NO_ERROR;
}
struct rep_read_baton
{
/* The FS from which we're reading. */
svn_fs_t *fs;
/* Representation to read. */
representation_t rep;
/* If not NULL, this is the base for the first delta window in rs_list */
svn_stringbuf_t *base_window;
/* The state of all prior delta representations. */
apr_array_header_t *rs_list;
/* The plaintext state, if there is a plaintext. */
rep_state_t *src_state;
/* The index of the current delta chunk, if we are reading a delta. */
int chunk_index;
/* The buffer where we store undeltified data. */
char *buf;
apr_size_t buf_pos;
apr_size_t buf_len;
/* A checksum context for summing the data read in order to verify it.
Note: we don't need to use the sha1 checksum because we're only doing
data verification, for which md5 is perfectly safe. */
svn_checksum_ctx_t *md5_checksum_ctx;
svn_boolean_t checksum_finalized;
/* The stored checksum of the representation we are reading, its
length, and the amount we've read so far. Some of this
information is redundant with rs_list and src_state, but it's
convenient for the checksumming code to have it here. */
unsigned char md5_digest[APR_MD5_DIGESTSIZE];
svn_filesize_t len;
svn_filesize_t off;
/* The key for the fulltext cache for this rep, if there is a
fulltext cache. */
pair_cache_key_t fulltext_cache_key;
/* The text we've been reading, if we're going to cache it. */
svn_stringbuf_t *current_fulltext;
/* If not NULL, attempt to read the data from this cache.
Once that lookup fails, reset it to NULL. */
svn_cache__t *fulltext_cache;
/* Bytes delivered from the FULLTEXT_CACHE so far. If the next
lookup fails, we need to skip that much data from the reconstructed
window stream before we continue normal operation. */
svn_filesize_t fulltext_delivered;
/* Used for temporary allocations during the read. */
apr_pool_t *pool;
/* Pool used to store file handles and other data that is persistant
for the entire stream read. */
apr_pool_t *filehandle_pool;
};
/* Set window key in *KEY to address the window described by RS.
For convenience, return the KEY. */
static window_cache_key_t *
get_window_key(window_cache_key_t *key, rep_state_t *rs)
{
assert(rs->revision <= APR_UINT32_MAX);
key->revision = (apr_uint32_t)rs->revision;
key->item_index = rs->item_index;
key->chunk_index = rs->chunk_index;
return key;
}
/* Implement svn_cache__partial_getter_func_t for raw txdelta windows.
* Parse the raw data and return a svn_fs_fs__txdelta_cached_window_t.
*/
static svn_error_t *
parse_raw_window(void **out,
const void *data,
apr_size_t data_len,
void *baton,
apr_pool_t *result_pool)
{
svn_string_t raw_window;
svn_stream_t *stream;
/* unparsed and parsed window */
const svn_fs_fs__raw_cached_window_t *window
= (const svn_fs_fs__raw_cached_window_t *)data;
svn_fs_fs__txdelta_cached_window_t *result
= apr_pcalloc(result_pool, sizeof(*result));
/* create a read stream taking the raw window as input */
raw_window.data = svn_temp_deserializer__ptr(window,
(const void * const *)&window->window.data);
raw_window.len = window->window.len;
stream = svn_stream_from_string(&raw_window, result_pool);
/* parse it */
SVN_ERR(svn_txdelta_read_svndiff_window(&result->window, stream, 1,
result_pool));
/* complete the window and return it */
result->end_offset = window->end_offset;
*out = result;
return SVN_NO_ERROR;
}
/* Read the WINDOW_P number CHUNK_INDEX for the representation given in
* rep state RS from the current FSFS session's cache. This will be a
* no-op and IS_CACHED will be set to FALSE if no cache has been given.
* If a cache is available IS_CACHED will inform the caller about the
* success of the lookup. Allocations of the window in will be made
* from RESULT_POOL. Use SCRATCH_POOL for temporary allocations.
*
* If the information could be found, put RS to CHUNK_INDEX.
*/
static svn_error_t *
get_cached_window(svn_txdelta_window_t **window_p,
rep_state_t *rs,
int chunk_index,
svn_boolean_t *is_cached,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
if (! rs->window_cache)
{
/* txdelta window has not been enabled */
*is_cached = FALSE;
}
else
{
/* ask the cache for the desired txdelta window */
svn_fs_fs__txdelta_cached_window_t *cached_window;
window_cache_key_t key = { 0 };
get_window_key(&key, rs);
key.chunk_index = chunk_index;
SVN_ERR(svn_cache__get((void **) &cached_window,
is_cached,
rs->window_cache,
&key,
result_pool));
/* If we did not find a parsed txdelta window, we might have a raw
version of it in our cache. If so, read, parse and re-cache it. */
if (!*is_cached && rs->raw_window_cache)
{
SVN_ERR(svn_cache__get_partial((void **) &cached_window, is_cached,
rs->raw_window_cache, &key,
parse_raw_window, NULL, result_pool));
if (*is_cached)
SVN_ERR(svn_cache__set(rs->window_cache, &key, cached_window,
scratch_pool));
}
/* Return cached information. */
if (*is_cached)
{
/* found it. Pass it back to the caller. */
*window_p = cached_window->window;
/* manipulate the RS as if we just read the data */
rs->current = cached_window->end_offset;
rs->chunk_index = chunk_index;
}
}
return SVN_NO_ERROR;
}
/* Store the WINDOW read for the rep state RS in the current FSFS
* session's cache. This will be a no-op if no cache has been given.
* Temporary allocations will be made from SCRATCH_POOL. */
static svn_error_t *
set_cached_window(svn_txdelta_window_t *window,
rep_state_t *rs,
apr_pool_t *scratch_pool)
{
if (rs->window_cache)
{
/* store the window and the first offset _past_ it */
svn_fs_fs__txdelta_cached_window_t cached_window;
window_cache_key_t key = {0};
cached_window.window = window;
cached_window.end_offset = rs->current;
/* but key it with the start offset because that is the known state
* when we will look it up */
SVN_ERR(svn_cache__set(rs->window_cache,
get_window_key(&key, rs),
&cached_window,
scratch_pool));
}
return SVN_NO_ERROR;
}
/* Read the WINDOW_P for the rep state RS from the current FSFS session's
* cache. This will be a no-op and IS_CACHED will be set to FALSE if no
* cache has been given. If a cache is available IS_CACHED will inform
* the caller about the success of the lookup. Allocations (of the window
* in particular) will be made from POOL.
*/
static svn_error_t *
get_cached_combined_window(svn_stringbuf_t **window_p,
rep_state_t *rs,
svn_boolean_t *is_cached,
apr_pool_t *pool)
{
if (! rs->combined_cache)
{
/* txdelta window has not been enabled */
*is_cached = FALSE;
}
else
{
/* ask the cache for the desired txdelta window */
window_cache_key_t key = { 0 };
return svn_cache__get((void **)window_p,
is_cached,
rs->combined_cache,
get_window_key(&key, rs),
pool);
}
return SVN_NO_ERROR;
}
/* Store the WINDOW read for the rep state RS in the current FSFS session's
* cache. This will be a no-op if no cache has been given.
* Temporary allocations will be made from SCRATCH_POOL. */
static svn_error_t *
set_cached_combined_window(svn_stringbuf_t *window,
rep_state_t *rs,
apr_pool_t *scratch_pool)
{
if (rs->combined_cache)
{
/* but key it with the start offset because that is the known state
* when we will look it up */
window_cache_key_t key = { 0 };
return svn_cache__set(rs->combined_cache,
get_window_key(&key, rs),
window,
scratch_pool);
}
return SVN_NO_ERROR;
}
/* Build an array of rep_state structures in *LIST giving the delta
reps from first_rep to a plain-text or self-compressed rep. Set
*SRC_STATE to the plain-text rep we find at the end of the chain,
or to NULL if the final delta representation is self-compressed.
The representation to start from is designated by filesystem FS, id
ID, and representation REP.
Also, set *WINDOW_P to the base window content for *LIST, if it
could be found in cache. Otherwise, *LIST will contain the base
representation for the whole delta chain. */
static svn_error_t *
build_rep_list(apr_array_header_t **list,
svn_stringbuf_t **window_p,
rep_state_t **src_state,
svn_fs_t *fs,
representation_t *first_rep,
apr_pool_t *pool)
{
representation_t rep;
rep_state_t *rs = NULL;
svn_fs_fs__rep_header_t *rep_header;
svn_boolean_t is_cached = FALSE;
shared_file_t *shared_file = NULL;
apr_pool_t *iterpool = svn_pool_create(pool);
*list = apr_array_make(pool, 1, sizeof(rep_state_t *));
rep = *first_rep;
/* for the top-level rep, we need the rep_args */
SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file, &rep, fs, pool,
iterpool));
while (1)
{
svn_pool_clear(iterpool);
/* fetch state, if that has not been done already */
if (!rs)
SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file,
&rep, fs, pool, iterpool));
/* for txn reps, there won't be a cached combined window */
if (!svn_fs_fs__id_txn_used(&rep.txn_id))
SVN_ERR(get_cached_combined_window(window_p, rs, &is_cached, pool));
if (is_cached)
{
/* We already have a reconstructed window in our cache.
Write a pseudo rep_state with the full length. */
rs->start = 0;
rs->current = 0;
rs->size = (*window_p)->len;
*src_state = rs;
break;
}
if (rep_header->type == svn_fs_fs__rep_plain)
{
/* This is a plaintext, so just return the current rep_state. */
*src_state = rs;
break;
}
/* Push this rep onto the list. If it's self-compressed, we're done. */
APR_ARRAY_PUSH(*list, rep_state_t *) = rs;
if (rep_header->type == svn_fs_fs__rep_self_delta)
{
*src_state = NULL;
break;
}
rep.revision = rep_header->base_revision;
rep.item_index = rep_header->base_item_index;
rep.size = rep_header->base_length;
svn_fs_fs__id_txn_reset(&rep.txn_id);
rs = NULL;
}
svn_pool_destroy(iterpool);
return SVN_NO_ERROR;
}
/* Create a rep_read_baton structure for node revision NODEREV in
filesystem FS and store it in *RB_P. Perform all allocations in
POOL. If rep is mutable, it must be for file contents. */
static svn_error_t *
rep_read_get_baton(struct rep_read_baton **rb_p,
svn_fs_t *fs,
representation_t *rep,
pair_cache_key_t fulltext_cache_key,
apr_pool_t *pool)
{
struct rep_read_baton *b;
b = apr_pcalloc(pool, sizeof(*b));
b->fs = fs;
b->rep = *rep;
b->base_window = NULL;
b->chunk_index = 0;
b->buf = NULL;
b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool);
b->checksum_finalized = FALSE;
memcpy(b->md5_digest, rep->md5_digest, sizeof(rep->md5_digest));
b->len = rep->expanded_size;
b->off = 0;
b->fulltext_cache_key = fulltext_cache_key;
b->pool = svn_pool_create(pool);
b->filehandle_pool = svn_pool_create(pool);
b->fulltext_cache = NULL;
b->fulltext_delivered = 0;
b->current_fulltext = NULL;
/* Save our output baton. */
*rb_p = b;
return SVN_NO_ERROR;
}
/* Skip forwards to THIS_CHUNK in REP_STATE and then read the next delta
window into *NWIN. Note that RS->CHUNK_INDEX will be THIS_CHUNK rather
than THIS_CHUNK + 1 when this function returns. */
static svn_error_t *
read_delta_window(svn_txdelta_window_t **nwin, int this_chunk,
rep_state_t *rs, apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_boolean_t is_cached;
apr_off_t start_offset;
apr_off_t end_offset;
apr_pool_t *iterpool;
SVN_ERR_ASSERT(rs->chunk_index <= this_chunk);
SVN_ERR(dbg_log_access(rs->sfile->fs, rs->revision, rs->item_index,
NULL, SVN_FS_FS__ITEM_TYPE_ANY_REP, scratch_pool));
/* Read the next window. But first, try to find it in the cache. */
SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
result_pool, scratch_pool));
if (is_cached)
return SVN_NO_ERROR;
/* someone has to actually read the data from file. Open it */
SVN_ERR(auto_open_shared_file(rs->sfile));
/* invoke the 'block-read' feature for non-txn data.
However, don't do that if we are in the middle of some representation,
because the block is unlikely to contain other data. */
if ( rs->chunk_index == 0
&& SVN_IS_VALID_REVNUM(rs->revision)
&& use_block_read(rs->sfile->fs)
&& rs->raw_window_cache)
{
SVN_ERR(block_read(NULL, rs->sfile->fs, rs->revision, rs->item_index,
rs->sfile->rfile, result_pool, scratch_pool));
/* reading the whole block probably also provided us with the
desired txdelta window */
SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
result_pool, scratch_pool));
if (is_cached)
return SVN_NO_ERROR;
}
/* data is still not cached -> we need to read it.
Make sure we have all the necessary info. */
SVN_ERR(auto_set_start_offset(rs, scratch_pool));
SVN_ERR(auto_read_diff_version(rs, scratch_pool));
/* RS->FILE may be shared between RS instances -> make sure we point
* to the right data. */
start_offset = rs->start + rs->current;
SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, scratch_pool));
/* Skip windows to reach the current chunk if we aren't there yet. */
iterpool = svn_pool_create(scratch_pool);
while (rs->chunk_index < this_chunk)
{
svn_pool_clear(iterpool);
SVN_ERR(svn_txdelta_skip_svndiff_window(rs->sfile->rfile->file,
rs->ver, iterpool));
rs->chunk_index++;
SVN_ERR(get_file_offset(&start_offset, rs, iterpool));
rs->current = start_offset - rs->start;
if (rs->current >= rs->size)
return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
_("Reading one svndiff window read "
"beyond the end of the "
"representation"));
}
svn_pool_destroy(iterpool);
/* Actually read the next window. */
SVN_ERR(svn_txdelta_read_svndiff_window(nwin, rs->sfile->rfile->stream,
rs->ver, result_pool));
SVN_ERR(get_file_offset(&end_offset, rs, scratch_pool));
rs->current = end_offset - rs->start;
if (rs->current > rs->size)
return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
_("Reading one svndiff window read beyond "
"the end of the representation"));
/* the window has not been cached before, thus cache it now
* (if caching is used for them at all) */
if (SVN_IS_VALID_REVNUM(rs->revision))
SVN_ERR(set_cached_window(*nwin, rs, scratch_pool));
return SVN_NO_ERROR;
}
/* Read SIZE bytes from the representation RS and return it in *NWIN. */
static svn_error_t *
read_plain_window(svn_stringbuf_t **nwin, rep_state_t *rs,
apr_size_t size, apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
apr_off_t offset;
/* RS->FILE may be shared between RS instances -> make sure we point
* to the right data. */
SVN_ERR(auto_open_shared_file(rs->sfile));
SVN_ERR(auto_set_start_offset(rs, scratch_pool));
offset = rs->start + rs->current;
SVN_ERR(rs_aligned_seek(rs, NULL, offset, scratch_pool));
/* Read the plain data. */
*nwin = svn_stringbuf_create_ensure(size, result_pool);
SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, (*nwin)->data, size,
NULL, NULL, result_pool));
(*nwin)->data[size] = 0;
/* Update RS. */
rs->current += (apr_off_t)size;
return SVN_NO_ERROR;
}
/* Get the undeltified window that is a result of combining all deltas
from the current desired representation identified in *RB with its
base representation. Store the window in *RESULT. */
static svn_error_t *
get_combined_window(svn_stringbuf_t **result,
struct rep_read_baton *rb)
{
apr_pool_t *pool, *new_pool, *window_pool;
int i;
apr_array_header_t *windows;
svn_stringbuf_t *source, *buf = rb->base_window;
rep_state_t *rs;
apr_pool_t *iterpool;
/* Read all windows that we need to combine. This is fine because
the size of each window is relatively small (100kB) and skip-
delta limits the number of deltas in a chain to well under 100.
Stop early if one of them does not depend on its predecessors. */
window_pool = svn_pool_create(rb->pool);
windows = apr_array_make(window_pool, 0, sizeof(svn_txdelta_window_t *));
iterpool = svn_pool_create(rb->pool);
for (i = 0; i < rb->rs_list->nelts; ++i)
{
svn_txdelta_window_t *window;
svn_pool_clear(iterpool);
rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
SVN_ERR(read_delta_window(&window, rb->chunk_index, rs, window_pool,
iterpool));
APR_ARRAY_PUSH(windows, svn_txdelta_window_t *) = window;
if (window->src_ops == 0)
{
++i;
break;
}
}
/* Combine in the windows from the other delta reps. */
pool = svn_pool_create(rb->pool);
for (--i; i >= 0; --i)
{
svn_txdelta_window_t *window;
svn_pool_clear(iterpool);
rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
window = APR_ARRAY_IDX(windows, i, svn_txdelta_window_t *);
/* Maybe, we've got a PLAIN start representation. If we do, read
as much data from it as the needed for the txdelta window's source
view.
Note that BUF / SOURCE may only be NULL in the first iteration.
Also note that we may have short-cut reading the delta chain --
in which case SRC_OPS is 0 and it might not be a PLAIN rep. */
source = buf;
if (source == NULL && rb->src_state != NULL && window->src_ops)
SVN_ERR(read_plain_window(&source, rb->src_state, window->sview_len,
pool, iterpool));
/* Combine this window with the current one. */
new_pool = svn_pool_create(rb->pool);
buf = svn_stringbuf_create_ensure(window->tview_len, new_pool);
buf->len = window->tview_len;
svn_txdelta_apply_instructions(window, source ? source->data : NULL,
buf->data, &buf->len);
if (buf->len != window->tview_len)
return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
_("svndiff window length is "
"corrupt"));
/* Cache windows only if the whole rep content could be read as a
single chunk. Only then will no other chunk need a deeper RS
list than the cached chunk. */
if ( (rb->chunk_index == 0) && (rs->current == rs->size)
&& SVN_IS_VALID_REVNUM(rs->revision))
SVN_ERR(set_cached_combined_window(buf, rs, new_pool));
rs->chunk_index++;
/* Cycle pools so that we only need to hold three windows at a time. */
svn_pool_destroy(pool);
pool = new_pool;
}
svn_pool_destroy(iterpool);
svn_pool_destroy(window_pool);
*result = buf;
return SVN_NO_ERROR;
}
/* Returns whether or not the expanded fulltext of the file is cachable
* based on its size SIZE. The decision depends on the cache used by FFD.
*/
static svn_boolean_t
fulltext_size_is_cachable(fs_fs_data_t *ffd, svn_filesize_t size)
{
return (size < APR_SIZE_MAX)
&& svn_cache__is_cachable(ffd->fulltext_cache, (apr_size_t)size);
}
/* Close method used on streams returned by read_representation().
*/
static svn_error_t *
rep_read_contents_close(void *baton)
{
struct rep_read_baton *rb = baton;
svn_pool_destroy(rb->pool);
svn_pool_destroy(rb->filehandle_pool);
return SVN_NO_ERROR;
}
/* Return the next *LEN bytes of the rep from our plain / delta windows
and store them in *BUF. */
static svn_error_t *
get_contents_from_windows(struct rep_read_baton *rb,
char *buf,
apr_size_t *len)
{
apr_size_t copy_len, remaining = *len;
char *cur = buf;
rep_state_t *rs;
/* Special case for when there are no delta reps, only a plain
text. */
if (rb->rs_list->nelts == 0)
{
copy_len = remaining;
rs = rb->src_state;
if (rb->base_window != NULL)
{
/* We got the desired rep directly from the cache.
This is where we need the pseudo rep_state created
by build_rep_list(). */
apr_size_t offset = (apr_size_t)rs->current;
if (offset >= rb->base_window->len)
copy_len = 0ul;
else if (copy_len > rb->base_window->len - offset)
copy_len = rb->base_window->len - offset;
memcpy (cur, rb->base_window->data + offset, copy_len);
}
else
{
apr_off_t offset;
if (((apr_off_t) copy_len) > rs->size - rs->current)
copy_len = (apr_size_t) (rs->size - rs->current);
SVN_ERR(auto_open_shared_file(rs->sfile));
SVN_ERR(auto_set_start_offset(rs, rb->pool));
offset = rs->start + rs->current;
SVN_ERR(rs_aligned_seek(rs, NULL, offset, rb->pool));
SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, cur,
copy_len, NULL, NULL, rb->pool));
}
rs->current += copy_len;
*len = copy_len;
return SVN_NO_ERROR;
}
while (remaining > 0)
{
/* If we have buffered data from a previous chunk, use that. */
if (rb->buf)
{
/* Determine how much to copy from the buffer. */
copy_len = rb->buf_len - rb->buf_pos;
if (copy_len > remaining)
copy_len = remaining;
/* Actually copy the data. */
memcpy(cur, rb->buf + rb->buf_pos, copy_len);
rb->buf_pos += copy_len;
cur += copy_len;
remaining -= copy_len;
/* If the buffer is all used up, clear it and empty the
local pool. */
if (rb->buf_pos == rb->buf_len)
{
svn_pool_clear(rb->pool);
rb->buf = NULL;
}
}
else
{
svn_stringbuf_t *sbuf = NULL;
rs = APR_ARRAY_IDX(rb->rs_list, 0, rep_state_t *);
if (rs->current == rs->size)
break;
/* Get more buffered data by evaluating a chunk. */
SVN_ERR(get_combined_window(&sbuf, rb));
rb->chunk_index++;
rb->buf_len = sbuf->len;
rb->buf = sbuf->data;
rb->buf_pos = 0;
}
}
*len = cur - buf;
return SVN_NO_ERROR;
}
/* Baton type for get_fulltext_partial. */
typedef struct fulltext_baton_t
{
/* Target buffer to write to; of at least LEN bytes. */
char *buffer;
/* Offset within the respective fulltext at which we shall start to
copy data into BUFFER. */
apr_size_t start;
/* Number of bytes to copy. The actual amount may be less in case
the fulltext is short(er). */
apr_size_t len;
/* Number of bytes actually copied into BUFFER. */
apr_size_t read;
} fulltext_baton_t;
/* Implement svn_cache__partial_getter_func_t for fulltext caches.
* From the fulltext in DATA, we copy the range specified by the
* fulltext_baton_t* BATON into the buffer provided by that baton.
* OUT and RESULT_POOL are not used.
*/
static svn_error_t *
get_fulltext_partial(void **out,
const void *data,
apr_size_t data_len,
void *baton,
apr_pool_t *result_pool)
{
fulltext_baton_t *fulltext_baton = baton;
/* We cached the fulltext with an NUL appended to it. */
apr_size_t fulltext_len = data_len - 1;
/* Clip the copy range to what the fulltext size allows. */
apr_size_t start = MIN(fulltext_baton->start, fulltext_len);
fulltext_baton->read = MIN(fulltext_len - start, fulltext_baton->len);
/* Copy the data to the output buffer and be done. */
memcpy(fulltext_baton->buffer, (const char *)data + start,
fulltext_baton->read);
return SVN_NO_ERROR;
}
/* Find the fulltext specified in BATON in the fulltext cache given
* as well by BATON. If that succeeds, set *CACHED to TRUE and copy
* up to the next *LEN bytes into BUFFER. Set *LEN to the actual
* number of bytes copied.
*/
static svn_error_t *
get_contents_from_fulltext(svn_boolean_t *cached,
struct rep_read_baton *baton,
char *buffer,
apr_size_t *len)
{
void *dummy;
fulltext_baton_t fulltext_baton;
SVN_ERR_ASSERT((apr_size_t)baton->fulltext_delivered
== baton->fulltext_delivered);
fulltext_baton.buffer = buffer;
fulltext_baton.start = (apr_size_t)baton->fulltext_delivered;
fulltext_baton.len = *len;
fulltext_baton.read = 0;
SVN_ERR(svn_cache__get_partial(&dummy, cached, baton->fulltext_cache,
&baton->fulltext_cache_key,
get_fulltext_partial, &fulltext_baton,
baton->pool));
if (*cached)
{
baton->fulltext_delivered += fulltext_baton.read;
*len = fulltext_baton.read;
}
return SVN_NO_ERROR;
}
/* Determine the optimal size of a string buf that shall receive a
* (full-) text of NEEDED bytes.
*
* The critical point is that those buffers may be very large and
* can cause memory fragmentation. We apply simple heuristics to
* make fragmentation less likely.
*/
static apr_size_t
optimimal_allocation_size(apr_size_t needed)
{
/* For all allocations, assume some overhead that is shared between
* OS memory managemnt, APR memory management and svn_stringbuf_t. */
const apr_size_t overhead = 0x400;
apr_size_t optimal;
/* If an allocation size if safe for other ephemeral buffers, it should
* be safe for ours. */
if (needed <= SVN__STREAM_CHUNK_SIZE)
return needed;
/* Paranoia edge case:
* Skip our heuristics if they created arithmetical overflow.
* Beware to make this test work for NEEDED = APR_SIZE_MAX as well! */
if (needed >= APR_SIZE_MAX / 2 - overhead)
return needed;
/* As per definition SVN__STREAM_CHUNK_SIZE is a power of two.
* Since we know NEEDED to be larger than that, use it as the
* starting point.
*
* Heuristics: Allocate a power-of-two number of bytes that fit
* NEEDED plus some OVERHEAD. The APR allocator
* will round it up to the next full page size.
*/
optimal = SVN__STREAM_CHUNK_SIZE;
while (optimal - overhead < needed)
optimal *= 2;
/* This is above or equal to NEEDED. */
return optimal - overhead;
}
/* After a fulltext cache lookup failure, we will continue to read from
* combined delta or plain windows. However, we must first make that data
* stream in BATON catch up tho the position LEN already delivered from the
* fulltext cache. Also, we need to store the reconstructed fulltext if we
* want to cache it at the end.
*/
static svn_error_t *
skip_contents(struct rep_read_baton *baton,
svn_filesize_t len)
{
svn_error_t *err = SVN_NO_ERROR;
/* Do we want to cache the reconstructed fulltext? */
if (SVN_IS_VALID_REVNUM(baton->fulltext_cache_key.revision))
{
char *buffer;
svn_filesize_t to_alloc = MAX(len, baton->len);
/* This should only be happening if BATON->LEN and LEN are
* cacheable, implying they fit into memory. */
SVN_ERR_ASSERT((apr_size_t)to_alloc == to_alloc);
/* Allocate the fulltext buffer. */
baton->current_fulltext = svn_stringbuf_create_ensure(
optimimal_allocation_size((apr_size_t)to_alloc),
baton->filehandle_pool);
/* Read LEN bytes from the window stream and store the data
* in the fulltext buffer (will be filled by further reads later). */
baton->current_fulltext->len = (apr_size_t)len;
baton->current_fulltext->data[(apr_size_t)len] = 0;
buffer = baton->current_fulltext->data;
while (len > 0 && !err)
{
apr_size_t to_read = (apr_size_t)len;
err = get_contents_from_windows(baton, buffer, &to_read);
len -= to_read;
buffer += to_read;
}
}
else if (len > 0)
{
/* Simply drain LEN bytes from the window stream. */
apr_pool_t *subpool = svn_pool_create(baton->pool);
char *buffer = apr_palloc(subpool, SVN__STREAM_CHUNK_SIZE);
while (len > 0 && !err)
{
apr_size_t to_read = len > SVN__STREAM_CHUNK_SIZE
? SVN__STREAM_CHUNK_SIZE
: (apr_size_t)len;
err = get_contents_from_windows(baton, buffer, &to_read);
len -= to_read;
}
svn_pool_destroy(subpool);
}
return svn_error_trace(err);
}
/* BATON is of type `rep_read_baton'; read the next *LEN bytes of the
representation and store them in *BUF. Sum as we read and verify
the MD5 sum at the end. */
static svn_error_t *
rep_read_contents(void *baton,
char *buf,
apr_size_t *len)
{
struct rep_read_baton *rb = baton;
/* Get data from the fulltext cache for as long as we can. */
if (rb->fulltext_cache)
{
svn_boolean_t cached;
SVN_ERR(get_contents_from_fulltext(&cached, rb, buf, len));
if (cached)
return SVN_NO_ERROR;
/* Cache miss. From now on, we will never read from the fulltext
* cache for this representation anymore. */
rb->fulltext_cache = NULL;
}
/* No fulltext cache to help us. We must read from the window stream. */
if (!rb->rs_list)
{
/* Window stream not initialized, yet. Do it now. */
rb->len = rb->rep.expanded_size;
SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
&rb->src_state, rb->fs, &rb->rep,
rb->filehandle_pool));
/* In case we did read from the fulltext cache before, make the
* window stream catch up. Also, initialize the fulltext buffer
* if we want to cache the fulltext at the end. */
SVN_ERR(skip_contents(rb, rb->fulltext_delivered));
}
/* Get the next block of data. */
SVN_ERR(get_contents_from_windows(rb, buf, len));
if (rb->current_fulltext)
svn_stringbuf_appendbytes(rb->current_fulltext, buf, *len);
/* Perform checksumming. We want to check the checksum as soon as
the last byte of data is read, in case the caller never performs
a short read, but we don't want to finalize the MD5 context
twice. */
if (!rb->checksum_finalized)
{
SVN_ERR(svn_checksum_update(rb->md5_checksum_ctx, buf, *len));
rb->off += *len;
if (rb->off == rb->len)
{
svn_checksum_t *md5_checksum;
svn_checksum_t expected;
expected.kind = svn_checksum_md5;
expected.digest = rb->md5_digest;
rb->checksum_finalized = TRUE;
SVN_ERR(svn_checksum_final(&md5_checksum, rb->md5_checksum_ctx,
rb->pool));
if (!svn_checksum_match(md5_checksum, &expected))
return svn_error_create(SVN_ERR_FS_CORRUPT,
svn_checksum_mismatch_err(&expected, md5_checksum,
rb->pool,
_("Checksum mismatch while reading representation")),
NULL);
}
}
if (rb->off == rb->len && rb->current_fulltext)
{
fs_fs_data_t *ffd = rb->fs->fsap_data;
SVN_ERR(svn_cache__set(ffd->fulltext_cache, &rb->fulltext_cache_key,
rb->current_fulltext, rb->pool));
rb->current_fulltext = NULL;
}
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_fs__get_contents(svn_stream_t **contents_p,
svn_fs_t *fs,
representation_t *rep,
svn_boolean_t cache_fulltext,
apr_pool_t *pool)
{
if (! rep)
{
*contents_p = svn_stream_empty(pool);
}
else
{
fs_fs_data_t *ffd = fs->fsap_data;
struct rep_read_baton *rb;
pair_cache_key_t fulltext_cache_key = { 0 };
fulltext_cache_key.revision = rep->revision;
fulltext_cache_key.second = rep->item_index;
/* Initialize the reader baton. Some members may added lazily
* while reading from the stream */
SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool));
/* Make the stream attempt fulltext cache lookups if the fulltext
* is cacheable. If it is not, then also don't try to buffer and
* cache it. */
if (ffd->fulltext_cache && cache_fulltext
&& SVN_IS_VALID_REVNUM(rep->revision)
&& fulltext_size_is_cachable(ffd, rep->expanded_size))
{
rb->fulltext_cache = ffd->fulltext_cache;
}
else
{
/* This will also prevent the reconstructed fulltext from being
put into the cache. */
rb->fulltext_cache_key.revision = SVN_INVALID_REVNUM;
}
*contents_p = svn_stream_create(rb, pool);
svn_stream_set_read2(*contents_p, NULL /* only full read support */,
rep_read_contents);
svn_stream_set_close(*contents_p, rep_read_contents_close);
}
return SVN_NO_ERROR;
}
/* Baton for cache_access_wrapper. Wraps the original parameters of
* svn_fs_fs__try_process_file_content().
*/
typedef struct cache_access_wrapper_baton_t
{
svn_fs_process_contents_func_t func;
void* baton;
} cache_access_wrapper_baton_t;
/* Wrapper to translate between svn_fs_process_contents_func_t and
* svn_cache__partial_getter_func_t.
*/
static svn_error_t *
cache_access_wrapper(void **out,
const void *data,
apr_size_t data_len,
void *baton,
apr_pool_t *pool)
{
cache_access_wrapper_baton_t *wrapper_baton = baton;
SVN_ERR(wrapper_baton->func((const unsigned char *)data,
data_len - 1, /* cache adds terminating 0 */
wrapper_baton->baton,
pool));
/* non-NULL value to signal the calling cache that all went well */
*out = baton;
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_fs__try_process_file_contents(svn_boolean_t *success,
svn_fs_t *fs,
node_revision_t *noderev,
svn_fs_process_contents_func_t processor,
void* baton,
apr_pool_t *pool)
{
representation_t *rep = noderev->data_rep;
if (rep)
{
fs_fs_data_t *ffd = fs->fsap_data;
pair_cache_key_t fulltext_cache_key = { 0 };
fulltext_cache_key.revision = rep->revision;
fulltext_cache_key.second = rep->item_index;
if (ffd->fulltext_cache && SVN_IS_VALID_REVNUM(rep->revision)
&& fulltext_size_is_cachable(ffd, rep->expanded_size))
{
cache_access_wrapper_baton_t wrapper_baton;
void *dummy = NULL;
wrapper_baton.func = processor;
wrapper_baton.baton = baton;
return svn_cache__get_partial(&dummy, success,
ffd->fulltext_cache,
&fulltext_cache_key,
cache_access_wrapper,
&wrapper_baton,
pool);
}
}
*success = FALSE;
return SVN_NO_ERROR;
}
/* Baton used when reading delta windows. */
struct delta_read_baton
{
rep_state_t *rs;
unsigned char md5_digest[APR_MD5_DIGESTSIZE];
};
/* This implements the svn_txdelta_next_window_fn_t interface. */
static svn_error_t *
delta_read_next_window(svn_txdelta_window_t **window, void *baton,
apr_pool_t *pool)
{
struct delta_read_baton *drb = baton;
apr_pool_t *scratch_pool = svn_pool_create(pool);
*window = NULL;
if (drb->rs->current < drb->rs->size)
{
SVN_ERR(read_delta_window(window, drb->rs->chunk_index, drb->rs, pool,
scratch_pool));
drb->rs->chunk_index++;
}
svn_pool_destroy(scratch_pool);
return SVN_NO_ERROR;
}
/* This implements the svn_txdelta_md5_digest_fn_t interface. */
static const unsigned char *
delta_read_md5_digest(void *baton)
{
struct delta_read_baton *drb = baton;
return drb->md5_digest;
}
/* Return a txdelta stream for on-disk representation REP_STATE
* of TARGET. Allocate the result in POOL.
*/
static svn_txdelta_stream_t *
get_storaged_delta_stream(rep_state_t *rep_state,
node_revision_t *target,
apr_pool_t *pool)
{
/* Create the delta read baton. */
struct delta_read_baton *drb = apr_pcalloc(pool, sizeof(*drb));
drb->rs = rep_state;
memcpy(drb->md5_digest, target->data_rep->md5_digest,
sizeof(drb->md5_digest));
return svn_txdelta_stream_create(drb, delta_read_next_window,
delta_read_md5_digest, pool);
}
svn_error_t *
svn_fs_fs__get_file_delta_stream(svn_txdelta_stream_t **stream_p,
svn_fs_t *fs,
node_revision_t *source,
node_revision_t *target,
apr_pool_t *pool)
{
svn_stream_t *source_stream, *target_stream;
rep_state_t *rep_state;
svn_fs_fs__rep_header_t *rep_header;
fs_fs_data_t *ffd = fs->fsap_data;
/* Try a shortcut: if the target is stored as a delta against the source,
then just use that delta. However, prefer using the fulltext cache
whenever that is available. */
if (target->data_rep && (source || ! ffd->fulltext_cache))
{
/* Read target's base rep if any. */
SVN_ERR(create_rep_state(&rep_state, &rep_header, NULL,
target->data_rep, fs, pool, pool));
if (source && source->data_rep && target->data_rep)
{
/* If that matches source, then use this delta as is.
Note that we want an actual delta here. E.g. a self-delta would
not be good enough. */
if (rep_header->type == svn_fs_fs__rep_delta
&& rep_header->base_revision == source->data_rep->revision
&& rep_header->base_item_index == source->data_rep->item_index)
{
*stream_p = get_storaged_delta_stream(rep_state, target, pool);
return SVN_NO_ERROR;
}
}
else if (!source)
{
/* We want a self-delta. There is a fair chance that TARGET got
added in this revision and is already stored in the requested
format. */
if (rep_header->type == svn_fs_fs__rep_self_delta)
{
*stream_p = get_storaged_delta_stream(rep_state, target, pool);
return SVN_NO_ERROR;
}
}
/* Don't keep file handles open for longer than necessary. */
if (rep_state->sfile->rfile)
{
SVN_ERR(svn_fs_fs__close_revision_file(rep_state->sfile->rfile));
rep_state->sfile->rfile = NULL;
}
}
/* Read both fulltexts and construct a delta. */
if (source)
SVN_ERR(svn_fs_fs__get_contents(&source_stream, fs, source->data_rep,
TRUE, pool));
else
source_stream = svn_stream_empty(pool);
SVN_ERR(svn_fs_fs__get_contents(&target_stream, fs, target->data_rep,
TRUE, pool));
/* Because source and target stream will already verify their content,
* there is no need to do this once more. In particular if the stream
* content is being fetched from cache. */
svn_txdelta2(stream_p, source_stream, target_stream, FALSE, pool);
return SVN_NO_ERROR;
}
/* Return TRUE when all svn_fs_dirent_t* in ENTRIES are already sorted
by their respective name. */
static svn_boolean_t
sorted(apr_array_header_t *entries)
{
int i;
const svn_fs_dirent_t * const *dirents = (const void *)entries->elts;
for (i = 0; i < entries->nelts-1; ++i)
if (strcmp(dirents[i]->name, dirents[i+1]->name) > 0)
return FALSE;
return TRUE;
}
/* Compare the names of the two dirents given in **A and **B. */
static int
compare_dirents(const void *a, const void *b)
{
const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a);
const svn_fs_dirent_t *rhs = *((const svn_fs_dirent_t * const *) b);
return strcmp(lhs->name, rhs->name);
}
/* Compare the name of the dirents given in **A with the C string in *B. */
static int
compare_dirent_name(const void *a, const void *b)
{
const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a);
const char *rhs = b;
return strcmp(lhs->name, rhs);
}
/* Into *ENTRIES_P, read all directories entries from the key-value text in
* STREAM. If INCREMENTAL is TRUE, read until the end of the STREAM and
* update the data. ID is provided for nicer error messages.
*/
static svn_error_t *
read_dir_entries(apr_array_header_t **entries_p,
svn_stream_t *stream,
svn_boolean_t incremental,
const svn_fs_id_t *id,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
apr_pool_t *iterpool = svn_pool_create(scratch_pool);
apr_hash_t *hash = NULL;
const char *terminator = SVN_HASH_TERMINATOR;
apr_array_header_t *entries = NULL;
if (incremental)
hash = svn_hash__make(scratch_pool);
else
entries = apr_array_make(result_pool, 16, sizeof(svn_fs_dirent_t *));
/* Read until the terminator (non-incremental) or the end of STREAM
(incremental mode). In the latter mode, we use a temporary HASH
to make updating and removing entries cheaper. */
while (1)
{
svn_hash__entry_t entry;
svn_fs_dirent_t *dirent;
char *str;
svn_pool_clear(iterpool);
SVN_ERR_W(svn_hash__read_entry(&entry, stream, terminator,
incremental, iterpool),
apr_psprintf(iterpool,
_("Directory representation corrupt in '%s'"),
svn_fs_fs__id_unparse(id, scratch_pool)->data));
/* End of directory? */
if (entry.key == NULL)
{
/* In incremental mode, we skip the terminator and read the
increments following it until the end of the stream. */
if (incremental && terminator)
terminator = NULL;
else
break;
}
/* Deleted entry? */
if (entry.val == NULL)
{
/* We must be in incremental mode */
assert(hash);
apr_hash_set(hash, entry.key, entry.keylen, NULL);
continue;
}
/* Add a new directory entry. */
dirent = apr_pcalloc(result_pool, sizeof(*dirent));
dirent->name = apr_pstrmemdup(result_pool, entry.key, entry.keylen);
str = svn_cstring_tokenize(" ", &entry.val);
if (str == NULL)
return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
_("Directory entry corrupt in '%s'"),
svn_fs_fs__id_unparse(id, scratch_pool)->data);
if (strcmp(str, SVN_FS_FS__KIND_FILE) == 0)
{
dirent->kind = svn_node_file;
}
else if (strcmp(str, SVN_FS_FS__KIND_DIR) == 0)
{
dirent->kind = svn_node_dir;
}
else
{
return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
_("Directory entry corrupt in '%s'"),
svn_fs_fs__id_unparse(id, scratch_pool)->data);
}
str = svn_cstring_tokenize(" ", &entry.val);
if (str == NULL)
return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
_("Directory entry corrupt in '%s'"),
svn_fs_fs__id_unparse(id, scratch_pool)->data);
SVN_ERR(svn_fs_fs__id_parse(&dirent->id, str, result_pool));
/* In incremental mode, update the hash; otherwise, write to the
* final array. Be sure to use hash keys that survive this iteration.
*/
if (incremental)
apr_hash_set(hash, dirent->name, entry.keylen, dirent);
else
APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = dirent;
}
/* Convert container to a sorted array. */
if (incremental)
{
apr_hash_index_t *hi;
entries = apr_array_make(result_pool, apr_hash_count(hash),
sizeof(svn_fs_dirent_t *));
for (hi = apr_hash_first(iterpool, hash); hi; hi = apr_hash_next(hi))
APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = apr_hash_this_val(hi);
}
if (!sorted(entries))
svn_sort__array(entries, compare_dirents);
svn_pool_destroy(iterpool);
*entries_p = entries;
return SVN_NO_ERROR;
}
/* For directory NODEREV in FS, return the *FILESIZE of its in-txn
* representation. If the directory representation is comitted data,
* set *FILESIZE to SVN_INVALID_FILESIZE. Use SCRATCH_POOL for temporaries.
*/
static svn_error_t *
get_txn_dir_info(svn_filesize_t *filesize,
svn_fs_t *fs,
node_revision_t *noderev,
apr_pool_t *scratch_pool)
{
if (noderev->data_rep && svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
{
const svn_io_dirent2_t *dirent;
const char *filename;
filename = svn_fs_fs__path_txn_node_children(fs, noderev->id,
scratch_pool);
SVN_ERR(svn_io_stat_dirent2(&dirent, filename, FALSE, FALSE,
scratch_pool, scratch_pool));
*filesize = dirent->filesize;
}
else
{
*filesize = SVN_INVALID_FILESIZE;
}
return SVN_NO_ERROR;
}
/* Fetch the contents of a directory into DIR. Values are stored
as filename to string mappings; further conversion is necessary to
convert them into svn_fs_dirent_t values. */
static svn_error_t *
get_dir_contents(svn_fs_fs__dir_data_t *dir,
svn_fs_t *fs,
node_revision_t *noderev,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
svn_stream_t *contents;
/* Initialize the result. */
dir->txn_filesize = SVN_INVALID_FILESIZE;
/* Read dir contents - unless there is none in which case we are done. */
if (noderev->data_rep && svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
{
/* Get location & current size of the directory representation. */
const char *filename;
apr_file_t *file;
filename = svn_fs_fs__path_txn_node_children(fs, noderev->id,
scratch_pool);
/* The representation is mutable. Read the old directory
contents from the mutable children file, followed by the
changes we've made in this transaction. */
SVN_ERR(svn_io_file_open(&file, filename, APR_READ | APR_BUFFERED,
APR_OS_DEFAULT, scratch_pool));
/* Obtain txn children file size. */
SVN_ERR(svn_io_file_size_get(&dir->txn_filesize, file, scratch_pool));
contents = svn_stream_from_aprfile2(file, FALSE, scratch_pool);
SVN_ERR(read_dir_entries(&dir->entries, contents, TRUE, noderev->id,
result_pool, scratch_pool));
SVN_ERR(svn_stream_close(contents));
}
else if (noderev->data_rep)
{
/* Undeltify content before parsing it. Otherwise, we could only
* parse it byte-by-byte.
*/
apr_size_t len = noderev->data_rep->expanded_size;
svn_stringbuf_t *text;
/* The representation is immutable. Read it normally. */
SVN_ERR(svn_fs_fs__get_contents(&contents, fs, noderev->data_rep,
FALSE, scratch_pool));
SVN_ERR(svn_stringbuf_from_stream(&text, contents, len, scratch_pool));
SVN_ERR(svn_stream_close(contents));
/* de-serialize hash */
contents = svn_stream_from_stringbuf(text, scratch_pool);
SVN_ERR(read_dir_entries(&dir->entries, contents, FALSE, noderev->id,
result_pool, scratch_pool));
}
else
{
dir->entries = apr_array_make(result_pool, 0, sizeof(svn_fs_dirent_t *));
}
return SVN_NO_ERROR;
}
/* Return the cache object in FS responsible to storing the directory the
* NODEREV plus the corresponding *KEY. If no cache exists, return NULL.
* PAIR_KEY must point to some key struct, which does not need to be
* initialized. We use it to avoid dynamic allocation.
*/
static svn_cache__t *
locate_dir_cache(svn_fs_t *fs,
const void **key,
pair_cache_key_t *pair_key,
node_revision_t *noderev,
apr_pool_t *pool)
{
fs_fs_data_t *ffd = fs->fsap_data;
if (!noderev->data_rep)
{
/* no data rep -> empty directory.
A NULL key causes a cache miss. */
*key = NULL;
return ffd->dir_cache;
}
if (svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
{
/* data in txns requires the expensive fs_id-based addressing mode */
*key = svn_fs_fs__id_unparse(noderev->id, pool)->data;
return ffd->txn_dir_cache;
}
else
{
/* committed data can use simple rev,item pairs */
pair_key->revision = noderev->data_rep->revision;
pair_key->second = noderev->data_rep->item_index;
*key = pair_key;
return ffd->dir_cache;
}
}
svn_error_t *
svn_fs_fs__rep_contents_dir(apr_array_header_t **entries_p,
svn_fs_t *fs,
node_revision_t *noderev,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
pair_cache_key_t pair_key = { 0 };
const void *key;
svn_fs_fs__dir_data_t *dir;
/* find the cache we may use */
svn_cache__t *cache = locate_dir_cache(fs, &key, &pair_key, noderev,
scratch_pool);
if (cache)
{
svn_boolean_t found;
SVN_ERR(svn_cache__get((void **)&dir, &found, cache, key,
result_pool));
if (found)
{
/* Verify that the cached dir info is not stale
* (no-op for committed data). */
svn_filesize_t filesize;
SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool));
if (filesize == dir->txn_filesize)
{
/* Still valid. Done. */
*entries_p = dir->entries;
return SVN_NO_ERROR;
}
}
}
/* Read in the directory contents. */
dir = apr_pcalloc(scratch_pool, sizeof(*dir));
SVN_ERR(get_dir_contents(dir, fs, noderev, result_pool, scratch_pool));
*entries_p = dir->entries;
/* Update the cache, if we are to use one.
*
* Don't even attempt to serialize very large directories; it would cause
* an unnecessary memory allocation peak. 150 bytes/entry is about right.
*/
if (cache && svn_cache__is_cachable(cache, 150 * dir->entries->nelts))
SVN_ERR(svn_cache__set(cache, key, dir, scratch_pool));
return SVN_NO_ERROR;
}
svn_fs_dirent_t *
svn_fs_fs__find_dir_entry(apr_array_header_t *entries,
const char *name,
int *hint)
{
svn_fs_dirent_t **result
= svn_sort__array_lookup(entries, name, hint, compare_dirent_name);
return result ? *result : NULL;
}
svn_error_t *
svn_fs_fs__rep_contents_dir_entry(svn_fs_dirent_t **dirent,
svn_fs_t *fs,
node_revision_t *noderev,
const char *name,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
extract_dir_entry_baton_t baton;
svn_boolean_t found = FALSE;
/* find the cache we may use */
pair_cache_key_t pair_key = { 0 };
const void *key;
svn_cache__t *cache = locate_dir_cache(fs, &key, &pair_key, noderev,
scratch_pool);
if (cache)
{
svn_filesize_t filesize;
SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool));
/* Cache lookup. */
baton.txn_filesize = filesize;
baton.name = name;
SVN_ERR(svn_cache__get_partial((void **)dirent,
&found,
cache,
key,
svn_fs_fs__extract_dir_entry,
&baton,
result_pool));
}
/* fetch data from disk if we did not find it in the cache */
if (! found || baton.out_of_date)
{
svn_fs_dirent_t *entry;
svn_fs_dirent_t *entry_copy = NULL;
svn_fs_fs__dir_data_t dir;
/* Read in the directory contents. */
SVN_ERR(get_dir_contents(&dir, fs, noderev, scratch_pool,
scratch_pool));
/* Update the cache, if we are to use one.
*
* Don't even attempt to serialize very large directories; it would
* cause an unnecessary memory allocation peak. 150 bytes / entry is
* about right. */
if (cache && svn_cache__is_cachable(cache, 150 * dir.entries->nelts))
SVN_ERR(svn_cache__set(cache, key, &dir, scratch_pool));
/* find desired entry and return a copy in POOL, if found */
entry = svn_fs_fs__find_dir_entry(dir.entries, name, NULL);
if (entry)
{
entry_copy = apr_palloc(result_pool, sizeof(*entry_copy));
entry_copy->name = apr_pstrdup(result_pool, entry->name);
entry_copy->id = svn_fs_fs__id_copy(entry->id, result_pool);
entry_copy->kind = entry->kind;
}
*dirent = entry_copy;
}
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_fs__get_proplist(apr_hash_t **proplist_p,
svn_fs_t *fs,
node_revision_t *noderev,
apr_pool_t *pool)
{
apr_hash_t *proplist;
svn_stream_t *stream;
if (noderev->prop_rep && svn_fs_fs__id_txn_used(&noderev->prop_rep->txn_id))
{
svn_error_t *err;
const char *filename
= svn_fs_fs__path_txn_node_props(fs, noderev->id, pool);
proplist = apr_hash_make(pool);
SVN_ERR(svn_stream_open_readonly(&stream, filename, pool, pool));
err = svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR, pool);
if (err)
{
svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool);
err = svn_error_compose_create(err, svn_stream_close(stream));
return svn_error_quick_wrapf(err,
_("malformed property list for node-revision '%s' in '%s'"),
id_str->data, filename);
}
SVN_ERR(svn_stream_close(stream));
}
else if (noderev->prop_rep)
{
svn_error_t *err;
fs_fs_data_t *ffd = fs->fsap_data;
representation_t *rep = noderev->prop_rep;
pair_cache_key_t key = { 0 };
key.revision = rep->revision;
key.second = rep->item_index;
if (ffd->properties_cache && SVN_IS_VALID_REVNUM(rep->revision))
{
svn_boolean_t is_cached;
SVN_ERR(svn_cache__get((void **) proplist_p, &is_cached,
ffd->properties_cache, &key, pool));
if (is_cached)
return SVN_NO_ERROR;
}
proplist = apr_hash_make(pool);
SVN_ERR(svn_fs_fs__get_contents(&stream, fs, noderev->prop_rep, FALSE,
pool));
err = svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR, pool);
if (err)
{
svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool);
err = svn_error_compose_create(err, svn_stream_close(stream));
return svn_error_quick_wrapf(err,
_("malformed property list for node-revision '%s'"),
id_str->data);
}
SVN_ERR(svn_stream_close(stream));
if (ffd->properties_cache && SVN_IS_VALID_REVNUM(rep->revision))
SVN_ERR(svn_cache__set(ffd->properties_cache, &key, proplist, pool));
}
else
{
/* return an empty prop list if the node doesn't have any props */
proplist = apr_hash_make(pool);
}
*proplist_p = proplist;
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_fs__create_changes_context(svn_fs_fs__changes_context_t **context,
svn_fs_t *fs,
svn_revnum_t rev,
apr_pool_t *result_pool)
{
svn_fs_fs__changes_context_t *result = apr_pcalloc(result_pool,
sizeof(*result));
result->fs = fs;
result->revision = rev;
result->rev_file_pool = result_pool;
*context = result;
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_fs__get_changes(apr_array_header_t **changes,
svn_fs_fs__changes_context_t *context,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
apr_off_t item_index = SVN_FS_FS__ITEM_INDEX_CHANGES;
svn_boolean_t found;
fs_fs_data_t *ffd = context->fs->fsap_data;
svn_fs_fs__changes_list_t *changes_list;
pair_cache_key_t key;
key.revision = context->revision;
key.second = context->next;
/* try cache lookup first */
if (ffd->changes_cache)
{
SVN_ERR(svn_cache__get((void **)&changes_list, &found,
ffd->changes_cache, &key, result_pool));
}
else
{
found = FALSE;
}
if (!found)
{
/* read changes from revision file */
if (!context->revision_file)
{
SVN_ERR(svn_fs_fs__ensure_revision_exists(context->revision,
context->fs,
scratch_pool));
SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&context->revision_file,
context->fs,
context->revision,
context->rev_file_pool,
scratch_pool));
}
if (use_block_read(context->fs))
{
/* 'block-read' will probably populate the cache with the data
* that we want. However, we won't want to force it to process
* very large change lists as part of this prefetching mechanism.
* Those would be better handled by the iterative code below. */
SVN_ERR(block_read(NULL, context->fs,
context->revision, SVN_FS_FS__ITEM_INDEX_CHANGES,
context->revision_file, scratch_pool,
scratch_pool));
/* This may succeed now ... */
SVN_ERR(svn_cache__get((void **)&changes_list, &found,
ffd->changes_cache, &key, result_pool));
}
/* If we still have no data, read it here. */
if (!found)
{
apr_off_t changes_offset;
/* Addressing is very different for old formats
* (needs to read the revision trailer). */
if (svn_fs_fs__use_log_addressing(context->fs))
{
SVN_ERR(svn_fs_fs__item_offset(&changes_offset, context->fs,
context->revision_file,
context->revision, NULL,
SVN_FS_FS__ITEM_INDEX_CHANGES,
scratch_pool));
}
else
{
SVN_ERR(get_root_changes_offset(NULL, &changes_offset,
context->revision_file,
context->fs, context->revision,
scratch_pool));
/* This variable will be used for debug logging only. */
item_index = changes_offset;
}
/* Actual reading and parsing are the same, though. */
SVN_ERR(aligned_seek(context->fs, context->revision_file->file,
NULL, changes_offset + context->next_offset,
scratch_pool));
SVN_ERR(svn_fs_fs__read_changes(changes,
context->revision_file->stream,
SVN_FS_FS__CHANGES_BLOCK_SIZE,
result_pool, scratch_pool));
/* Construct the info object for the entries block we just read. */
changes_list = apr_pcalloc(scratch_pool, sizeof(*changes_list));
SVN_ERR(svn_io_file_get_offset(&changes_list->end_offset,
context->revision_file->file,
scratch_pool));
changes_list->end_offset -= changes_offset;
changes_list->start_offset = context->next_offset;
changes_list->count = (*changes)->nelts;
changes_list->changes = (change_t **)(*changes)->elts;
changes_list->eol = changes_list->count < SVN_FS_FS__CHANGES_BLOCK_SIZE;
/* cache for future reference */
if (ffd->changes_cache)
SVN_ERR(svn_cache__set(ffd->changes_cache, &key, changes_list,
scratch_pool));
}
}
if (found)
{
/* Return the block as a "proper" APR array. */
(*changes) = apr_array_make(result_pool, 0, sizeof(void *));
(*changes)->elts = (char *)changes_list->changes;
(*changes)->nelts = changes_list->count;
(*changes)->nalloc = changes_list->count;
}
/* Where to look next - if there is more data. */
context->next += (*changes)->nelts;
context->next_offset = changes_list->end_offset;
context->eol = changes_list->eol;
/* Close the revision file after we read all data. */
if (context->eol && context->revision_file)
{
SVN_ERR(svn_fs_fs__close_revision_file(context->revision_file));
context->revision_file = NULL;
}
SVN_ERR(dbg_log_access(context->fs, context->revision, item_index, *changes,
SVN_FS_FS__ITEM_TYPE_CHANGES, scratch_pool));
return SVN_NO_ERROR;
}
/* Inialize the representation read state RS for the given REP_HEADER and
* p2l index ENTRY. If not NULL, assign FILE and STREAM to RS.
* Use RESULT_POOL for allocations.
*/
static svn_error_t *
init_rep_state(rep_state_t *rs,
svn_fs_fs__rep_header_t *rep_header,
svn_fs_t *fs,
svn_fs_fs__revision_file_t *file,
svn_fs_fs__p2l_entry_t* entry,
apr_pool_t *result_pool)
{
fs_fs_data_t *ffd = fs->fsap_data;
shared_file_t *shared_file = apr_pcalloc(result_pool, sizeof(*shared_file));
/* this function does not apply to representation containers */
SVN_ERR_ASSERT(entry->type >= SVN_FS_FS__ITEM_TYPE_FILE_REP
&& entry->type <= SVN_FS_FS__ITEM_TYPE_DIR_PROPS);
shared_file->rfile = file;
shared_file->fs = fs;
shared_file->revision = entry->item.revision;
shared_file->pool = result_pool;
rs->sfile = shared_file;
rs->revision = entry->item.revision;
rs->item_index = entry->item.number;
rs->header_size = rep_header->header_size;
rs->start = entry->offset + rs->header_size;
rs->current = rep_header->type == svn_fs_fs__rep_plain ? 0 : 4;
rs->size = entry->size - rep_header->header_size - 7;
rs->ver = 1;
rs->chunk_index = 0;
rs->raw_window_cache = ffd->raw_window_cache;
rs->window_cache = ffd->txdelta_window_cache;
rs->combined_cache = ffd->combined_window_cache;
return SVN_NO_ERROR;
}
/* Implement svn_cache__partial_getter_func_t for txdelta windows.
* Instead of the whole window data, return only END_OFFSET member.
*/
static svn_error_t *
get_txdelta_window_end(void **out,
const void *data,
apr_size_t data_len,
void *baton,
apr_pool_t *result_pool)
{
const svn_fs_fs__txdelta_cached_window_t *window
= (const svn_fs_fs__txdelta_cached_window_t *)data;
*(apr_off_t*)out = window->end_offset;
return SVN_NO_ERROR;
}
/* Implement svn_cache__partial_getter_func_t for raw windows.
* Instead of the whole window data, return only END_OFFSET member.
*/
static svn_error_t *
get_raw_window_end(void **out,
const void *data,
apr_size_t data_len,
void *baton,
apr_pool_t *result_pool)
{
const svn_fs_fs__raw_cached_window_t *window
= (const svn_fs_fs__raw_cached_window_t *)data;
*(apr_off_t*)out = window->end_offset;
return SVN_NO_ERROR;
}
/* Walk through all windows in the representation addressed by RS in FS
* (excluding the delta bases) and put those not already cached into the
* window caches. If MAX_OFFSET is not -1, don't read windows that start
* at or beyond that offset. Use POOL for temporary allocations.
*
* This function requires RS->RAW_WINDOW_CACHE and RS->WINDOW_CACHE to
* be non-NULL.
*/
static svn_error_t *
cache_windows(svn_fs_t *fs,
rep_state_t *rs,
apr_off_t max_offset,
apr_pool_t *pool)
{
apr_pool_t *iterpool = svn_pool_create(pool);
while (rs->current < rs->size)
{
apr_off_t end_offset;
svn_boolean_t found = FALSE;
window_cache_key_t key = { 0 };
svn_pool_clear(iterpool);
if (max_offset != -1 && rs->start + rs->current >= max_offset)
{
svn_pool_destroy(iterpool);
return SVN_NO_ERROR;
}
/* We don't need to read the data again if it is already in cache.
* It might be cached as either raw or parsed window.
*/
SVN_ERR(svn_cache__get_partial((void **) &end_offset, &found,
rs->raw_window_cache,
get_window_key(&key, rs),
get_raw_window_end, NULL,
iterpool));
if (! found)
SVN_ERR(svn_cache__get_partial((void **) &end_offset, &found,
rs->window_cache, &key,
get_txdelta_window_end, NULL,
iterpool));
if (found)
{
rs->current = end_offset;
}
else
{
/* Read, decode and cache the window. */
svn_fs_fs__raw_cached_window_t window;
apr_off_t start_offset = rs->start + rs->current;
apr_size_t window_len;
char *buf;
/* navigate to the current window */
SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, iterpool));
SVN_ERR(svn_txdelta__read_raw_window_len(&window_len,
rs->sfile->rfile->stream,
iterpool));
/* Read the raw window. */
buf = apr_palloc(iterpool, window_len + 1);
SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, iterpool));
SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, buf,
window_len, NULL, NULL, iterpool));
buf[window_len] = 0;
/* update relative offset in representation */
rs->current += window_len;
/* Construct the cachable raw window object. */
window.end_offset = rs->current;
window.window.len = window_len;
window.window.data = buf;
/* cache the window now */
SVN_ERR(svn_cache__set(rs->raw_window_cache, &key, &window,
iterpool));
}
if (rs->current > rs->size)
return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
_("Reading one svndiff window read beyond "
"the end of the representation"));
rs->chunk_index++;
}
svn_pool_destroy(iterpool);
return SVN_NO_ERROR;
}
/* Read all txdelta / plain windows following REP_HEADER in FS as described
* by ENTRY. Read the data from the already open FILE and the wrapping
* STREAM object. If MAX_OFFSET is not -1, don't read windows that start
* at or beyond that offset. Use SCRATCH_POOL for temporary allocations.
* If caching is not enabled, this is a no-op.
*/
static svn_error_t *
block_read_windows(svn_fs_fs__rep_header_t *rep_header,
svn_fs_t *fs,
svn_fs_fs__revision_file_t *rev_file,
svn_fs_fs__p2l_entry_t* entry,
apr_off_t max_offset,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
fs_fs_data_t *ffd = fs->fsap_data;
rep_state_t rs = { 0 };
apr_off_t offset;
window_cache_key_t key = { 0 };
if ( (rep_header->type != svn_fs_fs__rep_plain
&& (!ffd->txdelta_window_cache || !ffd->raw_window_cache))
|| (rep_header->type == svn_fs_fs__rep_plain
&& !ffd->combined_window_cache))
return SVN_NO_ERROR;
SVN_ERR(init_rep_state(&rs, rep_header, fs, rev_file, entry,
result_pool));
/* RS->FILE may be shared between RS instances -> make sure we point
* to the right data. */
offset = rs.start + rs.current;
if (rep_header->type == svn_fs_fs__rep_plain)
{
svn_stringbuf_t *plaintext;
svn_boolean_t is_cached;
/* already in cache? */
SVN_ERR(svn_cache__has_key(&is_cached, rs.combined_cache,
get_window_key(&key, &rs),
scratch_pool));
if (is_cached)
return SVN_NO_ERROR;
/* for larger reps, the header may have crossed a block boundary.
* make sure we still read blocks properly aligned, i.e. don't use
* plain seek here. */
SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, scratch_pool));
plaintext = svn_stringbuf_create_ensure(rs.size, result_pool);
SVN_ERR(svn_io_file_read_full2(rev_file->file, plaintext->data,
rs.size, &plaintext->len, NULL,
result_pool));
plaintext->data[plaintext->len] = 0;
rs.current += rs.size;
SVN_ERR(set_cached_combined_window(plaintext, &rs, scratch_pool));
}
else
{
SVN_ERR(cache_windows(fs, &rs, max_offset, scratch_pool));
}
return SVN_NO_ERROR;
}
/* Try to get the representation header identified by KEY from FS's cache.
* If it has not been cached, read it from the current position in STREAM
* and put it into the cache (if caching has been enabled for rep headers).
* Return the result in *REP_HEADER. Use POOL for allocations.
*/
static svn_error_t *
read_rep_header(svn_fs_fs__rep_header_t **rep_header,
svn_fs_t *fs,
svn_stream_t *stream,
pair_cache_key_t *key,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
fs_fs_data_t *ffd = fs->fsap_data;
svn_boolean_t is_cached = FALSE;
if (ffd->rep_header_cache)
{
SVN_ERR(svn_cache__get((void**)rep_header, &is_cached,
ffd->rep_header_cache, key,
result_pool));
if (is_cached)
return SVN_NO_ERROR;
}
SVN_ERR(svn_fs_fs__read_rep_header(rep_header, stream, result_pool,
scratch_pool));
if (ffd->rep_header_cache)
SVN_ERR(svn_cache__set(ffd->rep_header_cache, key, *rep_header,
scratch_pool));
return SVN_NO_ERROR;
}
/* Fetch the representation data (header, txdelta / plain windows)
* addressed by ENTRY->ITEM in FS and cache it if caches are enabled.
* Read the data from REV_FILE. If MAX_OFFSET is not -1, don't read
* windows that start at or beyond that offset.
* Use SCRATCH_POOL for temporary allocations.
*/
static svn_error_t *
block_read_contents(svn_fs_t *fs,
svn_fs_fs__revision_file_t *rev_file,
svn_fs_fs__p2l_entry_t* entry,
apr_off_t max_offset,
apr_pool_t *scratch_pool)
{
pair_cache_key_t header_key = { 0 };
svn_fs_fs__rep_header_t *rep_header;
header_key.revision = (apr_int32_t)entry->item.revision;
header_key.second = entry->item.number;
SVN_ERR(read_rep_header(&rep_header, fs, rev_file->stream, &header_key,
scratch_pool, scratch_pool));
SVN_ERR(block_read_windows(rep_header, fs, rev_file, entry, max_offset,
scratch_pool, scratch_pool));
return SVN_NO_ERROR;
}
/* For the given REV_FILE in FS, in *STREAM return a stream covering the
* item specified by ENTRY. Also, verify the item's content by low-level
* checksum. Allocate the result in POOL.
*/
static svn_error_t *
read_item(svn_stream_t **stream,
svn_fs_t *fs,
svn_fs_fs__revision_file_t *rev_file,
svn_fs_fs__p2l_entry_t* entry,
apr_pool_t *pool)
{
apr_uint32_t digest;
svn_checksum_t *expected, *actual;
apr_uint32_t plain_digest;
/* Read item into string buffer. */
svn_stringbuf_t *text = svn_stringbuf_create_ensure(entry->size, pool);
text->len = entry->size;
text->data[text->len] = 0;
SVN_ERR(svn_io_file_read_full2(rev_file->file, text->data, text->len,
NULL, NULL, pool));
/* Return (construct, calculate) stream and checksum. */
*stream = svn_stream_from_stringbuf(text, pool);
digest = svn__fnv1a_32x4(text->data, text->len);
/* Checksums will match most of the time. */
if (entry->fnv1_checksum == digest)
return SVN_NO_ERROR;
/* Construct proper checksum objects from their digests to allow for
* nice error messages. */
plain_digest = htonl(entry->fnv1_checksum);
expected = svn_checksum__from_digest_fnv1a_32x4(
(const unsigned char *)&plain_digest, pool);
plain_digest = htonl(digest);
actual = svn_checksum__from_digest_fnv1a_32x4(
(const unsigned char *)&plain_digest, pool);
/* Construct the full error message with all the info we have. */
return svn_checksum_mismatch_err(expected, actual, pool,
_("Low-level checksum mismatch while reading\n"
"%s bytes of meta data at offset %s "
"for item %s in revision %ld"),
apr_psprintf(pool, "%" APR_OFF_T_FMT, entry->size),
apr_psprintf(pool, "%" APR_OFF_T_FMT, entry->offset),
apr_psprintf(pool, "%" APR_UINT64_T_FMT, entry->item.number),
entry->item.revision);
}
/* If not already cached, read the changed paths list addressed by ENTRY in
* FS and cache it if it has no more than SVN_FS_FS__CHANGES_BLOCK_SIZE
* entries and caching is enabled. Read the data from REV_FILE.
* Allocate temporaries in SCRATCH_POOL.
*/
static svn_error_t *
block_read_changes(svn_fs_t *fs,
svn_fs_fs__revision_file_t *rev_file,
svn_fs_fs__p2l_entry_t *entry,
apr_pool_t *scratch_pool)
{
fs_fs_data_t *ffd = fs->fsap_data;
svn_stream_t *stream;
apr_array_header_t *changes;
pair_cache_key_t key;
key.revision = entry->item.revision;
key.second = 0;
if (!ffd->changes_cache)
return SVN_NO_ERROR;
/* already in cache? */
if (ffd->changes_cache)
{
svn_boolean_t is_cached;
SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_cache, &key,
scratch_pool));
if (is_cached)
return SVN_NO_ERROR;
}
SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
/* Read changes from revision file. But read just past the first block to
enable us to determine whether the first block already hit the EOL.
Note: A 100 entries block is already > 10kB on disk. With a 4kB default
disk block size, this function won't even be called for larger
changed paths lists. */
SVN_ERR(svn_fs_fs__read_changes(&changes, stream,
SVN_FS_FS__CHANGES_BLOCK_SIZE + 1,
scratch_pool, scratch_pool));
/* We can only cache small lists that don't need to be split up.
For longer lists, we miss the file offset info for the respective */
if (changes->nelts <= SVN_FS_FS__CHANGES_BLOCK_SIZE)
{
svn_fs_fs__changes_list_t changes_list;
/* Construct the info object for the entries block we just read. */
changes_list.end_offset = entry->size;
changes_list.start_offset = 0;
changes_list.count = changes->nelts;
changes_list.changes = (change_t **)changes->elts;
changes_list.eol = TRUE;
SVN_ERR(svn_cache__set(ffd->changes_cache, &key, &changes_list,
scratch_pool));
}
return SVN_NO_ERROR;
}
/* If not already cached or if MUST_READ is set, read the node revision
* addressed by ENTRY in FS and retúrn it in *NODEREV_P. Cache the
* result if caching is enabled. Read the data from REV_FILE. Allocate
* *NODEREV_P in RESUSLT_POOL and allocate temporaries in SCRATCH_POOL.
*/
static svn_error_t *
block_read_noderev(node_revision_t **noderev_p,
svn_fs_t *fs,
svn_fs_fs__revision_file_t *rev_file,
svn_fs_fs__p2l_entry_t *entry,
svn_boolean_t must_read,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
fs_fs_data_t *ffd = fs->fsap_data;
svn_stream_t *stream;
pair_cache_key_t key = { 0 };
key.revision = entry->item.revision;
key.second = entry->item.number;
if (!must_read && !ffd->node_revision_cache)
return SVN_NO_ERROR;
/* already in cache? */
if (!must_read && ffd->node_revision_cache)
{
svn_boolean_t is_cached;
SVN_ERR(svn_cache__has_key(&is_cached, ffd->node_revision_cache,
&key, scratch_pool));
if (is_cached)
return SVN_NO_ERROR;
}
SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
/* read node rev from revision file */
SVN_ERR(svn_fs_fs__read_noderev(noderev_p, stream,
result_pool, scratch_pool));
SVN_ERR(fixup_node_revision(fs, *noderev_p, scratch_pool));
if (ffd->node_revision_cache)
SVN_ERR(svn_cache__set(ffd->node_revision_cache, &key, *noderev_p,
scratch_pool));
return SVN_NO_ERROR;
}
/* Read the whole (e.g. 64kB) block containing ITEM_INDEX of REVISION in FS
* and put all data into cache. If necessary and depending on heuristics,
* neighboring blocks may also get read. The data is being read from
* already open REVISION_FILE, which must be the correct rev / pack file
* w.r.t. REVISION.
*
* For noderevs and changed path lists, the item fetched can be allocated
* RESULT_POOL and returned in *RESULT. Otherwise, RESULT must be NULL.
*/
static svn_error_t *
block_read(void **result,
svn_fs_t *fs,
svn_revnum_t revision,
apr_uint64_t item_index,
svn_fs_fs__revision_file_t *revision_file,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
fs_fs_data_t *ffd = fs->fsap_data;
apr_off_t offset, wanted_offset = 0;
apr_off_t block_start = 0;
apr_array_header_t *entries;
int run_count = 0;
int i;
apr_pool_t *iterpool;
/* Block read is an optional feature. If the caller does not want anything
* specific we may not have to read anything. */
if (!result)
return SVN_NO_ERROR;
iterpool = svn_pool_create(scratch_pool);
/* don't try this on transaction protorev files */
SVN_ERR_ASSERT(SVN_IS_VALID_REVNUM(revision));
/* index lookup: find the OFFSET of the item we *must* read plus (in the
* "do-while" block) the list of items in the same block. */
SVN_ERR(svn_fs_fs__item_offset(&wanted_offset, fs, revision_file,
revision, NULL, item_index, iterpool));
offset = wanted_offset;
/* Heuristics:
*
* Read this block. If the last item crosses the block boundary, read
* the next block but stop there. Because cross-boundary items cause
* blocks to be read twice, this heuristics will limit this effect to
* approx. 50% of blocks, probably less, while providing a sensible
* amount of read-ahead.
*/
do
{
/* fetch list of items in the block surrounding OFFSET */
block_start = offset - (offset % ffd->block_size);
SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs, revision_file,
revision, block_start,
ffd->block_size, scratch_pool,
scratch_pool));
SVN_ERR(aligned_seek(fs, revision_file->file, &block_start, offset,
iterpool));
/* read all items from the block */
for (i = 0; i < entries->nelts; ++i)
{
svn_boolean_t is_result, is_wanted;
apr_pool_t *pool;
svn_fs_fs__p2l_entry_t* entry;
svn_pool_clear(iterpool);
/* skip empty sections */
entry = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
continue;
/* the item / container we were looking for? */
is_wanted = entry->offset == wanted_offset
&& entry->item.revision == revision
&& entry->item.number == item_index;
is_result = result && is_wanted;
/* select the pool that we want the item to be allocated in */
pool = is_result ? result_pool : iterpool;
/* handle all items that start within this block and are relatively
* small (i.e. < block size). Always read the item we need to return.
*/
if (is_result || ( entry->offset >= block_start
&& entry->size < ffd->block_size))
{
void *item = NULL;
SVN_ERR(svn_io_file_seek(revision_file->file, APR_SET,
&entry->offset, iterpool));
switch (entry->type)
{
case SVN_FS_FS__ITEM_TYPE_FILE_REP:
case SVN_FS_FS__ITEM_TYPE_DIR_REP:
case SVN_FS_FS__ITEM_TYPE_FILE_PROPS:
case SVN_FS_FS__ITEM_TYPE_DIR_PROPS:
SVN_ERR(block_read_contents(fs, revision_file, entry,
is_wanted
? -1
: block_start + ffd->block_size,
iterpool));
break;
case SVN_FS_FS__ITEM_TYPE_NODEREV:
if (ffd->node_revision_cache || is_result)
SVN_ERR(block_read_noderev((node_revision_t **)&item,
fs, revision_file,
entry, is_result, pool,
iterpool));
break;
case SVN_FS_FS__ITEM_TYPE_CHANGES:
SVN_ERR(block_read_changes(fs, revision_file,
entry, iterpool));
break;
default:
break;
}
if (is_result)
*result = item;
/* if we crossed a block boundary, read the remainder of
* the last block as well */
offset = entry->offset + entry->size;
if (offset - block_start > ffd->block_size)
++run_count;
}
}
}
while(run_count++ == 1); /* can only be true once and only if a block
* boundary got crossed */
/* if the caller requested a result, we must have provided one by now */
assert(!result || *result);
svn_pool_destroy(iterpool);
return SVN_NO_ERROR;
}