| /* stats.c -- implements the svn_fs_fs__get_stats private API. |
| * |
| * ==================================================================== |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * ==================================================================== |
| */ |
| |
| #include "svn_dirent_uri.h" |
| #include "svn_fs.h" |
| #include "svn_pools.h" |
| #include "svn_sorts.h" |
| |
| #include "private/svn_cache.h" |
| #include "private/svn_sorts_private.h" |
| #include "private/svn_string_private.h" |
| |
| #include "index.h" |
| #include "pack.h" |
| #include "rev_file.h" |
| #include "util.h" |
| #include "fs_fs.h" |
| #include "cached_data.h" |
| #include "low_level.h" |
| #include "revprops.h" |
| |
| #include "../libsvn_fs/fs-loader.h" |
| |
| #include "svn_private_config.h" |
| |
| /* We group representations into 2x2 different kinds plus one default: |
| * [dir / file] x [text / prop]. The assignment is done by the first node |
| * that references the respective representation. |
| */ |
| typedef enum rep_kind_t |
| { |
| /* The representation is not used _directly_, i.e. not referenced by any |
| * noderev. However, some other representation may use it as delta base. |
| * Null value. Should not occur in real-word repositories. */ |
| unused_rep, |
| |
| /* a properties on directory rep */ |
| dir_property_rep, |
| |
| /* a properties on file rep */ |
| file_property_rep, |
| |
| /* a directory rep */ |
| dir_rep, |
| |
| /* a file rep */ |
| file_rep |
| } rep_kind_t; |
| |
| /* A representation fragment. |
| */ |
| typedef struct rep_stats_t |
| { |
| /* offset in the revision file (phys. addressing) / |
| * item index within REVISION (log. addressing) */ |
| apr_uint64_t item_index; |
| |
| /* item length in bytes */ |
| apr_uint64_t size; |
| |
| /* item length after de-deltification */ |
| apr_uint64_t expanded_size; |
| |
| /* revision that contains this representation |
| * (may be referenced by other revisions, though) */ |
| svn_revnum_t revision; |
| |
| /* number of nodes that reference this representation */ |
| apr_uint32_t ref_count; |
| |
| /* length of the PLAIN / DELTA line in the source file in bytes */ |
| apr_uint16_t header_size; |
| |
| /* classification of the representation. values of rep_kind_t */ |
| char kind; |
| |
| /* length of the delta chain, including this representation, |
| * saturated to 255 - if need be */ |
| apr_byte_t chain_length; |
| } rep_stats_t; |
| |
| /* Represents a link in the rep delta chain. REVISION + ITEM_INDEX points |
| * to BASE_REVISION + BASE_ITEM_INDEX. We collect this info while scanning |
| * a f7 repo in a single pass and resolve it afterwards. */ |
| typedef struct rep_ref_t |
| { |
| /* Revision that contains this representation. */ |
| svn_revnum_t revision; |
| |
| /* Item index of this rep within REVISION. */ |
| apr_uint64_t item_index; |
| |
| /* Revision of the representation we deltified against. |
| * -1 if this representation is either PLAIN or a self-delta. */ |
| svn_revnum_t base_revision; |
| |
| /* Item index of that rep within BASE_REVISION. */ |
| apr_uint64_t base_item_index; |
| |
| /* Length of the PLAIN / DELTA line in the source file in bytes. |
| * We use this to update the info in the rep stats after scanning the |
| * whole file. */ |
| apr_uint16_t header_size; |
| |
| } rep_ref_t; |
| |
| /* Represents a single revision. |
| * There will be only one instance per revision. */ |
| typedef struct revision_info_t |
| { |
| /* number of this revision */ |
| svn_revnum_t revision; |
| |
| /* pack file offset (manifest value), 0 for non-packed files */ |
| apr_off_t offset; |
| |
| /* length of the changes list on bytes */ |
| apr_uint64_t changes_len; |
| |
| /* offset of the changes list relative to OFFSET */ |
| apr_uint64_t change_count; |
| |
| /* first offset behind the revision data in the pack file (file length |
| * for non-packed revs) */ |
| apr_off_t end; |
| |
| /* number of directory noderevs in this revision */ |
| apr_uint64_t dir_noderev_count; |
| |
| /* number of file noderevs in this revision */ |
| apr_uint64_t file_noderev_count; |
| |
| /* total size of directory noderevs (i.e. the structs - not the rep) */ |
| apr_uint64_t dir_noderev_size; |
| |
| /* total size of file noderevs (i.e. the structs - not the rep) */ |
| apr_uint64_t file_noderev_size; |
| |
| /* all rep_stats_t of this revision (in no particular order), |
| * i.e. those that point back to this struct */ |
| apr_array_header_t *representations; |
| |
| /* Temporary rev / pack file access object, used in phys. addressing |
| * mode only. NULL when done reading this revision. */ |
| svn_fs_fs__revision_file_t *rev_file; |
| } revision_info_t; |
| |
| /* Root data structure containing all information about a given repository. |
| * We use it as a wrapper around svn_fs_t and pass it around where we would |
| * otherwise just use a svn_fs_t. |
| */ |
| typedef struct query_t |
| { |
| /* FS API object*/ |
| svn_fs_t *fs; |
| |
| /* The HEAD revision. */ |
| svn_revnum_t head; |
| |
| /* Number of revs per shard; 0 for non-sharded repos. */ |
| int shard_size; |
| |
| /* First non-packed revision. */ |
| svn_revnum_t min_unpacked_rev; |
| |
| /* all revisions */ |
| apr_array_header_t *revisions; |
| |
| /* empty representation. |
| * Used as a dummy base for DELTA reps without base. */ |
| rep_stats_t *null_base; |
| |
| /* collected statistics */ |
| svn_fs_fs__stats_t *stats; |
| |
| /* Progress notification callback to call after each shard. May be NULL. */ |
| svn_fs_progress_notify_func_t progress_func; |
| |
| /* Baton for PROGRESS_FUNC. */ |
| void *progress_baton; |
| |
| /* Cancellation support callback to call once in a while. May be NULL. */ |
| svn_cancel_func_t cancel_func; |
| |
| /* Baton for CANCEL_FUNC. */ |
| void *cancel_baton; |
| } query_t; |
| |
| /* Initialize the LARGEST_CHANGES member in STATS with a capacity of COUNT |
| * entries. Allocate the result in RESULT_POOL. |
| */ |
| static void |
| initialize_largest_changes(svn_fs_fs__stats_t *stats, |
| apr_size_t count, |
| apr_pool_t *result_pool) |
| { |
| apr_size_t i; |
| |
| stats->largest_changes = apr_pcalloc(result_pool, |
| sizeof(*stats->largest_changes)); |
| stats->largest_changes->count = count; |
| stats->largest_changes->min_size = 1; |
| stats->largest_changes->changes |
| = apr_palloc(result_pool, count * sizeof(*stats->largest_changes->changes)); |
| |
| /* allocate *all* entries before the path stringbufs. This increases |
| * cache locality and enhances performance significantly. */ |
| for (i = 0; i < count; ++i) |
| stats->largest_changes->changes[i] |
| = apr_palloc(result_pool, sizeof(**stats->largest_changes->changes)); |
| |
| /* now initialize them and allocate the stringbufs */ |
| for (i = 0; i < count; ++i) |
| { |
| stats->largest_changes->changes[i]->size = 0; |
| stats->largest_changes->changes[i]->revision = SVN_INVALID_REVNUM; |
| stats->largest_changes->changes[i]->path |
| = svn_stringbuf_create_ensure(1024, result_pool); |
| } |
| } |
| |
| /* Add entry for SIZE to HISTOGRAM. |
| */ |
| static void |
| add_to_histogram(svn_fs_fs__histogram_t *histogram, |
| apr_int64_t size) |
| { |
| apr_int64_t shift = 0; |
| |
| while (((apr_int64_t)(1) << shift) <= size) |
| shift++; |
| |
| histogram->total.count++; |
| histogram->total.sum += size; |
| histogram->lines[(apr_size_t)shift].count++; |
| histogram->lines[(apr_size_t)shift].sum += size; |
| } |
| |
| /* Update data aggregators in STATS with this representation of type KIND, |
| * on-disk REP_SIZE and expanded node size EXPANDED_SIZE for PATH in REVSION. |
| * PLAIN_ADDED indicates whether the node has a deltification predecessor. |
| */ |
| static void |
| add_change(svn_fs_fs__stats_t *stats, |
| apr_uint64_t rep_size, |
| apr_uint64_t expanded_size, |
| svn_revnum_t revision, |
| const char *path, |
| rep_kind_t kind, |
| svn_boolean_t plain_added) |
| { |
| /* identify largest reps */ |
| if (rep_size >= stats->largest_changes->min_size) |
| { |
| apr_size_t i; |
| svn_fs_fs__largest_changes_t *largest_changes = stats->largest_changes; |
| svn_fs_fs__large_change_info_t *info |
| = largest_changes->changes[largest_changes->count - 1]; |
| info->size = rep_size; |
| info->revision = revision; |
| svn_stringbuf_set(info->path, path); |
| |
| /* linear insertion but not too bad since count is low and insertions |
| * near the end are more likely than close to front */ |
| for (i = largest_changes->count - 1; i > 0; --i) |
| if (largest_changes->changes[i-1]->size >= rep_size) |
| break; |
| else |
| largest_changes->changes[i] = largest_changes->changes[i-1]; |
| |
| largest_changes->changes[i] = info; |
| largest_changes->min_size |
| = largest_changes->changes[largest_changes->count-1]->size; |
| } |
| |
| /* global histograms */ |
| add_to_histogram(&stats->rep_size_histogram, rep_size); |
| add_to_histogram(&stats->node_size_histogram, expanded_size); |
| |
| if (plain_added) |
| { |
| add_to_histogram(&stats->added_rep_size_histogram, rep_size); |
| add_to_histogram(&stats->added_node_size_histogram, expanded_size); |
| } |
| |
| /* specific histograms by type */ |
| switch (kind) |
| { |
| case unused_rep: |
| add_to_histogram(&stats->unused_rep_histogram, rep_size); |
| break; |
| case dir_property_rep: |
| add_to_histogram(&stats->dir_prop_rep_histogram, rep_size); |
| add_to_histogram(&stats->dir_prop_histogram, expanded_size); |
| break; |
| case file_property_rep: |
| add_to_histogram(&stats->file_prop_rep_histogram, rep_size); |
| add_to_histogram(&stats->file_prop_histogram, expanded_size); |
| break; |
| case dir_rep: |
| add_to_histogram(&stats->dir_rep_histogram, rep_size); |
| add_to_histogram(&stats->dir_histogram, expanded_size); |
| break; |
| case file_rep: |
| add_to_histogram(&stats->file_rep_histogram, rep_size); |
| add_to_histogram(&stats->file_histogram, expanded_size); |
| break; |
| } |
| |
| /* by extension */ |
| if (kind == file_rep) |
| { |
| /* determine extension */ |
| svn_fs_fs__extension_info_t *info; |
| const char * file_name = strrchr(path, '/'); |
| const char * extension = file_name ? strrchr(file_name, '.') : NULL; |
| |
| if (extension == NULL || extension == file_name + 1) |
| extension = "(none)"; |
| |
| /* get / auto-insert entry for this extension */ |
| info = apr_hash_get(stats->by_extension, extension, APR_HASH_KEY_STRING); |
| if (info == NULL) |
| { |
| apr_pool_t *pool = apr_hash_pool_get(stats->by_extension); |
| info = apr_pcalloc(pool, sizeof(*info)); |
| info->extension = apr_pstrdup(pool, extension); |
| |
| apr_hash_set(stats->by_extension, info->extension, |
| APR_HASH_KEY_STRING, info); |
| } |
| |
| /* update per-extension histogram */ |
| add_to_histogram(&info->node_histogram, expanded_size); |
| add_to_histogram(&info->rep_histogram, rep_size); |
| } |
| } |
| |
| /* Comparator used for binary search comparing the absolute file offset |
| * of a representation to some other offset. DATA is a *rep_stats_t, |
| * KEY is a pointer to an apr_uint64_t. |
| */ |
| static int |
| compare_representation_item_index(const void *data, const void *key) |
| { |
| apr_uint64_t lhs = (*(const rep_stats_t *const *)data)->item_index; |
| apr_uint64_t rhs = *(const apr_uint64_t *)key; |
| |
| if (lhs < rhs) |
| return -1; |
| return (lhs > rhs ? 1 : 0); |
| } |
| |
| /* Find the revision_info_t object to the given REVISION in QUERY and |
| * return it in *REVISION_INFO. For performance reasons, we skip the |
| * lookup if the info is already provided. |
| * |
| * In that revision, look for the rep_stats_t object for item ITEM_INDEX. |
| * If it already exists, set *IDX to its index in *REVISION_INFO's |
| * representations list and return the representation object. Otherwise, |
| * set the index to where it must be inserted and return NULL. |
| */ |
| static rep_stats_t * |
| find_representation(int *idx, |
| query_t *query, |
| revision_info_t **revision_info, |
| svn_revnum_t revision, |
| apr_uint64_t item_index) |
| { |
| revision_info_t *info; |
| *idx = -1; |
| |
| /* first let's find the revision */ |
| info = revision_info ? *revision_info : NULL; |
| if (info == NULL || info->revision != revision) |
| { |
| info = APR_ARRAY_IDX(query->revisions, revision, revision_info_t*); |
| if (revision_info) |
| *revision_info = info; |
| } |
| |
| /* not found -> no result */ |
| if (info == NULL) |
| return NULL; |
| |
| /* look for the representation */ |
| *idx = svn_sort__bsearch_lower_bound(info->representations, |
| &item_index, |
| compare_representation_item_index); |
| if (*idx < info->representations->nelts) |
| { |
| /* return the representation, if this is the one we were looking for */ |
| rep_stats_t *result |
| = APR_ARRAY_IDX(info->representations, *idx, rep_stats_t *); |
| if (result->item_index == item_index) |
| return result; |
| } |
| |
| /* not parsed, yet */ |
| return NULL; |
| } |
| |
| /* Find / auto-construct the representation stats for REP in QUERY and |
| * return it in *REPRESENTATION. |
| * |
| * If necessary, allocate the result in RESULT_POOL; use SCRATCH_POOL for |
| * temporary allocations. |
| */ |
| static svn_error_t * |
| parse_representation(rep_stats_t **representation, |
| query_t *query, |
| representation_t *rep, |
| revision_info_t *revision_info, |
| apr_pool_t *result_pool, |
| apr_pool_t *scratch_pool) |
| { |
| rep_stats_t *result; |
| int idx; |
| |
| /* read location (revision, offset) and size */ |
| |
| /* look it up */ |
| result = find_representation(&idx, query, &revision_info, rep->revision, |
| rep->item_index); |
| if (!result) |
| { |
| /* not parsed, yet (probably a rep in the same revision). |
| * Create a new rep object and determine its base rep as well. |
| */ |
| result = apr_pcalloc(result_pool, sizeof(*result)); |
| result->revision = rep->revision; |
| result->expanded_size = rep->expanded_size; |
| result->item_index = rep->item_index; |
| result->size = rep->size; |
| |
| /* In phys. addressing mode, follow link to the actual representation. |
| * In log. addressing mode, we will find it already as part of our |
| * linear walk through the whole file. */ |
| if (!svn_fs_fs__use_log_addressing(query->fs)) |
| { |
| svn_fs_fs__rep_header_t *header; |
| apr_off_t offset = revision_info->offset |
| + (apr_off_t)rep->item_index; |
| |
| SVN_ERR_ASSERT(revision_info->rev_file); |
| SVN_ERR(svn_io_file_seek(revision_info->rev_file->file, APR_SET, |
| &offset, scratch_pool)); |
| SVN_ERR(svn_fs_fs__read_rep_header(&header, |
| revision_info->rev_file->stream, |
| scratch_pool, scratch_pool)); |
| |
| result->header_size = header->header_size; |
| |
| /* Determine length of the delta chain. */ |
| if (header->type == svn_fs_fs__rep_delta) |
| { |
| int base_idx; |
| rep_stats_t *base_rep |
| = find_representation(&base_idx, query, NULL, |
| header->base_revision, |
| header->base_item_index); |
| |
| result->chain_length = 1 + MIN(base_rep->chain_length, |
| (apr_byte_t)0xfe); |
| } |
| else |
| { |
| result->chain_length = 1; |
| } |
| } |
| |
| SVN_ERR(svn_sort__array_insert2(revision_info->representations, &result, idx)); |
| } |
| |
| *representation = result; |
| |
| return SVN_NO_ERROR; |
| } |
| |
| |
| /* forward declaration */ |
| static svn_error_t * |
| read_noderev(query_t *query, |
| svn_stringbuf_t *noderev_str, |
| revision_info_t *revision_info, |
| apr_pool_t *result_pool, |
| apr_pool_t *scratch_pool); |
| |
| /* Read the noderev item at OFFSET in REVISION_INFO from the filesystem |
| * provided by QUERY. Return it in *NODEREV, allocated in RESULT_POOL. |
| * Use SCRATCH_POOL for temporary allocations. |
| * |
| * The textual representation of the noderev will be used to determine |
| * the on-disk size of the noderev. Only called in phys. addressing mode. |
| */ |
| static svn_error_t * |
| read_phsy_noderev(svn_stringbuf_t **noderev, |
| query_t *query, |
| apr_off_t offset, |
| revision_info_t *revision_info, |
| apr_pool_t *result_pool, |
| apr_pool_t *scratch_pool) |
| { |
| svn_stringbuf_t *noderev_str = svn_stringbuf_create_empty(result_pool); |
| svn_stringbuf_t *line; |
| svn_boolean_t eof; |
| |
| apr_pool_t *iterpool = svn_pool_create(scratch_pool); |
| |
| /* Navigate the file stream to the start of noderev. */ |
| SVN_ERR_ASSERT(revision_info->rev_file); |
| |
| offset += revision_info->offset; |
| SVN_ERR(svn_io_file_seek(revision_info->rev_file->file, APR_SET, |
| &offset, scratch_pool)); |
| |
| /* Read it (terminated by an empty line) */ |
| do |
| { |
| svn_pool_clear(iterpool); |
| |
| SVN_ERR(svn_stream_readline(revision_info->rev_file->stream, &line, |
| "\n", &eof, iterpool)); |
| svn_stringbuf_appendstr(noderev_str, line); |
| svn_stringbuf_appendbyte(noderev_str, '\n'); |
| } |
| while (line->len > 0 && !eof); |
| |
| /* Return the result. */ |
| *noderev = noderev_str; |
| |
| svn_pool_destroy(iterpool); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Starting at the directory in NODEREV's text, read all DAG nodes, |
| * directories and representations linked in that tree structure. |
| * Store them in QUERY and REVISION_INFO. Also, read them only once. |
| * |
| * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for |
| * temporaries. |
| */ |
| static svn_error_t * |
| parse_dir(query_t *query, |
| node_revision_t *noderev, |
| revision_info_t *revision_info, |
| apr_pool_t *result_pool, |
| apr_pool_t *scratch_pool) |
| { |
| apr_pool_t *iterpool = svn_pool_create(scratch_pool); |
| |
| int i; |
| apr_array_header_t *entries; |
| SVN_ERR(svn_fs_fs__rep_contents_dir(&entries, query->fs, noderev, |
| scratch_pool, scratch_pool)); |
| |
| for (i = 0; i < entries->nelts; ++i) |
| { |
| svn_fs_dirent_t *dirent = APR_ARRAY_IDX(entries, i, svn_fs_dirent_t *); |
| |
| if (svn_fs_fs__id_rev(dirent->id) == revision_info->revision) |
| { |
| svn_stringbuf_t *noderev_str; |
| svn_pool_clear(iterpool); |
| |
| SVN_ERR(read_phsy_noderev(&noderev_str, query, |
| svn_fs_fs__id_item(dirent->id), |
| revision_info, iterpool, iterpool)); |
| SVN_ERR(read_noderev(query, noderev_str, revision_info, |
| result_pool, iterpool)); |
| } |
| } |
| |
| svn_pool_destroy(iterpool); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Parse the noderev given as NODEREV_STR and store the info in QUERY and |
| * REVISION_INFO. In phys. addressing mode, continue reading all DAG nodes, |
| * directories and representations linked in that tree structure. |
| * |
| * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for |
| * temporaries. |
| */ |
| static svn_error_t * |
| read_noderev(query_t *query, |
| svn_stringbuf_t *noderev_str, |
| revision_info_t *revision_info, |
| apr_pool_t *result_pool, |
| apr_pool_t *scratch_pool) |
| { |
| rep_stats_t *text = NULL; |
| rep_stats_t *props = NULL; |
| node_revision_t *noderev; |
| |
| svn_stream_t *stream = svn_stream_from_stringbuf(noderev_str, scratch_pool); |
| SVN_ERR(svn_fs_fs__read_noderev(&noderev, stream, scratch_pool, |
| scratch_pool)); |
| SVN_ERR(svn_fs_fs__fixup_expanded_size(query->fs, noderev->data_rep, |
| scratch_pool)); |
| SVN_ERR(svn_fs_fs__fixup_expanded_size(query->fs, noderev->prop_rep, |
| scratch_pool)); |
| |
| if (noderev->data_rep) |
| { |
| SVN_ERR(parse_representation(&text, query, |
| noderev->data_rep, revision_info, |
| result_pool, scratch_pool)); |
| |
| /* if we are the first to use this rep, mark it as "text rep" */ |
| if (++text->ref_count == 1) |
| text->kind = noderev->kind == svn_node_dir ? dir_rep : file_rep; |
| } |
| |
| if (noderev->prop_rep) |
| { |
| SVN_ERR(parse_representation(&props, query, |
| noderev->prop_rep, revision_info, |
| result_pool, scratch_pool)); |
| |
| /* if we are the first to use this rep, mark it as "prop rep" */ |
| if (++props->ref_count == 1) |
| props->kind = noderev->kind == svn_node_dir ? dir_property_rep |
| : file_property_rep; |
| } |
| |
| /* record largest changes */ |
| if (text && text->ref_count == 1) |
| add_change(query->stats, text->size, text->expanded_size, text->revision, |
| noderev->created_path, text->kind, !noderev->predecessor_id); |
| if (props && props->ref_count == 1) |
| add_change(query->stats, props->size, props->expanded_size, |
| props->revision, noderev->created_path, props->kind, |
| !noderev->predecessor_id); |
| |
| /* if this is a directory and has not been processed, yet, read and |
| * process it recursively */ |
| if ( noderev->kind == svn_node_dir && text && text->ref_count == 1 |
| && !svn_fs_fs__use_log_addressing(query->fs)) |
| SVN_ERR(parse_dir(query, noderev, revision_info, result_pool, |
| scratch_pool)); |
| |
| /* update stats */ |
| if (noderev->kind == svn_node_dir) |
| { |
| revision_info->dir_noderev_size += noderev_str->len; |
| revision_info->dir_noderev_count++; |
| } |
| else |
| { |
| revision_info->file_noderev_size += noderev_str->len; |
| revision_info->file_noderev_count++; |
| } |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* For the revision given as REVISION_INFO within QUERY, determine the number |
| * of entries in its changed paths list and store that info in REVISION_INFO. |
| * Use SCRATCH_POOL for temporary allocations. |
| */ |
| static svn_error_t * |
| get_phys_change_count(query_t *query, |
| revision_info_t *revision_info, |
| apr_pool_t *scratch_pool) |
| { |
| apr_pool_t *iterpool = svn_pool_create(scratch_pool); |
| svn_fs_fs__changes_context_t *context; |
| |
| /* Fetch the first block of data. */ |
| SVN_ERR(svn_fs_fs__create_changes_context(&context, query->fs, |
| revision_info->revision, |
| scratch_pool)); |
| |
| revision_info->change_count = 0; |
| while (!context->eol) |
| { |
| apr_array_header_t *changes; |
| |
| svn_pool_clear(iterpool); |
| SVN_ERR(svn_fs_fs__get_changes(&changes, context, iterpool, iterpool)); |
| revision_info->change_count = changes->nelts; |
| } |
| |
| svn_pool_destroy(iterpool); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Read header information for the revision stored in FILE_CONTENT (one |
| * whole revision). Return the offsets within FILE_CONTENT for the |
| * *ROOT_NODEREV, the list of *CHANGES and its len in *CHANGES_LEN. |
| * Use POOL for temporary allocations. */ |
| static svn_error_t * |
| read_phys_revision(query_t *query, |
| revision_info_t *info, |
| apr_pool_t *result_pool, |
| apr_pool_t *scratch_pool) |
| { |
| char buf[64]; |
| apr_off_t root_node_offset; |
| apr_off_t changes_offset; |
| svn_stringbuf_t *trailer; |
| svn_stringbuf_t *noderev_str; |
| |
| /* Read the last 64 bytes of the revision (if long enough). */ |
| apr_off_t start = MAX(info->offset, info->end - sizeof(buf)); |
| apr_size_t len = (apr_size_t)(info->end - start); |
| SVN_ERR(svn_io_file_seek(info->rev_file->file, APR_SET, &start, |
| scratch_pool)); |
| SVN_ERR(svn_io_file_read_full2(info->rev_file->file, buf, len, NULL, NULL, |
| scratch_pool)); |
| trailer = svn_stringbuf_ncreate(buf, len, scratch_pool); |
| |
| /* Parse that trailer. */ |
| SVN_ERR(svn_fs_fs__parse_revision_trailer(&root_node_offset, |
| &changes_offset, trailer, |
| info->revision)); |
| SVN_ERR(get_phys_change_count(query, info, scratch_pool)); |
| |
| /* Calculate the length of the changes list. */ |
| trailer = svn_fs_fs__unparse_revision_trailer(root_node_offset, |
| changes_offset, |
| scratch_pool); |
| info->changes_len = info->end - info->offset - changes_offset |
| - trailer->len; |
| |
| /* Recursively read nodes added in this rev. */ |
| SVN_ERR(read_phsy_noderev(&noderev_str, query, root_node_offset, info, |
| scratch_pool, scratch_pool)); |
| SVN_ERR(read_noderev(query, noderev_str, info, result_pool, scratch_pool)); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Read the content of the pack file staring at revision BASE physical |
| * addressing mode and store it in QUERY. |
| * |
| * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for |
| * temporaries. |
| */ |
| static svn_error_t * |
| read_phys_pack_file(query_t *query, |
| svn_revnum_t base, |
| apr_pool_t *result_pool, |
| apr_pool_t *scratch_pool) |
| { |
| apr_pool_t *iterpool = svn_pool_create(scratch_pool); |
| int i; |
| svn_filesize_t file_size = 0; |
| svn_fs_fs__revision_file_t *rev_file; |
| |
| SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, base, |
| scratch_pool, scratch_pool)); |
| SVN_ERR(svn_io_file_size_get(&file_size, rev_file->file, scratch_pool)); |
| |
| /* process each revision in the pack file */ |
| for (i = 0; i < query->shard_size; ++i) |
| { |
| revision_info_t *info; |
| |
| /* cancellation support */ |
| if (query->cancel_func) |
| SVN_ERR(query->cancel_func(query->cancel_baton)); |
| |
| /* create the revision info for the current rev */ |
| info = apr_pcalloc(result_pool, sizeof(*info)); |
| info->representations = apr_array_make(result_pool, 4, |
| sizeof(rep_stats_t*)); |
| info->rev_file = rev_file; |
| |
| info->revision = base + i; |
| SVN_ERR(svn_fs_fs__get_packed_offset(&info->offset, query->fs, base + i, |
| iterpool)); |
| if (i + 1 == query->shard_size) |
| info->end = file_size; |
| else |
| SVN_ERR(svn_fs_fs__get_packed_offset(&info->end, query->fs, |
| base + i + 1, iterpool)); |
| |
| SVN_ERR(read_phys_revision(query, info, result_pool, iterpool)); |
| |
| info->representations = apr_array_copy(result_pool, |
| info->representations); |
| |
| /* Done with this revision. */ |
| info->rev_file = NULL; |
| |
| /* put it into our container */ |
| APR_ARRAY_PUSH(query->revisions, revision_info_t*) = info; |
| |
| /* destroy temps */ |
| svn_pool_clear(iterpool); |
| } |
| |
| /* Done with this pack file. */ |
| SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); |
| |
| /* one more pack file processed */ |
| if (query->progress_func) |
| query->progress_func(base, query->progress_baton, scratch_pool); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Read the content of the file for REVISION in physical addressing mode |
| * and store its contents in QUERY. |
| * |
| * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for |
| * temporaries. |
| */ |
| static svn_error_t * |
| read_phys_revision_file(query_t *query, |
| svn_revnum_t revision, |
| apr_pool_t *result_pool, |
| apr_pool_t *scratch_pool) |
| { |
| revision_info_t *info = apr_pcalloc(result_pool, sizeof(*info)); |
| svn_filesize_t file_size = 0; |
| svn_fs_fs__revision_file_t *rev_file; |
| |
| /* cancellation support */ |
| if (query->cancel_func) |
| SVN_ERR(query->cancel_func(query->cancel_baton)); |
| |
| /* read the whole pack file into memory */ |
| SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, revision, |
| scratch_pool, scratch_pool)); |
| SVN_ERR(svn_io_file_size_get(&file_size, rev_file->file, scratch_pool)); |
| |
| /* create the revision info for the current rev */ |
| info->representations = apr_array_make(result_pool, 4, sizeof(rep_stats_t*)); |
| |
| info->rev_file = rev_file; |
| info->revision = revision; |
| info->offset = 0; |
| info->end = file_size; |
| |
| SVN_ERR(read_phys_revision(query, info, result_pool, scratch_pool)); |
| |
| /* Done with this revision. */ |
| SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); |
| info->rev_file = NULL; |
| |
| /* put it into our container */ |
| APR_ARRAY_PUSH(query->revisions, revision_info_t*) = info; |
| |
| /* show progress every 1000 revs or so */ |
| if (query->progress_func) |
| { |
| if (query->shard_size && (revision % query->shard_size == 0)) |
| query->progress_func(revision, query->progress_baton, scratch_pool); |
| if (!query->shard_size && (revision % 1000 == 0)) |
| query->progress_func(revision, query->progress_baton, scratch_pool); |
| } |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Given the unparsed changes list in CHANGES with LEN chars, return the |
| * number of changed paths encoded in it. Only used in log. addressing |
| * mode. |
| */ |
| static apr_uint64_t |
| get_log_change_count(const char *changes, |
| apr_size_t len) |
| { |
| apr_size_t lines = 0; |
| const char *end = changes + len; |
| |
| /* line count */ |
| for (; changes < end; ++changes) |
| if (*changes == '\n') |
| ++lines; |
| |
| /* two lines per change */ |
| return lines / 2; |
| } |
| |
| /* Read the item described by ENTRY from the REV_FILE and return the |
| * respective byte sequence in *CONTENTS, allocated in RESULT_POOL. |
| * Use SCRATCH_POOL for temporary allocations |
| */ |
| static svn_error_t * |
| read_item(svn_stringbuf_t **contents, |
| svn_fs_fs__revision_file_t *rev_file, |
| svn_fs_fs__p2l_entry_t *entry, |
| apr_pool_t *result_pool, |
| apr_pool_t *scratch_pool) |
| { |
| svn_stringbuf_t *item = svn_stringbuf_create_ensure(entry->size, |
| result_pool); |
| item->len = entry->size; |
| item->data[item->len] = 0; |
| |
| SVN_ERR(svn_io_file_aligned_seek(rev_file->file, rev_file->block_size, |
| NULL, entry->offset, scratch_pool)); |
| SVN_ERR(svn_io_file_read_full2(rev_file->file, item->data, item->len, |
| NULL, NULL, scratch_pool)); |
| |
| *contents = item; |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Predicate comparing the two rep_ref_t** LHS and RHS by the respective |
| * representation's revision. |
| */ |
| static int |
| compare_representation_refs(const void *lhs, const void *rhs) |
| { |
| svn_revnum_t lhs_rev = (*(const rep_ref_t *const *)lhs)->revision; |
| svn_revnum_t rhs_rev = (*(const rep_ref_t *const *)rhs)->revision; |
| |
| if (lhs_rev < rhs_rev) |
| return -1; |
| return (lhs_rev > rhs_rev ? 1 : 0); |
| } |
| |
| /* Given all the presentations found in a single rev / pack file as |
| * rep_ref_t * in REP_REFS, update the delta chain lengths in QUERY. |
| * REP_REFS and its contents can then be discarded. |
| */ |
| static svn_error_t * |
| resolve_representation_refs(query_t *query, |
| apr_array_header_t *rep_refs) |
| { |
| int i; |
| |
| /* Because delta chains can only point to previous revs, after sorting |
| * REP_REFS, all base refs have already been updated. */ |
| svn_sort__array(rep_refs, compare_representation_refs); |
| |
| /* Build up the CHAIN_LENGTH values. */ |
| for (i = 0; i < rep_refs->nelts; ++i) |
| { |
| int idx; |
| rep_ref_t *ref = APR_ARRAY_IDX(rep_refs, i, rep_ref_t *); |
| rep_stats_t *rep = find_representation(&idx, query, NULL, |
| ref->revision, ref->item_index); |
| |
| /* No dangling pointers and all base reps have been processed. */ |
| SVN_ERR_ASSERT(rep); |
| SVN_ERR_ASSERT(!rep->chain_length); |
| |
| /* Set the HEADER_SIZE as we found it during the scan. */ |
| rep->header_size = ref->header_size; |
| |
| /* The delta chain got 1 element longer. */ |
| if (ref->base_revision == SVN_INVALID_REVNUM) |
| { |
| rep->chain_length = 1; |
| } |
| else |
| { |
| rep_stats_t *base; |
| |
| base = find_representation(&idx, query, NULL, ref->base_revision, |
| ref->base_item_index); |
| SVN_ERR_ASSERT(base); |
| SVN_ERR_ASSERT(base->chain_length); |
| |
| rep->chain_length = 1 + MIN(base->chain_length, (apr_byte_t)0xfe); |
| } |
| } |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Process the logically addressed revision contents of revisions BASE to |
| * BASE + COUNT - 1 in QUERY. |
| * |
| * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for |
| * temporaries. |
| */ |
| static svn_error_t * |
| read_log_rev_or_packfile(query_t *query, |
| svn_revnum_t base, |
| int count, |
| apr_pool_t *result_pool, |
| apr_pool_t *scratch_pool) |
| { |
| fs_fs_data_t *ffd = query->fs->fsap_data; |
| apr_pool_t *iterpool = svn_pool_create(scratch_pool); |
| apr_off_t max_offset; |
| apr_off_t offset = 0; |
| int i; |
| svn_fs_fs__revision_file_t *rev_file; |
| |
| /* We collect the delta chain links as we scan the file. Afterwards, |
| * we determine the lengths of those delta chains and throw this |
| * temporary container away. */ |
| apr_array_header_t *rep_refs = apr_array_make(scratch_pool, 64, |
| sizeof(rep_ref_t *)); |
| |
| /* we will process every revision in the rev / pack file */ |
| for (i = 0; i < count; ++i) |
| { |
| /* create the revision info for the current rev */ |
| revision_info_t *info = apr_pcalloc(result_pool, sizeof(*info)); |
| info->representations = apr_array_make(result_pool, 4, |
| sizeof(rep_stats_t*)); |
| info->revision = base + i; |
| |
| APR_ARRAY_PUSH(query->revisions, revision_info_t*) = info; |
| } |
| |
| /* open the pack / rev file that is covered by the p2l index */ |
| SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, base, |
| scratch_pool, iterpool)); |
| SVN_ERR(svn_fs_fs__p2l_get_max_offset(&max_offset, query->fs, rev_file, |
| base, scratch_pool)); |
| |
| /* record the whole pack size in the first rev so the total sum will |
| still be correct */ |
| APR_ARRAY_IDX(query->revisions, base, revision_info_t*)->end = max_offset; |
| |
| /* for all offsets in the file, get the P2L index entries and process |
| the interesting items (change lists, noderevs) */ |
| for (offset = 0; offset < max_offset; ) |
| { |
| apr_array_header_t *entries; |
| |
| svn_pool_clear(iterpool); |
| |
| /* cancellation support */ |
| if (query->cancel_func) |
| SVN_ERR(query->cancel_func(query->cancel_baton)); |
| |
| /* get all entries for the current block */ |
| SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, query->fs, rev_file, base, |
| offset, ffd->p2l_page_size, |
| iterpool, iterpool)); |
| |
| /* process all entries (and later continue with the next block) */ |
| for (i = 0; i < entries->nelts; ++i) |
| { |
| svn_stringbuf_t *item; |
| revision_info_t *info; |
| svn_fs_fs__p2l_entry_t *entry |
| = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t); |
| |
| /* skip bits we previously processed */ |
| if (i == 0 && entry->offset < offset) |
| continue; |
| |
| /* skip zero-sized entries */ |
| if (entry->size == 0) |
| continue; |
| |
| /* read and process interesting items */ |
| info = APR_ARRAY_IDX(query->revisions, entry->item.revision, |
| revision_info_t*); |
| |
| if (entry->type == SVN_FS_FS__ITEM_TYPE_NODEREV) |
| { |
| SVN_ERR(read_item(&item, rev_file, entry, iterpool, iterpool)); |
| SVN_ERR(read_noderev(query, item, info, result_pool, iterpool)); |
| } |
| else if (entry->type == SVN_FS_FS__ITEM_TYPE_CHANGES) |
| { |
| SVN_ERR(read_item(&item, rev_file, entry, iterpool, iterpool)); |
| info->change_count |
| = get_log_change_count(item->data + 0, item->len); |
| info->changes_len += entry->size; |
| } |
| else if ( (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_REP) |
| || (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_REP) |
| || (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_PROPS) |
| || (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_PROPS)) |
| { |
| /* Collect the delta chain link. */ |
| svn_fs_fs__rep_header_t *header; |
| rep_ref_t *ref = apr_pcalloc(scratch_pool, sizeof(*ref)); |
| |
| SVN_ERR(svn_io_file_aligned_seek(rev_file->file, |
| rev_file->block_size, |
| NULL, entry->offset, |
| iterpool)); |
| SVN_ERR(svn_fs_fs__read_rep_header(&header, |
| rev_file->stream, |
| iterpool, iterpool)); |
| |
| ref->header_size = header->header_size; |
| ref->revision = entry->item.revision; |
| ref->item_index = entry->item.number; |
| |
| if (header->type == svn_fs_fs__rep_delta) |
| { |
| ref->base_item_index = header->base_item_index; |
| ref->base_revision = header->base_revision; |
| } |
| else |
| { |
| ref->base_item_index = SVN_FS_FS__ITEM_INDEX_UNUSED; |
| ref->base_revision = SVN_INVALID_REVNUM; |
| } |
| |
| APR_ARRAY_PUSH(rep_refs, rep_ref_t *) = ref; |
| } |
| |
| /* advance offset */ |
| offset += entry->size; |
| } |
| } |
| |
| /* Resolve the delta chain links. */ |
| SVN_ERR(resolve_representation_refs(query, rep_refs)); |
| |
| /* clean up and close file handles */ |
| svn_pool_destroy(iterpool); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Read the content of the pack file staring at revision BASE logical |
| * addressing mode and store it in QUERY. |
| * |
| * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for |
| * temporaries. |
| */ |
| static svn_error_t * |
| read_log_pack_file(query_t *query, |
| svn_revnum_t base, |
| apr_pool_t *result_pool, |
| apr_pool_t *scratch_pool) |
| { |
| SVN_ERR(read_log_rev_or_packfile(query, base, query->shard_size, |
| result_pool, scratch_pool)); |
| |
| /* one more pack file processed */ |
| if (query->progress_func) |
| query->progress_func(base, query->progress_baton, scratch_pool); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Read the content of the file for REVISION in logical addressing mode |
| * and store its contents in QUERY. |
| * |
| * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for |
| * temporaries. |
| */ |
| static svn_error_t * |
| read_log_revision_file(query_t *query, |
| svn_revnum_t revision, |
| apr_pool_t *result_pool, |
| apr_pool_t *scratch_pool) |
| { |
| SVN_ERR(read_log_rev_or_packfile(query, revision, 1, |
| result_pool, scratch_pool)); |
| |
| /* show progress every 1000 revs or so */ |
| if (query->progress_func) |
| { |
| if (query->shard_size && (revision % query->shard_size == 0)) |
| query->progress_func(revision, query->progress_baton, scratch_pool); |
| if (!query->shard_size && (revision % 1000 == 0)) |
| query->progress_func(revision, query->progress_baton, scratch_pool); |
| } |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Read the repository and collect the stats info in QUERY. |
| * |
| * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for |
| * temporaries. |
| */ |
| static svn_error_t * |
| read_revisions(query_t *query, |
| apr_pool_t *result_pool, |
| apr_pool_t *scratch_pool) |
| { |
| apr_pool_t *iterpool = svn_pool_create(scratch_pool); |
| svn_revnum_t revision; |
| |
| /* read all packed revs */ |
| for ( revision = 0 |
| ; revision < query->min_unpacked_rev |
| ; revision += query->shard_size) |
| { |
| svn_pool_clear(iterpool); |
| |
| if (svn_fs_fs__use_log_addressing(query->fs)) |
| SVN_ERR(read_log_pack_file(query, revision, result_pool, iterpool)); |
| else |
| SVN_ERR(read_phys_pack_file(query, revision, result_pool, iterpool)); |
| } |
| |
| /* read non-packed revs */ |
| for ( ; revision <= query->head; ++revision) |
| { |
| svn_pool_clear(iterpool); |
| |
| if (svn_fs_fs__use_log_addressing(query->fs)) |
| SVN_ERR(read_log_revision_file(query, revision, result_pool, |
| iterpool)); |
| else |
| SVN_ERR(read_phys_revision_file(query, revision, result_pool, |
| iterpool)); |
| } |
| |
| svn_pool_destroy(iterpool); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Accumulate stats of REP in STATS. |
| */ |
| static void |
| add_rep_pack_stats(svn_fs_fs__rep_pack_stats_t *stats, |
| rep_stats_t *rep) |
| { |
| stats->count++; |
| |
| stats->packed_size += rep->size; |
| stats->expanded_size += rep->expanded_size; |
| stats->overhead_size += rep->header_size + 7 /* ENDREP\n */; |
| } |
| |
| /* Accumulate stats of REP in STATS. |
| */ |
| static void |
| add_rep_stats(svn_fs_fs__representation_stats_t *stats, |
| rep_stats_t *rep) |
| { |
| add_rep_pack_stats(&stats->total, rep); |
| if (rep->ref_count == 1) |
| add_rep_pack_stats(&stats->uniques, rep); |
| else |
| add_rep_pack_stats(&stats->shared, rep); |
| |
| stats->references += rep->ref_count; |
| stats->expanded_size += rep->ref_count * rep->expanded_size; |
| stats->chain_len += rep->chain_length; |
| } |
| |
| /* Aggregate the info the in revision_info_t * array REVISIONS into the |
| * respectve fields of STATS. |
| */ |
| static void |
| aggregate_stats(const apr_array_header_t *revisions, |
| svn_fs_fs__stats_t *stats) |
| { |
| int i, k; |
| |
| /* aggregate info from all revisions */ |
| stats->revision_count = revisions->nelts; |
| for (i = 0; i < revisions->nelts; ++i) |
| { |
| revision_info_t *revision = APR_ARRAY_IDX(revisions, i, |
| revision_info_t *); |
| |
| /* data gathered on a revision level */ |
| stats->change_count += revision->change_count; |
| stats->change_len += revision->changes_len; |
| stats->total_size += revision->end - revision->offset; |
| |
| stats->dir_node_stats.count += revision->dir_noderev_count; |
| stats->dir_node_stats.size += revision->dir_noderev_size; |
| stats->file_node_stats.count += revision->file_noderev_count; |
| stats->file_node_stats.size += revision->file_noderev_size; |
| stats->total_node_stats.count += revision->dir_noderev_count |
| + revision->file_noderev_count; |
| stats->total_node_stats.size += revision->dir_noderev_size |
| + revision->file_noderev_size; |
| |
| /* process representations */ |
| for (k = 0; k < revision->representations->nelts; ++k) |
| { |
| rep_stats_t *rep = APR_ARRAY_IDX(revision->representations, k, |
| rep_stats_t *); |
| |
| /* accumulate in the right bucket */ |
| switch(rep->kind) |
| { |
| case file_rep: |
| add_rep_stats(&stats->file_rep_stats, rep); |
| break; |
| case dir_rep: |
| add_rep_stats(&stats->dir_rep_stats, rep); |
| break; |
| case file_property_rep: |
| add_rep_stats(&stats->file_prop_rep_stats, rep); |
| break; |
| case dir_property_rep: |
| add_rep_stats(&stats->dir_prop_rep_stats, rep); |
| break; |
| default: |
| break; |
| } |
| |
| add_rep_stats(&stats->total_rep_stats, rep); |
| } |
| } |
| } |
| |
| /* Return a new svn_fs_fs__stats_t instance, allocated in RESULT_POOL. |
| */ |
| static svn_fs_fs__stats_t * |
| create_stats(apr_pool_t *result_pool) |
| { |
| svn_fs_fs__stats_t *stats = apr_pcalloc(result_pool, sizeof(*stats)); |
| |
| initialize_largest_changes(stats, 64, result_pool); |
| stats->by_extension = apr_hash_make(result_pool); |
| |
| return stats; |
| } |
| |
| /* Create a *QUERY, allocated in RESULT_POOL, reading filesystem FS and |
| * collecting results in STATS. Store the optional PROCESS_FUNC and |
| * PROGRESS_BATON as well as CANCEL_FUNC and CANCEL_BATON in *QUERY, too. |
| * Use SCRATCH_POOL for temporary allocations. |
| */ |
| static svn_error_t * |
| create_query(query_t **query, |
| svn_fs_t *fs, |
| svn_fs_fs__stats_t *stats, |
| svn_fs_progress_notify_func_t progress_func, |
| void *progress_baton, |
| svn_cancel_func_t cancel_func, |
| void *cancel_baton, |
| apr_pool_t *result_pool, |
| apr_pool_t *scratch_pool) |
| { |
| *query = apr_pcalloc(result_pool, sizeof(**query)); |
| |
| /* Read repository dimensions. */ |
| (*query)->shard_size = svn_fs_fs__shard_size(fs); |
| SVN_ERR(svn_fs_fs__youngest_rev(&(*query)->head, fs, scratch_pool)); |
| SVN_ERR(svn_fs_fs__min_unpacked_rev(&(*query)->min_unpacked_rev, fs, |
| scratch_pool)); |
| |
| /* create data containers and caches |
| * Note: this assumes that int is at least 32-bits and that we only support |
| * 32-bit wide revision numbers (actually 31-bits due to the signedness |
| * of both the nelts field of the array and our revision numbers). This |
| * means this code will fail on platforms where int is less than 32-bits |
| * and the repository has more revisions than int can hold. */ |
| (*query)->revisions = apr_array_make(result_pool, (int) (*query)->head + 1, |
| sizeof(revision_info_t *)); |
| (*query)->null_base = apr_pcalloc(result_pool, |
| sizeof(*(*query)->null_base)); |
| |
| /* Store other parameters */ |
| (*query)->fs = fs; |
| (*query)->stats = stats; |
| (*query)->progress_func = progress_func; |
| (*query)->progress_baton = progress_baton; |
| (*query)->cancel_func = cancel_func; |
| (*query)->cancel_baton = cancel_baton; |
| |
| return SVN_NO_ERROR; |
| } |
| |
| svn_error_t * |
| svn_fs_fs__get_stats(svn_fs_fs__stats_t **stats, |
| svn_fs_t *fs, |
| svn_fs_progress_notify_func_t progress_func, |
| void *progress_baton, |
| svn_cancel_func_t cancel_func, |
| void *cancel_baton, |
| apr_pool_t *result_pool, |
| apr_pool_t *scratch_pool) |
| { |
| query_t *query; |
| |
| *stats = create_stats(result_pool); |
| SVN_ERR(create_query(&query, fs, *stats, progress_func, progress_baton, |
| cancel_func, cancel_baton, scratch_pool, |
| scratch_pool)); |
| SVN_ERR(read_revisions(query, scratch_pool, scratch_pool)); |
| aggregate_stats(query->revisions, *stats); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Baton for rev_size_index_entry_cb. */ |
| struct rev_size_baton_t { |
| svn_revnum_t revision; |
| apr_off_t rev_size; |
| }; |
| |
| /* Implements svn_fs_fs__dump_index_func_t, summing object sizes for |
| * revision BATON->revision into BATON->rev_size. |
| */ |
| static svn_error_t * |
| rev_size_index_entry_cb(const svn_fs_fs__p2l_entry_t *entry, |
| void *baton, |
| apr_pool_t *scratch_pool) |
| { |
| struct rev_size_baton_t *b = baton; |
| |
| if (entry->item.revision == b->revision) |
| b->rev_size += entry->size; |
| return SVN_NO_ERROR; |
| } |
| |
| svn_error_t * |
| svn_fs_fs__revision_size(apr_off_t *rev_size, |
| svn_fs_t *fs, |
| svn_revnum_t revision, |
| apr_pool_t *scratch_pool) |
| { |
| /* Get the size of the revision (excluding rev-props) */ |
| if (svn_fs_fs__use_log_addressing(fs)) |
| { |
| /* This works for a packed or a non-packed revision. |
| We could provide an optimized case for a non-packed revision |
| using svn_fs_fs__p2l_get_max_offset(). */ |
| struct rev_size_baton_t b = { 0, 0 }; |
| |
| b.revision = revision; |
| SVN_ERR(svn_fs_fs__dump_index(fs, revision, |
| rev_size_index_entry_cb, &b, |
| NULL, NULL, scratch_pool)); |
| *rev_size = b.rev_size; |
| } |
| else |
| { |
| svn_fs_fs__revision_file_t *rev_file; |
| svn_revnum_t min_unpacked_rev; |
| |
| SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, revision, |
| scratch_pool, scratch_pool)); |
| SVN_ERR(svn_fs_fs__min_unpacked_rev(&min_unpacked_rev, fs, |
| scratch_pool)); |
| if (revision < min_unpacked_rev) |
| { |
| int shard_size = svn_fs_fs__shard_size(fs); |
| apr_off_t start_offset, end_offset; |
| |
| SVN_ERR(svn_fs_fs__get_packed_offset(&start_offset, fs, revision, |
| scratch_pool)); |
| if (((revision + 1) % shard_size) == 0) |
| { |
| svn_filesize_t file_size; |
| |
| SVN_ERR(svn_io_file_size_get(&file_size, rev_file->file, scratch_pool)); |
| end_offset = (apr_off_t)file_size; |
| } |
| else |
| { |
| SVN_ERR(svn_fs_fs__get_packed_offset(&end_offset, fs, |
| revision + 1, scratch_pool)); |
| } |
| *rev_size = (end_offset - start_offset); |
| } |
| else |
| { |
| svn_filesize_t file_size; |
| |
| SVN_ERR(svn_io_file_size_get(&file_size, rev_file->file, scratch_pool)); |
| *rev_size = (apr_off_t)file_size; |
| } |
| |
| SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); |
| } |
| |
| /* Add the size of the rev-props */ |
| { |
| apr_off_t size; |
| |
| SVN_ERR(svn_fs_fs__get_revision_props_size(&size, fs, revision, scratch_pool)); |
| *rev_size += size; |
| } |
| |
| return SVN_NO_ERROR; |
| } |