| /* fsfs-stats.c -- gather size statistics on FSFS repositories |
| * |
| * ==================================================================== |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * ==================================================================== |
| */ |
| |
| |
| #include <assert.h> |
| |
| #include <apr.h> |
| #include <apr_general.h> |
| #include <apr_file_io.h> |
| #include <apr_poll.h> |
| |
| #include "svn_pools.h" |
| #include "svn_diff.h" |
| #include "svn_io.h" |
| #include "svn_utf.h" |
| #include "svn_dirent_uri.h" |
| #include "svn_sorts.h" |
| #include "svn_delta.h" |
| #include "svn_hash.h" |
| #include "svn_cache_config.h" |
| |
| #include "private/svn_string_private.h" |
| #include "private/svn_subr_private.h" |
| #include "private/svn_dep_compat.h" |
| #include "private/svn_cache.h" |
| |
| #ifndef _ |
| #define _(x) x |
| #endif |
| |
| #define ERROR_TAG "fsfs-stats: " |
| |
| /* We group representations into 2x2 different kinds plus one default: |
| * [dir / file] x [text / prop]. The assignment is done by the first node |
| * that references the respective representation. |
| */ |
| typedef enum rep_kind_t |
| { |
| /* The representation is _directly_ unused, i.e. not referenced by any |
| * noderev. However, some other representation may use it as delta base. |
| * null value. Should not occur in real-word repositories. */ |
| unused_rep, |
| |
| /* a properties on directory rep */ |
| dir_property_rep, |
| |
| /* a properties on file rep */ |
| file_property_rep, |
| |
| /* a directory rep */ |
| dir_rep, |
| |
| /* a file rep */ |
| file_rep |
| } rep_kind_t; |
| |
| /* A representation fragment. |
| */ |
| typedef struct representation_t |
| { |
| /* absolute offset in the file */ |
| apr_size_t offset; |
| |
| /* item length in bytes */ |
| apr_size_t size; |
| |
| /* item length after de-deltification */ |
| apr_size_t expanded_size; |
| |
| /* deltification base, or NULL if there is none */ |
| struct representation_t *delta_base; |
| |
| /* revision that contains this representation |
| * (may be referenced by other revisions, though) */ |
| svn_revnum_t revision; |
| |
| /* number of nodes that reference this representation */ |
| apr_uint32_t ref_count; |
| |
| /* length of the PLAIN / DELTA line in the source file in bytes */ |
| apr_uint16_t header_size; |
| |
| /* classification of the representation. values of rep_kind_t */ |
| char kind; |
| |
| /* the source content has a PLAIN header, so we may simply copy the |
| * source content into the target */ |
| char is_plain; |
| |
| } representation_t; |
| |
| /* Represents a single revision. |
| * There will be only one instance per revision. */ |
| typedef struct revision_info_t |
| { |
| /* number of this revision */ |
| svn_revnum_t revision; |
| |
| /* pack file offset (manifest value), 0 for non-packed files */ |
| apr_size_t offset; |
| |
| /* offset of the changes list relative to OFFSET */ |
| apr_size_t changes; |
| |
| /* length of the changes list on bytes */ |
| apr_size_t changes_len; |
| |
| /* offset of the changes list relative to OFFSET */ |
| apr_size_t change_count; |
| |
| /* first offset behind the revision data in the pack file (file length |
| * for non-packed revs) */ |
| apr_size_t end; |
| |
| /* number of directory noderevs in this revision */ |
| apr_size_t dir_noderev_count; |
| |
| /* number of file noderevs in this revision */ |
| apr_size_t file_noderev_count; |
| |
| /* total size of directory noderevs (i.e. the structs - not the rep) */ |
| apr_size_t dir_noderev_size; |
| |
| /* total size of file noderevs (i.e. the structs - not the rep) */ |
| apr_size_t file_noderev_size; |
| |
| /* all representation_t of this revision (in no particular order), |
| * i.e. those that point back to this struct */ |
| apr_array_header_t *representations; |
| } revision_info_t; |
| |
| /* Data type to identify a representation. It will be used to address |
| * cached combined (un-deltified) windows. |
| */ |
| typedef struct window_cache_key_t |
| { |
| /* revision of the representation */ |
| svn_revnum_t revision; |
| |
| /* its offset */ |
| apr_size_t offset; |
| } window_cache_key_t; |
| |
| /* Description of one large representation. It's content will be reused / |
| * overwritten when it gets replaced by an even larger representation. |
| */ |
| typedef struct large_change_info_t |
| { |
| /* size of the (deltified) representation */ |
| apr_size_t size; |
| |
| /* revision of the representation */ |
| svn_revnum_t revision; |
| |
| /* node path. "" for unused instances */ |
| svn_stringbuf_t *path; |
| } large_change_info_t; |
| |
| /* Container for the largest representations found so far. The capacity |
| * is fixed and entries will be inserted by reusing the last one and |
| * reshuffling the entry pointers. |
| */ |
| typedef struct largest_changes_t |
| { |
| /* number of entries allocated in CHANGES */ |
| apr_size_t count; |
| |
| /* size of the smallest change */ |
| apr_size_t min_size; |
| |
| /* changes kept in this struct */ |
| large_change_info_t **changes; |
| } largest_changes_t; |
| |
| /* Information we gather per size bracket. |
| */ |
| typedef struct histogram_line_t |
| { |
| /* number of item that fall into this bracket */ |
| apr_int64_t count; |
| |
| /* sum of values in this bracket */ |
| apr_int64_t sum; |
| } histogram_line_t; |
| |
| /* A histogram of 64 bit integer values. |
| */ |
| typedef struct histogram_t |
| { |
| /* total sum over all brackets */ |
| histogram_line_t total; |
| |
| /* one bracket per binary step. |
| * line[i] is the 2^(i-1) <= x < 2^i bracket */ |
| histogram_line_t lines[64]; |
| } histogram_t; |
| |
| /* Information we collect per file ending. |
| */ |
| typedef struct extension_info_t |
| { |
| /* file extension, including leading "." |
| * "(none)" in the container for files w/o extension. */ |
| const char *extension; |
| |
| /* histogram of representation sizes */ |
| histogram_t rep_histogram; |
| |
| /* histogram of sizes of changed files */ |
| histogram_t node_histogram; |
| } extension_info_t; |
| |
| /* Root data structure containing all information about a given repository. |
| */ |
| typedef struct fs_fs_t |
| { |
| /* repository to reorg */ |
| const char *path; |
| |
| /* revision to start at (must be 0, ATM) */ |
| svn_revnum_t start_revision; |
| |
| /* FSFS format number */ |
| int format; |
| |
| /* highest revision number in the repo */ |
| svn_revnum_t max_revision; |
| |
| /* first non-packed revision */ |
| svn_revnum_t min_unpacked_rev; |
| |
| /* sharing size*/ |
| int max_files_per_dir; |
| |
| /* all revisions */ |
| apr_array_header_t *revisions; |
| |
| /* empty representation. |
| * Used as a dummy base for DELTA reps without base. */ |
| representation_t *null_base; |
| |
| /* undeltified txdelta window cache */ |
| svn_cache__t *window_cache; |
| |
| /* track the biggest contributors to repo size */ |
| largest_changes_t *largest_changes; |
| |
| /* history of representation sizes */ |
| histogram_t rep_size_histogram; |
| |
| /* history of sizes of changed nodes */ |
| histogram_t node_size_histogram; |
| |
| /* history of unused representations */ |
| histogram_t unused_rep_histogram; |
| |
| /* history of sizes of changed files */ |
| histogram_t file_histogram; |
| |
| /* history of sizes of file representations */ |
| histogram_t file_rep_histogram; |
| |
| /* history of sizes of changed file property sets */ |
| histogram_t file_prop_histogram; |
| |
| /* history of sizes of file property representations */ |
| histogram_t file_prop_rep_histogram; |
| |
| /* history of sizes of changed directories (in bytes) */ |
| histogram_t dir_histogram; |
| |
| /* history of sizes of directories representations */ |
| histogram_t dir_rep_histogram; |
| |
| /* history of sizes of changed directories property sets */ |
| histogram_t dir_prop_histogram; |
| |
| /* history of sizes of directories property representations */ |
| histogram_t dir_prop_rep_histogram; |
| |
| /* extension -> extension_info_t* map */ |
| apr_hash_t *by_extension; |
| } fs_fs_t; |
| |
| /* Return the rev pack folder for revision REV in FS. |
| */ |
| static const char * |
| get_pack_folder(fs_fs_t *fs, |
| svn_revnum_t rev, |
| apr_pool_t *pool) |
| { |
| return apr_psprintf(pool, "%s/db/revs/%ld.pack", |
| fs->path, rev / fs->max_files_per_dir); |
| } |
| |
| /* Return the path of the file containing revision REV in FS. |
| */ |
| static const char * |
| rev_or_pack_file_name(fs_fs_t *fs, |
| svn_revnum_t rev, |
| apr_pool_t *pool) |
| { |
| return fs->min_unpacked_rev > rev |
| ? svn_dirent_join(get_pack_folder(fs, rev, pool), "pack", pool) |
| : apr_psprintf(pool, "%s/db/revs/%ld/%ld", fs->path, |
| rev / fs->max_files_per_dir, rev); |
| } |
| |
| /* Open the file containing revision REV in FS and return it in *FILE. |
| */ |
| static svn_error_t * |
| open_rev_or_pack_file(apr_file_t **file, |
| fs_fs_t *fs, |
| svn_revnum_t rev, |
| apr_pool_t *pool) |
| { |
| return svn_io_file_open(file, |
| rev_or_pack_file_name(fs, rev, pool), |
| APR_READ | APR_BUFFERED, |
| APR_OS_DEFAULT, |
| pool); |
| } |
| |
| /* Return the length of FILE in *FILE_SIZE. Use POOL for allocations. |
| */ |
| static svn_error_t * |
| get_file_size(apr_off_t *file_size, |
| apr_file_t *file, |
| apr_pool_t *pool) |
| { |
| apr_finfo_t finfo; |
| |
| SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, file, pool)); |
| |
| *file_size = finfo.size; |
| return SVN_NO_ERROR; |
| } |
| |
| /* Get the file content of revision REVISION in FS and return it in *CONTENT. |
| * Read the LEN bytes starting at file OFFSET. When provided, use FILE as |
| * packed or plain rev file. |
| * Use POOL for temporary allocations. |
| */ |
| static svn_error_t * |
| get_content(svn_stringbuf_t **content, |
| apr_file_t *file, |
| fs_fs_t *fs, |
| svn_revnum_t revision, |
| apr_off_t offset, |
| apr_size_t len, |
| apr_pool_t *pool) |
| { |
| apr_pool_t * file_pool = svn_pool_create(pool); |
| apr_size_t large_buffer_size = 0x10000; |
| |
| if (file == NULL) |
| SVN_ERR(open_rev_or_pack_file(&file, fs, revision, file_pool)); |
| |
| *content = svn_stringbuf_create_ensure(len, pool); |
| (*content)->len = len; |
| |
| #if APR_VERSION_AT_LEAST(1,3,0) |
| /* for better efficiency use larger buffers on large reads */ |
| if ( (len >= large_buffer_size) |
| && (apr_file_buffer_size_get(file) < large_buffer_size)) |
| apr_file_buffer_set(file, |
| apr_palloc(apr_file_pool_get(file), |
| large_buffer_size), |
| large_buffer_size); |
| #endif |
| |
| SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, pool)); |
| SVN_ERR(svn_io_file_read_full2(file, (*content)->data, len, |
| NULL, NULL, pool)); |
| svn_pool_destroy(file_pool); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* In *RESULT, return the cached txdelta window stored in REPRESENTATION |
| * within FS. If that has not been found in cache, return NULL. |
| * Allocate the result in POOL. |
| */ |
| static svn_error_t * |
| get_cached_window(svn_stringbuf_t **result, |
| fs_fs_t *fs, |
| representation_t *representation, |
| apr_pool_t *pool) |
| { |
| svn_boolean_t found = FALSE; |
| window_cache_key_t key; |
| key.revision = representation->revision; |
| key.offset = representation->offset; |
| |
| *result = NULL; |
| return svn_error_trace(svn_cache__get((void**)result, &found, |
| fs->window_cache, |
| &key, pool)); |
| } |
| |
| /* Cache the undeltified txdelta WINDOW for REPRESENTATION within FS. |
| * Use POOL for temporaries. |
| */ |
| static svn_error_t * |
| set_cached_window(fs_fs_t *fs, |
| representation_t *representation, |
| svn_stringbuf_t *window, |
| apr_pool_t *pool) |
| { |
| /* select entry */ |
| window_cache_key_t key; |
| key.revision = representation->revision; |
| key.offset = representation->offset; |
| |
| return svn_error_trace(svn_cache__set(fs->window_cache, &key, window, |
| pool)); |
| } |
| |
| /* Initialize the LARGEST_CHANGES member in FS with a capacity of COUNT |
| * entries. Use POOL for allocations. |
| */ |
| static void |
| initialize_largest_changes(fs_fs_t *fs, |
| apr_size_t count, |
| apr_pool_t *pool) |
| { |
| apr_size_t i; |
| |
| fs->largest_changes = apr_pcalloc(pool, sizeof(*fs->largest_changes)); |
| fs->largest_changes->count = count; |
| fs->largest_changes->min_size = 1; |
| fs->largest_changes->changes |
| = apr_palloc(pool, count * sizeof(*fs->largest_changes->changes)); |
| |
| /* allocate *all* entries before the path stringbufs. This increases |
| * cache locality and enhances performance significantly. */ |
| for (i = 0; i < count; ++i) |
| fs->largest_changes->changes[i] |
| = apr_palloc(pool, sizeof(**fs->largest_changes->changes)); |
| |
| /* now initialize them and allocate the stringbufs */ |
| for (i = 0; i < count; ++i) |
| { |
| fs->largest_changes->changes[i]->size = 0; |
| fs->largest_changes->changes[i]->revision = SVN_INVALID_REVNUM; |
| fs->largest_changes->changes[i]->path |
| = svn_stringbuf_create_ensure(1024, pool); |
| } |
| } |
| |
| /* Add entry for SIZE to HISTOGRAM. |
| */ |
| static void |
| add_to_histogram(histogram_t *histogram, |
| apr_int64_t size) |
| { |
| apr_int64_t shift = 0; |
| |
| while (((apr_int64_t)(1) << shift) <= size) |
| shift++; |
| |
| histogram->total.count++; |
| histogram->total.sum += size; |
| histogram->lines[(apr_size_t)shift].count++; |
| histogram->lines[(apr_size_t)shift].sum += size; |
| } |
| |
| /* Update data aggregators in FS with this representation of type KIND, on- |
| * disk REP_SIZE and expanded node size EXPANDED_SIZE for PATH in REVSION. |
| */ |
| static void |
| add_change(fs_fs_t *fs, |
| apr_int64_t rep_size, |
| apr_int64_t expanded_size, |
| svn_revnum_t revision, |
| const char *path, |
| rep_kind_t kind) |
| { |
| /* identify largest reps */ |
| if (rep_size >= fs->largest_changes->min_size) |
| { |
| apr_size_t i; |
| large_change_info_t *info |
| = fs->largest_changes->changes[fs->largest_changes->count - 1]; |
| info->size = rep_size; |
| info->revision = revision; |
| svn_stringbuf_set(info->path, path); |
| |
| /* linear insertion but not too bad since count is low and insertions |
| * near the end are more likely than close to front */ |
| for (i = fs->largest_changes->count - 1; i > 0; --i) |
| if (fs->largest_changes->changes[i-1]->size >= rep_size) |
| break; |
| else |
| fs->largest_changes->changes[i] = fs->largest_changes->changes[i-1]; |
| |
| fs->largest_changes->changes[i] = info; |
| fs->largest_changes->min_size |
| = fs->largest_changes->changes[fs->largest_changes->count-1]->size; |
| } |
| |
| /* global histograms */ |
| add_to_histogram(&fs->rep_size_histogram, rep_size); |
| add_to_histogram(&fs->node_size_histogram, expanded_size); |
| |
| /* specific histograms by type */ |
| switch (kind) |
| { |
| case unused_rep: add_to_histogram(&fs->unused_rep_histogram, |
| rep_size); |
| break; |
| case dir_property_rep: add_to_histogram(&fs->dir_prop_rep_histogram, |
| rep_size); |
| add_to_histogram(&fs->dir_prop_histogram, |
| expanded_size); |
| break; |
| case file_property_rep: add_to_histogram(&fs->file_prop_rep_histogram, |
| rep_size); |
| add_to_histogram(&fs->file_prop_histogram, |
| expanded_size); |
| break; |
| case dir_rep: add_to_histogram(&fs->dir_rep_histogram, |
| rep_size); |
| add_to_histogram(&fs->dir_histogram, |
| expanded_size); |
| break; |
| case file_rep: add_to_histogram(&fs->file_rep_histogram, |
| rep_size); |
| add_to_histogram(&fs->file_histogram, |
| expanded_size); |
| break; |
| } |
| |
| /* by extension */ |
| if (kind == file_rep) |
| { |
| /* determine extension */ |
| extension_info_t *info; |
| const char * file_name = strrchr(path, '/'); |
| const char * extension = file_name ? strrchr(file_name, '.') : NULL; |
| |
| if (extension == NULL || extension == file_name + 1) |
| extension = "(none)"; |
| |
| /* get / auto-insert entry for this extension */ |
| info = apr_hash_get(fs->by_extension, extension, APR_HASH_KEY_STRING); |
| if (info == NULL) |
| { |
| apr_pool_t *pool = apr_hash_pool_get(fs->by_extension); |
| info = apr_pcalloc(pool, sizeof(*info)); |
| info->extension = apr_pstrdup(pool, extension); |
| |
| apr_hash_set(fs->by_extension, info->extension, |
| APR_HASH_KEY_STRING, info); |
| } |
| |
| /* update per-extension histogram */ |
| add_to_histogram(&info->node_histogram, expanded_size); |
| add_to_histogram(&info->rep_histogram, rep_size); |
| } |
| } |
| |
| /* Given rev pack PATH in FS, read the manifest file and return the offsets |
| * in *MANIFEST. Use POOL for allocations. |
| */ |
| static svn_error_t * |
| read_manifest(apr_array_header_t **manifest, |
| fs_fs_t *fs, |
| const char *path, |
| apr_pool_t *pool) |
| { |
| svn_stream_t *manifest_stream; |
| apr_pool_t *iterpool; |
| |
| /* Open the manifest file. */ |
| SVN_ERR(svn_stream_open_readonly(&manifest_stream, |
| svn_dirent_join(path, "manifest", pool), |
| pool, pool)); |
| |
| /* While we're here, let's just read the entire manifest file into an array, |
| so we can cache the entire thing. */ |
| iterpool = svn_pool_create(pool); |
| *manifest = apr_array_make(pool, fs->max_files_per_dir, sizeof(apr_size_t)); |
| while (1) |
| { |
| svn_stringbuf_t *sb; |
| svn_boolean_t eof; |
| apr_uint64_t val; |
| svn_error_t *err; |
| |
| svn_pool_clear(iterpool); |
| SVN_ERR(svn_stream_readline(manifest_stream, &sb, "\n", &eof, iterpool)); |
| if (eof) |
| break; |
| |
| err = svn_cstring_strtoui64(&val, sb->data, 0, APR_SIZE_MAX, 10); |
| if (err) |
| return svn_error_createf(SVN_ERR_FS_CORRUPT, err, |
| _("Manifest offset '%s' too large"), |
| sb->data); |
| APR_ARRAY_PUSH(*manifest, apr_size_t) = (apr_size_t)val; |
| } |
| svn_pool_destroy(iterpool); |
| |
| return svn_stream_close(manifest_stream); |
| } |
| |
| /* Read header information for the revision stored in FILE_CONTENT (one |
| * whole revision). Return the offsets within FILE_CONTENT for the |
| * *ROOT_NODEREV, the list of *CHANGES and its len in *CHANGES_LEN. |
| * Use POOL for temporary allocations. */ |
| static svn_error_t * |
| read_revision_header(apr_size_t *changes, |
| apr_size_t *changes_len, |
| apr_size_t *root_noderev, |
| svn_stringbuf_t *file_content, |
| apr_pool_t *pool) |
| { |
| char buf[64]; |
| const char *line; |
| char *space; |
| apr_uint64_t val; |
| apr_size_t len; |
| |
| /* Read in this last block, from which we will identify the last line. */ |
| len = sizeof(buf); |
| if (len > file_content->len) |
| len = file_content->len; |
| |
| memcpy(buf, file_content->data + file_content->len - len, len); |
| |
| /* The last byte should be a newline. */ |
| if (buf[(apr_ssize_t)len - 1] != '\n') |
| return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, |
| _("Revision lacks trailing newline")); |
| |
| /* Look for the next previous newline. */ |
| buf[len - 1] = 0; |
| line = strrchr(buf, '\n'); |
| if (line == NULL) |
| return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, |
| _("Final line in revision file longer " |
| "than 64 characters")); |
| |
| space = strchr(line, ' '); |
| if (space == NULL) |
| return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, |
| _("Final line in revision file missing space")); |
| |
| /* terminate the header line */ |
| *space = 0; |
| |
| /* extract information */ |
| SVN_ERR(svn_cstring_strtoui64(&val, line+1, 0, APR_SIZE_MAX, 10)); |
| *root_noderev = (apr_size_t)val; |
| SVN_ERR(svn_cstring_strtoui64(&val, space+1, 0, APR_SIZE_MAX, 10)); |
| *changes = (apr_size_t)val; |
| *changes_len = file_content->len - *changes - (buf + len - line) + 1; |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Read the FSFS format number and sharding size from the format file at |
| * PATH and return it in *PFORMAT and *MAX_FILES_PER_DIR respectively. |
| * Use POOL for temporary allocations. |
| */ |
| static svn_error_t * |
| read_format(int *pformat, int *max_files_per_dir, |
| const char *path, apr_pool_t *pool) |
| { |
| svn_error_t *err; |
| apr_file_t *file; |
| char buf[80]; |
| apr_size_t len; |
| |
| /* open format file and read the first line */ |
| err = svn_io_file_open(&file, path, APR_READ | APR_BUFFERED, |
| APR_OS_DEFAULT, pool); |
| if (err && APR_STATUS_IS_ENOENT(err->apr_err)) |
| { |
| /* Treat an absent format file as format 1. Do not try to |
| create the format file on the fly, because the repository |
| might be read-only for us, or this might be a read-only |
| operation, and the spirit of FSFS is to make no changes |
| whatseover in read-only operations. See thread starting at |
| http://subversion.tigris.org/servlets/ReadMsg?list=dev&msgNo=97600 |
| for more. */ |
| svn_error_clear(err); |
| *pformat = 1; |
| *max_files_per_dir = 0; |
| |
| return SVN_NO_ERROR; |
| } |
| SVN_ERR(err); |
| |
| len = sizeof(buf); |
| err = svn_io_read_length_line(file, buf, &len, pool); |
| if (err && APR_STATUS_IS_EOF(err->apr_err)) |
| { |
| /* Return a more useful error message. */ |
| svn_error_clear(err); |
| return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL, |
| _("Can't read first line of format file '%s'"), |
| svn_dirent_local_style(path, pool)); |
| } |
| SVN_ERR(err); |
| |
| /* Check that the first line contains only digits. */ |
| SVN_ERR(svn_cstring_atoi(pformat, buf)); |
| |
| /* Set the default values for anything that can be set via an option. */ |
| *max_files_per_dir = 0; |
| |
| /* Read any options. */ |
| while (1) |
| { |
| len = sizeof(buf); |
| err = svn_io_read_length_line(file, buf, &len, pool); |
| if (err && APR_STATUS_IS_EOF(err->apr_err)) |
| { |
| /* No more options; that's okay. */ |
| svn_error_clear(err); |
| break; |
| } |
| SVN_ERR(err); |
| |
| if (strncmp(buf, "layout ", 7) == 0) |
| { |
| if (strcmp(buf+7, "linear") == 0) |
| { |
| *max_files_per_dir = 0; |
| continue; |
| } |
| |
| if (strncmp(buf+7, "sharded ", 8) == 0) |
| { |
| /* Check that the argument is numeric. */ |
| SVN_ERR(svn_cstring_atoi(max_files_per_dir, buf + 15)); |
| continue; |
| } |
| } |
| |
| return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL, |
| _("'%s' contains invalid filesystem format option '%s'"), |
| svn_dirent_local_style(path, pool), buf); |
| } |
| |
| return svn_io_file_close(file, pool); |
| } |
| |
| /* Read the content of the file at PATH and return it in *RESULT. |
| * Use POOL for temporary allocations. |
| */ |
| static svn_error_t * |
| read_number(svn_revnum_t *result, const char *path, apr_pool_t *pool) |
| { |
| svn_stringbuf_t *content; |
| apr_uint64_t number; |
| |
| SVN_ERR(svn_stringbuf_from_file2(&content, path, pool)); |
| |
| content->data[content->len-1] = 0; |
| SVN_ERR(svn_cstring_strtoui64(&number, content->data, 0, LONG_MAX, 10)); |
| *result = (svn_revnum_t)number; |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Create *FS for the repository at PATH and read the format and size info. |
| * Use POOL for temporary allocations. |
| */ |
| static svn_error_t * |
| fs_open(fs_fs_t **fs, const char *path, apr_pool_t *pool) |
| { |
| *fs = apr_pcalloc(pool, sizeof(**fs)); |
| (*fs)->path = apr_pstrdup(pool, path); |
| (*fs)->max_files_per_dir = 1000; |
| |
| /* Read the FS format number. */ |
| SVN_ERR(read_format(&(*fs)->format, |
| &(*fs)->max_files_per_dir, |
| svn_dirent_join(path, "db/format", pool), |
| pool)); |
| if (((*fs)->format != 4) && ((*fs)->format != 6)) |
| return svn_error_create(SVN_ERR_FS_UNSUPPORTED_FORMAT, NULL, NULL); |
| |
| /* read size (HEAD) info */ |
| SVN_ERR(read_number(&(*fs)->min_unpacked_rev, |
| svn_dirent_join(path, "db/min-unpacked-rev", pool), |
| pool)); |
| return read_number(&(*fs)->max_revision, |
| svn_dirent_join(path, "db/current", pool), |
| pool); |
| } |
| |
| /* Utility function that returns true if STRING->DATA matches KEY. |
| */ |
| static svn_boolean_t |
| key_matches(svn_string_t *string, const char *key) |
| { |
| return strcmp(string->data, key) == 0; |
| } |
| |
| /* Comparator used for binary search comparing the absolute file offset |
| * of a representation to some other offset. DATA is a *representation_t, |
| * KEY is a pointer to an apr_size_t. |
| */ |
| static int |
| compare_representation_offsets(const void *data, const void *key) |
| { |
| apr_ssize_t diff = (*(const representation_t *const *)data)->offset |
| - *(const apr_size_t *)key; |
| |
| /* sizeof(int) may be < sizeof(ssize_t) */ |
| if (diff < 0) |
| return -1; |
| return diff > 0 ? 1 : 0; |
| } |
| |
| /* Find the revision_info_t object to the given REVISION in FS and return |
| * it in *REVISION_INFO. For performance reasons, we skip the lookup if |
| * the info is already provided. |
| * |
| * In that revision, look for the representation_t object for offset OFFSET. |
| * If it already exists, set *IDX to its index in *REVISION_INFO's |
| * representations list and return the representation object. Otherwise, |
| * set the index to where it must be inserted and return NULL. |
| */ |
| static representation_t * |
| find_representation(int *idx, |
| fs_fs_t *fs, |
| revision_info_t **revision_info, |
| svn_revnum_t revision, |
| apr_size_t offset) |
| { |
| revision_info_t *info; |
| *idx = -1; |
| |
| /* first let's find the revision */ |
| info = revision_info ? *revision_info : NULL; |
| if (info == NULL || info->revision != revision) |
| { |
| info = APR_ARRAY_IDX(fs->revisions, |
| revision - fs->start_revision, |
| revision_info_t*); |
| if (revision_info) |
| *revision_info = info; |
| } |
| |
| /* not found -> no result */ |
| if (info == NULL) |
| return NULL; |
| |
| assert(revision == info->revision); |
| |
| /* look for the representation */ |
| *idx = svn_sort__bsearch_lower_bound(&offset, |
| info->representations, |
| compare_representation_offsets); |
| if (*idx < info->representations->nelts) |
| { |
| /* return the representation, if this is the one we were looking for */ |
| representation_t *result |
| = APR_ARRAY_IDX(info->representations, *idx, representation_t *); |
| if (result->offset == offset) |
| return result; |
| } |
| |
| /* not parsed, yet */ |
| return NULL; |
| } |
| |
| /* Read the representation header in FILE_CONTENT at OFFSET. Return its |
| * size in *HEADER_SIZE, set *IS_PLAIN if no deltification was used and |
| * return the deltification base representation in *REPRESENTATION. If |
| * there is none, set it to NULL. Use FS to it look up. |
| * |
| * Use POOL for allocations and SCRATCH_POOL for temporaries. |
| */ |
| static svn_error_t * |
| read_rep_base(representation_t **representation, |
| apr_size_t *header_size, |
| svn_boolean_t *is_plain, |
| fs_fs_t *fs, |
| svn_stringbuf_t *file_content, |
| apr_size_t offset, |
| apr_pool_t *pool, |
| apr_pool_t *scratch_pool) |
| { |
| char *str, *last_str; |
| int idx; |
| svn_revnum_t revision; |
| apr_uint64_t temp; |
| |
| /* identify representation header (1 line) */ |
| const char *buffer = file_content->data + offset; |
| const char *line_end = strchr(buffer, '\n'); |
| *header_size = line_end - buffer + 1; |
| |
| /* check for PLAIN rep */ |
| if (strncmp(buffer, "PLAIN\n", *header_size) == 0) |
| { |
| *is_plain = TRUE; |
| *representation = NULL; |
| return SVN_NO_ERROR; |
| } |
| |
| /* check for DELTA against empty rep */ |
| *is_plain = FALSE; |
| if (strncmp(buffer, "DELTA\n", *header_size) == 0) |
| { |
| /* This is a delta against the empty stream. */ |
| *representation = fs->null_base; |
| return SVN_NO_ERROR; |
| } |
| |
| str = apr_pstrndup(scratch_pool, buffer, line_end - buffer); |
| last_str = str; |
| |
| /* parse it. */ |
| str = svn_cstring_tokenize(" ", &last_str); |
| str = svn_cstring_tokenize(" ", &last_str); |
| SVN_ERR(svn_revnum_parse(&revision, str, NULL)); |
| |
| str = svn_cstring_tokenize(" ", &last_str); |
| SVN_ERR(svn_cstring_strtoui64(&temp, str, 0, APR_SIZE_MAX, 10)); |
| |
| /* it should refer to a rep in an earlier revision. Look it up */ |
| *representation = find_representation(&idx, fs, NULL, revision, (apr_size_t)temp); |
| return SVN_NO_ERROR; |
| } |
| |
| /* Parse the representation reference (text: or props:) in VALUE, look |
| * it up in FS and return it in *REPRESENTATION. To be able to parse the |
| * base rep, we pass the FILE_CONTENT as well. |
| * |
| * If necessary, allocate the result in POOL; use SCRATCH_POOL for temp. |
| * allocations. |
| */ |
| static svn_error_t * |
| parse_representation(representation_t **representation, |
| fs_fs_t *fs, |
| svn_stringbuf_t *file_content, |
| svn_string_t *value, |
| revision_info_t *revision_info, |
| apr_pool_t *pool, |
| apr_pool_t *scratch_pool) |
| { |
| representation_t *result; |
| svn_revnum_t revision; |
| |
| apr_uint64_t offset; |
| apr_uint64_t size; |
| apr_uint64_t expanded_size; |
| int idx; |
| |
| /* read location (revision, offset) and size */ |
| char *c = (char *)value->data; |
| SVN_ERR(svn_revnum_parse(&revision, svn_cstring_tokenize(" ", &c), NULL)); |
| SVN_ERR(svn_cstring_strtoui64(&offset, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10)); |
| SVN_ERR(svn_cstring_strtoui64(&size, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10)); |
| SVN_ERR(svn_cstring_strtoui64(&expanded_size, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10)); |
| |
| /* look it up */ |
| result = find_representation(&idx, fs, &revision_info, revision, (apr_size_t)offset); |
| if (!result) |
| { |
| /* not parsed, yet (probably a rep in the same revision). |
| * Create a new rep object and determine its base rep as well. |
| */ |
| apr_size_t header_size; |
| svn_boolean_t is_plain; |
| |
| result = apr_pcalloc(pool, sizeof(*result)); |
| result->revision = revision; |
| result->expanded_size = (apr_size_t)(expanded_size ? expanded_size : size); |
| result->offset = (apr_size_t)offset; |
| result->size = (apr_size_t)size; |
| SVN_ERR(read_rep_base(&result->delta_base, &header_size, |
| &is_plain, fs, file_content, |
| (apr_size_t)offset, |
| pool, scratch_pool)); |
| |
| result->header_size = header_size; |
| result->is_plain = is_plain; |
| svn_sort__array_insert(&result, revision_info->representations, idx); |
| } |
| |
| *representation = result; |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Get the unprocessed (i.e. still deltified) content of REPRESENTATION in |
| * FS and return it in *CONTENT. If no NULL, FILE_CONTENT must contain |
| * the contents of the revision that also contains the representation. |
| * Use POOL for allocations. |
| */ |
| static svn_error_t * |
| get_rep_content(svn_stringbuf_t **content, |
| fs_fs_t *fs, |
| representation_t *representation, |
| svn_stringbuf_t *file_content, |
| apr_pool_t *pool) |
| { |
| apr_off_t offset; |
| svn_revnum_t revision = representation->revision; |
| revision_info_t *revision_info = APR_ARRAY_IDX(fs->revisions, |
| revision - fs->start_revision, |
| revision_info_t*); |
| |
| /* not in cache. Is the revision valid at all? */ |
| if (revision - fs->start_revision > fs->revisions->nelts) |
| return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, |
| _("Unknown revision %ld"), revision); |
| |
| if (file_content) |
| { |
| offset = representation->offset |
| + representation->header_size; |
| *content = svn_stringbuf_ncreate(file_content->data + offset, |
| representation->size, pool); |
| } |
| else |
| { |
| offset = revision_info->offset |
| + representation->offset |
| + representation->header_size; |
| SVN_ERR(get_content(content, NULL, fs, revision, offset, |
| representation->size, pool)); |
| } |
| |
| return SVN_NO_ERROR; |
| } |
| |
| |
| /* Read the delta window contents of all windows in REPRESENTATION in FS. |
| * If no NULL, FILE_CONTENT must contain the contents of the revision that |
| * also contains the representation. |
| * Return the data as svn_txdelta_window_t* instances in *WINDOWS. |
| * Use POOL for allocations. |
| */ |
| static svn_error_t * |
| read_windows(apr_array_header_t **windows, |
| fs_fs_t *fs, |
| representation_t *representation, |
| svn_stringbuf_t *file_content, |
| apr_pool_t *pool) |
| { |
| svn_stringbuf_t *content; |
| svn_stream_t *stream; |
| char version; |
| apr_size_t len = sizeof(version); |
| |
| *windows = apr_array_make(pool, 0, sizeof(svn_txdelta_window_t *)); |
| |
| /* get the whole revision content */ |
| SVN_ERR(get_rep_content(&content, fs, representation, file_content, pool)); |
| |
| /* create a read stream and position it directly after the rep header */ |
| content->data += 3; |
| content->len -= 3; |
| stream = svn_stream_from_stringbuf(content, pool); |
| SVN_ERR(svn_stream_read(stream, &version, &len)); |
| |
| /* read the windows from that stream */ |
| while (TRUE) |
| { |
| svn_txdelta_window_t *window; |
| svn_stream_mark_t *mark; |
| char dummy; |
| |
| len = sizeof(dummy); |
| SVN_ERR(svn_stream_mark(stream, &mark, pool)); |
| SVN_ERR(svn_stream_read(stream, &dummy, &len)); |
| if (len == 0) |
| break; |
| |
| SVN_ERR(svn_stream_seek(stream, mark)); |
| SVN_ERR(svn_txdelta_read_svndiff_window(&window, stream, version, pool)); |
| APR_ARRAY_PUSH(*windows, svn_txdelta_window_t *) = window; |
| } |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Get the undeltified representation that is a result of combining all |
| * deltas from the current desired REPRESENTATION in FS with its base |
| * representation. If no NULL, FILE_CONTENT must contain the contents of |
| * the revision that also contains the representation. Store the result |
| * in *CONTENT. Use POOL for allocations. |
| */ |
| static svn_error_t * |
| get_combined_window(svn_stringbuf_t **content, |
| fs_fs_t *fs, |
| representation_t *representation, |
| svn_stringbuf_t *file_content, |
| apr_pool_t *pool) |
| { |
| int i; |
| apr_array_header_t *windows; |
| svn_stringbuf_t *base_content, *result; |
| const char *source; |
| apr_pool_t *sub_pool; |
| apr_pool_t *iter_pool; |
| |
| /* special case: no un-deltification necessary */ |
| if (representation->is_plain) |
| { |
| SVN_ERR(get_rep_content(content, fs, representation, file_content, |
| pool)); |
| SVN_ERR(set_cached_window(fs, representation, *content, pool)); |
| return SVN_NO_ERROR; |
| } |
| |
| /* special case: data already in cache */ |
| SVN_ERR(get_cached_window(content, fs, representation, pool)); |
| if (*content) |
| return SVN_NO_ERROR; |
| |
| /* read the delta windows for this representation */ |
| sub_pool = svn_pool_create(pool); |
| iter_pool = svn_pool_create(pool); |
| SVN_ERR(read_windows(&windows, fs, representation, file_content, sub_pool)); |
| |
| /* fetch the / create a base content */ |
| if (representation->delta_base && representation->delta_base->revision) |
| SVN_ERR(get_combined_window(&base_content, fs, |
| representation->delta_base, NULL, sub_pool)); |
| else |
| base_content = svn_stringbuf_create_empty(sub_pool); |
| |
| /* apply deltas */ |
| result = svn_stringbuf_create_empty(pool); |
| source = base_content->data; |
| |
| for (i = 0; i < windows->nelts; ++i) |
| { |
| svn_txdelta_window_t *window |
| = APR_ARRAY_IDX(windows, i, svn_txdelta_window_t *); |
| svn_stringbuf_t *buf |
| = svn_stringbuf_create_ensure(window->tview_len, iter_pool); |
| |
| buf->len = window->tview_len; |
| svn_txdelta_apply_instructions(window, window->src_ops ? source : NULL, |
| buf->data, &buf->len); |
| |
| svn_stringbuf_appendbytes(result, buf->data, buf->len); |
| source += window->sview_len; |
| |
| svn_pool_clear(iter_pool); |
| } |
| |
| /* cache result and return it */ |
| SVN_ERR(set_cached_window(fs, representation, result, sub_pool)); |
| *content = result; |
| |
| svn_pool_destroy(iter_pool); |
| svn_pool_destroy(sub_pool); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* forward declaration */ |
| static svn_error_t * |
| read_noderev(fs_fs_t *fs, |
| svn_stringbuf_t *file_content, |
| apr_size_t offset, |
| revision_info_t *revision_info, |
| apr_pool_t *pool, |
| apr_pool_t *scratch_pool); |
| |
| /* Starting at the directory in REPRESENTATION in FILE_CONTENT, read all |
| * DAG nodes, directories and representations linked in that tree structure. |
| * Store them in FS and REVISION_INFO. Also, read them only once. |
| * |
| * Use POOL for persistent allocations and SCRATCH_POOL for temporaries. |
| */ |
| static svn_error_t * |
| parse_dir(fs_fs_t *fs, |
| svn_stringbuf_t *file_content, |
| representation_t *representation, |
| revision_info_t *revision_info, |
| apr_pool_t *pool, |
| apr_pool_t *scratch_pool) |
| { |
| svn_stringbuf_t *text; |
| apr_pool_t *iter_pool; |
| apr_pool_t *text_pool; |
| const char *current; |
| const char *revision_key; |
| apr_size_t key_len; |
| |
| /* special case: empty dir rep */ |
| if (representation == NULL) |
| return SVN_NO_ERROR; |
| |
| /* get the directory as unparsed string */ |
| iter_pool = svn_pool_create(scratch_pool); |
| text_pool = svn_pool_create(scratch_pool); |
| |
| SVN_ERR(get_combined_window(&text, fs, representation, file_content, |
| text_pool)); |
| current = text->data; |
| |
| /* calculate some invariants */ |
| revision_key = apr_psprintf(text_pool, "r%ld/", representation->revision); |
| key_len = strlen(revision_key); |
| |
| /* Parse and process all directory entries. */ |
| while (*current != 'E') |
| { |
| char *next; |
| |
| /* skip "K ???\n<name>\nV ???\n" lines*/ |
| current = strchr(current, '\n'); |
| if (current) |
| current = strchr(current+1, '\n'); |
| if (current) |
| current = strchr(current+1, '\n'); |
| next = current ? strchr(++current, '\n') : NULL; |
| if (next == NULL) |
| return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, |
| _("Corrupt directory representation in rev %ld at offset %ld"), |
| representation->revision, |
| (long)representation->offset); |
| |
| /* iff this entry refers to a node in the same revision as this dir, |
| * recurse into that node */ |
| *next = 0; |
| current = strstr(current, revision_key); |
| if (current) |
| { |
| /* recurse */ |
| apr_uint64_t offset; |
| |
| SVN_ERR(svn_cstring_strtoui64(&offset, current + key_len, 0, |
| APR_SIZE_MAX, 10)); |
| SVN_ERR(read_noderev(fs, file_content, (apr_size_t)offset, |
| revision_info, pool, iter_pool)); |
| |
| svn_pool_clear(iter_pool); |
| } |
| current = next+1; |
| } |
| |
| svn_pool_destroy(iter_pool); |
| svn_pool_destroy(text_pool); |
| return SVN_NO_ERROR; |
| } |
| |
| /* Starting at the noderev at OFFSET in FILE_CONTENT, read all DAG nodes, |
| * directories and representations linked in that tree structure. Store |
| * them in FS and REVISION_INFO. Also, read them only once. Return the |
| * result in *NODEREV. |
| * |
| * Use POOL for persistent allocations and SCRATCH_POOL for temporaries. |
| */ |
| static svn_error_t * |
| read_noderev(fs_fs_t *fs, |
| svn_stringbuf_t *file_content, |
| apr_size_t offset, |
| revision_info_t *revision_info, |
| apr_pool_t *pool, |
| apr_pool_t *scratch_pool) |
| { |
| svn_string_t *line; |
| representation_t *text = NULL; |
| representation_t *props = NULL; |
| apr_size_t start_offset = offset; |
| svn_boolean_t is_dir = FALSE; |
| const char *path = "???"; |
| |
| scratch_pool = svn_pool_create(scratch_pool); |
| |
| /* parse the noderev line-by-line until we find an empty line */ |
| while (1) |
| { |
| /* for this line, extract key and value. Ignore invalid values */ |
| svn_string_t key; |
| svn_string_t value; |
| char *sep; |
| const char *start = file_content->data + offset; |
| const char *end = strchr(start, '\n'); |
| |
| line = svn_string_ncreate(start, end - start, scratch_pool); |
| offset += end - start + 1; |
| |
| /* empty line -> end of noderev data */ |
| if (line->len == 0) |
| break; |
| |
| sep = strchr(line->data, ':'); |
| if (sep == NULL) |
| continue; |
| |
| key.data = line->data; |
| key.len = sep - key.data; |
| *sep = 0; |
| |
| if (key.len + 2 > line->len) |
| continue; |
| |
| value.data = sep + 2; |
| value.len = line->len - (key.len + 2); |
| |
| /* translate (key, value) into noderev elements */ |
| if (key_matches(&key, "type")) |
| is_dir = strcmp(value.data, "dir") == 0; |
| else if (key_matches(&key, "text")) |
| { |
| SVN_ERR(parse_representation(&text, fs, file_content, |
| &value, revision_info, |
| pool, scratch_pool)); |
| |
| /* if we are the first to use this rep, mark it as "text rep" */ |
| if (++text->ref_count == 1) |
| text->kind = is_dir ? dir_rep : file_rep; |
| } |
| else if (key_matches(&key, "props")) |
| { |
| SVN_ERR(parse_representation(&props, fs, file_content, |
| &value, revision_info, |
| pool, scratch_pool)); |
| |
| /* if we are the first to use this rep, mark it as "prop rep" */ |
| if (++props->ref_count == 1) |
| props->kind = is_dir ? dir_property_rep : file_property_rep; |
| } |
| else if (key_matches(&key, "cpath")) |
| path = value.data; |
| } |
| |
| /* record largest changes */ |
| if (text && text->ref_count == 1) |
| add_change(fs, (apr_int64_t)text->size, (apr_int64_t)text->expanded_size, |
| text->revision, path, text->kind); |
| if (props && props->ref_count == 1) |
| add_change(fs, (apr_int64_t)props->size, (apr_int64_t)props->expanded_size, |
| props->revision, path, props->kind); |
| |
| /* if this is a directory and has not been processed, yet, read and |
| * process it recursively */ |
| if (is_dir && text && text->ref_count == 1) |
| SVN_ERR(parse_dir(fs, file_content, text, revision_info, |
| pool, scratch_pool)); |
| |
| /* update stats */ |
| if (is_dir) |
| { |
| revision_info->dir_noderev_size += offset - start_offset; |
| revision_info->dir_noderev_count++; |
| } |
| else |
| { |
| revision_info->file_noderev_size += offset - start_offset; |
| revision_info->file_noderev_count++; |
| } |
| svn_pool_destroy(scratch_pool); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Given the unparsed changes list in CHANGES with LEN chars, return the |
| * number of changed paths encoded in it. |
| */ |
| static apr_size_t |
| get_change_count(const char *changes, |
| apr_size_t len) |
| { |
| apr_size_t lines = 0; |
| const char *end = changes + len; |
| |
| /* line count */ |
| for (; changes < end; ++changes) |
| if (*changes == '\n') |
| ++lines; |
| |
| /* two lines per change */ |
| return lines / 2; |
| } |
| |
| /* Simple utility to print a REVISION number and make it appear immediately. |
| */ |
| static void |
| print_progress(svn_revnum_t revision) |
| { |
| printf("%8ld", revision); |
| fflush(stdout); |
| } |
| |
| /* Read the content of the pack file staring at revision BASE and store it |
| * in FS. Use POOL for allocations. |
| */ |
| static svn_error_t * |
| read_pack_file(fs_fs_t *fs, |
| svn_revnum_t base, |
| apr_pool_t *pool) |
| { |
| apr_array_header_t *manifest = NULL; |
| apr_pool_t *local_pool = svn_pool_create(pool); |
| apr_pool_t *iter_pool = svn_pool_create(local_pool); |
| int i; |
| apr_off_t file_size = 0; |
| apr_file_t *file; |
| const char *pack_folder = get_pack_folder(fs, base, local_pool); |
| |
| /* parse the manifest file */ |
| SVN_ERR(read_manifest(&manifest, fs, pack_folder, local_pool)); |
| if (manifest->nelts != fs->max_files_per_dir) |
| return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, NULL); |
| |
| SVN_ERR(open_rev_or_pack_file(&file, fs, base, local_pool)); |
| SVN_ERR(get_file_size(&file_size, file, local_pool)); |
| |
| /* process each revision in the pack file */ |
| for (i = 0; i < manifest->nelts; ++i) |
| { |
| apr_size_t root_node_offset; |
| svn_stringbuf_t *rev_content; |
| |
| /* create the revision info for the current rev */ |
| revision_info_t *info = apr_pcalloc(pool, sizeof(*info)); |
| info->representations = apr_array_make(iter_pool, 4, sizeof(representation_t*)); |
| |
| info->revision = base + i; |
| info->offset = APR_ARRAY_IDX(manifest, i, apr_size_t); |
| info->end = i+1 < manifest->nelts |
| ? APR_ARRAY_IDX(manifest, i+1 , apr_size_t) |
| : file_size; |
| |
| SVN_ERR(get_content(&rev_content, file, fs, info->revision, |
| info->offset, |
| info->end - info->offset, |
| iter_pool)); |
| |
| SVN_ERR(read_revision_header(&info->changes, |
| &info->changes_len, |
| &root_node_offset, |
| rev_content, |
| iter_pool)); |
| |
| info->change_count |
| = get_change_count(rev_content->data + info->changes, |
| info->changes_len); |
| SVN_ERR(read_noderev(fs, rev_content, |
| root_node_offset, info, pool, iter_pool)); |
| |
| info->representations = apr_array_copy(pool, info->representations); |
| APR_ARRAY_PUSH(fs->revisions, revision_info_t*) = info; |
| |
| /* destroy temps */ |
| svn_pool_clear(iter_pool); |
| } |
| |
| /* one more pack file processed */ |
| print_progress(base); |
| svn_pool_destroy(local_pool); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Read the content of the file for REVSION and store its contents in FS. |
| * Use POOL for allocations. |
| */ |
| static svn_error_t * |
| read_revision_file(fs_fs_t *fs, |
| svn_revnum_t revision, |
| apr_pool_t *pool) |
| { |
| apr_size_t root_node_offset; |
| apr_pool_t *local_pool = svn_pool_create(pool); |
| svn_stringbuf_t *rev_content; |
| revision_info_t *info = apr_pcalloc(pool, sizeof(*info)); |
| apr_off_t file_size = 0; |
| apr_file_t *file; |
| |
| /* read the whole pack file into memory */ |
| SVN_ERR(open_rev_or_pack_file(&file, fs, revision, local_pool)); |
| SVN_ERR(get_file_size(&file_size, file, local_pool)); |
| |
| /* create the revision info for the current rev */ |
| info->representations = apr_array_make(pool, 4, sizeof(representation_t*)); |
| |
| info->revision = revision; |
| info->offset = 0; |
| info->end = file_size; |
| |
| SVN_ERR(get_content(&rev_content, file, fs, revision, 0, file_size, |
| local_pool)); |
| |
| SVN_ERR(read_revision_header(&info->changes, |
| &info->changes_len, |
| &root_node_offset, |
| rev_content, |
| local_pool)); |
| |
| /* put it into our containers */ |
| APR_ARRAY_PUSH(fs->revisions, revision_info_t*) = info; |
| |
| info->change_count |
| = get_change_count(rev_content->data + info->changes, |
| info->changes_len); |
| |
| /* parse the revision content recursively. */ |
| SVN_ERR(read_noderev(fs, rev_content, |
| root_node_offset, info, |
| pool, local_pool)); |
| |
| /* show progress every 1000 revs or so */ |
| if (revision % fs->max_files_per_dir == 0) |
| print_progress(revision); |
| |
| svn_pool_destroy(local_pool); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Read the repository at PATH beginning with revision START_REVISION and |
| * return the result in *FS. Allocate caches with MEMSIZE bytes total |
| * capacity. Use POOL for non-cache allocations. |
| */ |
| static svn_error_t * |
| read_revisions(fs_fs_t **fs, |
| const char *path, |
| svn_revnum_t start_revision, |
| apr_size_t memsize, |
| apr_pool_t *pool) |
| { |
| svn_revnum_t revision; |
| svn_cache_config_t cache_config = *svn_cache_config_get(); |
| |
| /* determine cache sizes */ |
| |
| if (memsize < 100) |
| memsize = 100; |
| |
| cache_config.cache_size = memsize * 1024 * 1024; |
| svn_cache_config_set(&cache_config); |
| |
| SVN_ERR(fs_open(fs, path, pool)); |
| |
| /* create data containers and caches */ |
| (*fs)->start_revision = start_revision |
| - (start_revision % (*fs)->max_files_per_dir); |
| (*fs)->revisions = apr_array_make(pool, |
| (*fs)->max_revision + 1 - (*fs)->start_revision, |
| sizeof(revision_info_t *)); |
| (*fs)->null_base = apr_pcalloc(pool, sizeof(*(*fs)->null_base)); |
| initialize_largest_changes(*fs, 64, pool); |
| (*fs)->by_extension = apr_hash_make(pool); |
| |
| SVN_ERR(svn_cache__create_membuffer_cache(&(*fs)->window_cache, |
| svn_cache__get_global_membuffer_cache(), |
| NULL, NULL, |
| sizeof(window_cache_key_t), |
| "", FALSE, pool)); |
| |
| /* read all packed revs */ |
| for ( revision = start_revision |
| ; revision < (*fs)->min_unpacked_rev |
| ; revision += (*fs)->max_files_per_dir) |
| SVN_ERR(read_pack_file(*fs, revision, pool)); |
| |
| /* read non-packed revs */ |
| for ( ; revision <= (*fs)->max_revision; ++revision) |
| SVN_ERR(read_revision_file(*fs, revision, pool)); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Compression statistics we collect over a given set of representations. |
| */ |
| typedef struct rep_pack_stats_t |
| { |
| /* number of representations */ |
| apr_int64_t count; |
| |
| /* total size after deltification (i.e. on disk size) */ |
| apr_int64_t packed_size; |
| |
| /* total size after de-deltification (i.e. plain text size) */ |
| apr_int64_t expanded_size; |
| |
| /* total on-disk header size */ |
| apr_int64_t overhead_size; |
| } rep_pack_stats_t; |
| |
| /* Statistics we collect over a given set of representations. |
| * We group them into shared and non-shared ("unique") reps. |
| */ |
| typedef struct representation_stats_t |
| { |
| /* stats over all representations */ |
| rep_pack_stats_t total; |
| |
| /* stats over those representations with ref_count == 1 */ |
| rep_pack_stats_t uniques; |
| |
| /* stats over those representations with ref_count > 1 */ |
| rep_pack_stats_t shared; |
| |
| /* sum of all ref_counts */ |
| apr_int64_t references; |
| |
| /* sum of ref_count * expanded_size, |
| * i.e. total plaintext content if there was no rep sharing */ |
| apr_int64_t expanded_size; |
| } representation_stats_t; |
| |
| /* Basic statistics we collect over a given set of noderevs. |
| */ |
| typedef struct node_stats_t |
| { |
| /* number of noderev structs */ |
| apr_int64_t count; |
| |
| /* their total size on disk (structs only) */ |
| apr_int64_t size; |
| } node_stats_t; |
| |
| /* Accumulate stats of REP in STATS. |
| */ |
| static void |
| add_rep_pack_stats(rep_pack_stats_t *stats, |
| representation_t *rep) |
| { |
| stats->count++; |
| |
| stats->packed_size += rep->size; |
| stats->expanded_size += rep->expanded_size; |
| stats->overhead_size += rep->header_size + 7 /* ENDREP\n */; |
| } |
| |
| /* Accumulate stats of REP in STATS. |
| */ |
| static void |
| add_rep_stats(representation_stats_t *stats, |
| representation_t *rep) |
| { |
| add_rep_pack_stats(&stats->total, rep); |
| if (rep->ref_count == 1) |
| add_rep_pack_stats(&stats->uniques, rep); |
| else |
| add_rep_pack_stats(&stats->shared, rep); |
| |
| stats->references += rep->ref_count; |
| stats->expanded_size += rep->ref_count * rep->expanded_size; |
| } |
| |
| /* Print statistics for the given group of representations to console. |
| * Use POOL for allocations. |
| */ |
| static void |
| print_rep_stats(representation_stats_t *stats, |
| apr_pool_t *pool) |
| { |
| printf(_("%20s bytes in %12s reps\n" |
| "%20s bytes in %12s shared reps\n" |
| "%20s bytes expanded size\n" |
| "%20s bytes expanded shared size\n" |
| "%20s bytes with rep-sharing off\n" |
| "%20s shared references\n"), |
| svn__i64toa_sep(stats->total.packed_size, ',', pool), |
| svn__i64toa_sep(stats->total.count, ',', pool), |
| svn__i64toa_sep(stats->shared.packed_size, ',', pool), |
| svn__i64toa_sep(stats->shared.count, ',', pool), |
| svn__i64toa_sep(stats->total.expanded_size, ',', pool), |
| svn__i64toa_sep(stats->shared.expanded_size, ',', pool), |
| svn__i64toa_sep(stats->expanded_size, ',', pool), |
| svn__i64toa_sep(stats->references - stats->total.count, ',', pool)); |
| } |
| |
| /* Print the (used) contents of CHANGES. Use POOL for allocations. |
| */ |
| static void |
| print_largest_reps(largest_changes_t *changes, |
| apr_pool_t *pool) |
| { |
| apr_size_t i; |
| for (i = 0; i < changes->count && changes->changes[i]->size; ++i) |
| printf(_("%12s r%-8ld %s\n"), |
| svn__i64toa_sep(changes->changes[i]->size, ',', pool), |
| changes->changes[i]->revision, |
| changes->changes[i]->path->data); |
| } |
| |
| /* Print the non-zero section of HISTOGRAM to console. |
| * Use POOL for allocations. |
| */ |
| static void |
| print_histogram(histogram_t *histogram, |
| apr_pool_t *pool) |
| { |
| int first = 0; |
| int last = 63; |
| int i; |
| |
| /* identify non-zero range */ |
| while (last > 0 && histogram->lines[last].count == 0) |
| --last; |
| |
| while (first <= last && histogram->lines[first].count == 0) |
| ++first; |
| |
| /* display histogram lines */ |
| for (i = last; i >= first; --i) |
| printf(_(" [2^%2d, 2^%2d) %15s (%2d%%) bytes in %12s (%2d%%) items\n"), |
| i-1, i, |
| svn__i64toa_sep(histogram->lines[i].sum, ',', pool), |
| (int)(histogram->lines[i].sum * 100 / histogram->total.sum), |
| svn__i64toa_sep(histogram->lines[i].count, ',', pool), |
| (int)(histogram->lines[i].count * 100 / histogram->total.count)); |
| } |
| |
| /* COMPARISON_FUNC for svn_sort__hash. |
| * Sort extension_info_t values by total count in descending order. |
| */ |
| static int |
| compare_count(const svn_sort__item_t *a, |
| const svn_sort__item_t *b) |
| { |
| const extension_info_t *lhs = a->value; |
| const extension_info_t *rhs = b->value; |
| apr_int64_t diff = lhs->node_histogram.total.count |
| - rhs->node_histogram.total.count; |
| |
| return diff > 0 ? -1 : (diff < 0 ? 1 : 0); |
| } |
| |
| /* COMPARISON_FUNC for svn_sort__hash. |
| * Sort extension_info_t values by total uncompressed size in descending order. |
| */ |
| static int |
| compare_node_size(const svn_sort__item_t *a, |
| const svn_sort__item_t *b) |
| { |
| const extension_info_t *lhs = a->value; |
| const extension_info_t *rhs = b->value; |
| apr_int64_t diff = lhs->node_histogram.total.sum |
| - rhs->node_histogram.total.sum; |
| |
| return diff > 0 ? -1 : (diff < 0 ? 1 : 0); |
| } |
| |
| /* COMPARISON_FUNC for svn_sort__hash. |
| * Sort extension_info_t values by total prep count in descending order. |
| */ |
| static int |
| compare_rep_size(const svn_sort__item_t *a, |
| const svn_sort__item_t *b) |
| { |
| const extension_info_t *lhs = a->value; |
| const extension_info_t *rhs = b->value; |
| apr_int64_t diff = lhs->rep_histogram.total.sum |
| - rhs->rep_histogram.total.sum; |
| |
| return diff > 0 ? -1 : (diff < 0 ? 1 : 0); |
| } |
| |
| /* Return an array of extension_info_t* for the (up to) 16 most prominent |
| * extensions in FS according to the sort criterion COMPARISON_FUNC. |
| * Allocate results in POOL. |
| */ |
| static apr_array_header_t * |
| get_by_extensions(fs_fs_t *fs, |
| int (*comparison_func)(const svn_sort__item_t *, |
| const svn_sort__item_t *), |
| apr_pool_t *pool) |
| { |
| /* sort all data by extension */ |
| apr_array_header_t *sorted |
| = svn_sort__hash(fs->by_extension, comparison_func, pool); |
| |
| /* select the top (first) 16 entries */ |
| int count = MIN(sorted->nelts, 16); |
| apr_array_header_t *result |
| = apr_array_make(pool, count, sizeof(extension_info_t*)); |
| int i; |
| |
| for (i = 0; i < count; ++i) |
| APR_ARRAY_PUSH(result, extension_info_t*) |
| = APR_ARRAY_IDX(sorted, i, svn_sort__item_t).value; |
| |
| return result; |
| } |
| |
| /* Add all extension_info_t* entries of TO_ADD not already in TARGET to |
| * TARGET. |
| */ |
| static void |
| merge_by_extension(apr_array_header_t *target, |
| apr_array_header_t *to_add) |
| { |
| int i, k, count; |
| |
| count = target->nelts; |
| for (i = 0; i < to_add->nelts; ++i) |
| { |
| extension_info_t *info = APR_ARRAY_IDX(to_add, i, extension_info_t *); |
| for (k = 0; k < count; ++k) |
| if (info == APR_ARRAY_IDX(target, k, extension_info_t *)) |
| break; |
| |
| if (k == count) |
| APR_ARRAY_PUSH(target, extension_info_t*) = info; |
| } |
| } |
| |
| /* Print the (up to) 16 extensions in FS with the most changes. |
| * Use POOL for allocations. |
| */ |
| static void |
| print_extensions_by_changes(fs_fs_t *fs, |
| apr_pool_t *pool) |
| { |
| apr_array_header_t *data = get_by_extensions(fs, compare_count, pool); |
| apr_int64_t sum = 0; |
| int i; |
| |
| for (i = 0; i < data->nelts; ++i) |
| { |
| extension_info_t *info = APR_ARRAY_IDX(data, i, extension_info_t *); |
| sum += info->node_histogram.total.count; |
| printf(_(" %9s %12s (%2d%%) changes\n"), |
| info->extension, |
| svn__i64toa_sep(info->node_histogram.total.count, ',', pool), |
| (int)(info->node_histogram.total.count * 100 / |
| fs->file_histogram.total.count)); |
| } |
| |
| printf(_(" %9s %12s (%2d%%) changes\n"), |
| "(others)", |
| svn__i64toa_sep(fs->file_histogram.total.count - sum, ',', pool), |
| (int)((fs->file_histogram.total.count - sum) * 100 / |
| fs->file_histogram.total.count)); |
| } |
| |
| /* Print the (up to) 16 extensions in FS with the largest total size of |
| * changed file content. Use POOL for allocations. |
| */ |
| static void |
| print_extensions_by_nodes(fs_fs_t *fs, |
| apr_pool_t *pool) |
| { |
| apr_array_header_t *data = get_by_extensions(fs, compare_node_size, pool); |
| apr_int64_t sum = 0; |
| int i; |
| |
| for (i = 0; i < data->nelts; ++i) |
| { |
| extension_info_t *info = APR_ARRAY_IDX(data, i, extension_info_t *); |
| sum += info->node_histogram.total.sum; |
| printf(_(" %9s %20s (%2d%%) bytes\n"), |
| info->extension, |
| svn__i64toa_sep(info->node_histogram.total.sum, ',', pool), |
| (int)(info->node_histogram.total.sum * 100 / |
| fs->file_histogram.total.sum)); |
| } |
| |
| printf(_(" %9s %20s (%2d%%) bytes\n"), |
| "(others)", |
| svn__i64toa_sep(fs->file_histogram.total.sum - sum, ',', pool), |
| (int)((fs->file_histogram.total.sum - sum) * 100 / |
| fs->file_histogram.total.sum)); |
| } |
| |
| /* Print the (up to) 16 extensions in FS with the largest total size of |
| * changed file content. Use POOL for allocations. |
| */ |
| static void |
| print_extensions_by_reps(fs_fs_t *fs, |
| apr_pool_t *pool) |
| { |
| apr_array_header_t *data = get_by_extensions(fs, compare_rep_size, pool); |
| apr_int64_t sum = 0; |
| int i; |
| |
| for (i = 0; i < data->nelts; ++i) |
| { |
| extension_info_t *info = APR_ARRAY_IDX(data, i, extension_info_t *); |
| sum += info->rep_histogram.total.sum; |
| printf(_(" %9s %20s (%2d%%) bytes\n"), |
| info->extension, |
| svn__i64toa_sep(info->rep_histogram.total.sum, ',', pool), |
| (int)(info->rep_histogram.total.sum * 100 / |
| fs->rep_size_histogram.total.sum)); |
| } |
| |
| printf(_(" %9s %20s (%2d%%) bytes\n"), |
| "(others)", |
| svn__i64toa_sep(fs->rep_size_histogram.total.sum - sum, ',', pool), |
| (int)((fs->rep_size_histogram.total.sum - sum) * 100 / |
| fs->rep_size_histogram.total.sum)); |
| } |
| |
| /* Print per-extension histograms for the most frequent extensions in FS. |
| * Use POOL for allocations. */ |
| static void |
| print_histograms_by_extension(fs_fs_t *fs, |
| apr_pool_t *pool) |
| { |
| apr_array_header_t *data = get_by_extensions(fs, compare_count, pool); |
| int i; |
| |
| merge_by_extension(data, get_by_extensions(fs, compare_node_size, pool)); |
| merge_by_extension(data, get_by_extensions(fs, compare_rep_size, pool)); |
| |
| for (i = 0; i < data->nelts; ++i) |
| { |
| extension_info_t *info = APR_ARRAY_IDX(data, i, extension_info_t *); |
| printf("\nHistogram of '%s' file sizes:\n", info->extension); |
| print_histogram(&info->node_histogram, pool); |
| printf("\nHistogram of '%s' file representation sizes:\n", |
| info->extension); |
| print_histogram(&info->rep_histogram, pool); |
| } |
| } |
| |
| /* Post-process stats for FS and print them to the console. |
| * Use POOL for allocations. |
| */ |
| static void |
| print_stats(fs_fs_t *fs, |
| apr_pool_t *pool) |
| { |
| int i, k; |
| |
| /* initialize stats to collect */ |
| representation_stats_t file_rep_stats = { { 0 } }; |
| representation_stats_t dir_rep_stats = { { 0 } }; |
| representation_stats_t file_prop_rep_stats = { { 0 } }; |
| representation_stats_t dir_prop_rep_stats = { { 0 } }; |
| representation_stats_t total_rep_stats = { { 0 } }; |
| |
| node_stats_t dir_node_stats = { 0 }; |
| node_stats_t file_node_stats = { 0 }; |
| node_stats_t total_node_stats = { 0 }; |
| |
| apr_int64_t total_size = 0; |
| apr_int64_t change_count = 0; |
| apr_int64_t change_len = 0; |
| |
| /* aggregate info from all revisions */ |
| for (i = 0; i < fs->revisions->nelts; ++i) |
| { |
| revision_info_t *revision = APR_ARRAY_IDX(fs->revisions, i, |
| revision_info_t *); |
| |
| /* data gathered on a revision level */ |
| change_count += revision->change_count; |
| change_len += revision->changes_len; |
| total_size += revision->end - revision->offset; |
| |
| dir_node_stats.count += revision->dir_noderev_count; |
| dir_node_stats.size += revision->dir_noderev_size; |
| file_node_stats.count += revision->file_noderev_count; |
| file_node_stats.size += revision->file_noderev_size; |
| total_node_stats.count += revision->dir_noderev_count |
| + revision->file_noderev_count; |
| total_node_stats.size += revision->dir_noderev_size |
| + revision->file_noderev_size; |
| |
| /* process representations */ |
| for (k = 0; k < revision->representations->nelts; ++k) |
| { |
| representation_t *rep = APR_ARRAY_IDX(revision->representations, |
| k, representation_t *); |
| |
| /* accumulate in the right bucket */ |
| switch(rep->kind) |
| { |
| case file_rep: |
| add_rep_stats(&file_rep_stats, rep); |
| break; |
| case dir_rep: |
| add_rep_stats(&dir_rep_stats, rep); |
| break; |
| case file_property_rep: |
| add_rep_stats(&file_prop_rep_stats, rep); |
| break; |
| case dir_property_rep: |
| add_rep_stats(&dir_prop_rep_stats, rep); |
| break; |
| default: |
| break; |
| } |
| |
| add_rep_stats(&total_rep_stats, rep); |
| } |
| } |
| |
| /* print results */ |
| printf("\nGlobal statistics:\n"); |
| printf(_("%20s bytes in %12s revisions\n" |
| "%20s bytes in %12s changes\n" |
| "%20s bytes in %12s node revision records\n" |
| "%20s bytes in %12s representations\n" |
| "%20s bytes expanded representation size\n" |
| "%20s bytes with rep-sharing off\n"), |
| svn__i64toa_sep(total_size, ',', pool), |
| svn__i64toa_sep(fs->revisions->nelts, ',', pool), |
| svn__i64toa_sep(change_len, ',', pool), |
| svn__i64toa_sep(change_count, ',', pool), |
| svn__i64toa_sep(total_node_stats.size, ',', pool), |
| svn__i64toa_sep(total_node_stats.count, ',', pool), |
| svn__i64toa_sep(total_rep_stats.total.packed_size, ',', pool), |
| svn__i64toa_sep(total_rep_stats.total.count, ',', pool), |
| svn__i64toa_sep(total_rep_stats.total.expanded_size, ',', pool), |
| svn__i64toa_sep(total_rep_stats.expanded_size, ',', pool)); |
| |
| printf("\nNoderev statistics:\n"); |
| printf(_("%20s bytes in %12s nodes total\n" |
| "%20s bytes in %12s directory noderevs\n" |
| "%20s bytes in %12s file noderevs\n"), |
| svn__i64toa_sep(total_node_stats.size, ',', pool), |
| svn__i64toa_sep(total_node_stats.count, ',', pool), |
| svn__i64toa_sep(dir_node_stats.size, ',', pool), |
| svn__i64toa_sep(dir_node_stats.count, ',', pool), |
| svn__i64toa_sep(file_node_stats.size, ',', pool), |
| svn__i64toa_sep(file_node_stats.count, ',', pool)); |
| |
| printf("\nRepresentation statistics:\n"); |
| printf(_("%20s bytes in %12s representations total\n" |
| "%20s bytes in %12s directory representations\n" |
| "%20s bytes in %12s file representations\n" |
| "%20s bytes in %12s directory property representations\n" |
| "%20s bytes in %12s file property representations\n" |
| "%20s bytes in header & footer overhead\n"), |
| svn__i64toa_sep(total_rep_stats.total.packed_size, ',', pool), |
| svn__i64toa_sep(total_rep_stats.total.count, ',', pool), |
| svn__i64toa_sep(dir_rep_stats.total.packed_size, ',', pool), |
| svn__i64toa_sep(dir_rep_stats.total.count, ',', pool), |
| svn__i64toa_sep(file_rep_stats.total.packed_size, ',', pool), |
| svn__i64toa_sep(file_rep_stats.total.count, ',', pool), |
| svn__i64toa_sep(dir_prop_rep_stats.total.packed_size, ',', pool), |
| svn__i64toa_sep(dir_prop_rep_stats.total.count, ',', pool), |
| svn__i64toa_sep(file_prop_rep_stats.total.packed_size, ',', pool), |
| svn__i64toa_sep(file_prop_rep_stats.total.count, ',', pool), |
| svn__i64toa_sep(total_rep_stats.total.overhead_size, ',', pool)); |
| |
| printf("\nDirectory representation statistics:\n"); |
| print_rep_stats(&dir_rep_stats, pool); |
| printf("\nFile representation statistics:\n"); |
| print_rep_stats(&file_rep_stats, pool); |
| printf("\nDirectory property representation statistics:\n"); |
| print_rep_stats(&dir_prop_rep_stats, pool); |
| printf("\nFile property representation statistics:\n"); |
| print_rep_stats(&file_prop_rep_stats, pool); |
| |
| printf("\nLargest representations:\n"); |
| print_largest_reps(fs->largest_changes, pool); |
| printf("\nExtensions by number of changes:\n"); |
| print_extensions_by_changes(fs, pool); |
| printf("\nExtensions by size of changed files:\n"); |
| print_extensions_by_nodes(fs, pool); |
| printf("\nExtensions by size of representations:\n"); |
| print_extensions_by_reps(fs, pool); |
| |
| printf("\nHistogram of expanded node sizes:\n"); |
| print_histogram(&fs->node_size_histogram, pool); |
| printf("\nHistogram of representation sizes:\n"); |
| print_histogram(&fs->rep_size_histogram, pool); |
| printf("\nHistogram of file sizes:\n"); |
| print_histogram(&fs->file_histogram, pool); |
| printf("\nHistogram of file representation sizes:\n"); |
| print_histogram(&fs->file_rep_histogram, pool); |
| printf("\nHistogram of file property sizes:\n"); |
| print_histogram(&fs->file_prop_histogram, pool); |
| printf("\nHistogram of file property representation sizes:\n"); |
| print_histogram(&fs->file_prop_rep_histogram, pool); |
| printf("\nHistogram of directory sizes:\n"); |
| print_histogram(&fs->dir_histogram, pool); |
| printf("\nHistogram of directory representation sizes:\n"); |
| print_histogram(&fs->dir_rep_histogram, pool); |
| printf("\nHistogram of directory property sizes:\n"); |
| print_histogram(&fs->dir_prop_histogram, pool); |
| printf("\nHistogram of directory property representation sizes:\n"); |
| print_histogram(&fs->dir_prop_rep_histogram, pool); |
| |
| print_histograms_by_extension(fs, pool); |
| } |
| |
| /* Write tool usage info text to OSTREAM using PROGNAME as a prefix and |
| * POOL for allocations. |
| */ |
| static void |
| print_usage(svn_stream_t *ostream, const char *progname, |
| apr_pool_t *pool) |
| { |
| svn_error_clear(svn_stream_printf(ostream, pool, |
| "\n" |
| "Usage: %s <repo> [cachesize]\n" |
| "\n" |
| "Read the repository at local path <repo> starting at revision 0,\n" |
| "count statistical information and write that data to stdout.\n" |
| "Use up to [cachesize] MB of memory for caching. This does not include\n" |
| "temporary representation of the repository structure, i.e. the actual\n" |
| "memory may be considerably higher. If not given, defaults to 100 MB.\n", |
| progname)); |
| } |
| |
| /* linear control flow */ |
| int main(int argc, const char *argv[]) |
| { |
| apr_pool_t *pool; |
| svn_stream_t *ostream; |
| svn_error_t *svn_err; |
| const char *repo_path = NULL; |
| svn_revnum_t start_revision = 0; |
| apr_size_t memsize = 100; |
| apr_uint64_t temp = 0; |
| fs_fs_t *fs; |
| |
| apr_initialize(); |
| atexit(apr_terminate); |
| |
| pool = apr_allocator_owner_get(svn_pool_create_allocator(FALSE)); |
| |
| svn_err = svn_stream_for_stdout(&ostream, pool); |
| if (svn_err) |
| { |
| svn_handle_error2(svn_err, stdout, FALSE, ERROR_TAG); |
| return 2; |
| } |
| |
| if (argc < 2 || argc > 3) |
| { |
| print_usage(ostream, argv[0], pool); |
| return 2; |
| } |
| |
| if (argc == 3) |
| { |
| svn_err = svn_cstring_strtoui64(&temp, argv[2], 0, APR_SIZE_MAX, 10); |
| if (svn_err) |
| { |
| print_usage(ostream, argv[0], pool); |
| svn_error_clear(svn_err); |
| return 2; |
| } |
| |
| memsize = (apr_size_t)temp; |
| } |
| |
| repo_path = svn_dirent_canonicalize(argv[1], pool); |
| start_revision = 0; |
| |
| printf("Reading revisions\n"); |
| svn_err = read_revisions(&fs, repo_path, start_revision, memsize, pool); |
| printf("\n"); |
| |
| if (svn_err) |
| { |
| svn_handle_error2(svn_err, stdout, FALSE, ERROR_TAG); |
| return 2; |
| } |
| |
| print_stats(fs, pool); |
| |
| return 0; |
| } |