| /* fsfs-reorg.c -- prototypic tool to reorganize packed FSFS repositories |
| * to reduce seeks |
| * |
| * ==================================================================== |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * ==================================================================== |
| */ |
| |
| |
| #include <assert.h> |
| |
| #include <apr.h> |
| #include <apr_general.h> |
| #include <apr_file_io.h> |
| #include <apr_poll.h> |
| |
| #include "svn_private_config.h" |
| #include "svn_pools.h" |
| #include "svn_diff.h" |
| #include "svn_io.h" |
| #include "svn_utf.h" |
| #include "svn_dirent_uri.h" |
| #include "svn_sorts.h" |
| #include "svn_delta.h" |
| #include "svn_hash.h" |
| |
| #include "private/svn_string_private.h" |
| #include "private/svn_subr_private.h" |
| #include "private/svn_dep_compat.h" |
| |
| #ifndef _ |
| #define _(x) x |
| #endif |
| |
| #define ERROR_TAG "fsfs-reporg: " |
| |
| /* forward declarations */ |
| typedef struct noderev_t noderev_t; |
| typedef struct revision_info_t revision_info_t; |
| |
| /* A FSFS rev file is sequence of fragments and unused space (the latter |
| * only being inserted by this tool and not during ordinary SVN operation). |
| * |
| * This type defines the type of any fragment. |
| * |
| * Please note that the classification as "property", "dir" or "file" |
| * fragments is only to be used while determining the future placement |
| * of a representation. If the rep is shared, the same rep may be used |
| * as *any* of the 3 kinds. |
| */ |
| enum fragment_kind_t |
| { |
| /* the 2 number line containing changes and root node offsets */ |
| header_fragment, |
| |
| /* list of all changes in a revision */ |
| changes_fragment, |
| |
| /* (the textual representation of) a noderev */ |
| noderev_fragment, |
| |
| /* a property rep (including PLAIN / DELTA header) */ |
| property_fragment, |
| |
| /* a directory rep (including PLAIN / DELTA header) */ |
| dir_fragment, |
| |
| /* a file rep (including PLAIN / DELTA header) */ |
| file_fragment |
| }; |
| |
| /* A fragment. This is used to represent the final ordering, i.e. there |
| * will be an array containing elements of this type that basically put |
| * a fragment at some location in the target file. |
| */ |
| typedef struct fragment_t |
| { |
| /* position in the target file */ |
| apr_size_t position; |
| |
| /* kind of fragment */ |
| enum fragment_kind_t kind; |
| |
| /* pointer to the fragment struct; type depends on KIND */ |
| void *data; |
| } fragment_t; |
| |
| /* Location info for a single revision. |
| */ |
| typedef struct revision_location_t |
| { |
| /* pack file offset (manifest value), 0 for non-packed files */ |
| apr_size_t offset; |
| |
| /* offset of the changes list relative to OFFSET */ |
| apr_size_t changes; |
| |
| /* length of the changes list on bytes */ |
| apr_size_t changes_len; |
| |
| /* first offset behind the revision data in the pack file (file length |
| * for non-packed revs) */ |
| apr_size_t end; |
| } revision_location_t; |
| |
| /* Absolute position and size of some item. |
| */ |
| typedef struct location_t |
| { |
| /* absolute offset in the file */ |
| apr_size_t offset; |
| |
| /* item length in bytes */ |
| apr_size_t size; |
| } location_t; |
| |
| /* A parsed directory entry. Note that instances of this struct may be |
| * shared between different DIRECTORY_T containers. |
| */ |
| typedef struct direntry_t |
| { |
| /* (local) entry / path name */ |
| const char *name; |
| |
| /* strlen (name) */ |
| apr_size_t name_len; |
| |
| /* node rev providing ID and representation(s) */ |
| noderev_t *node; |
| } direntry_t; |
| |
| /* Representation of a parsed directory content. |
| */ |
| typedef struct directory_t |
| { |
| /* array of pointers to DIRENTRY_T */ |
| apr_array_header_t *entries; |
| |
| /* MD5 of the textual representation. Will be set lazily as a side-effect |
| * of determining the length of this dir's textual representation. */ |
| unsigned char target_md5[16]; |
| |
| /* (expanded) length of the textual representation. |
| * Determined lazily during the write process. */ |
| apr_size_t size; |
| } directory_t; |
| |
| /* A representation fragment. |
| */ |
| typedef struct representation_t |
| { |
| /* location in the source file */ |
| location_t original; |
| |
| /* location in the reordered target file */ |
| location_t target; |
| |
| /* length of the PLAIN / DELTA line in the source file in bytes */ |
| apr_size_t header_size; |
| |
| /* deltification base, or NULL if there is none */ |
| struct representation_t *delta_base; |
| |
| /* revision that contains this representation |
| * (may be referenced by other revisions, though) */ |
| revision_info_t *revision; |
| |
| /* representation content parsed as a directory. This will be NULL, if |
| * *no* directory noderev uses this representation. */ |
| directory_t *dir; |
| |
| /* the source content has a PLAIN header, so we may simply copy the |
| * source content into the target */ |
| svn_boolean_t is_plain; |
| |
| /* coloring flag used in the reordering algorithm to keep track of |
| * representations that still need to be placed. */ |
| svn_boolean_t covered; |
| } representation_t; |
| |
| /* A node rev. |
| */ |
| struct noderev_t |
| { |
| /* location within the source file */ |
| location_t original; |
| |
| /* location within the reorganized target file. */ |
| location_t target; |
| |
| /* predecessor node, or NULL if there is none */ |
| noderev_t *predecessor; |
| |
| /* content representation; may be NULL if there is none */ |
| representation_t *text; |
| |
| /* properties representation; may be NULL if there is none */ |
| representation_t *props; |
| |
| /* revision that this noderev belongs to */ |
| revision_info_t *revision; |
| |
| /* coloring flag used in the reordering algorithm to keep track of |
| * representations that still need to be placed. */ |
| svn_boolean_t covered; |
| }; |
| |
| /* Represents a single revision. |
| * There will be only one instance per revision. */ |
| struct revision_info_t |
| { |
| /* number of this revision */ |
| svn_revnum_t revision; |
| |
| /* position in the source file */ |
| revision_location_t original; |
| |
| /* position in the reorganized target file */ |
| revision_location_t target; |
| |
| /* noderev of the root directory */ |
| noderev_t *root_noderev; |
| |
| /* all noderevs_t of this revision (ordered by source file offset), |
| * i.e. those that point back to this struct */ |
| apr_array_header_t *node_revs; |
| |
| /* all representation_t of this revision (ordered by source file offset), |
| * i.e. those that point back to this struct */ |
| apr_array_header_t *representations; |
| }; |
| |
| /* Represents a packed revision file. |
| */ |
| typedef struct revision_pack_t |
| { |
| /* first revision in the pack file */ |
| svn_revnum_t base; |
| |
| /* revision_info_t* of all revisions in the pack file; in revision order. */ |
| apr_array_header_t *info; |
| |
| /* list of fragments to place in the target pack file; in target order. */ |
| apr_array_header_t *fragments; |
| |
| /* source pack file length */ |
| apr_size_t filesize; |
| |
| /* temporary value. Equal to the number of bytes in the target pack file |
| * already allocated to fragments. */ |
| apr_size_t target_offset; |
| } revision_pack_t; |
| |
| /* Cache for revision source content. All content is stored in DATA and |
| * the HASH maps revision number to an svn_string_t instance whose data |
| * member points into DATA. |
| * |
| * Once TOTAL_SIZE exceeds LIMIT, all content will be discarded. Similarly, |
| * the hash gets cleared every 10000 insertions to keep the HASH_POOL |
| * memory usage in check. |
| */ |
| typedef struct content_cache_t |
| { |
| /* pool used for HASH */ |
| apr_pool_t *hash_pool; |
| |
| /* svn_revnum_t -> svn_string_t. |
| * The strings become (potentially) invalid when adding new cache entries. */ |
| apr_hash_t *hash; |
| |
| /* data buffer. the first TOTAL_SIZE bytes are actually being used. */ |
| char *data; |
| |
| /* DATA capacity */ |
| apr_size_t limit; |
| |
| /* number of bytes used in DATA */ |
| apr_size_t total_size; |
| |
| /* number of insertions since the last hash cleanup */ |
| apr_size_t insert_count; |
| } content_cache_t; |
| |
| /* A cached directory. In contrast to directory_t, this stored the data as |
| * the plain hash that the normal FSFS will use to serialize & diff dirs. |
| */ |
| typedef struct dir_cache_entry_t |
| { |
| /* revision containing the representation */ |
| svn_revnum_t revision; |
| |
| /* offset of the representation within that revision */ |
| apr_size_t offset; |
| |
| /* key-value representation of the directory entries */ |
| apr_hash_t *hash; |
| } dir_cache_entry_t; |
| |
| /* Directory cache. (revision, offset) will be mapped directly into the |
| * ENTRIES array of ENTRY_COUNT buckets (many entries will be NULL). |
| * Two alternating pools will be used to allocate dir content. |
| * |
| * If the INSERT_COUNT exceeds a given limit, the pools get exchanged and |
| * the older of the two will be cleared. This is to keep dir objects valid |
| * for at least one insertion. |
| */ |
| typedef struct dir_cache_t |
| { |
| /* fixed-size array of ENTRY_COUNT elements */ |
| dir_cache_entry_t *entries; |
| |
| /* currently used for entry allocations */ |
| apr_pool_t *pool1; |
| |
| /* previously used for entry allocations */ |
| apr_pool_t *pool2; |
| |
| /* size of ENTRIES in elements */ |
| apr_size_t entry_count; |
| |
| /* number of directory elements added. I.e. usually >> #cached dirs */ |
| apr_size_t insert_count; |
| } dir_cache_t; |
| |
| /* A cached, undeltified txdelta window. |
| */ |
| typedef struct window_cache_entry_t |
| { |
| /* revision containing the window */ |
| svn_revnum_t revision; |
| |
| /* offset of the deltified window within that revision */ |
| apr_size_t offset; |
| |
| /* window content */ |
| svn_stringbuf_t *window; |
| } window_cache_entry_t; |
| |
| /* Cache for undeltified txdelta windows. (revision, offset) will be mapped |
| * directly into the ENTRIES array of INSERT_COUNT buckets (most entries |
| * will be NULL). |
| * |
| * The cache will be cleared when USED exceeds CAPACITY. |
| */ |
| typedef struct window_cache_t |
| { |
| /* fixed-size array of ENTRY_COUNT elements */ |
| window_cache_entry_t *entries; |
| |
| /* used to allocate windows */ |
| apr_pool_t *pool; |
| |
| /* size of ENTRIES in elements */ |
| apr_size_t entry_count; |
| |
| /* maximum combined size of all cached windows */ |
| apr_size_t capacity; |
| |
| /* current combined size of all cached windows */ |
| apr_size_t used; |
| } window_cache_t; |
| |
| /* Root data structure containing all information about a given repository. |
| */ |
| typedef struct fs_fs_t |
| { |
| /* repository to reorg */ |
| const char *path; |
| |
| /* revision to start at (must be 0, ATM) */ |
| svn_revnum_t start_revision; |
| |
| /* FSFS format number */ |
| int format; |
| |
| /* highest revision number in the repo */ |
| svn_revnum_t max_revision; |
| |
| /* first non-packed revision */ |
| svn_revnum_t min_unpacked_rev; |
| |
| /* sharing size*/ |
| int max_files_per_dir; |
| |
| /* all revisions */ |
| apr_array_header_t *revisions; |
| |
| /* all packed files */ |
| apr_array_header_t *packs; |
| |
| /* empty representation. |
| * Used as a dummy base for DELTA reps without base. */ |
| representation_t *null_base; |
| |
| /* revision content cache */ |
| content_cache_t *cache; |
| |
| /* directory hash cache */ |
| dir_cache_t *dir_cache; |
| |
| /* undeltified txdelta window cache */ |
| window_cache_t *window_cache; |
| } fs_fs_t; |
| |
| /* Return the rev pack folder for revision REV in FS. |
| */ |
| static const char * |
| get_pack_folder(fs_fs_t *fs, |
| svn_revnum_t rev, |
| apr_pool_t *pool) |
| { |
| return apr_psprintf(pool, "%s/db/revs/%ld.pack", |
| fs->path, rev / fs->max_files_per_dir); |
| } |
| |
| /* Return the path of the file containing revision REV in FS. |
| */ |
| static const char * |
| rev_or_pack_file_name(fs_fs_t *fs, |
| svn_revnum_t rev, |
| apr_pool_t *pool) |
| { |
| return fs->min_unpacked_rev > rev |
| ? svn_dirent_join(get_pack_folder(fs, rev, pool), "pack", pool) |
| : apr_psprintf(pool, "%s/db/revs/%ld/%ld", fs->path, |
| rev / fs->max_files_per_dir, rev); |
| } |
| |
| /* Open the file containing revision REV in FS and return it in *FILE. |
| */ |
| static svn_error_t * |
| open_rev_or_pack_file(apr_file_t **file, |
| fs_fs_t *fs, |
| svn_revnum_t rev, |
| apr_pool_t *pool) |
| { |
| return svn_io_file_open(file, |
| rev_or_pack_file_name(fs, rev, pool), |
| APR_READ | APR_BUFFERED, |
| APR_OS_DEFAULT, |
| pool); |
| } |
| |
| /* Read the whole content of the file containing REV in FS and return that |
| * in *CONTENT. |
| */ |
| static svn_error_t * |
| read_rev_or_pack_file(svn_stringbuf_t **content, |
| fs_fs_t *fs, |
| svn_revnum_t rev, |
| apr_pool_t *pool) |
| { |
| return svn_stringbuf_from_file2(content, |
| rev_or_pack_file_name(fs, rev, pool), |
| pool); |
| } |
| |
| /* Return a new content cache with the given size LIMIT. Use POOL for |
| * all cache-related allocations. |
| */ |
| static content_cache_t * |
| create_content_cache(apr_pool_t *pool, |
| apr_size_t limit) |
| { |
| content_cache_t *result = apr_pcalloc(pool, sizeof(*result)); |
| |
| result->hash_pool = svn_pool_create(pool); |
| result->hash = svn_hash__make(result->hash_pool); |
| result->limit = limit; |
| result->total_size = 0; |
| result->insert_count = 0; |
| result->data = apr_palloc(pool, limit); |
| |
| return result; |
| } |
| |
| /* Return the content of revision REVISION from CACHE. Return NULL upon a |
| * cache miss. This is a cache-internal function. |
| */ |
| static svn_string_t * |
| get_cached_content(content_cache_t *cache, |
| svn_revnum_t revision) |
| { |
| return apr_hash_get(cache->hash, &revision, sizeof(revision)); |
| } |
| |
| /* Take the content in DATA and store it under REVISION in CACHE. |
| * This is a cache-internal function. |
| */ |
| static void |
| set_cached_content(content_cache_t *cache, |
| svn_revnum_t revision, |
| svn_string_t *data) |
| { |
| svn_string_t *content; |
| svn_revnum_t *key; |
| |
| /* double insertion? -> broken cache logic */ |
| assert(get_cached_content(cache, revision) == NULL); |
| |
| /* purge the cache upon overflow */ |
| if (cache->total_size + data->len > cache->limit) |
| { |
| /* the hash pool grows slowly over time; clear it once in a while */ |
| if (cache->insert_count > 10000) |
| { |
| svn_pool_clear(cache->hash_pool); |
| cache->hash = svn_hash__make(cache->hash_pool); |
| cache->insert_count = 0; |
| } |
| else |
| cache->hash = svn_hash__make(cache->hash_pool); |
| |
| cache->total_size = 0; |
| |
| /* buffer overflow / revision too large */ |
| if (data->len > cache->limit) |
| SVN_ERR_MALFUNCTION_NO_RETURN(); |
| } |
| |
| /* copy data to cache and update he index (hash) */ |
| content = apr_palloc(cache->hash_pool, sizeof(*content)); |
| content->data = cache->data + cache->total_size; |
| content->len = data->len; |
| |
| memcpy(cache->data + cache->total_size, data->data, data->len); |
| cache->total_size += data->len; |
| |
| key = apr_palloc(cache->hash_pool, sizeof(*key)); |
| *key = revision; |
| |
| apr_hash_set(cache->hash, key, sizeof(*key), content); |
| ++cache->insert_count; |
| } |
| |
| /* Get the file content of revision REVISION in FS and return it in *DATA. |
| * Use SCRATCH_POOL for temporary allocations. |
| */ |
| static svn_error_t * |
| get_content(svn_string_t **data, |
| fs_fs_t *fs, |
| svn_revnum_t revision, |
| apr_pool_t *scratch_pool) |
| { |
| apr_file_t *file; |
| revision_info_t *revision_info; |
| svn_stringbuf_t *temp; |
| apr_off_t temp_offset; |
| |
| /* try getting the data from our cache */ |
| svn_string_t *result = get_cached_content(fs->cache, revision); |
| if (result) |
| { |
| *data = result; |
| return SVN_NO_ERROR; |
| } |
| |
| /* not in cache. Is the revision valid at all? */ |
| if (revision - fs->start_revision > fs->revisions->nelts) |
| return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL, |
| _("Unknown revision %ld"), revision); |
| revision_info = APR_ARRAY_IDX(fs->revisions, |
| revision - fs->start_revision, |
| revision_info_t*); |
| |
| /* read the revision content. Assume that the file has *not* been |
| * reorg'ed, yet, i.e. all data is in one place. */ |
| temp = svn_stringbuf_create_ensure( revision_info->original.end |
| - revision_info->original.offset, |
| scratch_pool); |
| temp->len = revision_info->original.end - revision_info->original.offset; |
| SVN_ERR(open_rev_or_pack_file(&file, fs, revision, scratch_pool)); |
| |
| temp_offset = revision_info->original.offset; |
| SVN_ERR(svn_io_file_seek(file, APR_SET, &temp_offset, |
| scratch_pool)); |
| SVN_ERR_ASSERT(temp_offset < APR_SIZE_MAX); |
| revision_info->original.offset = (apr_size_t)temp_offset; |
| SVN_ERR(svn_io_file_read(file, temp->data, &temp->len, scratch_pool)); |
| |
| /* cache the result and return it */ |
| set_cached_content(fs->cache, revision, |
| svn_stringbuf__morph_into_string(temp)); |
| *data = get_cached_content(fs->cache, revision); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Return a new directory cache with ENTRY_COUNT buckets in its index. |
| * Use POOL for all cache-related allocations. |
| */ |
| static dir_cache_t * |
| create_dir_cache(apr_pool_t *pool, |
| apr_size_t entry_count) |
| { |
| dir_cache_t *result = apr_pcalloc(pool, sizeof(*result)); |
| |
| result->pool1 = svn_pool_create(pool); |
| result->pool2 = svn_pool_create(pool); |
| result->entry_count = entry_count; |
| result->insert_count = 0; |
| result->entries = apr_pcalloc(pool, sizeof(*result->entries) * entry_count); |
| |
| return result; |
| } |
| |
| /* Return the position within FS' dir cache ENTRIES index for the given |
| * (REVISION, OFFSET) pair. This is a cache-internal function. |
| */ |
| static apr_size_t |
| get_dir_cache_index(fs_fs_t *fs, |
| svn_revnum_t revision, |
| apr_size_t offset) |
| { |
| return (revision + offset * 0xd1f3da69) % fs->dir_cache->entry_count; |
| } |
| |
| /* Return the currently active pool of FS' dir cache. Note that it may be |
| * cleared after *2* insertions. |
| */ |
| static apr_pool_t * |
| get_cached_dir_pool(fs_fs_t *fs) |
| { |
| return fs->dir_cache->pool1; |
| } |
| |
| /* Return the cached directory content stored in REPRESENTATION within FS. |
| * If that has not been found in cache, return NULL. |
| */ |
| static apr_hash_t * |
| get_cached_dir(fs_fs_t *fs, |
| representation_t *representation) |
| { |
| svn_revnum_t revision = representation->revision->revision; |
| apr_size_t offset = representation->original.offset; |
| |
| apr_size_t i = get_dir_cache_index(fs, revision, offset); |
| dir_cache_entry_t *entry = &fs->dir_cache->entries[i]; |
| |
| return entry->offset == offset && entry->revision == revision |
| ? entry->hash |
| : NULL; |
| } |
| |
| /* Cache the directory HASH for REPRESENTATION within FS. |
| */ |
| static void |
| set_cached_dir(fs_fs_t *fs, |
| representation_t *representation, |
| apr_hash_t *hash) |
| { |
| /* select the entry to use */ |
| svn_revnum_t revision = representation->revision->revision; |
| apr_size_t offset = representation->original.offset; |
| |
| apr_size_t i = get_dir_cache_index(fs, revision, offset); |
| dir_cache_entry_t *entry = &fs->dir_cache->entries[i]; |
| |
| /* clean the cache and rotate pools at regular intervals */ |
| fs->dir_cache->insert_count += apr_hash_count(hash); |
| if (fs->dir_cache->insert_count >= fs->dir_cache->entry_count * 100) |
| { |
| apr_pool_t *pool; |
| |
| svn_pool_clear(fs->dir_cache->pool2); |
| memset(fs->dir_cache->entries, |
| 0, |
| sizeof(*fs->dir_cache->entries) * fs->dir_cache->entry_count); |
| fs->dir_cache->insert_count = 0; |
| |
| pool = fs->dir_cache->pool2; |
| fs->dir_cache->pool2 = fs->dir_cache->pool1; |
| fs->dir_cache->pool1 = pool; |
| } |
| |
| /* write data to cache */ |
| entry->hash = hash; |
| entry->offset = offset; |
| entry->revision = revision; |
| } |
| |
| /* Return a new txdelta window cache with ENTRY_COUNT buckets in its index |
| * and a the total CAPACITY given in bytes. |
| * Use POOL for all cache-related allocations. |
| */ |
| static window_cache_t * |
| create_window_cache(apr_pool_t *pool, |
| apr_size_t entry_count, |
| apr_size_t capacity) |
| { |
| window_cache_t *result = apr_pcalloc(pool, sizeof(*result)); |
| |
| result->pool = svn_pool_create(pool); |
| result->entry_count = entry_count; |
| result->capacity = capacity; |
| result->used = 0; |
| result->entries = apr_pcalloc(pool, sizeof(*result->entries) * entry_count); |
| |
| return result; |
| } |
| |
| /* Return the position within FS' window cache ENTRIES index for the given |
| * (REVISION, OFFSET) pair. This is a cache-internal function. |
| */ |
| static apr_size_t |
| get_window_cache_index(fs_fs_t *fs, |
| svn_revnum_t revision, |
| apr_size_t offset) |
| { |
| return (revision + offset * 0xd1f3da69) % fs->window_cache->entry_count; |
| } |
| |
| /* Return the cached txdelta window stored in REPRESENTATION within FS. |
| * If that has not been found in cache, return NULL. |
| */ |
| static svn_stringbuf_t * |
| get_cached_window(fs_fs_t *fs, |
| representation_t *representation, |
| apr_pool_t *pool) |
| { |
| svn_revnum_t revision = representation->revision->revision; |
| apr_size_t offset = representation->original.offset; |
| |
| apr_size_t i = get_window_cache_index(fs, revision, offset); |
| window_cache_entry_t *entry = &fs->window_cache->entries[i]; |
| |
| return entry->offset == offset && entry->revision == revision |
| ? svn_stringbuf_dup(entry->window, pool) |
| : NULL; |
| } |
| |
| /* Cache the undeltified txdelta WINDOW for REPRESENTATION within FS. |
| */ |
| static void |
| set_cached_window(fs_fs_t *fs, |
| representation_t *representation, |
| svn_stringbuf_t *window) |
| { |
| /* select entry */ |
| svn_revnum_t revision = representation->revision->revision; |
| apr_size_t offset = representation->original.offset; |
| |
| apr_size_t i = get_window_cache_index(fs, revision, offset); |
| window_cache_entry_t *entry = &fs->window_cache->entries[i]; |
| |
| /* if the capacity is exceeded, clear the cache */ |
| fs->window_cache->used += window->len; |
| if (fs->window_cache->used >= fs->window_cache->capacity) |
| { |
| svn_pool_clear(fs->window_cache->pool); |
| memset(fs->window_cache->entries, |
| 0, |
| sizeof(*fs->window_cache->entries) * fs->window_cache->entry_count); |
| fs->window_cache->used = window->len; |
| } |
| |
| /* set the entry to a copy of the window data */ |
| entry->window = svn_stringbuf_dup(window, fs->window_cache->pool); |
| entry->offset = offset; |
| entry->revision = revision; |
| } |
| |
| /* Given rev pack PATH in FS, read the manifest file and return the offsets |
| * in *MANIFEST. Use POOL for allocations. |
| */ |
| static svn_error_t * |
| read_manifest(apr_array_header_t **manifest, |
| fs_fs_t *fs, |
| const char *path, |
| apr_pool_t *pool) |
| { |
| svn_stream_t *manifest_stream; |
| apr_pool_t *iterpool; |
| |
| /* Open the manifest file. */ |
| SVN_ERR(svn_stream_open_readonly(&manifest_stream, |
| svn_dirent_join(path, "manifest", pool), |
| pool, pool)); |
| |
| /* While we're here, let's just read the entire manifest file into an array, |
| so we can cache the entire thing. */ |
| iterpool = svn_pool_create(pool); |
| *manifest = apr_array_make(pool, fs->max_files_per_dir, sizeof(apr_size_t)); |
| while (1) |
| { |
| svn_stringbuf_t *sb; |
| svn_boolean_t eof; |
| apr_uint64_t val; |
| svn_error_t *err; |
| |
| svn_pool_clear(iterpool); |
| SVN_ERR(svn_stream_readline(manifest_stream, &sb, "\n", &eof, iterpool)); |
| if (eof) |
| break; |
| |
| err = svn_cstring_strtoui64(&val, sb->data, 0, APR_SIZE_MAX, 10); |
| if (err) |
| return svn_error_createf(SVN_ERR_FS_CORRUPT, err, |
| _("Manifest offset '%s' too large"), |
| sb->data); |
| APR_ARRAY_PUSH(*manifest, apr_size_t) = (apr_size_t)val; |
| } |
| svn_pool_destroy(iterpool); |
| |
| return svn_stream_close(manifest_stream); |
| } |
| |
| /* Read header information for the revision stored in FILE_CONTENT at |
| * offsets START or END. Return the offsets within FILE_CONTENT for the |
| * *ROOT_NODEREV, the list of *CHANGES and its len in *CHANGES_LEN. |
| * Use POOL for temporary allocations. */ |
| static svn_error_t * |
| read_revision_header(apr_size_t *changes, |
| apr_size_t *changes_len, |
| apr_size_t *root_noderev, |
| svn_stringbuf_t *file_content, |
| apr_size_t start, |
| apr_size_t end, |
| apr_pool_t *pool) |
| { |
| char buf[64]; |
| const char *line; |
| char *space; |
| apr_uint64_t val; |
| apr_size_t len; |
| |
| /* Read in this last block, from which we will identify the last line. */ |
| len = sizeof(buf); |
| if (start + len > end) |
| len = end - start; |
| |
| memcpy(buf, file_content->data + end - len, len); |
| |
| /* The last byte should be a newline. */ |
| if (buf[(apr_ssize_t)len - 1] != '\n') |
| return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, |
| _("Revision lacks trailing newline")); |
| |
| /* Look for the next previous newline. */ |
| buf[len - 1] = 0; |
| line = strrchr(buf, '\n'); |
| if (line == NULL) |
| return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, |
| _("Final line in revision file longer " |
| "than 64 characters")); |
| |
| space = strchr(line, ' '); |
| if (space == NULL) |
| return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, |
| _("Final line in revision file missing space")); |
| |
| /* terminate the header line */ |
| *space = 0; |
| |
| /* extract information */ |
| SVN_ERR(svn_cstring_strtoui64(&val, line+1, 0, APR_SIZE_MAX, 10)); |
| *root_noderev = (apr_size_t)val; |
| SVN_ERR(svn_cstring_strtoui64(&val, space+1, 0, APR_SIZE_MAX, 10)); |
| *changes = (apr_size_t)val; |
| *changes_len = end - *changes - start - (buf + len - line) + 1; |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Read the FSFS format number and sharding size from the format file at |
| * PATH and return it in *PFORMAT and *MAX_FILES_PER_DIR respectively. |
| * Use POOL for temporary allocations. |
| */ |
| static svn_error_t * |
| read_format(int *pformat, int *max_files_per_dir, |
| const char *path, apr_pool_t *pool) |
| { |
| svn_error_t *err; |
| apr_file_t *file; |
| char buf[80]; |
| apr_size_t len; |
| |
| /* open format file and read the first line */ |
| err = svn_io_file_open(&file, path, APR_READ | APR_BUFFERED, |
| APR_OS_DEFAULT, pool); |
| if (err && APR_STATUS_IS_ENOENT(err->apr_err)) |
| { |
| /* Treat an absent format file as format 1. Do not try to |
| create the format file on the fly, because the repository |
| might be read-only for us, or this might be a read-only |
| operation, and the spirit of FSFS is to make no changes |
| whatseover in read-only operations. See thread starting at |
| http://subversion.tigris.org/servlets/ReadMsg?list=dev&msgNo=97600 |
| for more. */ |
| svn_error_clear(err); |
| *pformat = 1; |
| *max_files_per_dir = 0; |
| |
| return SVN_NO_ERROR; |
| } |
| SVN_ERR(err); |
| |
| len = sizeof(buf); |
| err = svn_io_read_length_line(file, buf, &len, pool); |
| if (err && APR_STATUS_IS_EOF(err->apr_err)) |
| { |
| /* Return a more useful error message. */ |
| svn_error_clear(err); |
| return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL, |
| _("Can't read first line of format file '%s'"), |
| svn_dirent_local_style(path, pool)); |
| } |
| SVN_ERR(err); |
| |
| /* Check that the first line contains only digits. */ |
| SVN_ERR(svn_cstring_atoi(pformat, buf)); |
| |
| /* Set the default values for anything that can be set via an option. */ |
| *max_files_per_dir = 0; |
| |
| /* Read any options. */ |
| while (1) |
| { |
| len = sizeof(buf); |
| err = svn_io_read_length_line(file, buf, &len, pool); |
| if (err && APR_STATUS_IS_EOF(err->apr_err)) |
| { |
| /* No more options; that's okay. */ |
| svn_error_clear(err); |
| break; |
| } |
| SVN_ERR(err); |
| |
| if (strncmp(buf, "layout ", 7) == 0) |
| { |
| if (strcmp(buf+7, "linear") == 0) |
| { |
| *max_files_per_dir = 0; |
| continue; |
| } |
| |
| if (strncmp(buf+7, "sharded ", 8) == 0) |
| { |
| /* Check that the argument is numeric. */ |
| SVN_ERR(svn_cstring_atoi(max_files_per_dir, buf + 15)); |
| continue; |
| } |
| } |
| |
| return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL, |
| _("'%s' contains invalid filesystem format option '%s'"), |
| svn_dirent_local_style(path, pool), buf); |
| } |
| |
| return svn_io_file_close(file, pool); |
| } |
| |
| /* Read the content of the file at PATH and return it in *RESULT. |
| * Use POOL for temporary allocations. |
| */ |
| static svn_error_t * |
| read_number(svn_revnum_t *result, const char *path, apr_pool_t *pool) |
| { |
| svn_stringbuf_t *content; |
| apr_uint64_t number; |
| |
| SVN_ERR(svn_stringbuf_from_file2(&content, path, pool)); |
| |
| content->data[content->len-1] = 0; |
| SVN_ERR(svn_cstring_strtoui64(&number, content->data, 0, LONG_MAX, 10)); |
| *result = (svn_revnum_t)number; |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Create *FS for the repository at PATH and read the format and size info. |
| * Use POOL for temporary allocations. |
| */ |
| static svn_error_t * |
| fs_open(fs_fs_t **fs, const char *path, apr_pool_t *pool) |
| { |
| *fs = apr_pcalloc(pool, sizeof(**fs)); |
| (*fs)->path = apr_pstrdup(pool, path); |
| (*fs)->max_files_per_dir = 1000; |
| |
| /* Read the FS format number. */ |
| SVN_ERR(read_format(&(*fs)->format, |
| &(*fs)->max_files_per_dir, |
| svn_dirent_join(path, "db/format", pool), |
| pool)); |
| if (((*fs)->format != 4) && ((*fs)->format != 6)) |
| return svn_error_create(SVN_ERR_FS_UNSUPPORTED_FORMAT, NULL, NULL); |
| |
| /* read size (HEAD) info */ |
| SVN_ERR(read_number(&(*fs)->min_unpacked_rev, |
| svn_dirent_join(path, "db/min-unpacked-rev", pool), |
| pool)); |
| return read_number(&(*fs)->max_revision, |
| svn_dirent_join(path, "db/current", pool), |
| pool); |
| } |
| |
| /* Utility function that returns true if STRING->DATA matches KEY. |
| */ |
| static svn_boolean_t |
| key_matches(svn_string_t *string, const char *key) |
| { |
| return strcmp(string->data, key) == 0; |
| } |
| |
| /* Comparator used for binary search comparing the absolute file offset |
| * of a noderev to some other offset. DATA is a *noderev_t, KEY is pointer |
| * to an apr_size_t. |
| */ |
| static int |
| compare_noderev_offsets(const void *data, const void *key) |
| { |
| apr_ssize_t diff = (*(const noderev_t *const *)data)->original.offset |
| - *(const apr_size_t *)key; |
| |
| /* sizeof(int) may be < sizeof(ssize_t) */ |
| if (diff < 0) |
| return -1; |
| return diff > 0 ? 1 : 0; |
| } |
| |
| /* Get the revision and offset info from the node ID with FS. Return the |
| * data as *REVISION_INFO and *OFFSET, respectively. |
| * |
| * Note that we assume that the revision_info_t object ID's revision has |
| * already been created. That can be guaranteed for standard FSFS pack |
| * files as IDs never point to future revisions. |
| */ |
| static svn_error_t * |
| parse_revnode_pos(revision_info_t **revision_info, |
| apr_size_t *offset, |
| fs_fs_t *fs, |
| svn_string_t *id) |
| { |
| int revision; |
| apr_uint64_t temp; |
| |
| /* split the ID and verify the format */ |
| const char *revision_pos = strrchr(id->data, 'r'); |
| char *offset_pos = (char *)strchr(id->data, '/'); |
| |
| if (revision_pos == NULL || offset_pos == NULL) |
| return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL, |
| _("Invalid node id '%s'"), id->data); |
| |
| /* extract the numbers (temp. modifying the ID)*/ |
| *offset_pos = 0; |
| SVN_ERR(svn_cstring_atoi(&revision, revision_pos + 1)); |
| SVN_ERR(svn_cstring_strtoui64(&temp, offset_pos + 1, 0, APR_SIZE_MAX, 10)); |
| *offset = (apr_size_t)temp; |
| *offset_pos = '/'; |
| |
| /* validate the revision number and return the revision info */ |
| if (revision - fs->start_revision > fs->revisions->nelts) |
| return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL, |
| _("Unknown revision %d"), revision); |
| |
| *revision_info = APR_ARRAY_IDX(fs->revisions, |
| revision - fs->start_revision, |
| revision_info_t*); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Returns in *RESULT the noderev at OFFSET relative the revision given in |
| * REVISION_INFO. If no such noderev has been parsed, yet, error out. |
| * |
| * Since we require the noderev to already have been parsed, we can use |
| * this functions only to access "older", i.e. predecessor noderevs. |
| */ |
| static svn_error_t * |
| find_noderev(noderev_t **result, |
| revision_info_t *revision_info, |
| apr_size_t offset) |
| { |
| int idx = svn_sort__bsearch_lower_bound(&offset, |
| revision_info->node_revs, |
| compare_noderev_offsets); |
| if ((idx < 0) || (idx >= revision_info->node_revs->nelts)) |
| return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL, |
| _("No noderev found at offset %ld"), |
| (long)offset); |
| |
| *result = APR_ARRAY_IDX(revision_info->node_revs, idx, noderev_t *); |
| if ((*result)->original.offset != offset) |
| return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL, |
| _("No noderev found at offset %ld"), |
| (long)offset); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* In *RESULT, return the noderev given by ID in FS. The noderev must |
| * already have been parsed and put into the FS data structures. |
| */ |
| static svn_error_t * |
| parse_pred(noderev_t **result, |
| fs_fs_t *fs, |
| svn_string_t *id) |
| { |
| apr_size_t offset; |
| revision_info_t *revision_info; |
| |
| SVN_ERR(parse_revnode_pos(&revision_info, &offset, fs, id)); |
| SVN_ERR(find_noderev(result, revision_info, offset)); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Comparator used for binary search comparing the absolute file offset |
| * of a representation to some other offset. DATA is a *representation_t, |
| * KEY is a pointer to an apr_size_t. |
| */ |
| static int |
| compare_representation_offsets(const void *data, const void *key) |
| { |
| apr_ssize_t diff = (*(const representation_t *const *)data)->original.offset |
| - *(const apr_size_t *)key; |
| |
| /* sizeof(int) may be < sizeof(ssize_t) */ |
| if (diff < 0) |
| return -1; |
| return diff > 0 ? 1 : 0; |
| } |
| |
| /* Find the revision_info_t object to the given REVISION in FS and return |
| * it in *REVISION_INFO. For performance reasons, we skip the lookup if |
| * the info is already provided. |
| * |
| * In that revision, look for the representation_t object for offset OFFSET. |
| * If it already exists, set *idx to its index in *REVISION_INFO's |
| * representations list and return the representation object. Otherwise, |
| * set the index to where it must be inserted and return NULL. |
| */ |
| static representation_t * |
| find_representation(int *idx, |
| fs_fs_t *fs, |
| revision_info_t **revision_info, |
| int revision, |
| apr_size_t offset) |
| { |
| revision_info_t *info; |
| *idx = -1; |
| |
| /* first let's find the revision '*/ |
| info = revision_info ? *revision_info : NULL; |
| if (info == NULL || info->revision != revision) |
| { |
| info = APR_ARRAY_IDX(fs->revisions, |
| revision - fs->start_revision, |
| revision_info_t*); |
| if (revision_info) |
| *revision_info = info; |
| } |
| |
| /* not found -> no result */ |
| if (info == NULL) |
| return NULL; |
| |
| assert(revision == info->revision); |
| |
| /* look for the representation */ |
| *idx = svn_sort__bsearch_lower_bound(&offset, |
| info->representations, |
| compare_representation_offsets); |
| if (*idx < info->representations->nelts) |
| { |
| /* return the representation, if this is the one we were looking for */ |
| representation_t *result |
| = APR_ARRAY_IDX(info->representations, *idx, representation_t *); |
| if (result->original.offset == offset) |
| return result; |
| } |
| |
| /* not parsed, yet */ |
| return NULL; |
| } |
| |
| /* Read the representation header in FILE_CONTENT at OFFSET. Return its |
| * size in *HEADER_SIZE, set *IS_PLAIN if no deltification was used and |
| * return the deltification base representation in *REPRESENTATION. If |
| * there is none, set it to NULL. Use FS to it look up. |
| * |
| * Use SCRATCH_POOL for temporary allocations. |
| */ |
| static svn_error_t * |
| read_rep_base(representation_t **representation, |
| apr_size_t *header_size, |
| svn_boolean_t *is_plain, |
| fs_fs_t *fs, |
| svn_stringbuf_t *file_content, |
| apr_size_t offset, |
| apr_pool_t *scratch_pool) |
| { |
| char *str, *last_str; |
| int idx, revision; |
| apr_uint64_t temp; |
| |
| /* identify representation header (1 line) */ |
| const char *buffer = file_content->data + offset; |
| const char *line_end = strchr(buffer, '\n'); |
| *header_size = line_end - buffer + 1; |
| |
| /* check for PLAIN rep */ |
| if (strncmp(buffer, "PLAIN\n", *header_size) == 0) |
| { |
| *is_plain = TRUE; |
| *representation = NULL; |
| return SVN_NO_ERROR; |
| } |
| |
| /* check for DELTA against empty rep */ |
| *is_plain = FALSE; |
| if (strncmp(buffer, "DELTA\n", *header_size) == 0) |
| { |
| /* This is a delta against the empty stream. */ |
| *representation = fs->null_base; |
| return SVN_NO_ERROR; |
| } |
| |
| /* it's delta against some other rep. Duplicate the header info such |
| * that we may modify it during parsing. */ |
| str = apr_pstrndup(scratch_pool, buffer, line_end - buffer); |
| last_str = str; |
| |
| /* parse it. */ |
| str = svn_cstring_tokenize(" ", &last_str); |
| str = svn_cstring_tokenize(" ", &last_str); |
| SVN_ERR(svn_cstring_atoi(&revision, str)); |
| |
| str = svn_cstring_tokenize(" ", &last_str); |
| SVN_ERR(svn_cstring_strtoui64(&temp, str, 0, APR_SIZE_MAX, 10)); |
| |
| /* it should refer to a rep in an earlier revision. Look it up */ |
| *representation = find_representation(&idx, fs, NULL, revision, (apr_size_t)temp); |
| return SVN_NO_ERROR; |
| } |
| |
| /* Parse the representation reference (text: or props:) in VALUE, look |
| * it up in FS and return it in *REPRESENTATION. To be able to parse the |
| * base rep, we pass the FILE_CONTENT as well. |
| * |
| * If necessary, allocate the result in POOL; use SCRATCH_POOL for temp. |
| * allocations. |
| */ |
| static svn_error_t * |
| parse_representation(representation_t **representation, |
| fs_fs_t *fs, |
| svn_stringbuf_t *file_content, |
| svn_string_t *value, |
| revision_info_t *revision_info, |
| apr_pool_t *pool, |
| apr_pool_t *scratch_pool) |
| { |
| representation_t *result; |
| int revision; |
| |
| apr_uint64_t offset; |
| apr_uint64_t size; |
| int idx; |
| |
| /* read location (revision, offset) and size */ |
| char *c = (char *)value->data; |
| SVN_ERR(svn_cstring_atoi(&revision, svn_cstring_tokenize(" ", &c))); |
| SVN_ERR(svn_cstring_strtoui64(&offset, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10)); |
| SVN_ERR(svn_cstring_strtoui64(&size, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10)); |
| |
| /* look it up */ |
| result = find_representation(&idx, fs, &revision_info, revision, (apr_size_t)offset); |
| if (!result) |
| { |
| /* not parsed, yet (probably a rep in the same revision). |
| * Create a new rep object and determine its base rep as well. |
| */ |
| result = apr_pcalloc(pool, sizeof(*result)); |
| result->revision = revision_info; |
| result->original.offset = (apr_size_t)offset; |
| result->original.size = (apr_size_t)size; |
| SVN_ERR(read_rep_base(&result->delta_base, &result->header_size, |
| &result->is_plain, fs, file_content, |
| (apr_size_t)offset + revision_info->original.offset, |
| scratch_pool)); |
| |
| svn_sort__array_insert(&result, revision_info->representations, idx); |
| } |
| |
| *representation = result; |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Read the delta window contents of all windows in REPRESENTATION in FS. |
| * Return the data as svn_txdelta_window_t* instances in *WINDOWS. |
| * Use POOL for allocations. |
| */ |
| static svn_error_t * |
| read_windows(apr_array_header_t **windows, |
| fs_fs_t *fs, |
| representation_t *representation, |
| apr_pool_t *pool) |
| { |
| svn_string_t *content; |
| svn_string_t data; |
| svn_stream_t *stream; |
| apr_size_t offset = representation->original.offset |
| + representation->header_size; |
| char version; |
| apr_size_t len = sizeof(version); |
| |
| *windows = apr_array_make(pool, 0, sizeof(svn_txdelta_window_t *)); |
| |
| /* get the whole revision content */ |
| SVN_ERR(get_content(&content, fs, representation->revision->revision, pool)); |
| |
| /* create a read stream and position it directly after the rep header */ |
| data.data = content->data + offset + 3; |
| data.len = representation->original.size - 3; |
| stream = svn_stream_from_string(&data, pool); |
| SVN_ERR(svn_stream_read(stream, &version, &len)); |
| |
| /* read the windows from that stream */ |
| while (TRUE) |
| { |
| svn_txdelta_window_t *window; |
| svn_stream_mark_t *mark; |
| char dummy; |
| |
| len = sizeof(dummy); |
| SVN_ERR(svn_stream_mark(stream, &mark, pool)); |
| SVN_ERR(svn_stream_read(stream, &dummy, &len)); |
| if (len == 0) |
| break; |
| |
| SVN_ERR(svn_stream_seek(stream, mark)); |
| SVN_ERR(svn_txdelta_read_svndiff_window(&window, stream, version, pool)); |
| APR_ARRAY_PUSH(*windows, svn_txdelta_window_t *) = window; |
| } |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Read the content of the PLAIN REPRESENTATION in FS and return it in |
| * *CONTENT. Use POOL for allocations. |
| */ |
| static svn_error_t * |
| read_plain(svn_stringbuf_t **content, |
| fs_fs_t *fs, |
| representation_t *representation, |
| apr_pool_t *pool) |
| { |
| svn_string_t *data; |
| apr_size_t offset = representation->original.offset |
| + representation->header_size; |
| |
| SVN_ERR(get_content(&data, fs, representation->revision->revision, pool)); |
| |
| /* content is stored as fulltext already */ |
| *content = svn_stringbuf_ncreate(data->data + offset, |
| representation->original.size, |
| pool); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Get the undeltified representation that is a result of combining all |
| * deltas from the current desired REPRESENTATION in FS with its base |
| * representation. Store the result in *CONTENT. |
| * Use POOL for allocations. */ |
| static svn_error_t * |
| get_combined_window(svn_stringbuf_t **content, |
| fs_fs_t *fs, |
| representation_t *representation, |
| apr_pool_t *pool) |
| { |
| int i; |
| apr_array_header_t *windows; |
| svn_stringbuf_t *base_content, *result; |
| const char *source; |
| apr_pool_t *subpool; |
| apr_pool_t *iterpool; |
| |
| /* special case: no un-deltification necessary */ |
| if (representation->is_plain) |
| return read_plain(content, fs, representation, pool); |
| |
| /* special case: data already in cache */ |
| *content = get_cached_window(fs, representation, pool); |
| if (*content) |
| return SVN_NO_ERROR; |
| |
| /* read the delta windows for this representation */ |
| subpool = svn_pool_create(pool); |
| iterpool = svn_pool_create(pool); |
| SVN_ERR(read_windows(&windows, fs, representation, subpool)); |
| |
| /* fetch the / create a base content */ |
| if (representation->delta_base && representation->delta_base->revision) |
| SVN_ERR(get_combined_window(&base_content, fs, |
| representation->delta_base, subpool)); |
| else |
| base_content = svn_stringbuf_create_empty(subpool); |
| |
| /* apply deltas */ |
| result = svn_stringbuf_create_empty(pool); |
| source = base_content->data; |
| |
| for (i = 0; i < windows->nelts; ++i) |
| { |
| svn_txdelta_window_t *window |
| = APR_ARRAY_IDX(windows, i, svn_txdelta_window_t *); |
| svn_stringbuf_t *buf |
| = svn_stringbuf_create_ensure(window->tview_len, iterpool); |
| |
| buf->len = window->tview_len; |
| svn_txdelta_apply_instructions(window, window->src_ops ? source : NULL, |
| buf->data, &buf->len); |
| |
| svn_stringbuf_appendbytes(result, buf->data, buf->len); |
| source += window->sview_len; |
| |
| svn_pool_clear(iterpool); |
| } |
| |
| svn_pool_destroy(iterpool); |
| svn_pool_destroy(subpool); |
| |
| /* cache result and return it */ |
| set_cached_window(fs, representation, result); |
| *content = result; |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* forward declaration */ |
| static svn_error_t * |
| read_noderev(noderev_t **noderev, |
| fs_fs_t *fs, |
| svn_stringbuf_t *file_content, |
| apr_size_t offset, |
| revision_info_t *revision_info, |
| apr_pool_t *pool, |
| apr_pool_t *scratch_pool); |
| |
| /* Get the noderev at OFFSET in FILE_CONTENT in FS. The file content must |
| * pertain to the revision given in REVISION_INFO. If the data has not |
| * been read yet, parse it and store it in REVISION_INFO. Return the result |
| * in *NODEREV. |
| * |
| * Use POOL for allocations and SCRATCH_POOL for temporaries. |
| */ |
| static svn_error_t * |
| get_noderev(noderev_t **noderev, |
| fs_fs_t *fs, |
| svn_stringbuf_t *file_content, |
| apr_size_t offset, |
| revision_info_t *revision_info, |
| apr_pool_t *pool, |
| apr_pool_t *scratch_pool) |
| { |
| int idx = svn_sort__bsearch_lower_bound(&offset, |
| revision_info->node_revs, |
| compare_noderev_offsets); |
| if ((idx < 0) || (idx >= revision_info->node_revs->nelts)) |
| SVN_ERR(read_noderev(noderev, fs, file_content, offset, revision_info, |
| pool, scratch_pool)); |
| else |
| { |
| *noderev = APR_ARRAY_IDX(revision_info->node_revs, idx, noderev_t *); |
| if ((*noderev)->original.offset != offset) |
| SVN_ERR(read_noderev(noderev, fs, file_content, offset, revision_info, |
| pool, scratch_pool)); |
| } |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Read the directory stored in REPRESENTATION in FS into *HASH. The result |
| * will be allocated in FS' directory cache and it will be plain key-value |
| * hash. Use SCRATCH_POOL for temporary allocations. |
| */ |
| static svn_error_t * |
| read_dir(apr_hash_t **hash, |
| fs_fs_t *fs, |
| representation_t *representation, |
| apr_pool_t *scratch_pool) |
| { |
| svn_stringbuf_t *text; |
| apr_pool_t *text_pool; |
| svn_stream_t *stream; |
| apr_pool_t *pool; |
| |
| /* chances are, we find the info in cache already */ |
| *hash = get_cached_dir(fs, representation); |
| if (*hash) |
| return SVN_NO_ERROR; |
| |
| /* create the result container */ |
| pool = get_cached_dir_pool(fs); |
| *hash = svn_hash__make(pool); |
| |
| /* if this is a non-empty rep, read it and de-serialize the hash */ |
| if (representation != NULL) |
| { |
| text_pool = svn_pool_create(scratch_pool); |
| SVN_ERR(get_combined_window(&text, fs, representation, text_pool)); |
| stream = svn_stream_from_stringbuf(text, text_pool); |
| SVN_ERR(svn_hash_read2(*hash, stream, SVN_HASH_TERMINATOR, pool)); |
| svn_pool_destroy(text_pool); |
| } |
| |
| /* cache the result */ |
| set_cached_dir(fs, representation, *hash); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Starting at the directory in REPRESENTATION in FILE_CONTENT, read all |
| * DAG nodes, directories and representations linked in that tree structure. |
| * Store them in FS and read them only once. |
| * |
| * Use POOL for persistent allocations and SCRATCH_POOL for temporaries. |
| */ |
| static svn_error_t * |
| parse_dir(fs_fs_t *fs, |
| svn_stringbuf_t *file_content, |
| representation_t *representation, |
| apr_pool_t *pool, |
| apr_pool_t *scratch_pool) |
| { |
| apr_hash_t *hash; |
| apr_hash_index_t *hi; |
| apr_pool_t *iter_pool = svn_pool_create(scratch_pool); |
| apr_hash_t *base_dir = svn_hash__make(scratch_pool); |
| |
| /* special case: empty dir rep */ |
| if (representation == NULL) |
| return SVN_NO_ERROR; |
| |
| /* if we have a previous representation of that dir, hash it by name */ |
| if (representation->delta_base && representation->delta_base->dir) |
| { |
| apr_array_header_t *dir = representation->delta_base->dir->entries; |
| int i; |
| |
| for (i = 0; i < dir->nelts; ++i) |
| { |
| direntry_t *entry = APR_ARRAY_IDX(dir, i, direntry_t *); |
| apr_hash_set(base_dir, entry->name, entry->name_len, entry); |
| } |
| } |
| |
| /* read this directory */ |
| SVN_ERR(read_dir(&hash, fs, representation, scratch_pool)); |
| |
| /* add it as an array to the representation (entries yet to be filled) */ |
| representation->dir = apr_pcalloc(pool, sizeof(*representation->dir)); |
| representation->dir->entries |
| = apr_array_make(pool, apr_hash_count(hash), sizeof(direntry_t *)); |
| |
| /* Translate the string dir entries into real entries. Reuse existing |
| * objects as much as possible to keep memory consumption low. |
| */ |
| for (hi = apr_hash_first(pool, hash); hi; hi = apr_hash_next(hi)) |
| { |
| const char *name = svn__apr_hash_index_key(hi); |
| svn_string_t *str_val = svn__apr_hash_index_val(hi); |
| apr_size_t offset; |
| revision_info_t *revision_info; |
| |
| /* look for corresponding entry in previous version */ |
| apr_size_t name_len = strlen(name); |
| direntry_t *entry = base_dir |
| ? apr_hash_get(base_dir, name, name_len) |
| : NULL; |
| |
| /* parse the new target revnode ID (revision, offset) */ |
| SVN_ERR(parse_revnode_pos(&revision_info, &offset, fs, str_val)); |
| |
| /* if this is a new entry or if the content changed, create a new |
| * instance for it. */ |
| if ( !entry |
| || !entry->node->text |
| || entry->node->text->revision != revision_info |
| || entry->node->original.offset != offset) |
| { |
| /* create & init the new entry. Reuse the name string if possible */ |
| direntry_t *new_entry = apr_pcalloc(pool, sizeof(*entry)); |
| new_entry->name_len = name_len; |
| if (entry) |
| new_entry->name = entry->name; |
| else |
| new_entry->name = apr_pstrdup(pool, name); |
| |
| /* Link it to the content noderev. Recurse. */ |
| entry = new_entry; |
| SVN_ERR(get_noderev(&entry->node, fs, file_content, offset, |
| revision_info, pool, iter_pool)); |
| } |
| |
| /* set the directory entry */ |
| APR_ARRAY_PUSH(representation->dir->entries, direntry_t *) = entry; |
| svn_pool_clear(iter_pool); |
| } |
| |
| svn_pool_destroy(iter_pool); |
| return SVN_NO_ERROR; |
| } |
| |
| /* Starting at the noderev at OFFSET in FILE_CONTENT, read all DAG nodes, |
| * directories and representations linked in that tree structure. Store |
| * them in FS and read them only once. Return the result in *NODEREV. |
| * |
| * Use POOL for persistent allocations and SCRATCH_POOL for temporaries. |
| */ |
| static svn_error_t * |
| read_noderev(noderev_t **noderev, |
| fs_fs_t *fs, |
| svn_stringbuf_t *file_content, |
| apr_size_t offset, |
| revision_info_t *revision_info, |
| apr_pool_t *pool, |
| apr_pool_t *scratch_pool) |
| { |
| noderev_t *result = apr_pcalloc(pool, sizeof(*result)); |
| svn_string_t *line; |
| svn_boolean_t is_dir = FALSE; |
| |
| scratch_pool = svn_pool_create(scratch_pool); |
| |
| /* parse the noderev line-by-line until we find an empty line */ |
| result->original.offset = offset; |
| while (1) |
| { |
| /* for this line, extract key and value. Ignore invalid values */ |
| svn_string_t key; |
| svn_string_t value; |
| char *sep; |
| const char *start = file_content->data + offset |
| + revision_info->original.offset; |
| const char *end = strchr(start, '\n'); |
| |
| line = svn_string_ncreate(start, end - start, scratch_pool); |
| offset += end - start + 1; |
| |
| /* empty line -> end of noderev data */ |
| if (line->len == 0) |
| break; |
| |
| sep = strchr(line->data, ':'); |
| if (sep == NULL) |
| continue; |
| |
| key.data = line->data; |
| key.len = sep - key.data; |
| *sep = 0; |
| |
| if (key.len + 2 > line->len) |
| continue; |
| |
| value.data = sep + 2; |
| value.len = line->len - (key.len + 2); |
| |
| /* translate (key, value) into noderev elements */ |
| if (key_matches(&key, "type")) |
| is_dir = strcmp(value.data, "dir") == 0; |
| else if (key_matches(&key, "pred")) |
| SVN_ERR(parse_pred(&result->predecessor, fs, &value)); |
| else if (key_matches(&key, "text")) |
| SVN_ERR(parse_representation(&result->text, fs, file_content, |
| &value, revision_info, |
| pool, scratch_pool)); |
| else if (key_matches(&key, "props")) |
| SVN_ERR(parse_representation(&result->props, fs, file_content, |
| &value, revision_info, |
| pool, scratch_pool)); |
| } |
| |
| /* link noderev to revision info */ |
| result->revision = revision_info; |
| result->original.size = offset - result->original.offset; |
| |
| svn_sort__array_insert(&result, |
| revision_info->node_revs, |
| svn_sort__bsearch_lower_bound(&offset, |
| revision_info->node_revs, |
| compare_noderev_offsets)); |
| |
| /* if this is a directory, read and process that recursively */ |
| if (is_dir) |
| SVN_ERR(parse_dir(fs, file_content, result->text, |
| pool, scratch_pool)); |
| |
| /* done */ |
| svn_pool_destroy(scratch_pool); |
| *noderev = result; |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Simple utility to print a REVISION number and make it appear immediately. |
| */ |
| static void |
| print_progress(svn_revnum_t revision) |
| { |
| printf("%8ld", revision); |
| fflush(stdout); |
| } |
| |
| /* Read the content of the pack file staring at revision BASE and store it |
| * in FS. Use POOL for allocations. |
| */ |
| static svn_error_t * |
| read_pack_file(fs_fs_t *fs, |
| svn_revnum_t base, |
| apr_pool_t *pool) |
| { |
| apr_array_header_t *manifest = NULL; |
| apr_pool_t *local_pool = svn_pool_create(pool); |
| apr_pool_t *iter_pool = svn_pool_create(local_pool); |
| int i; |
| svn_stringbuf_t *file_content; |
| revision_pack_t *revisions; |
| const char *pack_folder = get_pack_folder(fs, base, local_pool); |
| |
| /* read the whole pack file into memory */ |
| SVN_ERR(read_rev_or_pack_file(&file_content, fs, base, local_pool)); |
| |
| /* create the revision container */ |
| revisions = apr_pcalloc(pool, sizeof(*revisions)); |
| revisions->base = base; |
| revisions->fragments = NULL; |
| revisions->info = apr_array_make(pool, |
| fs->max_files_per_dir, |
| sizeof(revision_info_t*)); |
| revisions->filesize = file_content->len; |
| APR_ARRAY_PUSH(fs->packs, revision_pack_t*) = revisions; |
| |
| /* parse the manifest file */ |
| SVN_ERR(read_manifest(&manifest, fs, pack_folder, local_pool)); |
| if (manifest->nelts != fs->max_files_per_dir) |
| return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, NULL); |
| |
| /* process each revision in the pack file */ |
| for (i = 0; i < manifest->nelts; ++i) |
| { |
| apr_size_t root_node_offset; |
| svn_string_t rev_content; |
| |
| /* create the revision info for the current rev */ |
| revision_info_t *info = apr_pcalloc(pool, sizeof(*info)); |
| info->node_revs = apr_array_make(iter_pool, 4, sizeof(noderev_t*)); |
| info->representations = apr_array_make(iter_pool, 4, sizeof(representation_t*)); |
| |
| info->revision = base + i; |
| info->original.offset = APR_ARRAY_IDX(manifest, i, apr_size_t); |
| info->original.end = i+1 < manifest->nelts |
| ? APR_ARRAY_IDX(manifest, i+1 , apr_size_t) |
| : file_content->len; |
| SVN_ERR(read_revision_header(&info->original.changes, |
| &info->original.changes_len, |
| &root_node_offset, |
| file_content, |
| APR_ARRAY_IDX(manifest, i , apr_size_t), |
| info->original.end, |
| iter_pool)); |
| |
| /* put it into our containers */ |
| APR_ARRAY_PUSH(revisions->info, revision_info_t*) = info; |
| APR_ARRAY_PUSH(fs->revisions, revision_info_t*) = info; |
| |
| /* cache the revision content */ |
| rev_content.data = file_content->data + info->original.offset; |
| rev_content.len = info->original.end - info->original.offset; |
| set_cached_content(fs->cache, info->revision, &rev_content); |
| |
| /* parse the revision content recursively. */ |
| SVN_ERR(read_noderev(&info->root_noderev, fs, file_content, |
| root_node_offset, info, pool, iter_pool)); |
| |
| /* copy dynamically grown containers from temp into result pool */ |
| info->node_revs = apr_array_copy(pool, info->node_revs); |
| info->representations = apr_array_copy(pool, info->representations); |
| |
| /* destroy temps */ |
| svn_pool_clear(iter_pool); |
| } |
| |
| /* one more pack file processed */ |
| print_progress(base); |
| svn_pool_destroy(local_pool); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Read the content of REVSION file and store it in FS. |
| * Use POOL for allocations. |
| */ |
| static svn_error_t * |
| read_revision_file(fs_fs_t *fs, |
| svn_revnum_t revision, |
| apr_pool_t *pool) |
| { |
| apr_size_t root_node_offset; |
| apr_pool_t *local_pool = svn_pool_create(pool); |
| svn_stringbuf_t *file_content; |
| svn_string_t rev_content; |
| revision_pack_t *revisions = apr_pcalloc(pool, sizeof(*revisions)); |
| revision_info_t *info = apr_pcalloc(pool, sizeof(*info)); |
| |
| /* read the whole pack file into memory */ |
| SVN_ERR(read_rev_or_pack_file(&file_content, fs, revision, local_pool)); |
| |
| /* create the revision info for the current rev */ |
| info->node_revs = apr_array_make(pool, 4, sizeof(noderev_t*)); |
| info->representations = apr_array_make(pool, 4, sizeof(representation_t*)); |
| |
| info->revision = revision; |
| info->original.offset = 0; |
| info->original.end = file_content->len; |
| SVN_ERR(read_revision_header(&info->original.changes, |
| &info->original.changes_len, |
| &root_node_offset, |
| file_content, |
| 0, |
| info->original.end, |
| local_pool)); |
| |
| /* put it into our containers */ |
| APR_ARRAY_PUSH(fs->revisions, revision_info_t*) = info; |
| |
| /* create a pseudo-pack file container for just this rev to keep our |
| * data structures as uniform as possible. |
| */ |
| revisions->base = revision; |
| revisions->fragments = NULL; |
| revisions->info = apr_array_make(pool, 1, sizeof(revision_info_t*)); |
| revisions->filesize = file_content->len; |
| APR_ARRAY_PUSH(revisions->info, revision_info_t*) = info; |
| APR_ARRAY_PUSH(fs->packs, revision_pack_t*) = revisions; |
| |
| /* cache the revision content */ |
| rev_content.data = file_content->data + info->original.offset; |
| rev_content.len = info->original.end - info->original.offset; |
| set_cached_content(fs->cache, info->revision, &rev_content); |
| |
| /* parse the revision content recursively. */ |
| SVN_ERR(read_noderev(&info->root_noderev, fs, file_content, |
| root_node_offset, info, |
| pool, local_pool)); |
| APR_ARRAY_PUSH(info->node_revs, noderev_t*) = info->root_noderev; |
| |
| /* show progress every 1000 revs or so */ |
| if (revision % fs->max_files_per_dir == 0) |
| print_progress(revision); |
| |
| svn_pool_destroy(local_pool); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Read the repository at PATH beginning with revision START_REVISION and |
| * return the result in *FS. Allocate caches with MEMSIZE bytes total |
| * capacity. Use POOL for non-cache allocations. |
| */ |
| static svn_error_t * |
| read_revisions(fs_fs_t **fs, |
| const char *path, |
| svn_revnum_t start_revision, |
| apr_size_t memsize, |
| apr_pool_t *pool) |
| { |
| svn_revnum_t revision; |
| apr_size_t content_cache_size; |
| apr_size_t window_cache_size; |
| apr_size_t dir_cache_size; |
| |
| /* determine cache sizes */ |
| if (memsize < 100) |
| memsize = 100; |
| |
| content_cache_size = memsize * 7 / 10 > 4000 ? 4000 : memsize * 7 / 10; |
| window_cache_size = memsize * 2 / 10 * 1024 * 1024; |
| dir_cache_size = (memsize / 10) * 16000; |
| |
| /* read repo format and such */ |
| SVN_ERR(fs_open(fs, path, pool)); |
| |
| /* create data containers and caches */ |
| (*fs)->start_revision = start_revision |
| - (start_revision % (*fs)->max_files_per_dir); |
| (*fs)->revisions = apr_array_make(pool, |
| (*fs)->max_revision + 1 - (*fs)->start_revision, |
| sizeof(revision_info_t *)); |
| (*fs)->packs = apr_array_make(pool, |
| ((*fs)->min_unpacked_rev - (*fs)->start_revision) |
| / (*fs)->max_files_per_dir, |
| sizeof(revision_pack_t *)); |
| (*fs)->null_base = apr_pcalloc(pool, sizeof(*(*fs)->null_base)); |
| (*fs)->cache = create_content_cache |
| (apr_allocator_owner_get |
| (svn_pool_create_allocator(FALSE)), |
| content_cache_size * 1024 * 1024); |
| (*fs)->dir_cache = create_dir_cache |
| (apr_allocator_owner_get |
| (svn_pool_create_allocator(FALSE)), |
| dir_cache_size); |
| (*fs)->window_cache = create_window_cache |
| (apr_allocator_owner_get |
| (svn_pool_create_allocator(FALSE)), |
| 10000, window_cache_size); |
| |
| /* read all packed revs */ |
| for ( revision = start_revision |
| ; revision < (*fs)->min_unpacked_rev |
| ; revision += (*fs)->max_files_per_dir) |
| SVN_ERR(read_pack_file(*fs, revision, pool)); |
| |
| /* read non-packed revs */ |
| for ( ; revision <= (*fs)->max_revision; ++revision) |
| SVN_ERR(read_revision_file(*fs, revision, pool)); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Return the maximum number of decimal digits required to represent offsets |
| * in the given PACK file. |
| */ |
| static apr_size_t |
| get_max_offset_len(const revision_pack_t *pack) |
| { |
| /* the pack files may grow a few percent. |
| * Fudge it up to be on safe side. |
| */ |
| apr_size_t max_future_size = pack->filesize * 2 + 10000; |
| apr_size_t result = 0; |
| |
| while (max_future_size > 0) |
| { |
| ++result; |
| max_future_size /= 10; |
| } |
| |
| return result; |
| } |
| |
| /* Create the fragments container in PACK and add revision header fragments |
| * to it. Use POOL for allocations. |
| */ |
| static svn_error_t * |
| add_revisions_pack_heads(revision_pack_t *pack, |
| apr_pool_t *pool) |
| { |
| int i; |
| revision_info_t *info; |
| apr_size_t offset_len = get_max_offset_len(pack); |
| fragment_t fragment; |
| |
| /* allocate fragment arrays */ |
| |
| int fragment_count = 1; |
| for (i = 0; i < pack->info->nelts; ++i) |
| { |
| info = APR_ARRAY_IDX(pack->info, i, revision_info_t*); |
| fragment_count += info->node_revs->nelts |
| + info->representations->nelts |
| + 2; |
| } |
| |
| pack->target_offset = pack->info->nelts > 1 ? 64 : 0; |
| pack->fragments = apr_array_make(pool, |
| fragment_count, |
| sizeof(fragment_t)); |
| |
| /* put revision headers first */ |
| |
| for (i = 0; i < pack->info->nelts - 1; ++i) |
| { |
| info = APR_ARRAY_IDX(pack->info, i, revision_info_t*); |
| info->target.offset = pack->target_offset; |
| |
| fragment.data = info; |
| fragment.kind = header_fragment; |
| fragment.position = pack->target_offset; |
| APR_ARRAY_PUSH(pack->fragments, fragment_t) = fragment; |
| |
| pack->target_offset += 2 * offset_len + 3; |
| } |
| |
| info = APR_ARRAY_IDX(pack->info, pack->info->nelts - 1, revision_info_t*); |
| info->target.offset = pack->target_offset; |
| |
| /* followed by the changes list */ |
| |
| for (i = 0; i < pack->info->nelts; ++i) |
| { |
| info = APR_ARRAY_IDX(pack->info, i, revision_info_t*); |
| |
| info->target.changes = pack->target_offset - info->target.offset; |
| info->target.changes_len = info->original.changes_len; |
| |
| fragment.data = info; |
| fragment.kind = changes_fragment; |
| fragment.position = pack->target_offset; |
| APR_ARRAY_PUSH(pack->fragments, fragment_t) = fragment; |
| |
| pack->target_offset += info->original.changes_len; |
| } |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* For the revision given by INFO in FS, return the fragment container in |
| * *FRAGMENTS and the current placement offset in *CURRENT_POS. |
| */ |
| static svn_error_t * |
| get_target_offset(apr_size_t **current_pos, |
| apr_array_header_t **fragments, |
| fs_fs_t *fs, |
| revision_info_t *info) |
| { |
| int i; |
| revision_pack_t *pack; |
| svn_revnum_t revision = info->revision; |
| |
| /* identify the pack object */ |
| if (fs->min_unpacked_rev > revision) |
| { |
| i = (revision - fs->start_revision) / fs->max_files_per_dir; |
| } |
| else |
| { |
| i = (fs->min_unpacked_rev - fs->start_revision) / fs->max_files_per_dir; |
| i += revision - fs->min_unpacked_rev; |
| } |
| |
| /* extract the desired info from it */ |
| pack = APR_ARRAY_IDX(fs->packs, i, revision_pack_t*); |
| *current_pos = &pack->target_offset; |
| *fragments = pack->fragments; |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* forward declaration */ |
| static svn_error_t * |
| add_noderev_recursively(fs_fs_t *fs, |
| noderev_t *node, |
| apr_pool_t *pool); |
| |
| /* Place fragments for the given REPRESENTATION of the given KIND, iff it |
| * has not been covered, yet. Place the base reps along the deltification |
| * chain as far as those reps have not been covered, yet. If REPRESENTATION |
| * is a directory, recursively place its elements. |
| * |
| * Use POOL for allocations. |
| */ |
| static svn_error_t * |
| add_representation_recursively(fs_fs_t *fs, |
| representation_t *representation, |
| enum fragment_kind_t kind, |
| apr_pool_t *pool) |
| { |
| apr_size_t *current_pos; |
| apr_array_header_t *fragments; |
| fragment_t fragment; |
| |
| /* place REPRESENTATION only once and only if it exists and will not |
| * be covered later as a directory. */ |
| if ( representation == NULL |
| || representation->covered |
| || (representation->dir && kind != dir_fragment) |
| || representation == fs->null_base) |
| return SVN_NO_ERROR; |
| |
| /* add and place a fragment for REPRESENTATION */ |
| SVN_ERR(get_target_offset(¤t_pos, &fragments, |
| fs, representation->revision)); |
| representation->target.offset = *current_pos; |
| representation->covered = TRUE; |
| |
| fragment.data = representation; |
| fragment.kind = kind; |
| fragment.position = *current_pos; |
| APR_ARRAY_PUSH(fragments, fragment_t) = fragment; |
| |
| /* determine the size of data to be added to the target file */ |
| if ( kind != dir_fragment |
| && representation->delta_base && representation->delta_base->dir) |
| { |
| /* base rep is a dir -> would change -> need to store it as fulltext |
| * in our target file */ |
| apr_pool_t *text_pool = svn_pool_create(pool); |
| svn_stringbuf_t *content; |
| |
| SVN_ERR(get_combined_window(&content, fs, representation, text_pool)); |
| representation->target.size = content->len; |
| *current_pos += representation->target.size + 13; |
| |
| svn_pool_destroy(text_pool); |
| } |
| else |
| if ( kind == dir_fragment |
| || (representation->delta_base && representation->delta_base->dir)) |
| { |
| /* deltified directories may grow considerably */ |
| if (representation->original.size < 50) |
| *current_pos += 300; |
| else |
| *current_pos += representation->original.size * 3 + 150; |
| } |
| else |
| { |
| /* plain / deltified content will not change but the header may |
| * grow slightly due to larger offsets. */ |
| representation->target.size = representation->original.size; |
| |
| if (representation->delta_base && |
| (representation->delta_base != fs->null_base)) |
| *current_pos += representation->original.size + 50; |
| else |
| *current_pos += representation->original.size + 13; |
| } |
| |
| /* follow the delta chain and place base revs immediately after this */ |
| if (representation->delta_base) |
| SVN_ERR(add_representation_recursively(fs, |
| representation->delta_base, |
| kind, |
| pool)); |
| |
| /* finally, recurse into directories */ |
| if (representation->dir) |
| { |
| int i; |
| apr_array_header_t *entries = representation->dir->entries; |
| |
| for (i = 0; i < entries->nelts; ++i) |
| { |
| direntry_t *entry = APR_ARRAY_IDX(entries, i, direntry_t *); |
| if (entry->node) |
| SVN_ERR(add_noderev_recursively(fs, entry->node, pool)); |
| } |
| } |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Place fragments for the given NODE in FS, iff it has not been covered, |
| * yet. Place the reps (text, props) immediately after the node. |
| * |
| * Use POOL for allocations. |
| */ |
| static svn_error_t * |
| add_noderev_recursively(fs_fs_t *fs, |
| noderev_t *node, |
| apr_pool_t *pool) |
| { |
| apr_size_t *current_pos; |
| apr_array_header_t *fragments; |
| fragment_t fragment; |
| |
| /* don't add it twice */ |
| if (node->covered) |
| return SVN_NO_ERROR; |
| |
| /* add and place a fragment for NODE */ |
| SVN_ERR(get_target_offset(¤t_pos, &fragments, fs, node->revision)); |
| node->covered = TRUE; |
| node->target.offset = *current_pos; |
| |
| fragment.data = node; |
| fragment.kind = noderev_fragment; |
| fragment.position = *current_pos; |
| APR_ARRAY_PUSH(fragments, fragment_t) = fragment; |
| |
| /* size may slightly increase */ |
| *current_pos += node->original.size + 40; |
| |
| /* recurse into representations */ |
| if (node->text && node->text->dir) |
| SVN_ERR(add_representation_recursively(fs, node->text, dir_fragment, pool)); |
| else |
| SVN_ERR(add_representation_recursively(fs, node->text, file_fragment, pool)); |
| |
| SVN_ERR(add_representation_recursively(fs, node->props, property_fragment, pool)); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Place a fragment for the last revision in PACK. Use POOL for allocations. |
| */ |
| static svn_error_t * |
| add_revisions_pack_tail(revision_pack_t *pack, |
| apr_pool_t *pool) |
| { |
| int i; |
| revision_info_t *info; |
| apr_size_t offset_len = get_max_offset_len(pack); |
| fragment_t fragment; |
| |
| /* put final revision header last and fix up revision lengths */ |
| |
| info = APR_ARRAY_IDX(pack->info, pack->info->nelts-1, revision_info_t*); |
| |
| fragment.data = info; |
| fragment.kind = header_fragment; |
| fragment.position = pack->target_offset; |
| APR_ARRAY_PUSH(pack->fragments, fragment_t) = fragment; |
| |
| pack->target_offset += 2 * offset_len + 3; |
| |
| /* end of target file reached. Store that info in all revs. */ |
| for (i = 0; i < pack->info->nelts; ++i) |
| { |
| info = APR_ARRAY_IDX(pack->info, i, revision_info_t*); |
| info->target.end = pack->target_offset; |
| } |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Place all fragments for all revisions / packs in FS. |
| * Use POOL for allocations. |
| */ |
| static svn_error_t * |
| reorder_revisions(fs_fs_t *fs, |
| apr_pool_t *pool) |
| { |
| int i, k; |
| |
| /* headers and changes */ |
| |
| for (i = 0; i < fs->packs->nelts; ++i) |
| { |
| revision_pack_t *pack = APR_ARRAY_IDX(fs->packs, i, revision_pack_t*); |
| SVN_ERR(add_revisions_pack_heads(pack, pool)); |
| } |
| |
| /* representations & nodes */ |
| |
| for (i = fs->revisions->nelts-1; i >= 0; --i) |
| { |
| revision_info_t *info = APR_ARRAY_IDX(fs->revisions, i, revision_info_t*); |
| for (k = info->node_revs->nelts - 1; k >= 0; --k) |
| { |
| noderev_t *node = APR_ARRAY_IDX(info->node_revs, k, noderev_t*); |
| SVN_ERR(add_noderev_recursively(fs, node, pool)); |
| } |
| |
| if (info->revision % fs->max_files_per_dir == 0) |
| print_progress(info->revision); |
| } |
| |
| /* pack file tails */ |
| |
| for (i = 0; i < fs->packs->nelts; ++i) |
| { |
| revision_pack_t *pack = APR_ARRAY_IDX(fs->packs, i, revision_pack_t*); |
| SVN_ERR(add_revisions_pack_tail(pack, pool)); |
| } |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* forward declaration */ |
| static svn_error_t * |
| get_fragment_content(svn_string_t **content, |
| fs_fs_t *fs, |
| fragment_t *fragment, |
| apr_pool_t *pool); |
| |
| /* Directory content may change and with it, the deltified representations |
| * may significantly. This function causes all directory target reps in |
| * PACK of FS to be built and their new MD5 as well as rep sizes be updated. |
| * We must do that before attempting to write noderevs. |
| * |
| * Use POOL for allocations. |
| */ |
| static svn_error_t * |
| update_noderevs(fs_fs_t *fs, |
| revision_pack_t *pack, |
| apr_pool_t *pool) |
| { |
| int i; |
| apr_pool_t *itempool = svn_pool_create(pool); |
| |
| for (i = 0; i < pack->fragments->nelts; ++i) |
| { |
| fragment_t *fragment = &APR_ARRAY_IDX(pack->fragments, i, fragment_t); |
| if (fragment->kind == dir_fragment) |
| { |
| svn_string_t *content; |
| |
| /* request updated rep content but ignore the result. |
| * We are only interested in the MD5, content and rep size updates. */ |
| SVN_ERR(get_fragment_content(&content, fs, fragment, itempool)); |
| svn_pool_clear(itempool); |
| } |
| } |
| |
| svn_pool_destroy(itempool); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Determine the target size of the FRAGMENT in FS and return the value |
| * in *LENGTH. If ADD_PADDING has been set, slightly fudge the numbers |
| * to account for changes in offset lengths etc. Use POOL for temporary |
| * allocations. |
| */ |
| static svn_error_t * |
| get_content_length(apr_size_t *length, |
| fs_fs_t *fs, |
| fragment_t *fragment, |
| svn_boolean_t add_padding, |
| apr_pool_t *pool) |
| { |
| svn_string_t *content; |
| |
| SVN_ERR(get_fragment_content(&content, fs, fragment, pool)); |
| if (add_padding) |
| switch (fragment->kind) |
| { |
| case dir_fragment: |
| *length = content->len + 16; |
| break; |
| case noderev_fragment: |
| *length = content->len + 3; |
| break; |
| default: |
| *length = content->len; |
| break; |
| } |
| else |
| *length = content->len; |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Move the FRAGMENT to global file offset NEW_POSITION. Update the target |
| * location info of the underlying object as well. |
| */ |
| static void |
| move_fragment(fragment_t *fragment, |
| apr_size_t new_position) |
| { |
| revision_info_t *info; |
| representation_t *representation; |
| noderev_t *node; |
| |
| /* move the fragment */ |
| fragment->position = new_position; |
| |
| /* move the underlying object */ |
| switch (fragment->kind) |
| { |
| case header_fragment: |
| info = fragment->data; |
| info->target.offset = new_position; |
| break; |
| |
| case changes_fragment: |
| info = fragment->data; |
| info->target.changes = new_position - info->target.offset; |
| break; |
| |
| case property_fragment: |
| case file_fragment: |
| case dir_fragment: |
| representation = fragment->data; |
| representation->target.offset = new_position; |
| break; |
| |
| case noderev_fragment: |
| node = fragment->data; |
| node->target.offset = new_position; |
| break; |
| } |
| } |
| |
| /* Move the fragments in PACK's target fragment list to their final offsets. |
| * This may require several iterations if the fudge factors turned out to |
| * be insufficient. Use POOL for allocations. |
| */ |
| static svn_error_t * |
| pack_revisions(fs_fs_t *fs, |
| revision_pack_t *pack, |
| apr_pool_t *pool) |
| { |
| int i; |
| fragment_t *fragment, *next; |
| svn_boolean_t needed_to_expand; |
| revision_info_t *info; |
| apr_size_t current_pos, len, old_len; |
| |
| apr_pool_t *itempool = svn_pool_create(pool); |
| |
| /* update all directory reps. Chances are that most of the target rep |
| * sizes are now close to accurate. */ |
| SVN_ERR(update_noderevs(fs, pack, pool)); |
| |
| /* compression phase: pack all fragments tightly with only a very small |
| * fudge factor. This should cause offsets to shrink, thus all the |
| * actual fragment rate should tend to be even smaller afterwards. */ |
| current_pos = pack->info->nelts > 1 ? 64 : 0; |
| for (i = 0; i + 1 < pack->fragments->nelts; ++i) |
| { |
| fragment = &APR_ARRAY_IDX(pack->fragments, i, fragment_t); |
| SVN_ERR(get_content_length(&len, fs, fragment, TRUE, itempool)); |
| move_fragment(fragment, current_pos); |
| current_pos += len; |
| |
| svn_pool_clear(itempool); |
| } |
| |
| /* don't forget the final fragment (last revision's revision header) */ |
| fragment = &APR_ARRAY_IDX(pack->fragments, pack->fragments->nelts-1, fragment_t); |
| fragment->position = current_pos; |
| |
| /* expansion phase: check whether all fragments fit into their allotted |
| * slots. Grow them geometrically if they don't fit. Retry until they |
| * all do fit. |
| * Note: there is an upper limit to which fragments can grow. So, this |
| * loop will terminate. Often, no expansion will be necessary at all. */ |
| do |
| { |
| needed_to_expand = FALSE; |
| current_pos = pack->info->nelts > 1 ? 64 : 0; |
| |
| for (i = 0; i + 1 < pack->fragments->nelts; ++i) |
| { |
| fragment = &APR_ARRAY_IDX(pack->fragments, i, fragment_t); |
| next = &APR_ARRAY_IDX(pack->fragments, i + 1, fragment_t); |
| old_len = next->position - fragment->position; |
| |
| SVN_ERR(get_content_length(&len, fs, fragment, FALSE, itempool)); |
| |
| if (len > old_len) |
| { |
| len = (apr_size_t)(len * 1.1) + 10; |
| needed_to_expand = TRUE; |
| } |
| else |
| len = old_len; |
| |
| if (i == pack->info->nelts - 1) |
| { |
| info = APR_ARRAY_IDX(pack->info, pack->info->nelts - 1, revision_info_t*); |
| info->target.offset = current_pos; |
| } |
| |
| move_fragment(fragment, current_pos); |
| current_pos += len; |
| |
| svn_pool_clear(itempool); |
| } |
| |
| fragment = &APR_ARRAY_IDX(pack->fragments, pack->fragments->nelts-1, fragment_t); |
| fragment->position = current_pos; |
| |
| /* update the revision |
| * sizes (they all end at the end of the pack file now) */ |
| SVN_ERR(get_content_length(&len, fs, fragment, FALSE, itempool)); |
| current_pos += len; |
| |
| for (i = 0; i < pack->info->nelts; ++i) |
| { |
| info = APR_ARRAY_IDX(pack->info, i, revision_info_t*); |
| info->target.end = current_pos; |
| } |
| } |
| while (needed_to_expand); |
| |
| svn_pool_destroy(itempool); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Write reorg'ed target content for PACK in FS. Use POOL for allocations. |
| */ |
| static svn_error_t * |
| write_revisions(fs_fs_t *fs, |
| revision_pack_t *pack, |
| apr_pool_t *pool) |
| { |
| int i; |
| fragment_t *fragment = NULL; |
| svn_string_t *content; |
| |
| apr_pool_t *itempool = svn_pool_create(pool); |
| apr_pool_t *iterpool = svn_pool_create(pool); |
| |
| apr_file_t *file; |
| apr_size_t current_pos = 0; |
| svn_stringbuf_t *null_buffer = svn_stringbuf_create_empty(iterpool); |
| |
| /* create the target file */ |
| const char *dir = apr_psprintf(iterpool, "%s/new/%ld%s", |
| fs->path, pack->base / fs->max_files_per_dir, |
| pack->info->nelts > 1 ? ".pack" : ""); |
| SVN_ERR(svn_io_make_dir_recursively(dir, pool)); |
| SVN_ERR(svn_io_file_open(&file, |
| pack->info->nelts > 1 |
| ? apr_psprintf(iterpool, "%s/pack", dir) |
| : apr_psprintf(iterpool, "%s/%ld", dir, pack->base), |
| APR_WRITE | APR_CREATE | APR_BUFFERED, |
| APR_OS_DEFAULT, |
| iterpool)); |
| |
| /* write all fragments */ |
| for (i = 0; i < pack->fragments->nelts; ++i) |
| { |
| apr_size_t padding; |
| |
| /* get fragment content to write */ |
| fragment = &APR_ARRAY_IDX(pack->fragments, i, fragment_t); |
| SVN_ERR(get_fragment_content(&content, fs, fragment, itempool)); |
| SVN_ERR_ASSERT(fragment->position >= current_pos); |
| |
| /* number of bytes between this and the previous fragment */ |
| if ( fragment->kind == header_fragment |
| && i+1 < pack->fragments->nelts) |
| /* special case: header fragments are aligned to the slot end */ |
| padding = APR_ARRAY_IDX(pack->fragments, i+1, fragment_t).position - |
| content->len - current_pos; |
| else |
| /* standard case: fragments are aligned to the slot start */ |
| padding = fragment->position - current_pos; |
| |
| /* write padding between fragments */ |
| if (padding) |
| { |
| while (null_buffer->len < padding) |
| svn_stringbuf_appendbyte(null_buffer, 0); |
| |
| SVN_ERR(svn_io_file_write_full(file, |
| null_buffer->data, |
| padding, |
| NULL, |
| itempool)); |
| current_pos += padding; |
| } |
| |
| /* write fragment content */ |
| SVN_ERR(svn_io_file_write_full(file, |
| content->data, |
| content->len, |
| NULL, |
| itempool)); |
| current_pos += content->len; |
| |
| svn_pool_clear(itempool); |
| } |
| |
| apr_file_close(file); |
| |
| /* write new manifest file */ |
| if (pack->info->nelts > 1) |
| { |
| svn_stream_t *stream; |
| SVN_ERR(svn_io_file_open(&file, |
| apr_psprintf(iterpool, "%s/manifest", dir), |
| APR_WRITE | APR_CREATE | APR_BUFFERED, |
| APR_OS_DEFAULT, |
| iterpool)); |
| stream = svn_stream_from_aprfile2(file, FALSE, iterpool); |
| |
| for (i = 0; i < pack->info->nelts; ++i) |
| { |
| revision_info_t *info = APR_ARRAY_IDX(pack->info, i, |
| revision_info_t *); |
| SVN_ERR(svn_stream_printf(stream, itempool, |
| "%" APR_SIZE_T_FMT "\n", |
| info->target.offset)); |
| svn_pool_clear(itempool); |
| } |
| } |
| |
| /* cleanup */ |
| svn_pool_destroy(itempool); |
| svn_pool_destroy(iterpool); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Write reorg'ed target content for all revisions in FS. To maximize |
| * data locality, pack and write in one go per pack file. |
| * Use POOL for allocations. |
| */ |
| static svn_error_t * |
| pack_and_write_revisions(fs_fs_t *fs, |
| apr_pool_t *pool) |
| { |
| int i; |
| |
| SVN_ERR(svn_io_make_dir_recursively(apr_psprintf(pool, "%s/new", |
| fs->path), |
| pool)); |
| |
| for (i = 0; i < fs->packs->nelts; ++i) |
| { |
| revision_pack_t *pack = APR_ARRAY_IDX(fs->packs, i, revision_pack_t*); |
| if (pack->base % fs->max_files_per_dir == 0) |
| print_progress(pack->base); |
| |
| SVN_ERR(pack_revisions(fs, pack, pool)); |
| SVN_ERR(write_revisions(fs, pack, pool)); |
| } |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* For the directory REPRESENTATION in FS, construct the new (target) |
| * serialized plaintext representation and return it in *CONTENT. |
| * Allocate the result in POOL and temporaries in SCRATCH_POOL. |
| */ |
| static svn_error_t * |
| get_updated_dir(svn_string_t **content, |
| fs_fs_t *fs, |
| representation_t *representation, |
| apr_pool_t *pool, |
| apr_pool_t *scratch_pool) |
| { |
| apr_hash_t *hash; |
| apr_pool_t *hash_pool = svn_pool_create(scratch_pool); |
| apr_array_header_t *dir = representation->dir->entries; |
| int i; |
| svn_stream_t *stream; |
| svn_stringbuf_t *result; |
| |
| /* get the original content */ |
| SVN_ERR(read_dir(&hash, fs, representation, scratch_pool)); |
| hash = apr_hash_copy(hash_pool, hash); |
| |
| /* update all entries */ |
| for (i = 0; i < dir->nelts; ++i) |
| { |
| char buffer[256]; |
| svn_string_t *new_val; |
| apr_size_t pos; |
| |
| /* find the original entry for for the current name */ |
| direntry_t *entry = APR_ARRAY_IDX(dir, i, direntry_t *); |
| svn_string_t *str_val = apr_hash_get(hash, entry->name, entry->name_len); |
| if (str_val == NULL) |
| return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, |
| _("Dir entry '%s' not found"), entry->name); |
| |
| SVN_ERR_ASSERT(str_val->len < sizeof(buffer)); |
| |
| /* create and updated node ID */ |
| memcpy(buffer, str_val->data, str_val->len+1); |
| pos = strchr(buffer, '/') - buffer + 1; |
| pos += svn__ui64toa(buffer + pos, entry->node->target.offset - entry->node->revision->target.offset); |
| new_val = svn_string_ncreate(buffer, pos, hash_pool); |
| |
| /* store it in the hash */ |
| apr_hash_set(hash, entry->name, entry->name_len, new_val); |
| } |
| |
| /* serialize the updated hash */ |
| result = svn_stringbuf_create_ensure(representation->target.size, pool); |
| stream = svn_stream_from_stringbuf(result, hash_pool); |
| SVN_ERR(svn_hash_write2(hash, stream, SVN_HASH_TERMINATOR, hash_pool)); |
| svn_pool_destroy(hash_pool); |
| |
| /* done */ |
| *content = svn_stringbuf__morph_into_string(result); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Calculate the delta representation for the given CONTENT and BASE. |
| * Return the rep in *DIFF. Use POOL for allocations. |
| */ |
| static svn_error_t * |
| diff_stringbufs(svn_stringbuf_t *diff, |
| svn_string_t *base, |
| svn_string_t *content, |
| apr_pool_t *pool) |
| { |
| svn_txdelta_window_handler_t diff_wh; |
| void *diff_whb; |
| |
| svn_stream_t *stream; |
| svn_stream_t *source = svn_stream_from_string(base, pool); |
| svn_stream_t *target = svn_stream_from_stringbuf(diff, pool); |
| |
| /* Prepare to write the svndiff data. */ |
| svn_txdelta_to_svndiff3(&diff_wh, |
| &diff_whb, |
| target, |
| 1, |
| SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, |
| pool); |
| |
| /* create delta stream */ |
| stream = svn_txdelta_target_push(diff_wh, diff_whb, source, pool); |
| |
| /* run delta */ |
| SVN_ERR(svn_stream_write(stream, content->data, &content->len)); |
| SVN_ERR(svn_stream_close(stream)); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Update the noderev id value for KEY in the textual noderev representation |
| * in NODE_REV. Take the new id from NODE. This is a no-op if the KEY |
| * cannot be found. |
| */ |
| static void |
| update_id(svn_stringbuf_t *node_rev, |
| const char *key, |
| noderev_t *node) |
| { |
| char *newline_pos = 0; |
| char *pos; |
| |
| /* we need to update the offset only -> find its position */ |
| pos = strstr(node_rev->data, key); |
| if (pos) |
| pos = strchr(pos, '/'); |
| if (pos) |
| newline_pos = strchr(++pos, '\n'); |
| |
| if (pos && newline_pos) |
| { |
| /* offset data has been found -> replace it */ |
| char temp[SVN_INT64_BUFFER_SIZE]; |
| apr_size_t len = svn__i64toa(temp, node->target.offset - node->revision->target.offset); |
| svn_stringbuf_replace(node_rev, |
| pos - node_rev->data, newline_pos - pos, |
| temp, len); |
| } |
| } |
| |
| /* Update the representation id value for KEY in the textual noderev |
| * representation in NODE_REV. Take the offset, sizes and new MD5 from |
| * REPRESENTATION. Use SCRATCH_POOL for allocations. |
| * This is a no-op if the KEY cannot be found. |
| */ |
| static void |
| update_text(svn_stringbuf_t *node_rev, |
| const char *key, |
| representation_t *representation, |
| apr_pool_t *scratch_pool) |
| { |
| apr_size_t key_len = strlen(key); |
| char *pos = strstr(node_rev->data, key); |
| char *val_pos; |
| |
| if (!pos) |
| return; |
| |
| val_pos = pos + key_len; |
| if (representation->dir) |
| { |
| /* for directories, we need to write all rep info anew */ |
| char *newline_pos = strchr(val_pos, '\n'); |
| svn_checksum_t checksum; |
| const char* temp = apr_psprintf(scratch_pool, "%ld %" APR_SIZE_T_FMT " %" |
| APR_SIZE_T_FMT" %" APR_SIZE_T_FMT " %s", |
| representation->revision->revision, |
| representation->target.offset - representation->revision->target.offset, |
| representation->target.size, |
| representation->dir->size, |
| svn_checksum_to_cstring(&checksum, |
| scratch_pool)); |
| |
| checksum.digest = representation->dir->target_md5; |
| checksum.kind = svn_checksum_md5; |
| svn_stringbuf_replace(node_rev, |
| val_pos - node_rev->data, newline_pos - val_pos, |
| temp, strlen(temp)); |
| } |
| else |
| { |
| /* ordinary representation: replace offset and rep size only. |
| * Content size and checksums are unchanged. */ |
| const char* temp; |
| char *end_pos = strchr(val_pos, ' '); |
| |
| val_pos = end_pos + 1; |
| end_pos = strchr(strchr(val_pos, ' ') + 1, ' '); |
| temp = apr_psprintf(scratch_pool, "%" APR_SIZE_T_FMT " %" APR_SIZE_T_FMT, |
| representation->target.offset - representation->revision->target.offset, |
| representation->target.size); |
| |
| svn_stringbuf_replace(node_rev, |
| val_pos - node_rev->data, end_pos - val_pos, |
| temp, strlen(temp)); |
| } |
| } |
| |
| /* Get the target content (data block as to be written to the file) for |
| * the given FRAGMENT in FS. Return the content in *CONTENT. Use POOL |
| * for allocations. |
| * |
| * Note that, as a side-effect, this will update the target rep. info for |
| * directories. |
| */ |
| static svn_error_t * |
| get_fragment_content(svn_string_t **content, |
| fs_fs_t *fs, |
| fragment_t *fragment, |
| apr_pool_t *pool) |
| { |
| revision_info_t *info; |
| representation_t *representation; |
| noderev_t *node; |
| svn_string_t *revision_content, *base_content; |
| svn_stringbuf_t *header, *node_rev, *text; |
| apr_size_t header_size; |
| svn_checksum_t *checksum = NULL; |
| |
| switch (fragment->kind) |
| { |
| /* revision headers can be constructed from target position info */ |
| case header_fragment: |
| info = fragment->data; |
| *content = svn_string_createf(pool, |
| "\n%" APR_SIZE_T_FMT " %" APR_SIZE_T_FMT "\n", |
| info->root_noderev->target.offset - info->target.offset, |
| info->target.changes); |
| return SVN_NO_ERROR; |
| |
| /* The changes list remains untouched */ |
| case changes_fragment: |
| info = fragment->data; |
| SVN_ERR(get_content(&revision_content, fs, info->revision, pool)); |
| |
| *content = svn_string_create_empty(pool); |
| (*content)->data = revision_content->data + info->original.changes; |
| (*content)->len = info->target.changes_len; |
| return SVN_NO_ERROR; |
| |
| /* property and file reps get new headers any need to be rewritten, |
| * iff the base rep is a directory. The actual (deltified) content |
| * remains unchanged, though. MD5 etc. do not change. */ |
| case property_fragment: |
| case file_fragment: |
| representation = fragment->data; |
| SVN_ERR(get_content(&revision_content, fs, |
| representation->revision->revision, pool)); |
| |
| if (representation->delta_base) |
| if (representation->delta_base->dir) |
| { |
| /* if the base happens to be a directory, reconstruct the |
| * full text and represent it as PLAIN rep. */ |
| SVN_ERR(get_combined_window(&text, fs, representation, pool)); |
| representation->target.size = text->len; |
| |
| svn_stringbuf_insert(text, 0, "PLAIN\n", 6); |
| svn_stringbuf_appendcstr(text, "ENDREP\n"); |
| *content = svn_stringbuf__morph_into_string(text); |
| |
| return SVN_NO_ERROR; |
| } |
| else |
| /* construct a new rep header */ |
| if (representation->delta_base == fs->null_base) |
| header = svn_stringbuf_create("DELTA\n", pool); |
| else |
| header = svn_stringbuf_createf(pool, |
| "DELTA %ld %" APR_SIZE_T_FMT " %" APR_SIZE_T_FMT "\n", |
| representation->delta_base->revision->revision, |
| representation->delta_base->target.offset |
| - representation->delta_base->revision->target.offset, |
| representation->delta_base->target.size); |
| else |
| header = svn_stringbuf_create("PLAIN\n", pool); |
| |
| /* if it exists, the actual delta base is unchanged. Hence, this |
| * rep is unchanged even if it has been deltified. */ |
| header_size = strchr(revision_content->data + |
| representation->original.offset, '\n') - |
| revision_content->data - |
| representation->original.offset + 1; |
| svn_stringbuf_appendbytes(header, |
| revision_content->data + |
| representation->original.offset + |
| header_size, |
| representation->original.size); |
| svn_stringbuf_appendcstr(header, "ENDREP\n"); |
| *content = svn_stringbuf__morph_into_string(header); |
| return SVN_NO_ERROR; |
| |
| /* directory reps need to be rewritten (and deltified) completely. |
| * As a side-effect, update the MD5 and target content size. */ |
| case dir_fragment: |
| /* construct new content and update MD5 */ |
| representation = fragment->data; |
| SVN_ERR(get_updated_dir(&revision_content, fs, representation, |
| pool, pool)); |
| SVN_ERR(svn_checksum(&checksum, svn_checksum_md5, |
| revision_content->data, revision_content->len, |
| pool)); |
| memcpy(representation->dir->target_md5, |
| checksum->digest, |
| sizeof(representation->dir->target_md5)); |
| |
| /* deltify against the base rep if necessary */ |
| if (representation->delta_base) |
| { |
| if (representation->delta_base->dir == NULL) |
| { |
| /* dummy or non-dir base rep -> self-compress only */ |
| header = svn_stringbuf_create("DELTA\n", pool); |
| base_content = svn_string_create_empty(pool); |
| } |
| else |
| { |
| /* deltify against base rep (which is a directory, too)*/ |
| representation_t *base_rep = representation->delta_base; |
| header = svn_stringbuf_createf(pool, |
| "DELTA %ld %" APR_SIZE_T_FMT " %" APR_SIZE_T_FMT "\n", |
| base_rep->revision->revision, |
| base_rep->target.offset - base_rep->revision->target.offset, |
| base_rep->target.size); |
| SVN_ERR(get_updated_dir(&base_content, fs, base_rep, |
| pool, pool)); |
| } |
| |
| /* run deltification and update target content size */ |
| header_size = header->len; |
| SVN_ERR(diff_stringbufs(header, base_content, |
| revision_content, pool)); |
| representation->dir->size = revision_content->len; |
| representation->target.size = header->len - header_size; |
| svn_stringbuf_appendcstr(header, "ENDREP\n"); |
| *content = svn_stringbuf__morph_into_string(header); |
| } |
| else |
| { |
| /* no delta base (not even a dummy) -> PLAIN rep */ |
| representation->target.size = revision_content->len; |
| representation->dir->size = revision_content->len; |
| *content = svn_string_createf(pool, "PLAIN\n%sENDREP\n", |
| revision_content->data); |
| } |
| |
| return SVN_NO_ERROR; |
| |
| /* construct the new noderev content. No side-effects.*/ |
| case noderev_fragment: |
| /* get the original noderev as string */ |
| node = fragment->data; |
| SVN_ERR(get_content(&revision_content, fs, |
| node->revision->revision, pool)); |
| node_rev = svn_stringbuf_ncreate(revision_content->data + |
| node->original.offset, |
| node->original.size, |
| pool); |
| |
| /* update the values that may have hanged for target */ |
| update_id(node_rev, "id: ", node); |
| update_id(node_rev, "pred: ", node->predecessor); |
| update_text(node_rev, "text: ", node->text, pool); |
| update_text(node_rev, "props: ", node->props, pool); |
| |
| *content = svn_stringbuf__morph_into_string(node_rev); |
| return SVN_NO_ERROR; |
| } |
| |
| SVN_ERR_ASSERT(0); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* In the repository at PATH, restore the original content in case we ran |
| * this reorg tool before. Use POOL for allocations. |
| */ |
| static svn_error_t * |
| prepare_repo(const char *path, apr_pool_t *pool) |
| { |
| svn_node_kind_t kind; |
| |
| const char *old_path = svn_dirent_join(path, "db/old", pool); |
| const char *new_path = svn_dirent_join(path, "new", pool); |
| const char *revs_path = svn_dirent_join(path, "db/revs", pool); |
| const char *old_rep_cache_path = svn_dirent_join(path, "db/rep-cache.db.old", pool); |
| const char *rep_cache_path = svn_dirent_join(path, "db/rep-cache.db", pool); |
| |
| /* is there a backup? */ |
| SVN_ERR(svn_io_check_path(old_path, &kind, pool)); |
| if (kind == svn_node_dir) |
| { |
| /* yes, restore the org content from it */ |
| SVN_ERR(svn_io_remove_dir2(new_path, TRUE, NULL, NULL, pool)); |
| SVN_ERR(svn_io_file_move(revs_path, new_path, pool)); |
| SVN_ERR(svn_io_file_move(old_path, revs_path, pool)); |
| SVN_ERR(svn_io_remove_dir2(new_path, TRUE, NULL, NULL, pool)); |
| } |
| |
| /* same for the rep cache db */ |
| SVN_ERR(svn_io_check_path(old_rep_cache_path, &kind, pool)); |
| if (kind == svn_node_file) |
| SVN_ERR(svn_io_file_move(old_rep_cache_path, rep_cache_path, pool)); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* In the repository at PATH, create a backup of the orig content and |
| * replace it with the reorg'ed. Use POOL for allocations. |
| */ |
| static svn_error_t * |
| activate_new_revs(const char *path, apr_pool_t *pool) |
| { |
| svn_node_kind_t kind; |
| |
| const char *old_path = svn_dirent_join(path, "db/old", pool); |
| const char *new_path = svn_dirent_join(path, "new", pool); |
| const char *revs_path = svn_dirent_join(path, "db/revs", pool); |
| const char *old_rep_cache_path = svn_dirent_join(path, "db/rep-cache.db.old", pool); |
| const char *rep_cache_path = svn_dirent_join(path, "db/rep-cache.db", pool); |
| |
| /* if there is no backup, yet, move the current repo content to the backup |
| * and place it with the new (reorg'ed) data. */ |
| SVN_ERR(svn_io_check_path(old_path, &kind, pool)); |
| if (kind == svn_node_none) |
| { |
| SVN_ERR(svn_io_file_move(revs_path, old_path, pool)); |
| SVN_ERR(svn_io_file_move(new_path, revs_path, pool)); |
| } |
| |
| /* same for the rep cache db */ |
| SVN_ERR(svn_io_check_path(old_rep_cache_path, &kind, pool)); |
| if (kind == svn_node_none) |
| SVN_ERR(svn_io_file_move(rep_cache_path, old_rep_cache_path, pool)); |
| |
| return SVN_NO_ERROR; |
| } |
| |
| /* Write tool usage info text to OSTREAM using PROGNAME as a prefix and |
| * POOL for allocations. |
| */ |
| static void |
| print_usage(svn_stream_t *ostream, const char *progname, |
| apr_pool_t *pool) |
| { |
| svn_error_clear(svn_stream_printf(ostream, pool, |
| "\n" |
| "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n" |
| "!!! This is an experimental tool. Don't use it on production data !!!\n" |
| "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n" |
| "\n" |
| "Usage: %s <repo> <cachesize>\n" |
| "\n" |
| "Optimize the repository at local path <repo> staring from revision 0.\n" |
| "Use up to <cachesize> MB of memory for caching. This does not include\n" |
| "temporary representation of the repository structure, i.e. the actual\n" |
| "memory will be higher and <cachesize> be the lower limit.\n", |
| progname)); |
| } |
| |
| /* linear control flow */ |
| int main(int argc, const char *argv[]) |
| { |
| apr_pool_t *pool; |
| svn_stream_t *ostream; |
| svn_error_t *svn_err; |
| const char *repo_path = NULL; |
| svn_revnum_t start_revision = 0; |
| apr_size_t memsize = 0; |
| apr_uint64_t temp = 0; |
| fs_fs_t *fs; |
| |
| apr_initialize(); |
| atexit(apr_terminate); |
| |
| pool = apr_allocator_owner_get(svn_pool_create_allocator(FALSE)); |
| |
| svn_err = svn_stream_for_stdout(&ostream, pool); |
| if (svn_err) |
| { |
| svn_handle_error2(svn_err, stdout, FALSE, ERROR_TAG); |
| return 2; |
| } |
| |
| if (argc != 3) |
| { |
| print_usage(ostream, argv[0], pool); |
| return 2; |
| } |
| |
| svn_err = svn_cstring_strtoui64(&temp, argv[2], 0, APR_SIZE_MAX, 10); |
| if (svn_err) |
| { |
| print_usage(ostream, argv[0], pool); |
| svn_error_clear(svn_err); |
| return 2; |
| } |
| |
| memsize = (apr_size_t)temp; |
| repo_path = argv[1]; |
| start_revision = 0; |
| |
| printf("\nPreparing repository\n"); |
| svn_err = prepare_repo(repo_path, pool); |
| |
| if (!svn_err) |
| { |
| printf("Reading revisions\n"); |
| svn_err = read_revisions(&fs, repo_path, start_revision, memsize, pool); |
| } |
| |
| if (!svn_err) |
| { |
| printf("\nReordering revision content\n"); |
| svn_err = reorder_revisions(fs, pool); |
| } |
| |
| if (!svn_err) |
| { |
| printf("\nPacking and writing revisions\n"); |
| svn_err = pack_and_write_revisions(fs, pool); |
| } |
| |
| if (!svn_err) |
| { |
| printf("\nSwitch to new revs\n"); |
| svn_err = activate_new_revs(repo_path, pool); |
| } |
| |
| if (svn_err) |
| { |
| svn_handle_error2(svn_err, stdout, FALSE, ERROR_TAG); |
| return 2; |
| } |
| |
| return 0; |
| } |