On the diff-optimizations-tokens branch: Add skeleton implementation of suffix scanning. Getting tokens backwards (datasource_get_previous_token), and pushing back suffix tokens (token_pushback_suffix), are still stub implementations, which will be added in a follow up commit. * subversion/include/svn_diff.h (svn_diff_fns_t): Add new function types datasource_get_previous_token and token_pushback_suffix. Add parameter open_at_end to datasource_open. * subversion/libsvn_diff/diff_file.c (datasource_get_previous_token): New function, stub implementation. (token_pushback_suffix): New function, stub implementation. (datasource_open): Add parameter open_at_end. Add implementation to open the datasource at the end (read last chunk, point curp to the last byte). Make sure the actual file is only opened if hasn't been opened yet, so this function can be reused to jump to the end or the beginning of the file at will. While we are at it, remove local variables curp and endp, since they aren't needed anymore. * subversion/libsvn_diff/diff_memory.c (datasource_get_previous_token): New function, stub implementation. (token_pushback_suffix): New function, stub implementation. (datasource_open): Add parameter open_at_end. * subversion/libsvn_diff/token.c (svn_diff__get_tokens): Pass FALSE for parameter open_at_end to datasource_open, for backwards compatibility. (find_identical_suffix): New function. (find_identical_prefix): Remove parameter reached_one_eof, and convert it to a local variable, since it doesn't need to be returned anymore. (svn_diff__get_all_tokens): Add call to find_identical_suffix, before calling find_identical_prefix, and call datasource_open in between to reposition the datasource at the end and the start respectively. git-svn-id: https://svn.apache.org/repos/asf/subversion/branches/diff-optimizations-tokens@1039986 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/subversion/include/svn_diff.h b/subversion/include/svn_diff.h index 98481ab..ac9dbaf 100644 --- a/subversion/include/svn_diff.h +++ b/subversion/include/svn_diff.h
@@ -110,7 +110,8 @@ { /** Open the datasource of type @a datasource. */ svn_error_t *(*datasource_open)(void *diff_baton, - svn_diff_datasource_e datasource); + svn_diff_datasource_e datasource, + svn_boolean_t open_at_end); /** Close the datasource of type @a datasource. */ svn_error_t *(*datasource_close)(void *diff_baton, @@ -124,6 +125,14 @@ void *diff_baton, svn_diff_datasource_e datasource); + /** Get the previous "token" from the datasource of type @a datasource + * (reading backwards). Return a "token" in @a *token. Leave @a token + * untouched when the datasource is exhausted. + */ + svn_error_t *(*datasource_get_previous_token)(void **token, + void *diff_baton, + svn_diff_datasource_e datasource); + /** A function for ordering the tokens, resembling 'strcmp' in functionality. * @a compare should contain the return value of the comparison: * If @a ltoken and @a rtoken are "equal", return 0. If @a ltoken is @@ -139,6 +148,10 @@ void *token, svn_diff_datasource_e datasource); + svn_error_t *(*token_pushback_suffix)(void *diff_baton, + void *token, + svn_diff_datasource_e datasource); + /** Free @a token from memory, the diff algorithm is done with it. */ void (*token_discard)(void *diff_baton, void *token);
diff --git a/subversion/libsvn_diff/diff_file.c b/subversion/libsvn_diff/diff_file.c index 9b3a011..776a74a 100644 --- a/subversion/libsvn_diff/diff_file.c +++ b/subversion/libsvn_diff/diff_file.c
@@ -214,35 +214,49 @@ * * Implements svn_diff_fns_t::datasource_open. */ static svn_error_t * -datasource_open(void *baton, svn_diff_datasource_e datasource) +datasource_open(void *baton, + svn_diff_datasource_e datasource, + svn_boolean_t open_at_end) { svn_diff__file_baton_t *file_baton = baton; struct file_info *file = &file_baton->files[datasource_to_index(datasource)]; apr_finfo_t finfo; apr_off_t length; - char *curp; - char *endp; - SVN_ERR(svn_io_file_open(&file->file, file->path, - APR_READ, APR_OS_DEFAULT, file_baton->pool)); + if (file->file == NULL) + { + SVN_ERR(svn_io_file_open(&file->file, file->path, + APR_READ, APR_OS_DEFAULT, file_baton->pool)); + SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, + file->file, file_baton->pool)); + file->size = finfo.size; + } - SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, - file->file, file_baton->pool)); - - file->size = finfo.size; - length = finfo.size > CHUNK_SIZE ? CHUNK_SIZE : finfo.size; - - if (length == 0) + if (file->size == 0) return SVN_NO_ERROR; - endp = curp = apr_palloc(file_baton->pool, (apr_size_t) length); - endp += length; + if (open_at_end) + { + file->chunk = (int) offset_to_chunk(file->size); /* last chunk */ + length = offset_in_chunk(file->size); + if (file->chunk == 0) /* if last chunk is the only chunk */ + file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length); + else + file->buffer = apr_palloc(file_baton->pool, CHUNK_SIZE); + file->endp = file->buffer + length; + file->curp = file->endp - 1; + } + else + { + file->chunk = 0; + length = finfo.size > CHUNK_SIZE ? CHUNK_SIZE : finfo.size; + file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length); + file->endp = file->buffer + length; + file->curp = file->buffer; + } - file->buffer = file->curp = curp; - file->endp = endp; - - return read_chunk(file->file, file->path, - curp, length, 0, file_baton->pool); + return read_chunk(file->file, file->path, file->buffer, length, + chunk_to_offset(file->chunk), file_baton->pool); } @@ -426,6 +440,19 @@ return SVN_NO_ERROR; } + +/* Implements svn_diff_fns_t::datasource_get_previous_token */ +static svn_error_t * +datasource_get_previous_token(void **token, void *baton, + svn_diff_datasource_e datasource) +{ + /* ### TODO */ + *token = NULL; + + return SVN_NO_ERROR; +} + + #define COMPARE_CHUNK_SIZE 4096 /* Implements svn_diff_fns_t::token_compare */ @@ -580,6 +607,17 @@ return SVN_NO_ERROR; } + +static svn_error_t * +token_pushback_suffix(void *baton, + void *token, + svn_diff_datasource_e datasource) +{ + /* ### TODO */ + return SVN_NO_ERROR; +} + + /* Implements svn_diff_fns_t::token_discard */ static void token_discard(void *baton, void *token) @@ -608,8 +646,10 @@ datasource_open, datasource_close, datasource_get_next_token, + datasource_get_previous_token, token_compare, token_pushback_prefix, + token_pushback_suffix, token_discard, token_discard_all };
diff --git a/subversion/libsvn_diff/diff_memory.c b/subversion/libsvn_diff/diff_memory.c index 35bc734..e32af32 100644 --- a/subversion/libsvn_diff/diff_memory.c +++ b/subversion/libsvn_diff/diff_memory.c
@@ -89,7 +89,9 @@ /* Implements svn_diff_fns_t::datasource_open */ static svn_error_t * -datasource_open(void *baton, svn_diff_datasource_e datasource) +datasource_open(void *baton, + svn_diff_datasource_e datasource, + svn_boolean_t open_at_end) { /* Do nothing: everything is already there and initialized to 0 */ return SVN_NO_ERROR; @@ -138,6 +140,18 @@ return SVN_NO_ERROR; } +/* Implements svn_diff_fns_t::datasource_get_previous_token */ +static svn_error_t * +datasource_get_previous_token(void **token, void *baton, + svn_diff_datasource_e datasource) +{ + /* ### TODO */ + *token = NULL; + + return SVN_NO_ERROR; +} + + /* Implements svn_diff_fns_t::token_compare */ static svn_error_t * token_compare(void *baton, void *token1, void *token2, int *result) @@ -180,6 +194,17 @@ return SVN_NO_ERROR; } + +static svn_error_t * +token_pushback_suffix(void *baton, + void *token, + svn_diff_datasource_e datasource) +{ + /* ### TODO */ + return SVN_NO_ERROR; +} + + /* Implements svn_diff_fns_t::token_discard */ static void token_discard(void *baton, void *token) @@ -205,8 +230,10 @@ datasource_open, datasource_close, datasource_get_next_token, + datasource_get_previous_token, token_compare, token_pushback_prefix, + token_pushback_suffix, token_discard, token_discard_all };
diff --git a/subversion/libsvn_diff/token.c b/subversion/libsvn_diff/token.c index d87b185..381950d 100644 --- a/subversion/libsvn_diff/token.c +++ b/subversion/libsvn_diff/token.c
@@ -152,7 +152,7 @@ *position_list = NULL; - SVN_ERR(vtable->datasource_open(diff_baton, datasource)); + SVN_ERR(vtable->datasource_open(diff_baton, datasource, FALSE)); position_ref = &start_position; offset = 0; @@ -188,22 +188,77 @@ return SVN_NO_ERROR; } -/* Find identical prefix between all datasources +/* Find identical suffix between all datasources */ static svn_error_t * -find_identical_prefix(svn_boolean_t *reached_one_eof, - apr_off_t *prefix_lines, +find_identical_suffix(svn_boolean_t *reached_one_bof, void *diff_baton, const svn_diff_fns_t *vtable, svn_diff_datasource_e datasource[], int datasource_len) { void *token[4]; - svn_boolean_t is_match, reached_all_eof; + svn_boolean_t is_match, reached_all_bof; + int i, rv; + + *reached_one_bof = FALSE; + while (1) + { + /* Keep getting tokens and matching them, until there are no tokens + left, or we encounter a non-matching token. */ + for (i = 0; i < datasource_len; i++) + { + SVN_ERR(vtable->datasource_get_previous_token(&token[i], diff_baton, + datasource[i])); + *reached_one_bof = *reached_one_bof || token[i] == NULL; + } + if (*reached_one_bof) + { + break; + } + else + { + for (i = 1, is_match = TRUE; is_match && i < datasource_len; i++) + { + SVN_ERR(vtable->token_compare(diff_baton, token[0], token[i], &rv)); + is_match = is_match && rv == 0; + } + if (!is_match) + break; + } + } + + /* If all files reached their beginning (i.e. are fully identical), + we're done. */ + for (i = 0, reached_all_bof = TRUE; i < datasource_len; i++) + reached_all_bof = reached_all_bof && token[i] == NULL; + if (reached_all_bof) + return SVN_NO_ERROR; + + /* Push back the non-matching token we read. */ + for (i = 0; i < datasource_len; i++) + if (token[i] != NULL) + SVN_ERR(vtable->token_pushback_suffix(diff_baton, token[i], datasource[i])); + + return SVN_NO_ERROR; +} + + +/* Find identical prefix between all datasources + */ +static svn_error_t * +find_identical_prefix(apr_off_t *prefix_lines, + void *diff_baton, + const svn_diff_fns_t *vtable, + svn_diff_datasource_e datasource[], + int datasource_len) +{ + void *token[4]; + svn_boolean_t is_match, reached_one_eof, reached_all_eof; int i, rv; *prefix_lines = 0; - *reached_one_eof = FALSE; + reached_one_eof = FALSE; while (1) { /* Keep getting tokens and matching them, until there are no tokens @@ -212,9 +267,9 @@ { SVN_ERR(vtable->datasource_get_next_token(NULL, &token[i], diff_baton, datasource[i])); - *reached_one_eof = *reached_one_eof || token[i] == NULL; + reached_one_eof = reached_one_eof || token[i] == NULL; } - if (*reached_one_eof) + if (reached_one_eof) { break; } @@ -267,20 +322,28 @@ void *token; apr_off_t offset; apr_uint32_t hash; - svn_boolean_t reached_one_eof; + svn_boolean_t reached_one_bof; int i; for (i = 0; i < datasource_len; i++) { *position_list[i] = NULL; - SVN_ERR(vtable->datasource_open(diff_baton, datasource[i])); + SVN_ERR(vtable->datasource_open(diff_baton, datasource[i], TRUE)); } - /* find identical prefix */ - SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines, - diff_baton, vtable, datasource, datasource_len)); + /* find identical suffix */ + SVN_ERR(find_identical_suffix(&reached_one_bof, diff_baton, vtable, + datasource, datasource_len)); - /* ### TODO: find identical suffix (if not eof) */ + for (i = 0; i < datasource_len; i++) + { + SVN_ERR(vtable->datasource_open(diff_baton, datasource[i], FALSE)); + } + + /* find identical prefix (but don't bother if one file was all suffix) */ + /*if (!reached_one_bof)*/ + SVN_ERR(find_identical_prefix(prefix_lines, diff_baton, vtable, + datasource, datasource_len)); for (i = 0; i < datasource_len; i++) {