blob: 2c7c2608ac1577ca47dce80f186c46cbb29b64fd [file] [log] [blame]
/*
* text-delta.c -- Internal text delta representation
*
* ====================================================================
* Copyright (c) 2000-2002 CollabNet. All rights reserved.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
* are also available at http://subversion.tigris.org/license-1.html.
* If newer versions of this license are posted there, you may use a
* newer version instead, at your option.
*
* This software consists of voluntary contributions made by many
* individuals. For exact contribution history, see the revision
* history and logs, available at http://subversion.tigris.org/.
* ====================================================================
*/
#include <assert.h>
#include <string.h>
#include <apr_general.h> /* for APR_INLINE */
#include <apr_md5.h> /* for, um...MD5 stuff */
#include "svn_delta.h"
#include "svn_io.h"
#include "svn_pools.h"
#include "delta.h"
/* Text delta stream descriptor. */
struct svn_txdelta_stream_t {
/* These are copied from parameters passed to svn_txdelta. */
svn_stream_t *source;
svn_stream_t *target;
/* Private data */
svn_boolean_t more; /* TRUE if there are more data in the pool. */
apr_off_t pos; /* Offset of next read in source file. */
char *buf; /* Buffer for vdelta data. */
apr_size_t saved_source_len; /* Amount of source data saved in buf. */
apr_md5_ctx_t context; /* APR's MD5 context container. */
/* Calculated digest from MD5 operations.
NOTE: This is only valid after this stream has returned the NULL
(final) window. */
unsigned char digest[MD5_DIGESTSIZE];
};
/* Text delta applicator. */
struct apply_baton {
/* These are copied from parameters passed to svn_txdelta_apply. */
svn_stream_t *source;
svn_stream_t *target;
/* Private data. Between calls, SBUF contains the data from the
* last window's source view, as specified by SBUF_OFFSET and
* SBUF_LEN. The contents of TBUF are not interesting between
* calls. */
apr_pool_t *pool; /* Pool to allocate data from */
char *sbuf; /* Source buffer */
apr_size_t sbuf_size; /* Allocated source buffer space */
apr_off_t sbuf_offset; /* Offset of SBUF data in source stream */
apr_size_t sbuf_len; /* Length of SBUF data */
char *tbuf; /* Target buffer */
apr_size_t tbuf_size; /* Allocated target buffer space */
};
/* Context/baton for building an operation sequence. */
struct build_ops_baton_t {
int num_ops; /* current number of ops */
int ops_size; /* number of ops allocated */
svn_txdelta_op_t *ops; /* the operations */
svn_stringbuf_t *new_data; /* any new data used by the operations */
};
/* Allocate a delta window. */
static svn_txdelta_window_t *
make_window (apr_pool_t *pool, struct build_ops_baton_t *bob)
{
svn_txdelta_window_t *window;
svn_string_t *new_data = apr_palloc (pool, sizeof (*new_data));
window = apr_palloc (pool, sizeof (*window));
window->sview_offset = 0;
window->sview_len = 0;
window->tview_len = 0;
window->num_ops = bob->num_ops;
window->ops = bob->ops;
/* just copy the fields over, rather than alloc/copying into a whole new
svn_string_t structure. */
/* ### would be much nicer if window->new_data were not a ptr... */
new_data->data = bob->new_data->data;
new_data->len = bob->new_data->len;
window->new_data = new_data;
return window;
}
/* Insert a delta op into a delta window. */
void
svn_txdelta__insert_op (struct build_ops_baton_t *bob,
int opcode,
apr_off_t offset,
apr_off_t length,
const char *new_data,
apr_pool_t *pool)
{
svn_txdelta_op_t *op;
/* Create space for the new op. */
if (bob->num_ops == bob->ops_size)
{
svn_txdelta_op_t *const old_ops = bob->ops;
int const new_ops_size = (bob->ops_size == 0
? 16 : 2 * bob->ops_size);
bob->ops = apr_palloc (pool, new_ops_size * sizeof (*bob->ops));
/* Copy any existing ops into the new array */
if (old_ops)
memcpy (bob->ops, old_ops,
bob->ops_size * sizeof (*bob->ops));
bob->ops_size = new_ops_size;
}
/* Insert the op. svn_delta_source and svn_delta_target are
just inserted. For svn_delta_new, the new data must be
copied into the window. */
op = &bob->ops[bob->num_ops];
switch (opcode)
{
case svn_txdelta_source:
case svn_txdelta_target:
op->action_code = opcode;
op->offset = offset;
op->length = length;
break;
case svn_txdelta_new:
op->action_code = opcode;
op->offset = bob->new_data->len;
op->length = length;
svn_stringbuf_appendbytes (bob->new_data, new_data, length);
break;
default:
assert (!"unknown delta op.");
}
++bob->num_ops;
}
/* Allocate a delta stream descriptor. */
void
svn_txdelta (svn_txdelta_stream_t **stream,
svn_stream_t *source,
svn_stream_t *target,
apr_pool_t *pool)
{
*stream = apr_palloc (pool, sizeof (**stream));
(*stream)->source = source;
(*stream)->target = target;
(*stream)->more = TRUE;
(*stream)->pos = 0;
(*stream)->buf = apr_palloc (pool, 3 * SVN_STREAM_CHUNK_SIZE);
(*stream)->saved_source_len = 0;
/* Initialize MD5 digest calculation. */
apr_md5_init (&((*stream)->context));
}
/* Pull the next delta window from a stream.
Our current algorithm for picking source and target views is one
step up from the dumbest algorithm of "compare corresponding blocks
of each file." A problem with that algorithm is that an insertion
or deletion of N bytes near the beginning of the file will result
in N bytes of non-overlap in each window from then on. Our
algorithm lessens this problem by "padding" the source view with
half a target view's worth of data on each side.
For example, suppose the target view size is 16K. The dumbest
algorithm would use bytes 0-16K for the first source view, 16-32K
for the second source view, etc.. Our algorithm uses 0-24K for the
first source view, 8-40K for the second source view, etc..
Obviously, we're chewing some extra memory by doubling the source
view size, but small (less than 8K) insertions or deletions won't
result in non-overlap in every window.
If we run out of source data before we run out of target data, we
reuse the final chunk of data for the remaining windows. No grand
scheme at work there; that's just how the code worked out. */
svn_error_t *
svn_txdelta_next_window (svn_txdelta_window_t **window,
svn_txdelta_stream_t *stream,
apr_pool_t *pool)
{
if (!stream->more)
{
apr_status_t apr_err;
apr_err = apr_md5_final (stream->digest, &(stream->context));
if (! APR_STATUS_IS_SUCCESS (apr_err))
return svn_error_create
(apr_err, 0, NULL, pool,
"svn_txdelta_next_window: MD5 finalization failed");
*window = NULL;
return SVN_NO_ERROR;
}
else
{
svn_error_t *err;
apr_size_t total_source_len;
apr_size_t new_source_len = SVN_STREAM_CHUNK_SIZE;
apr_size_t target_len = SVN_STREAM_CHUNK_SIZE;
struct build_ops_baton_t bob = { 0 };
/* If there is no saved source data yet, read an extra half
window of data this time to get things started. */
if (stream->saved_source_len == 0)
new_source_len += SVN_STREAM_CHUNK_SIZE / 2;
/* Read the source stream. */
err = svn_stream_read (stream->source,
stream->buf + stream->saved_source_len,
&new_source_len);
total_source_len = stream->saved_source_len + new_source_len;
/* Update the MD5 accumulator with the freshly-read data in
stream.
### todo: Currently, apr_md5_update() always returns
APR_SUCCESS. As such, we are proposing to the APR folks that
its interface change to be a void function. In the meantime,
we'll simply ignore the return value. */
apr_md5_update (&(stream->context),
stream->buf + stream->saved_source_len,
new_source_len);
/* Read the target stream. */
if (err == SVN_NO_ERROR)
err = svn_stream_read (stream->target, stream->buf + total_source_len,
&target_len);
if (err != SVN_NO_ERROR)
return err;
stream->pos += new_source_len;
/* Forget everything if there's no target data. */
if (target_len == 0)
{
*window = NULL;
stream->more = FALSE;
return SVN_NO_ERROR;
}
/* Compute the delta operations. */
bob.new_data = svn_stringbuf_create ("", pool);
svn_txdelta__vdelta (&bob, stream->buf,
total_source_len, target_len,
pool);
/* Create the delta window. */
*window = make_window (pool, &bob);
(*window)->sview_offset = stream->pos - total_source_len;
(*window)->sview_len = total_source_len;
(*window)->tview_len = target_len;
/* Save the last window's worth of data from the source view. */
stream->saved_source_len = (total_source_len < SVN_STREAM_CHUNK_SIZE)
? total_source_len : SVN_STREAM_CHUNK_SIZE;
memmove (stream->buf,
stream->buf + total_source_len - stream->saved_source_len,
stream->saved_source_len);
/* That's it. */
return SVN_NO_ERROR;
}
}
const unsigned char *
svn_txdelta_md5_digest (svn_txdelta_stream_t *stream)
{
/* If there are more windows for this stream, the digest has not yet
been calculated. */
if (stream->more)
return NULL;
return stream->digest;
}
/* Functions for applying deltas. */
/* Ensure that BUF has enough space for VIEW_LEN bytes. */
static APR_INLINE void
size_buffer (char **buf, apr_size_t *buf_size,
apr_size_t view_len, apr_pool_t *pool)
{
if (view_len > *buf_size)
{
*buf_size *= 2;
if (*buf_size < view_len)
*buf_size = view_len;
*buf = apr_palloc (pool, *buf_size);
}
}
/* Apply the instructions from WINDOW to a source view SBUF to produce
a target view TBUF. SBUF is assumed to have WINDOW->sview_len
bytes of data and TBUF is assumed to have room for
WINDOW->tview_len bytes of output. This is purely a memory
operation; nothing can go wrong as long as we have a valid window. */
static void
apply_instructions (svn_txdelta_window_t *window, const char *sbuf, char *tbuf)
{
const svn_txdelta_op_t *op;
apr_size_t i, tpos = 0;
for (op = window->ops; op < window->ops + window->num_ops; op++)
{
/* Check some invariants common to all instructions. */
assert (op->offset >= 0 && op->length >= 0);
assert (tpos + op->length <= window->tview_len);
switch (op->action_code)
{
case svn_txdelta_source:
/* Copy from source area. */
assert (op->offset + op->length <= window->sview_len);
memcpy (tbuf + tpos, sbuf + op->offset, op->length);
tpos += op->length;
break;
case svn_txdelta_target:
/* Copy from target area. Don't use memcpy() since its
semantics aren't guaranteed for overlapping memory areas,
and target copies are allowed to overlap to generate
repeated data. */
assert (op->offset < tpos);
for (i = op->offset; i < op->offset + op->length; i++)
tbuf[tpos++] = tbuf[i];
break;
case svn_txdelta_new:
/* Copy from window new area. */
assert (op->offset + op->length <= window->new_data->len);
memcpy (tbuf + tpos,
window->new_data->data + op->offset,
op->length);
tpos += op->length;
break;
default:
assert ("Invalid delta instruction code" == NULL);
}
}
/* Check that we produced the right amount of data. */
assert (tpos == window->tview_len);
}
/* Apply WINDOW to the streams given by APPL. */
static svn_error_t *
apply_window (svn_txdelta_window_t *window, void *baton)
{
struct apply_baton *ab = (struct apply_baton *) baton;
apr_size_t len;
svn_error_t *err;
if (window == NULL)
{
/* We're done; just clean up. */
svn_stream_close (ab->target);
svn_pool_destroy (ab->pool);
return SVN_NO_ERROR;
}
/* Make sure the source view didn't slide backwards. */
assert (window->sview_offset >= ab->sbuf_offset
&& (window->sview_offset + window->sview_len
>= ab->sbuf_offset + ab->sbuf_len));
/* Make sure there's enough room in the target buffer. */
size_buffer (&ab->tbuf, &ab->tbuf_size, window->tview_len, ab->pool);
/* Prepare the source buffer for reading from the input stream. */
if (window->sview_offset != ab->sbuf_offset
|| window->sview_len > ab->sbuf_size)
{
char *old_sbuf = ab->sbuf;
/* Make sure there's enough room. */
size_buffer (&ab->sbuf, &ab->sbuf_size, window->sview_len, ab->pool);
/* If the existing view overlaps with the new view, copy the
* overlap to the beginning of the new buffer. */
if (ab->sbuf_offset + ab->sbuf_len > window->sview_offset)
{
apr_size_t start = window->sview_offset - ab->sbuf_offset;
memmove (ab->sbuf, old_sbuf + start, ab->sbuf_len - start);
ab->sbuf_len -= start;
}
else
ab->sbuf_len = 0;
ab->sbuf_offset = window->sview_offset;
}
/* Read the remainder of the source view into the buffer. */
if (ab->sbuf_len < window->sview_len)
{
len = window->sview_len - ab->sbuf_len;
err = svn_stream_read (ab->source, ab->sbuf + ab->sbuf_len, &len);
if (err == SVN_NO_ERROR && len != window->sview_len - ab->sbuf_len)
err = svn_error_create (SVN_ERR_INCOMPLETE_DATA, 0, NULL, ab->pool,
"Delta source ended unexpectedly");
if (err != SVN_NO_ERROR)
return err;
ab->sbuf_len = window->sview_len;
}
/* Apply the window instructions to the source view to generate
the target view. */
apply_instructions (window, ab->sbuf, ab->tbuf);
/* Write out the output. */
len = window->tview_len;
err = svn_stream_write (ab->target, ab->tbuf, &len);
return err;
}
void
svn_txdelta_apply (svn_stream_t *source,
svn_stream_t *target,
apr_pool_t *pool,
svn_txdelta_window_handler_t *handler,
void **handler_baton)
{
apr_pool_t *subpool = svn_pool_create (pool);
struct apply_baton *ab;
assert (pool != NULL);
ab = apr_palloc (subpool, sizeof (*ab));
ab->source = source;
ab->target = target;
ab->pool = subpool;
ab->sbuf = NULL;
ab->sbuf_size = 0;
ab->sbuf_offset = 0;
ab->sbuf_len = 0;
ab->tbuf = NULL;
ab->tbuf_size = 0;
*handler = apply_window;
*handler_baton = ab;
}
/* Convenience routines */
svn_error_t *
svn_txdelta_send_string (const svn_string_t *string,
svn_txdelta_window_handler_t handler,
void *handler_baton,
apr_pool_t *pool)
{
svn_txdelta_window_t window = { 0 };
svn_txdelta_op_t op;
/* Build a single `new' op */
op.action_code = svn_txdelta_new;
op.offset = 0;
op.length = string->len;
/* Build a single window containing a ptr to the string. */
window.tview_len = string->len;
window.num_ops = 1;
window.ops = &op;
window.new_data = string;
/* Push the one window at the handler. */
SVN_ERR ((*handler) (&window, handler_baton));
/* Push a NULL at the handler, because we're done. */
SVN_ERR ((*handler) (NULL, handler_baton));
return SVN_NO_ERROR;
}
svn_error_t *svn_txdelta_send_stream (svn_stream_t *stream,
svn_txdelta_window_handler_t handler,
void *handler_baton,
apr_pool_t *pool)
{
svn_txdelta_stream_t *txstream;
/* ### this is a hack. we should simply read from the stream, construct
### some windows, and pass those to the handler. there isn't any reason
### to crank up a full "diff" algorithm just to copy a stream.
###
### will fix RSN. */
/* Create a delta stream which converts an *empty* bytestream into the
target bytestream. */
svn_txdelta (&txstream, svn_stream_empty (pool), stream, pool);
return svn_txdelta_send_txstream (txstream, handler, handler_baton, pool);
}
svn_error_t *svn_txdelta_send_txstream (svn_txdelta_stream_t *txstream,
svn_txdelta_window_handler_t handler,
void *handler_baton,
apr_pool_t *pool)
{
svn_txdelta_window_t *window;
/* create a pool just for the windows */
apr_pool_t *wpool = svn_pool_create (pool);
do
{
/* read in a single delta window */
SVN_ERR( svn_txdelta_next_window (&window, txstream, wpool));
/* shove it at the handler */
SVN_ERR( (*handler)(window, handler_baton));
/* free the window (if any) */
svn_pool_clear (wpool);
}
while (window != NULL);
svn_pool_destroy (wpool);
return SVN_NO_ERROR;
}
/*
* local variables:
* eval: (load-file "../../tools/dev/svn-dev.el")
* end:
*/