blob: 8b814558a214c9176422c980565e4a3391e2a784 [file] [log] [blame]
/* reps-strings.c : intepreting representations w.r.t. strings
*
* ====================================================================
* Copyright (c) 2000-2002 CollabNet. All rights reserved.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
* are also available at http://subversion.tigris.org/license-1.html.
* If newer versions of this license are posted there, you may use a
* newer version instead, at your option.
*
* This software consists of voluntary contributions made by many
* individuals. For exact contribution history, see the revision
* history and logs, available at http://subversion.tigris.org/.
* ====================================================================
*/
#include <string.h>
#include <assert.h>
#include <apr_md5.h>
#include "db.h"
#include "svn_fs.h"
#include "svn_pools.h"
#include "fs.h"
#include "err.h"
#include "dbt.h"
#include "trail.h"
#include "reps-table.h"
#include "strings-table.h"
#include "reps-strings.h"
/*** Local prototypes. ***/
static svn_error_t *rep_read_range (svn_fs_t *fs,
const char *rep_key,
char *buf,
apr_size_t offset,
apr_size_t *len,
trail_t *trail);
/*** Helper Functions ***/
/* Is representation a `fulltext' type? */
static int
rep_is_fulltext (skel_t *rep)
{
return svn_fs__matches_atom (rep->children->children, "fulltext");
}
/* Is representation a `delta' type? */
static int
rep_is_delta (skel_t *rep)
{
return svn_fs__matches_atom (rep->children->children, "delta");
}
static int
rep_is_mutable (skel_t *rep)
{
/* The node "header" is the first element of a rep skel. */
skel_t *header = rep->children;
/* The 2nd element of the header, IF it exists, is the header's
first `flag'. It could be NULL. */
skel_t *flag = header->children->next;
while (flag)
{
if (svn_fs__matches_atom (flag, "mutable"))
return TRUE;
flag = flag->next;
}
/* Reached the end of the header skel, no mutable flag was found. */
return FALSE;
}
/* Add the "mutable" flag to representation REP. Allocate the flag in
POOL; it is advisable that POOL be at least as long-lived as the
pool REP is allocated in. If the mutability flag is already set,
this function does nothing. */
static void
rep_set_mutable_flag (skel_t *rep, apr_pool_t *pool)
{
if (! rep_is_mutable (rep))
svn_fs__append (svn_fs__str_atom ("mutable", pool), rep->children);
return;
}
/* Return a `fulltext' rep skel which references the string STR_KEY,
performing allocations in POOL. If MUTABLE is non-zero, make the
representation mutable. If non-NULL, STR_KEY will be copied into
an allocation of POOL. */
static skel_t *
make_fulltext_rep_skel (const char *str_key,
int mutable,
apr_pool_t *pool)
{
skel_t *rep_skel = svn_fs__make_empty_list (pool);
skel_t *header = svn_fs__make_empty_list (pool);
if (mutable)
svn_fs__prepend (svn_fs__str_atom ("mutable", pool), header);
svn_fs__prepend (svn_fs__str_atom ("fulltext", pool), header);
svn_fs__prepend (str_key ? svn_fs__str_atom (str_key, pool)
: svn_fs__mem_atom (0, 0, pool), rep_skel);
svn_fs__prepend (header, rep_skel);
return rep_skel;
}
/* Set *KEY to the string key pointed to by REP, allocating the key in
POOL. REP is a `fulltext' rep. */
static svn_error_t *
fulltext_string_key (const char **key, skel_t *rep, apr_pool_t *pool)
{
if (! rep_is_fulltext (rep))
return svn_error_create
(SVN_ERR_FS_GENERAL, 0, NULL, pool,
"fulltext_string_key: representation is not of type `fulltext'");
*key = apr_pstrndup (pool,
rep->children->next->data,
rep->children->next->len);
return SVN_NO_ERROR;
}
/* Set *KEYS to an array of string keys gleaned from `delta'
representation REP. Allocate *KEYS in POOL. */
static svn_error_t *
delta_string_keys (apr_array_header_t **keys, skel_t *rep, apr_pool_t *pool)
{
const char *key;
skel_t *window = rep->children->next;
int num_windows = 0;
if (! rep_is_delta (rep))
return svn_error_create
(SVN_ERR_FS_GENERAL, 0, NULL, pool,
"delta_string_key: representation is not of type `delta'");
/* Count the number of windows */
while (window)
{
num_windows++;
window = window->next;
}
/* Now, push the string keys for each window into *KEYS */
*keys = apr_array_make (pool, num_windows, sizeof (key));
window = rep->children->next;
while (window)
{
skel_t *diff = window->children->next->children->children;
if (strncmp ("svndiff", diff->data, diff->len) != 0)
return svn_error_create
(SVN_ERR_FS_CORRUPT, 0, NULL, pool,
"string_key: delta rep uses unknown diff format");
key = apr_pstrndup (pool, diff->next->data, diff->next->len);
(*((const char **)(apr_array_push (*keys)))) = key;
window = window->next;
}
return SVN_NO_ERROR;
}
/* Delete the strings associated with array KEYS in FS as part of TRAIL. */
static svn_error_t *
delete_strings (apr_array_header_t *keys,
svn_fs_t *fs,
trail_t *trail)
{
int i;
const char *str_key;
for (i = 0; i < keys->nelts; i++)
{
str_key = ((const char **)(keys->elts))[i];
SVN_ERR (svn_fs__string_delete (fs, str_key, trail));
}
return SVN_NO_ERROR;
}
/*** Reading the contents from a representation. ***/
/* The fulltext reconstruction code has its weak spot isolated to one
* case in the function window_handler(). By improving that case, we
* asymptotically approach having a real delta combiner; for now, it's
* just the naive reconstruction method.
*
* Here's an overview:
*
* rep_read_range() runs through the raw svndiff data, passing it into
* a stream which invokes window_handler() every time a new window is
* available. The window_handler() ignores windows until it sees one
* that reconstructs data within the range requested, at which point
* it
*
* 1. obtains the range of source fulltext used by this window in
* reconstructing whatever portion the requested target range,
* by naively making a recursive call to rep_read_range(),
*
* or
*
* 2. looks at the source rep; if it's a fulltext, does a dance
* for joy and grabs the relevant range, else if it's a delta,
* starts reading windows and reconstructs on the fly --
* wherever, this new window stream itself needs source data,
* it starts reading windows, and so on...
*
* [Got this up and running using #1, next task is to switch to #2.]
*
* When window_handler() has finished reconstructing the requested
* range, or receives the null window, it sets the `done' bit in its
* baton, so that rep_read_range() won't bother looping over the
* trailing svndiff data.
*
* We won't bother to evaluate plan #1; its weaknesses are well-known,
* although it'll probably perform acceptably for a while.
*
* Let's assume we've finished implementing plan #2. How does it
* perform? In terms of number of passes over ignored leading svndiff
* data, its worst case is probably Subversion's most common case,
* that is, looping to read a whole file from beginning to end. But
* note that the case is only bad when each loop reads a chunk that is
* small relative to the full size of the file. If you use big
* chunks, the penalty is not so bad; and if you read the whole file
* in one call, then there's no penalty at all (except, of course,
* that you held a whole file in memory, which your mother taught you
* never to do). Users of the FS interface should use their
* judgement; probably it would be good to read a whole file at a time
* on checkouts, for example, except when a file is really
* prohibitively large.
*/
/* Baton for window_handler() below. Note that this baton can live
across multiple calls to window_handler(). */
struct window_handler_baton_t
{
/* Where to store the data as we undeltify it. */
char *buf;
/* Requested offset into the fulltext. */
apr_size_t req_offset;
/* Current offset into the fulltext. */
apr_size_t cur_offset;
/* The FS in which `base_rep' can be found. */
svn_fs_t *fs;
/* Representation whose fulltext this delta was made against. */
const char *base_rep;
/* Amount of fulltext requested to reconstruct. */
apr_size_t len_req;
/* Amount of fulltext reconstructed so far;
i.e., the offset into buf. */
apr_size_t len_read;
/* False until we have received the null (final) window. */
svn_boolean_t done;
/* Trail in which to do everything. */
trail_t *trail;
/* Pool in which to do temporary allocations. This may be cleared
by the window handler, so you probably don't want it to be the
pool in which this baton or its buf live. */
apr_pool_t *pool;
};
/* Function of type `svn_txdelta_window_handler_t';
BATON is a `struct window_handler_baton_t'.
If BATON->done is set, do nothing and return immediately.
Otherwise...
If WINDOW is relevant, reconstruct some portion of BATON->buf, as
part of BATON->trail; any temporary allocation happens in
BATON->pool, which may be cleared before the handler exits. If
WINDOW is irrelevant, ignore it and return.
Q: When is WINDOW irrelevant?
A: If the range (BATON->req_offset + BATON->len_req) does not
overlap with the range (WINDOW->tview_len + BATON->cur_offset),
then the window is irrelevant, so: If the former range lies
before the latter, then increment BATON->cur_offset by
WINDOW->tview_len, else if the former range lies after the
latter, set BATON->done to 1, and then return in either case.
If the ranges do overlap, then the window is relevant -- that
is, it reconstructs some or all of the requested content range,
BATON->req_offset + BATON->len_req, so read on...
Q: Okay, so what exactly happens when WINDOW is relevant?
A: In that case, the data reconstructed by this window is stored at
BATON->buf + BATON->len_read, and BATON->len_read is incremented
by the number of bytes reconstructed, and BATON->cur_offset is
incremented by the same amount.
BATON->base_rep may be used to obtain source text against which
to reconstruct.
### todo: I'll go into shock if this function doesn't change as
a result of the new `delta' representation scheme.
*/
static svn_error_t *
window_handler (svn_txdelta_window_t *window, void *baton)
{
struct window_handler_baton_t *wb = baton;
/* If we're done, we're done. */
if ((window == NULL) || wb->done)
{
wb->done = TRUE; /* might be redundant */
return SVN_NO_ERROR;
}
/** Otherwise, handle the window. **/
/* Get the range of source text that's relevant to us. */
/* ### todo: if we wanted to make the naive algorithm really space
efficient, we could pass in (wb->buf + some_offset) for the data
buffer in a bunch of tiny calls to rep_read_range(), and
reconstruct the data in-place. That would probably be, ahem,
slow. :-) And anyway, we're going to do things differently. */
{
char *tbuf; /* Reconstructed target data. */
char *sbuf; /* Reconstructed source data. */
apr_size_t slen; /* Length of source data. */
slen = window->sview_len;
sbuf = apr_palloc (wb->pool, slen);
tbuf = apr_palloc (wb->pool, window->tview_len);
/* Q: Why is there a target buf allocated inside this function?
Why not just use the output buffer directly?
A: A given window (say, the current one) contains all the data
necessary to reproduce a contiguous range of bytes. If that
range of bytes is entirely outside the range the caller
requested, the window is ignored. If, however, any overlap
occurs between the window's "target view" and the requested
range, this window must be processed. When considering
overlap, we have exactly one of the following situations:
1. target window and requested range have the same
starting offset.
2. target window starting offset is greater than the
starting offset of the requested range.
3. target window starting offset is less than the
starting offset of the requested range.
Case 1 and Case 2 and fairly simple to deal with. Case 1 is
super-trivial. Case 2 can be treated like a special Case 1
because the "overlapped" portion came from a previous window(s).
Case 3 is the weird one. In this case, we have some amount
of data coming out of the window op handling that needs to
be discarded before we actually get to data that we care
about. Now, one might be tempted to just literally discard
that data, and then actually begin writing to the output
buffer at the proper time. This would be fine if the only
op types were svn_txdelta_source (which reads from a source
buffer) and svn_txdelta_new (which reads from a "new data"
buffer). But the svn_txdelta_target op also exists, and it
reads from the target buffer. With this op type comes the
risk that we will be asked to read from an offset that
exists in the "overlap" region -- which we just discarded!
So, in order to safeguard against the svn_txdelta_target op
making requests for data which we no longer have, we need to
"play out" this window into a temporary buffer, then copy
the range requested by the caller into the output buffer
once we're finished.
*/
/* Now we can loop over the window ops, doing them. I think this
makes more sense than trying to use the functions in
svn_delta.h. We'd spend a lot of effort packing things up
right, for not much gain. */
{
const svn_txdelta_op_t *op;
int i;
int src_read = 0;
apr_size_t len_read = 0;
apr_size_t copy_amt = 0;
apr_size_t discard_amt = (wb->req_offset > wb->cur_offset)
? (wb->req_offset - wb->cur_offset)
: 0;
/* For each op, we must check to see what portion of that op's output
is meant for the "discard pile." */
for (i = 0; i < window->num_ops; i++)
{
op = window->ops + i;
switch (op->action_code)
{
case svn_txdelta_source:
{
if (! src_read)
{
/* The first time we actually have a reference to
our source data, we'll read all the source data
that this window might use. We may end up
reading more than we need to based on the
portion of this window our caller actually
requested, but we'll at least only have the one
database access in this window_handler() call.
### todo: this is the core of the naive
algorithm, and is what has to go when we have a
true delta combiner. */
SVN_ERR (rep_read_range (wb->fs, wb->base_rep, sbuf,
window->sview_offset, &slen,
wb->trail));
src_read = 1;
}
memcpy (tbuf + len_read, sbuf + op->offset, op->length);
len_read += op->length;
}
break;
case svn_txdelta_target:
{
/* This could be done in bigger blocks, at the expense
of some more complexity. */
int t;
for (t = op->offset; t < op->offset + op->length; t++)
tbuf[len_read++] = tbuf[t];
}
break;
case svn_txdelta_new:
{
memcpy (tbuf + len_read,
window->new_data->data + op->offset,
op->length);
len_read += op->length;
}
break;
default:
return svn_error_createf
(SVN_ERR_FS_CORRUPT, 0, NULL, wb->pool,
"window_handler: unknown delta op action code (%d)",
op->action_code);
}
/* If we've at least read into the caller's requested range
of data, figure out how much data we would copy into the
output buffer were we going to do so right now. If that
amount is enough to "fill the request", stop handling ops
here. */
if (len_read >= discard_amt)
{
copy_amt = len_read - discard_amt;
if (copy_amt > (wb->len_req - wb->len_read))
{
copy_amt = wb->len_req - wb->len_read;
break;
}
}
}
/* Copy our requested range into the output buffer. */
memcpy (wb->buf + wb->len_read, tbuf + discard_amt, copy_amt);
wb->len_read += copy_amt;
/* If this window looks past relevant data, then we're done. */
wb->cur_offset += copy_amt;
if (wb->cur_offset >= (wb->req_offset + wb->len_req))
wb->done = TRUE;
}
}
/* Clear out the window baton's pool. */
svn_pool_clear (wb->pool);
return SVN_NO_ERROR;
}
/* Copy into BUF *LEN bytes starting at OFFSET from the string
represented via REP_KEY in FS, as part of TRAIL.
The number of bytes actually copied is stored in *LEN. */
static svn_error_t *
rep_read_range (svn_fs_t *fs,
const char *rep_key,
char *buf,
apr_size_t offset,
apr_size_t *len,
trail_t *trail)
{
skel_t *rep;
apr_pool_t *subpool = svn_pool_create (trail->pool);
/* Read in our REP. */
SVN_ERR (svn_fs__read_rep (&rep, fs, rep_key, trail));
if (rep_is_fulltext (rep))
{
/* Get the string key associated with REP, and read the
requested range from that string. */
const char *str_key;
SVN_ERR (fulltext_string_key (&str_key, rep, subpool));
SVN_ERR (svn_fs__string_read (fs, str_key, buf, offset, len, trail));
}
else
{
svn_stream_t *wstream;
struct window_handler_baton_t wb;
skel_t *this_window;
char chunk[4096]; /* chunk of svndiff data */
apr_size_t off; /* offset into svndiff data */
apr_size_t amt; /* how much svndiff data to/was read */
/* Initialize THIS_WINDOW to the first (OFFSET WINDOW) skel. */
this_window = rep->children->next;
assert (this_window != NULL);
/* Initialize the window handler baton. */
wb.fs = fs;
wb.buf = buf;
wb.req_offset = offset;
wb.len_req = *len;
wb.len_read = 0;
wb.done = FALSE;
wb.trail = trail;
wb.pool = svn_pool_create (subpool);
/* Set up a window handling stream for the svndiff data. */
wstream = svn_txdelta_parse_svndiff (window_handler, &wb,
FALSE, subpool);
/* First things first: send the "SVN\0" header through the
stream. */
chunk[0] = 'S';
chunk[1] = 'V';
chunk[2] = 'N';
chunk[3] = '\0';
amt = 4;
SVN_ERR (svn_stream_write (wstream, chunk, &amt));
/* Now, for each window, decide if the window is relevant. That
is, do we need to use to reconstruct data in the range
requested by the caller? */
do
{
skel_t *wnd_skel = this_window->children->next;
apr_size_t this_off, this_len;
const char *str_key;
/* Get the offset and size of this window from the skel. */
this_off = atoi (apr_pstrndup (subpool,
this_window->children->data,
this_window->children->len));
this_len = atoi (apr_pstrndup (subpool,
wnd_skel->children->next->data,
wnd_skel->children->next->len));
/* If this window is irrelevant because it reconstructs text
that is entirely before the range we're interested in,
then ignore it. */
if ((this_off + this_len - 1) < offset)
{
this_window = this_window->next;
continue;
}
/* If this window is irrelevant because it reconstructs text
that is entirely after the range we're interested in,
we're definitely done. */
if (this_off > (offset + *len))
break;
/* Get this string key which holds this window's data.
### todo: make sure this is an `svndiff' DIFF skel here. */
str_key = apr_pstrndup (subpool,
wnd_skel->children->children->next->data,
wnd_skel->children->children->next->len);
/* Finish initializing our baton with window-specific
stuff. */
wb.cur_offset = this_off;
wb.base_rep =
apr_pstrndup (subpool,
wnd_skel->children->next->next->next->data,
wnd_skel->children->next->next->next->len);
/* Run through the svndiff data, at least as far as necessary. */
off = 0;
do {
amt = sizeof (chunk);
SVN_ERR (svn_fs__string_read (fs, str_key, chunk,
off, &amt, trail));
off += amt;
SVN_ERR (svn_stream_write (wstream, chunk, &amt));
} while ((wb.done == FALSE) && (amt != 0));
this_window = this_window->next;
}
while ((this_window) && (wb.done == FALSE));
/* Close the stream. We should not get an error for closing the
stream early because we explicitly told the stream handlers
not to care in the call to svn_txdelta_parse_svndiff() above. */
SVN_ERR (svn_stream_close (wstream));
*len = wb.len_read;
}
svn_pool_destroy (subpool);
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs__get_mutable_rep (const char **new_rep,
const char *rep,
svn_fs_t *fs,
trail_t *trail)
{
skel_t *rep_skel;
if (rep && (rep[0] != '\0'))
{
/* We were passed an existing rep, so examine it. */
SVN_ERR (svn_fs__read_rep (&rep_skel, fs, rep, trail));
if (rep_is_mutable (rep_skel)) /* rep already mutable, so return it */
{
*new_rep = rep;
return SVN_NO_ERROR;
}
/* If REP is not mutable, we have to make a mutable copy. It is
a deep copy -- we copy the immutable rep's data. Note that
we copy it as fulltext, no matter how the immutable rep
represents the data. */
if (rep_is_fulltext (rep_skel))
{
/* The easy case -- copy the fulltext string directly. */
const char *old_str, *new_str;
/* Step 1: Copy the string to which the rep refers. */
SVN_ERR (fulltext_string_key (&old_str, rep_skel, trail->pool));
SVN_ERR (svn_fs__string_copy (fs, &new_str, old_str, trail));
/* Step 2: Make this rep mutable. */
rep_set_mutable_flag (rep_skel, trail->pool);
/* Step 3: Change the string key to which this rep points. */
rep_skel->children->next->data = new_str;
rep_skel->children->next->len = strlen (new_str);
}
else
{
/* This is a bit trickier. The immutable rep is a delta,
but we're still making a fulltext copy of it. So we do
an undeltifying read loop, writing the fulltext out to
the mutable rep. The efficiency of this depends on the
efficiency of rep_read_range(); fortunately, this
circumstance is probably rare, and especially unlikely to
happen on large contents (i.e., it's more likely to
happen on directories than on files, because directories
don't have to be up-to-date to receive commits, whereas
files do. */
char buf[10000];
apr_size_t offset;
apr_size_t size;
const char *new_str = NULL;
apr_size_t amount;
SVN_ERR (svn_fs__rep_contents_size (&size, fs, rep, trail));
for (offset = 0; offset < size; offset += amount)
{
if ((size - offset) > (sizeof (buf)))
amount = sizeof (buf);
else
amount = size - offset;
SVN_ERR (rep_read_range (fs, rep, buf, offset, &amount, trail));
SVN_ERR (svn_fs__string_append (fs, &new_str, amount, buf,
trail));
}
rep_skel = make_fulltext_rep_skel (new_str, 1, trail->pool);
}
}
else /* no key, so make a new, empty, mutable, fulltext rep */
{
const char *new_str = NULL;
SVN_ERR (svn_fs__string_append (fs, &new_str, 0, NULL, trail));
rep_skel = make_fulltext_rep_skel (new_str, 1, trail->pool);
}
/* If we made it here, there's a new rep to store in the fs. */
SVN_ERR (svn_fs__write_new_rep (new_rep, fs, rep_skel, trail));
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs__make_rep_immutable (svn_fs_t *fs,
const char *rep,
trail_t *trail)
{
skel_t *rep_skel;
skel_t *header, *flag, *prev;
SVN_ERR (svn_fs__read_rep (&rep_skel, fs, rep, trail));
header = rep_skel->children;
/* The flags start at the 2nd element of the header. */
for (flag = header->children->next, prev = NULL;
flag;
prev = flag, flag = flag->next)
{
if (flag->is_atom && svn_fs__matches_atom (flag, "mutable"))
{
/* We found it. */
if (prev)
prev->next = flag->next;
else
header->children->next = NULL;
SVN_ERR (svn_fs__write_rep (fs, rep, rep_skel, trail));
break;
}
}
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs__delete_rep_if_mutable (svn_fs_t *fs,
const char *rep,
trail_t *trail)
{
skel_t *rep_skel;
const char *str_key;
SVN_ERR (svn_fs__read_rep (&rep_skel, fs, rep, trail));
if (! rep_is_mutable (rep_skel))
return SVN_NO_ERROR;
if (rep_is_fulltext (rep_skel))
{
SVN_ERR (fulltext_string_key (&str_key, rep_skel, trail->pool));
SVN_ERR (svn_fs__string_delete (fs, str_key, trail));
}
else /* delta */
{
apr_array_header_t *keys;
SVN_ERR (delta_string_keys (&keys, rep_skel, trail->pool));
SVN_ERR (delete_strings (keys, fs, trail));
}
SVN_ERR (svn_fs__delete_rep (fs, rep, trail));
return SVN_NO_ERROR;
}
/*** Reading and writing data via representations. ***/
/** Reading. **/
struct rep_read_baton
{
/* The FS from which we're reading. */
svn_fs_t *fs;
/* The representation skel whose contents we want to read. If this
is null, the rep has never had any contents, so all reads fetch 0
bytes.
Formerly, we cached the entire rep skel here, not just the key.
That way we didn't have to fetch the rep from the db every time
we want to read a little bit more of the file. Unfortunately,
this has a problem: if, say, a file's representation changes
while we're reading (changes from fulltext to delta, for
example), we'll never know it. So for correctness, we now
refetch the representation skel every time we want to read
another chunk. */
const char *rep_key;
/* How many bytes have been read already. */
apr_size_t offset;
/* If present, the read will be done as part of this trail, and the
trail's pool will be used. Otherwise, see `pool' below. */
trail_t *trail;
/* Used for temporary allocations, iff `trail' (above) is null. */
apr_pool_t *pool;
};
static struct rep_read_baton *
rep_read_get_baton (svn_fs_t *fs,
const char *rep_key,
apr_size_t offset,
trail_t *trail,
apr_pool_t *pool)
{
struct rep_read_baton *b;
b = apr_pcalloc (pool, sizeof (*b));
b->fs = fs;
b->trail = trail;
b->pool = pool;
b->rep_key = rep_key;
b->offset = offset;
return b;
}
/*** Retrieving data. ***/
svn_error_t *
svn_fs__rep_contents_size (apr_size_t *size_p,
svn_fs_t *fs,
const char *rep,
trail_t *trail)
{
skel_t *rep_skel;
SVN_ERR (svn_fs__read_rep (&rep_skel, fs, rep, trail));
if (rep_is_fulltext (rep_skel))
{
/* Get the size by asking Berkeley for the string's length. */
const char *str_key;
SVN_ERR (fulltext_string_key (&str_key, rep_skel, trail->pool));
SVN_ERR (svn_fs__string_size (size_p, fs, str_key, trail));
}
else /* rep is delta */
{
/* Get the size by finding the last window pkg in the delta and
adding its offset to its size. This way, we won't even be
messed up by overlapping windows, as long as the window pkgs
are still ordered. */
skel_t *pkg_skel = rep_skel->children->next;
skel_t *window_skel;
char *offset_str, *size_str;
int offset, size;
assert (pkg_skel != NULL);
/* Skip to the last window pkg in this delta. */
while (pkg_skel->next)
pkg_skel = pkg_skel->next;
/* The offset is the first member of a window pkg. */
offset_str = apr_pstrndup (trail->pool,
pkg_skel->children->data,
pkg_skel->children->len);
/* Get the skel for the window sublist. */
window_skel = pkg_skel->children->next;
assert (window_skel != NULL);
/* The window's reconstructed size is the second element. */
size_str = apr_pstrndup (trail->pool,
window_skel->children->next->data,
window_skel->children->next->len);
offset = atoi (offset_str);
size = atoi (size_str);
*size_p = (apr_size_t) (offset + size);
}
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs__rep_contents (svn_string_t *str,
svn_fs_t *fs,
const char *rep,
trail_t *trail)
{
apr_size_t len;
SVN_ERR (svn_fs__rep_contents_size (&(str->len), fs, rep, trail));
str->data = apr_palloc (trail->pool, str->len);
len = str->len;
SVN_ERR (rep_read_range (fs, rep, (char *) str->data, 0, &len, trail));
/* Paranoia. */
if (len != str->len)
return svn_error_createf
(SVN_ERR_FS_CORRUPT, 0, NULL, trail->pool,
"svn_fs__rep_read_contents: failure reading rep \"%s\"", rep);
return SVN_NO_ERROR;
}
struct read_rep_args
{
struct rep_read_baton *rb; /* The data source. */
char *buf; /* Where to put what we read. */
apr_size_t *len; /* How much to read / was read. */
};
/* BATON is of type `read_rep_args':
Read into BATON->rb->buf the *(BATON->len) bytes starting at
BATON->rb->offset from the data represented at BATON->rb->rep_key
in BATON->rb->fs, as part of TRAIL.
Afterwards, *(BATON->len) is the number of bytes actually read, and
BATON->rb->offset is incremented by that amount.
If BATON->rb->rep_key is null, this is assumed to mean the file's
contents have no representation, i.e., the file has no contents.
In that case, if BATON->rb->offset > 0, return the error
SVN_ERR_FS_FILE_CONTENTS_CHANGED, else just set *(BATON->len) to
zero and return. */
static svn_error_t *
txn_body_read_rep (void *baton, trail_t *trail)
{
struct read_rep_args *args = baton;
if (args->rb->rep_key)
{
SVN_ERR (rep_read_range (args->rb->fs,
args->rb->rep_key,
args->buf,
args->rb->offset,
args->len,
trail));
args->rb->offset += *(args->len);
}
else if (args->rb->offset > 0)
{
return
svn_error_create
(SVN_ERR_FS_REP_CHANGED, 0, NULL, trail->pool,
"txn_body_read_rep: null rep, but offset past zero already");
}
else
*(args->len) = 0;
return SVN_NO_ERROR;
}
static svn_error_t *
rep_read_contents (void *baton, char *buf, apr_size_t *len)
{
struct rep_read_baton *rb = baton;
struct read_rep_args args;
args.rb = rb;
args.buf = buf;
args.len = len;
/* If we got a trail, use it; else make one. */
if (rb->trail)
SVN_ERR (txn_body_read_rep (&args, rb->trail));
else
SVN_ERR (svn_fs__retry_txn (rb->fs,
txn_body_read_rep,
&args,
rb->pool));
return SVN_NO_ERROR;
}
/** Writing. **/
struct rep_write_baton
{
/* The FS in which we're writing. */
svn_fs_t *fs;
/* The representation skel whose contents we want to write. */
const char *rep_key;
/* If present, do the write as part of this trail, and use trail's
pool. Otherwise, see `pool' below. */
trail_t *trail;
/* Used for temporary allocations, iff `trail' (above) is null. */
apr_pool_t *pool;
};
static struct rep_write_baton *
rep_write_get_baton (svn_fs_t *fs,
const char *rep_key,
trail_t *trail,
apr_pool_t *pool)
{
struct rep_write_baton *b;
b = apr_pcalloc (pool, sizeof (*b));
b->fs = fs;
b->trail = trail;
b->pool = pool;
b->rep_key = rep_key;
return b;
}
/* Write LEN bytes from BUF into the string represented via REP_KEY
in FS, starting at OFFSET in that string, as part of TRAIL.
If the representation is not mutable, return the error
SVN_FS_REP_NOT_MUTABLE. */
static svn_error_t *
rep_write (svn_fs_t *fs,
const char *rep_key,
const char *buf,
apr_size_t len,
trail_t *trail)
{
skel_t *rep;
SVN_ERR (svn_fs__read_rep (&rep, fs, rep_key, trail));
if (! rep_is_mutable (rep))
svn_error_createf
(SVN_ERR_FS_REP_CHANGED, 0, NULL, trail->pool,
"rep_write: rep \"%s\" is not mutable", rep_key);
if (rep_is_fulltext (rep))
{
const char *str_key;
SVN_ERR (fulltext_string_key (&str_key, rep, trail->pool));
SVN_ERR (svn_fs__string_append (fs, &str_key, len, buf, trail));
}
else
{
/* There should never be a case when we have a mutable
non-fulltext rep. The only code that creates mutable reps is
in this file, and it creates them fulltext. */
return svn_error_createf
(SVN_ERR_FS_CORRUPT, 0, NULL, trail->pool,
"rep_write: rep \"%s\" both mutable and non-fulltext", rep_key);
}
return SVN_NO_ERROR;
}
struct write_rep_args
{
struct rep_write_baton *wb; /* Destination. */
const char *buf; /* Data. */
apr_size_t len; /* How much to write. */
};
/* BATON is of type `write_rep_args':
Append onto BATON->wb->rep_key's contents BATON->len bytes of
data from BATON->wb->buf, in BATON->rb->fs, as part of TRAIL.
If the representation is not mutable, return the error
SVN_FS_REP_NOT_MUTABLE. */
static svn_error_t *
txn_body_write_rep (void *baton, trail_t *trail)
{
struct write_rep_args *args = baton;
SVN_ERR (rep_write (args->wb->fs,
args->wb->rep_key,
args->buf,
args->len,
trail));
return SVN_NO_ERROR;
}
static svn_error_t *
rep_write_contents (void *baton, const char *buf, apr_size_t *len)
{
struct rep_write_baton *wb = baton;
struct write_rep_args args;
/* We toss LEN's indirectness because if not all the bytes are
written, it's an error, so we wouldn't be reporting anything back
through *LEN anyway. */
args.wb = wb;
args.buf = buf;
args.len = *len;
/* If we got a trail, use it; else make one. */
if (wb->trail)
SVN_ERR (txn_body_write_rep (&args, wb->trail));
else
SVN_ERR (svn_fs__retry_txn (wb->fs,
txn_body_write_rep,
&args,
wb->pool));
return SVN_NO_ERROR;
}
/** Public read and write stream constructors. **/
svn_stream_t *
svn_fs__rep_contents_read_stream (svn_fs_t *fs,
const char *rep,
apr_size_t offset,
trail_t *trail,
apr_pool_t *pool)
{
struct rep_read_baton *rb
= rep_read_get_baton (fs, rep, offset, trail, pool);
svn_stream_t *rs = svn_stream_create (rb, pool);
svn_stream_set_read (rs, rep_read_contents);
return rs;
}
svn_stream_t *
svn_fs__rep_contents_write_stream (svn_fs_t *fs,
const char *rep,
trail_t *trail,
apr_pool_t *pool)
{
struct rep_write_baton *wb
= rep_write_get_baton (fs, rep, trail, pool);
svn_stream_t *ws = svn_stream_create (wb, pool);
svn_stream_set_write (ws, rep_write_contents);
return ws;
}
svn_error_t *
svn_fs__rep_contents_clear (svn_fs_t *fs,
const char *rep,
trail_t *trail)
{
skel_t *rep_skel;
const char *str_key;
SVN_ERR (svn_fs__read_rep (&rep_skel, fs, rep, trail));
/* Make sure it's mutable. */
if (! rep_is_mutable (rep_skel))
return svn_error_createf
(SVN_ERR_FS_REP_NOT_MUTABLE, 0, NULL, trail->pool,
"svn_fs__rep_contents_clear: rep \"%s\" is not mutable", rep);
if (rep_is_fulltext (rep_skel))
{
SVN_ERR (fulltext_string_key (&str_key, rep_skel, trail->pool));
/* If rep is already clear, just return success. */
if ((str_key == NULL) || (str_key[0] == '\0'))
return SVN_NO_ERROR;
/* Else, clear it. */
SVN_ERR (svn_fs__string_clear (fs, str_key, trail));
}
else /* delta */
{
/* For deltas, we replace the rep with a `fulltext' rep, then
delete all the strings associated with the old rep. */
apr_array_header_t *orig_keys;
/* Get the list of strings associated with this rep. */
SVN_ERR (delta_string_keys (&orig_keys, rep_skel, trail->pool));
/* Transform our rep into a `fulltext' rep with an empty string
behind it, and replace it in the filesystem. */
str_key = NULL;
SVN_ERR (svn_fs__string_append (fs, &str_key, 0, NULL, trail));
rep_skel = make_fulltext_rep_skel (str_key, 1, trail->pool);
SVN_ERR (svn_fs__write_rep (fs, rep, rep_skel, trail));
/* Now delete those old strings. */
SVN_ERR (delete_strings (orig_keys, fs, trail));
}
return SVN_NO_ERROR;
}
/*** Deltified storage. ***/
/* Baton for svn_write_fn_t write_string(). */
struct write_string_baton
{
/* The fs where lives the string we're writing. */
svn_fs_t *fs;
/* The key of the string we're writing to. Typically this is
initialized to NULL, so svn_fs__string_append() can fill in a
value. */
const char *key;
/* The trail we're writing in. */
trail_t *trail;
};
/* Function of type `svn_write_fn_t', for writing to a string;
BATON is `struct write_string_baton *'.
On the first call, BATON->key is null. A new string key in
BATON->fs is chosen and stored in BATON->key; each call appends
*LEN bytes from DATA onto the string. *LEN is never changed; if
the write fails to write all *LEN bytes, an error is returned. */
static svn_error_t *
write_string (void *baton, const char *data, apr_size_t *len)
{
struct write_string_baton *wb = baton;
return svn_fs__string_append (wb->fs, &(wb->key), *len, data, wb->trail);
}
/* Baton for svn_write_fn_t write_string_set(). */
struct write_svndiff_strings_baton
{
/* The fs where lives the string we're writing. */
svn_fs_t *fs;
/* The key of the string we're writing to. Typically this is
initialized to NULL, so svn_fs__string_append() can fill in a
value. */
const char *key;
/* The amount of txdelta data written to the current
string-in-progress. */
apr_size_t size;
/* The amount of svndiff header information we've written thus far
to the strings table. */
apr_size_t header_read;
/* The trail we're writing in. */
trail_t *trail;
};
/* Function of type `svn_write_fn_t', for writing to a collection of
strings; BATON is `struct write_svndiff_strings_baton *'.
On the first call, BATON->key is null. A new string key in
BATON->fs is chosen and stored in BATON->key; each call appends
*LEN bytes from DATA onto the string. *LEN is never changed; if
the write fails to write all *LEN bytes, an error is returned.
BATON->size is used to track the total amount of data written via
this handler, and must be reset by the caller to 0 when appropriate. */
static svn_error_t *
write_svndiff_strings (void *baton, const char *data, apr_size_t *len)
{
struct write_svndiff_strings_baton *wb = baton;
const char *buf = data;
apr_size_t nheader = 0;
/* If we haven't stripped all the header information from this
stream yet, keep stripping. If someone sends a first window
through here that's shorter than 4 bytes long, this will probably
cause a nuclear reactor meltdown somewhere in the American
midwest. */
if (wb->header_read < 4)
{
nheader = 4 - wb->header_read;
*len -= nheader;
buf += nheader;
wb->header_read += nheader;
}
/* Append to the current string we're writing (or create a new one
if WB->key is NULL). */
SVN_ERR (svn_fs__string_append (wb->fs, &(wb->key), *len, buf, wb->trail));
/* Make sure we (still) have a key. */
if (wb->key == NULL)
return svn_error_create (SVN_ERR_FS_GENERAL, 0, NULL, wb->trail->pool,
"write_string_set: Failed to get new string key");
/* Restore *LEN to the value it *would* have been were it not for
header stripping. */
*len += nheader;
/* Increment our running total of bytes written to this string. */
wb->size += *len;
return SVN_NO_ERROR;
}
typedef struct window_write_t
{
const char *key; /* string key for this window */
apr_size_t svndiff_len; /* amount of svndiff data written to the string */
apr_size_t text_off; /* offset of fulltext data represented by this window */
apr_size_t text_len; /* amount of fulltext data represented by this window */
} window_write_t;
svn_error_t *
svn_fs__rep_deltify (svn_fs_t *fs,
const char *target,
const char *source,
trail_t *trail)
{
svn_stream_t *source_stream; /* stream to read the source */
svn_stream_t *target_stream; /* stream to read the target */
svn_txdelta_stream_t *txdelta_stream; /* stream to read delta windows */
/* window-y things, and an array to track them */
window_write_t *ww;
apr_array_header_t *windows;
/* stream to write new (deltified) target data and its baton */
svn_stream_t *new_target_stream;
struct write_svndiff_strings_baton new_target_baton;
/* window handler/baton for writing to above stream */
svn_txdelta_window_handler_t new_target_handler;
void *new_target_handler_baton;
/* yes, we do windows */
svn_txdelta_window_t *window;
/* The current offset into the fulltext that our window is about to
write. This doubles, after all windows are written, as the
total size of the svndiff data for the deltification process. */
apr_size_t tview_off = 0;
/* The total amount of diff data written while deltifying. */
apr_size_t diffsize = 0;
/* TARGET's original string keys */
apr_array_header_t *orig_str_keys;
/* MD5 digest */
const unsigned char *digest;
/* pool for holding the windows */
apr_pool_t *wpool;
/* Paranoia: never allow a rep to be deltified against itself,
because then there would be no fulltext reachable in the delta
chain, and badness would ensue. */
if (strcmp (target, source) == 0)
return svn_error_createf
(SVN_ERR_FS_CORRUPT, 0, NULL, trail->pool,
"svn_fs__rep_deltify: attempt to deltify \"%s\" against itself",
target);
/* To favor time over space, we don't currently deltify files that
are larger than the svndiff window size. This might seem
counterintuitive, but most files are smaller than a window
anyway, and until we write the delta combiner or something
approaching it, the cost of retrieval for large files becomes
simply prohibitive after about 10 or so revisions. See issue
#531 for more details. */
{
apr_size_t size;
SVN_ERR (svn_fs__rep_contents_size (&size, fs, target, trail));
if (size > SVN_STREAM_CHUNK_SIZE)
return SVN_NO_ERROR;
}
/* Set up a handler for the svndiff data, which will write each
window to its own string in the `strings' table. */
new_target_baton.fs = fs;
new_target_baton.trail = trail;
new_target_baton.header_read = 0;
new_target_stream = svn_stream_create (&new_target_baton, trail->pool);
svn_stream_set_write (new_target_stream, write_svndiff_strings);
/* Get streams to our source and target text data. */
source_stream = svn_fs__rep_contents_read_stream (fs, source, 0,
trail, trail->pool);
target_stream = svn_fs__rep_contents_read_stream (fs, target, 0,
trail, trail->pool);
/* Setup a stream to convert the textdelta data into svndiff windows. */
svn_txdelta (&txdelta_stream, source_stream, target_stream, trail->pool);
svn_txdelta_to_svndiff (new_target_stream, trail->pool,
&new_target_handler, &new_target_handler_baton);
/* subpool for the windows */
wpool = svn_pool_create (trail->pool);
/* Now, loop, manufacturing and dispatching windows of svndiff data. */
windows = apr_array_make (trail->pool, 1, sizeof (ww));
do
{
/* Reset some baton variables. */
new_target_baton.size = 0;
new_target_baton.key = NULL;
/* Fetch the next window of txdelta data. */
SVN_ERR (svn_txdelta_next_window (&window, txdelta_stream, wpool));
/* Send off this package to be written as svndiff data. */
SVN_ERR (new_target_handler (window, new_target_handler_baton));
if (window)
{
/* Add a new window description to our array. */
ww = apr_pcalloc (trail->pool, sizeof (*ww));
ww->key = new_target_baton.key;
ww->svndiff_len = new_target_baton.size;
ww->text_off = tview_off;
ww->text_len = window->tview_len;
(*((window_write_t **)(apr_array_push (windows)))) = ww;
/* Update our recordkeeping variables. */
tview_off += window->tview_len;
diffsize += ww->svndiff_len;
/* Free the window. */
svn_pool_clear (wpool);
}
} while (window);
svn_pool_destroy (wpool);
/* Having processed all the windows, we can query the MD5 digest
from the stream. */
digest = svn_txdelta_md5_digest (txdelta_stream);
if (! digest)
return svn_error_createf
(SVN_ERR_DELTA_MD5_CHECKSUM_ABSENT, 0, NULL, trail->pool,
"svn_fs__rep_deltify: failed to calculate MD5 digest for %s",
source);
/* Get the size of the target's original string data. Note that we
don't use svn_fs__rep_contents_size() for this; that function
always returns the fulltext size, whereas we need to know the
actual amount of storage used by this representation. Check the
size of the new string. If it is larger than the old one, this
whole deltafication might not be such a bright idea. While we're
at it, we might as well figure out all the strings current used
by REP so we can potentially delete them later. */
{
skel_t *old_rep;
apr_size_t old_size = 0;
const char *str_key;
SVN_ERR (svn_fs__read_rep (&old_rep, fs, target, trail));
if (rep_is_fulltext (old_rep))
{
SVN_ERR (fulltext_string_key (&str_key, old_rep, trail->pool));
SVN_ERR (svn_fs__string_size (&old_size, fs, str_key, trail));
orig_str_keys = apr_array_make (trail->pool, 1, sizeof (str_key));
(*((const char **)(apr_array_push (orig_str_keys)))) = str_key;
}
else
{
int i;
apr_size_t my_size;
SVN_ERR (delta_string_keys (&orig_str_keys, old_rep, trail->pool));
for (i = 0; i < orig_str_keys->nelts; i++)
{
str_key = ((const char **)(orig_str_keys->elts))[i];
SVN_ERR (svn_fs__string_size (&my_size, fs, str_key, trail));
old_size += my_size;
}
}
/* If the new data is NOT an space optimization, destroy the
string(s) we created, and get outta here. */
if (diffsize >= old_size)
{
int i;
for (i = 0; i < windows->nelts; i++)
{
ww = ((window_write_t **)(windows->elts))[i];
SVN_ERR (svn_fs__string_delete (fs, ww->key, trail));
}
return SVN_NO_ERROR;
}
}
/* Hook the new strings we wrote into the rest of the filesystem by
building a new representation skel to replace our old one. */
{
int i;
skel_t *rep = svn_fs__make_empty_list (trail->pool);
skel_t *header = svn_fs__make_empty_list (trail->pool);
/* Loop backwards through the windows we wrote, creating and
prepending skels to our rep. */
for (i = windows->nelts; i > 0; i--)
{
const char *size_str;
const char *offset_str;
skel_t *win = svn_fs__make_empty_list (trail->pool);
skel_t *winhdr = svn_fs__make_empty_list (trail->pool);
skel_t *diff = svn_fs__make_empty_list (trail->pool);
skel_t *checksum = svn_fs__make_empty_list (trail->pool);
ww = ((window_write_t **)(windows->elts))[i-1];
offset_str = apr_psprintf (trail->pool, "%" APR_SIZE_T_FMT,
ww->text_off);
size_str = apr_psprintf (trail->pool, "%" APR_SIZE_T_FMT,
ww->text_len);
/* The diff. */
svn_fs__prepend (svn_fs__str_atom (ww->key, trail->pool), diff);
svn_fs__prepend (svn_fs__str_atom ("svndiff", trail->pool), diff);
/* The checksum. */
svn_fs__prepend (svn_fs__mem_atom (digest,
MD5_DIGESTSIZE,
trail->pool), checksum);
svn_fs__prepend (svn_fs__str_atom ("md5", trail->pool), checksum);
/* The window. */
svn_fs__prepend (svn_fs__str_atom (source, trail->pool), win);
svn_fs__prepend (checksum, win);
svn_fs__prepend (svn_fs__str_atom (size_str, trail->pool), win);
svn_fs__prepend (diff, win);
/* The window header. */
svn_fs__prepend (win, winhdr);
svn_fs__prepend (svn_fs__str_atom (offset_str, trail->pool), winhdr);
/* Add this window item to the rep. */
svn_fs__prepend (winhdr, rep);
}
/* Don't forget to prepend the header! */
svn_fs__prepend (svn_fs__str_atom ("delta", trail->pool), header);
svn_fs__prepend (header, rep);
/* Write out the new representation. */
SVN_ERR (svn_fs__write_rep (fs, target, rep, trail));
SVN_ERR (delete_strings (orig_str_keys, fs, trail));
}
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs__rep_undeltify (svn_fs_t *fs,
const char *rep,
trail_t *trail)
{
/* ### todo: Make this thing `delta'-aware! */
svn_stream_t *source_stream; /* stream to read the source */
svn_stream_t *target_stream; /* stream to write the fulltext */
struct write_string_baton target_baton;
apr_array_header_t *orig_keys;
skel_t *rep_skel;
unsigned char buf[65536];
apr_size_t len;
/* Read the rep skel. */
SVN_ERR (svn_fs__read_rep (&rep_skel, fs, rep, trail));
/* If REP is a fulltext rep, there's nothing to do. */
if (rep_is_fulltext (rep_skel))
return SVN_NO_ERROR;
/* Get the original string keys from REP (so we can delete them after
we write our new skel out. */
SVN_ERR (delta_string_keys (&orig_keys, rep_skel, trail->pool));
/* Set up a string to receive the svndiff data. */
target_baton.fs = fs;
target_baton.trail = trail;
target_baton.key = NULL;
target_stream = svn_stream_create (&target_baton, trail->pool);
svn_stream_set_write (target_stream, write_string);
/* Set up the source stream. */
source_stream = svn_fs__rep_contents_read_stream (fs, rep, 0,
trail, trail->pool);
do
{
apr_size_t len_read;
len = sizeof (buf);
SVN_ERR (svn_stream_read (source_stream, buf, &len));
len_read = len;
SVN_ERR (svn_stream_write (target_stream, buf, &len));
if (len_read != len)
return svn_error_createf
(SVN_ERR_FS_GENERAL, 0, NULL, trail->pool,
"svn_fs__rep_undeltify: Error writing fulltext contents");
}
while (len);
/* Now `target_baton.key' has the key of the new string. We
should hook it into the representation. So we make a new rep,
write it out... */
rep_skel = make_fulltext_rep_skel (target_baton.key, 0, trail->pool);
SVN_ERR (svn_fs__write_rep (fs, rep, rep_skel, trail));
/* ...then we delete our original strings. */
SVN_ERR (delete_strings (orig_keys, fs, trail));
return SVN_NO_ERROR;
}
/*
* local variables:
* eval: (load-file "../../tools/dev/svn-dev.el")
* end:
*/