blob: 8acc520ebd5fc31b36cf09b540fc32d3d759958e [file] [log] [blame]
/*
* hash.c : dumping and reading hash tables to/from files.
*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
* ====================================================================
*/
#include <stdlib.h>
#include <limits.h>
#include <apr_version.h>
#include <apr_pools.h>
#include <apr_hash.h>
#include <apr_file_io.h>
#ifndef SVN_HASH__GETS_SETS
#define SVN_HASH__GETS_SETS
#endif
#include "svn_hash.h"
#include "svn_types.h"
#include "svn_string.h"
#include "svn_error.h"
#include "svn_sorts.h"
#include "svn_io.h"
#include "svn_pools.h"
#include "private/svn_dep_compat.h"
#include "private/svn_sorts_private.h"
#include "private/svn_subr_private.h"
#include "svn_private_config.h"
/*
* The format of a dumped hash table is:
*
* K <nlength>
* name (a string of <nlength> bytes, followed by a newline)
* V <vlength>
* val (a string of <vlength> bytes, followed by a newline)
* [... etc, etc ...]
* END
*
*
* (Yes, there is a newline after END.)
*
* For example:
*
* K 5
* color
* V 3
* red
* K 11
* wine review
* V 376
* A forthright entrance, yet coquettish on the tongue, its deceptively
* fruity exterior hides the warm mahagony undercurrent that is the
* hallmark of Chateau Fraisant-Pitre. Connoisseurs of the region will
* be pleased to note the familiar, subtle hints of mulberries and
* carburator fluid. Its confident finish is marred only by a barely
* detectable suggestion of rancid squid ink.
* K 5
* price
* V 8
* US $6.50
* END
*
*/
/*** Dumping and loading hash files. */
/* Implements svn_hash_read2 and svn_hash_read_incremental. */
svn_error_t *
svn_hash__read_entry(svn_hash__entry_t *entry,
svn_stream_t *stream,
const char *terminator,
svn_boolean_t incremental,
apr_pool_t *pool)
{
svn_stringbuf_t *buf;
svn_boolean_t eof;
apr_size_t len;
char c;
svn_error_t *err;
apr_uint64_t ui64;
/* Read a key length line. Might be END, though. */
SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, pool));
/* Check for the end of the hash. */
if ((!terminator && eof && buf->len == 0)
|| (terminator && (strcmp(buf->data, terminator) == 0)))
{
entry->key = NULL;
entry->keylen = 0;
entry->val = NULL;
entry->vallen = 0;
return SVN_NO_ERROR;
}
/* Check for unexpected end of stream */
if (eof)
return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
_("Serialized hash missing terminator"));
if ((buf->len >= 3) && (buf->data[0] == 'K') && (buf->data[1] == ' '))
{
/* Get the length of the key */
err = svn_cstring_strtoui64(&ui64, buf->data + 2,
0, APR_SIZE_MAX, 10);
if (err)
return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
_("Serialized hash malformed key length"));
entry->keylen = (apr_size_t)ui64;
/* Now read that much into a buffer. */
entry->key = apr_palloc(pool, entry->keylen + 1);
SVN_ERR(svn_stream_read_full(stream, entry->key, &entry->keylen));
entry->key[entry->keylen] = '\0';
/* Suck up extra newline after key data */
len = 1;
SVN_ERR(svn_stream_read_full(stream, &c, &len));
if (c != '\n')
return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
_("Serialized hash malformed key data"));
/* Read a val length line */
SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, pool));
if ((buf->data[0] == 'V') && (buf->data[1] == ' '))
{
/* Get the length of the val */
err = svn_cstring_strtoui64(&ui64, buf->data + 2,
0, APR_SIZE_MAX, 10);
if (err)
return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
_("Serialized hash malformed value length"));
entry->vallen = (apr_size_t)ui64;
entry->val = apr_palloc(pool, entry->vallen + 1);
SVN_ERR(svn_stream_read_full(stream, entry->val, &entry->vallen));
entry->val[entry->vallen] = '\0';
/* Suck up extra newline after val data */
len = 1;
SVN_ERR(svn_stream_read_full(stream, &c, &len));
if (c != '\n')
return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
_("Serialized hash malformed value data"));
}
else
return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
_("Serialized hash malformed"));
}
else if (incremental && (buf->len >= 3)
&& (buf->data[0] == 'D') && (buf->data[1] == ' '))
{
/* Get the length of the key */
err = svn_cstring_strtoui64(&ui64, buf->data + 2,
0, APR_SIZE_MAX, 10);
if (err)
return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
_("Serialized hash malformed key length"));
entry->keylen = (apr_size_t)ui64;
/* Now read that much into a buffer. */
entry->key = apr_palloc(pool, entry->keylen + 1);
SVN_ERR(svn_stream_read_full(stream, entry->key, &entry->keylen));
entry->key[entry->keylen] = '\0';
/* Suck up extra newline after key data */
len = 1;
SVN_ERR(svn_stream_read_full(stream, &c, &len));
if (c != '\n')
return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
_("Serialized hash malformed key data"));
/* Remove this hash entry. */
entry->vallen = 0;
entry->val = NULL;
}
else
{
return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
_("Serialized hash malformed"));
}
return SVN_NO_ERROR;
}
static svn_error_t *
hash_read(apr_hash_t *hash, svn_stream_t *stream, const char *terminator,
svn_boolean_t incremental, apr_pool_t *pool)
{
apr_pool_t *iterpool = svn_pool_create(pool);
while (1)
{
svn_hash__entry_t entry;
svn_pool_clear(iterpool);
SVN_ERR(svn_hash__read_entry(&entry, stream, terminator,
incremental, iterpool));
/* end of hash? */
if (entry.key == NULL)
break;
if (entry.val)
{
/* Add a new hash entry. */
apr_hash_set(hash, apr_pstrmemdup(pool, entry.key, entry.keylen),
entry.keylen,
svn_string_ncreate(entry.val, entry.vallen, pool));
}
else
{
/* Remove this hash entry. */
apr_hash_set(hash, entry.key, entry.keylen, NULL);
}
}
svn_pool_destroy(iterpool);
return SVN_NO_ERROR;
}
/* Implements svn_hash_write2 and svn_hash_write_incremental. */
static svn_error_t *
hash_write(apr_hash_t *hash, apr_hash_t *oldhash, svn_stream_t *stream,
const char *terminator, apr_pool_t *pool)
{
apr_pool_t *subpool;
apr_size_t len;
apr_array_header_t *list;
int i;
subpool = svn_pool_create(pool);
list = svn_sort__hash(hash, svn_sort_compare_items_lexically, pool);
for (i = 0; i < list->nelts; i++)
{
svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t);
svn_string_t *valstr = item->value;
svn_pool_clear(subpool);
/* Don't output entries equal to the ones in oldhash, if present. */
if (oldhash)
{
svn_string_t *oldstr = apr_hash_get(oldhash, item->key, item->klen);
if (oldstr && svn_string_compare(valstr, oldstr))
continue;
}
if (item->klen < 0)
return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
_("Cannot serialize negative length"));
/* Write it out. */
SVN_ERR(svn_stream_printf(stream, subpool,
"K %" APR_SIZE_T_FMT "\n%s\n"
"V %" APR_SIZE_T_FMT "\n",
(apr_size_t) item->klen,
(const char *) item->key,
valstr->len));
len = valstr->len;
SVN_ERR(svn_stream_write(stream, valstr->data, &len));
SVN_ERR(svn_stream_puts(stream, "\n"));
}
if (oldhash)
{
/* Output a deletion entry for each property in oldhash but not hash. */
list = svn_sort__hash(oldhash, svn_sort_compare_items_lexically,
pool);
for (i = 0; i < list->nelts; i++)
{
svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t);
svn_pool_clear(subpool);
/* If it's not present in the new hash, write out a D entry. */
if (! apr_hash_get(hash, item->key, item->klen))
SVN_ERR(svn_stream_printf(stream, subpool,
"D %" APR_SSIZE_T_FMT "\n%s\n",
item->klen, (const char *) item->key));
}
}
if (terminator)
SVN_ERR(svn_stream_printf(stream, subpool, "%s\n", terminator));
svn_pool_destroy(subpool);
return SVN_NO_ERROR;
}
svn_error_t *svn_hash_read2(apr_hash_t *hash, svn_stream_t *stream,
const char *terminator, apr_pool_t *pool)
{
return hash_read(hash, stream, terminator, FALSE, pool);
}
svn_error_t *svn_hash_read_incremental(apr_hash_t *hash,
svn_stream_t *stream,
const char *terminator,
apr_pool_t *pool)
{
return hash_read(hash, stream, terminator, TRUE, pool);
}
svn_error_t *
svn_hash_write2(apr_hash_t *hash, svn_stream_t *stream,
const char *terminator, apr_pool_t *pool)
{
return hash_write(hash, NULL, stream, terminator, pool);
}
svn_error_t *
svn_hash_write_incremental(apr_hash_t *hash, apr_hash_t *oldhash,
svn_stream_t *stream, const char *terminator,
apr_pool_t *pool)
{
SVN_ERR_ASSERT(oldhash != NULL);
return hash_write(hash, oldhash, stream, terminator, pool);
}
svn_error_t *
svn_hash_write(apr_hash_t *hash, apr_file_t *destfile, apr_pool_t *pool)
{
return hash_write(hash, NULL, svn_stream_from_aprfile2(destfile, TRUE, pool),
SVN_HASH_TERMINATOR, pool);
}
/* There are enough quirks in the deprecated svn_hash_read that we
should just preserve its implementation. */
svn_error_t *
svn_hash_read(apr_hash_t *hash,
apr_file_t *srcfile,
apr_pool_t *pool)
{
svn_error_t *err;
char buf[SVN_KEYLINE_MAXLEN];
apr_size_t num_read;
char c;
int first_time = 1;
while (1)
{
/* Read a key length line. Might be END, though. */
apr_size_t len = sizeof(buf);
err = svn_io_read_length_line(srcfile, buf, &len, pool);
if (err && APR_STATUS_IS_EOF(err->apr_err) && first_time)
{
/* We got an EOF on our very first attempt to read, which
means it's a zero-byte file. No problem, just go home. */
svn_error_clear(err);
return SVN_NO_ERROR;
}
else if (err)
/* Any other circumstance is a genuine error. */
return err;
first_time = 0;
if (((len == 3) && (buf[0] == 'E') && (buf[1] == 'N') && (buf[2] == 'D'))
|| ((len == 9)
&& (buf[0] == 'P')
&& (buf[1] == 'R') /* We formerly used just "END" to */
&& (buf[2] == 'O') /* end a property hash, but later */
&& (buf[3] == 'P') /* we added "PROPS-END", so that */
&& (buf[4] == 'S') /* the fs dump format would be */
&& (buf[5] == '-') /* more human-readable. That's */
&& (buf[6] == 'E') /* why we accept either way here. */
&& (buf[7] == 'N')
&& (buf[8] == 'D')))
{
/* We've reached the end of the dumped hash table, so leave. */
return SVN_NO_ERROR;
}
else if ((buf[0] == 'K') && (buf[1] == ' '))
{
size_t keylen;
int parsed_len;
void *keybuf;
/* Get the length of the key */
SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2));
keylen = parsed_len;
/* Now read that much into a buffer, + 1 byte for null terminator */
keybuf = apr_palloc(pool, keylen + 1);
SVN_ERR(svn_io_file_read_full2(srcfile,
keybuf, keylen,
&num_read, NULL, pool));
((char *) keybuf)[keylen] = '\0';
/* Suck up extra newline after key data */
SVN_ERR(svn_io_file_getc(&c, srcfile, pool));
if (c != '\n')
return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
/* Read a val length line */
len = sizeof(buf);
SVN_ERR(svn_io_read_length_line(srcfile, buf, &len, pool));
if ((buf[0] == 'V') && (buf[1] == ' '))
{
svn_string_t *value = apr_palloc(pool, sizeof(*value));
apr_size_t vallen;
void *valbuf;
/* Get the length of the value */
SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2));
vallen = parsed_len;
/* Again, 1 extra byte for the null termination. */
valbuf = apr_palloc(pool, vallen + 1);
SVN_ERR(svn_io_file_read_full2(srcfile,
valbuf, vallen,
&num_read, NULL, pool));
((char *) valbuf)[vallen] = '\0';
/* Suck up extra newline after val data */
SVN_ERR(svn_io_file_getc(&c, srcfile, pool));
if (c != '\n')
return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
value->data = valbuf;
value->len = vallen;
/* The Grand Moment: add a new hash entry! */
apr_hash_set(hash, keybuf, keylen, value);
}
else
{
return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
}
}
else
{
return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
}
} /* while (1) */
}
/*** Diffing hashes ***/
svn_error_t *
svn_hash_diff(apr_hash_t *hash_a,
apr_hash_t *hash_b,
svn_hash_diff_func_t diff_func,
void *diff_func_baton,
apr_pool_t *pool)
{
apr_hash_index_t *hi;
if (hash_a)
for (hi = apr_hash_first(pool, hash_a); hi; hi = apr_hash_next(hi))
{
const void *key;
apr_ssize_t klen;
apr_hash_this(hi, &key, &klen, NULL);
if (hash_b && (apr_hash_get(hash_b, key, klen)))
SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_both,
diff_func_baton));
else
SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_a,
diff_func_baton));
}
if (hash_b)
for (hi = apr_hash_first(pool, hash_b); hi; hi = apr_hash_next(hi))
{
const void *key;
apr_ssize_t klen;
apr_hash_this(hi, &key, &klen, NULL);
if (! (hash_a && apr_hash_get(hash_a, key, klen)))
SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_b,
diff_func_baton));
}
return SVN_NO_ERROR;
}
/*** Misc. hash APIs ***/
svn_error_t *
svn_hash_keys(apr_array_header_t **array,
apr_hash_t *hash,
apr_pool_t *pool)
{
apr_hash_index_t *hi;
*array = apr_array_make(pool, apr_hash_count(hash), sizeof(const char *));
for (hi = apr_hash_first(pool, hash); hi; hi = apr_hash_next(hi))
{
APR_ARRAY_PUSH(*array, const char *) = apr_hash_this_key(hi);
}
return SVN_NO_ERROR;
}
svn_error_t *
svn_hash_from_cstring_keys(apr_hash_t **hash_p,
const apr_array_header_t *keys,
apr_pool_t *pool)
{
int i;
apr_hash_t *hash = svn_hash__make(pool);
for (i = 0; i < keys->nelts; i++)
{
const char *key =
apr_pstrdup(pool, APR_ARRAY_IDX(keys, i, const char *));
svn_hash_sets(hash, key, key);
}
*hash_p = hash;
return SVN_NO_ERROR;
}
void *
svn_hash__gets(apr_hash_t *ht, const char *key)
{
return apr_hash_get(ht, key, APR_HASH_KEY_STRING);
}
void
svn_hash__sets(apr_hash_t *ht, const char *key, const void *val)
{
apr_hash_set(ht, key, APR_HASH_KEY_STRING, val);
}
/*** Specialized getter APIs ***/
const char *
svn_hash__get_cstring(apr_hash_t *hash,
const char *key,
const char *default_value)
{
if (hash)
{
const char *value = svn_hash_gets(hash, key);
return value ? value : default_value;
}
return default_value;
}
svn_boolean_t
svn_hash__get_bool(apr_hash_t *hash, const char *key,
svn_boolean_t default_value)
{
const char *tmp_value = svn_hash__get_cstring(hash, key, NULL);
svn_tristate_t value = svn_tristate__from_word(tmp_value);
if (value == svn_tristate_true)
return TRUE;
else if (value == svn_tristate_false)
return FALSE;
return default_value;
}
/*** Optimized hash function ***/
/* apr_hashfunc_t optimized for the key that we use in SVN: paths and
* property names. Its primary goal is speed for keys of known length.
*
* Since strings tend to spawn large value spaces (usually differ in many
* bits with differences spanning a larger section of the key), we can be
* quite sloppy extracting a hash value. The more keys there are in a
* hash container, the more bits of the value returned by this function
* will be used. For a small number of string keys, choosing bits from any
* any fix location close to the tail of those keys would usually be good
* enough to prevent high collision rates.
*/
static unsigned int
hashfunc_compatible(const char *char_key, apr_ssize_t *klen)
{
unsigned int hash = 0;
const unsigned char *key = (const unsigned char *)char_key;
const unsigned char *p;
apr_ssize_t i;
if (*klen == APR_HASH_KEY_STRING)
*klen = strlen(char_key);
#if SVN_UNALIGNED_ACCESS_IS_OK
for (p = key, i = *klen; i >= 4; i-=4, p+=4)
{
apr_uint32_t chunk = *(const apr_uint32_t *)p;
/* the ">> 17" part gives upper bits in the chunk a chance to make
some impact as well */
hash = hash * 33 * 33 * 33 * 33 + chunk + (chunk >> 17);
}
#else
for (p = key, i = *klen; i >= 4; i-=4, p+=4)
{
hash = hash * 33 * 33 * 33 * 33
+ p[0] * 33 * 33 * 33
+ p[1] * 33 * 33
+ p[2] * 33
+ p[3];
}
#endif
for (; i; i--, p++)
hash = hash * 33 + *p;
return hash;
}
apr_hash_t *
svn_hash__make(apr_pool_t *pool)
{
return apr_hash_make_custom(pool, hashfunc_compatible);
}