On the fsfs-ucsnorm branch: Synced with trunk up to r1575684.
git-svn-id: https://svn.apache.org/repos/asf/subversion/branches/fsfs-ucsnorm@1575685 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/BRANCH-README b/BRANCH-README
new file mode 100644
index 0000000..e726beb
--- /dev/null
+++ b/BRANCH-README
@@ -0,0 +1,58 @@
+Enabling Normalized Path Lookup in FSFS
+=======================================
+
+The purpose of this [fsfs-ucsnorm] branch is to implement
+normalization-insensitive path lookups in FSFS. This will prevent the
+creation of paths that differ only in normalization, and will also
+remove the current constraint that paths used in the FS API must be
+byte-for-byte identical to those stored in the filesystem.
+
+The filesystem will *not* impose a particular normalization form, and
+it *will* preserve whatever representation it receives when a new path
+is created.
+
+This option would be enabled by default for all new FSFS-based
+repositories and *disabled* during repository format upgrade. The
+option can be disabled or enabled at any time during the lifetime of
+the repository; however, it is not safe to enable it without first
+running:
+
+ svnadmin verify REPOS --check-normalization
+
+
+Proposed argument to 'svnadmin create':
+
+ svnadmin create REPOS --disable-normalized-lookup
+
+ Without this option, normalized lookup will be enabled by
+ default.
+
+Proposed argument to 'svnadmin upgrade':
+
+ svnadmin upgrade REPOS --enable-normalized-lookup
+
+ Without this option, normalized lookup will be disabled during
+ format upgrade, unless it was already enabled in fsfs.conf.
+
+Proposed option in fsfs.conf:
+
+ [normalization]
+ normalized-lookup = true|false
+
+ Default value: true (in FSFSv7; false for older formats)
+
+
+References
+==========
+
+Unicode Normalization Forms
+ http://unicode.org/reports/tr15/#Norm_Forms
+
+Normalization Insensitivity (blog post)
+ https://blogs.oracle.com/nico/entry/normalization_insensitivity_should_be_the
+
+zfs(1M)
+ http://www.freebsd.org/cgi/man.cgi?query=zfs&apropos=0&sektion=0&manpath=FreeBSD+8.1-RELEASE&format=html
+
+zfs_share(1M)
+ http://docs.oracle.com/cd/E23824_01/html/821-1462/zfs-share-1m.html#scrolltoc
diff --git a/subversion/include/svn_error_codes.h b/subversion/include/svn_error_codes.h
index fd1dfa2..9ff7182 100644
--- a/subversion/include/svn_error_codes.h
+++ b/subversion/include/svn_error_codes.h
@@ -851,6 +851,11 @@
SVN_ERR_FS_CATEGORY_START + 60,
"Move without a suitable deletion")
+ /** @since New in 1.9. */
+ SVN_ERRDEF(SVN_ERR_FS_NAME_COLLISION,
+ SVN_ERR_FS_CATEGORY_START + 61,
+ "Normalized directory entry names are identical")
+
/* repos errors */
SVN_ERRDEF(SVN_ERR_REPOS_LOCKED,
diff --git a/subversion/libsvn_fs_fs/cached_data.c b/subversion/libsvn_fs_fs/cached_data.c
index ba1ec19..35aa52a 100644
--- a/subversion/libsvn_fs_fs/cached_data.c
+++ b/subversion/libsvn_fs_fs/cached_data.c
@@ -1942,39 +1942,39 @@
return SVN_NO_ERROR;
}
-/* Return TRUE when all svn_fs_dirent_t* in ENTRIES are already sorted
- by their respective name. */
+/* Return TRUE when all svn_fs_fs__dirent_t* in ENTRIES are already sorted
+ by their respective key. */
static svn_boolean_t
sorted(apr_array_header_t *entries)
{
int i;
- const svn_fs_dirent_t * const *dirents = (const void *)entries->elts;
+ const svn_fs_fs__dirent_t *const *dirents = (const void *)entries->elts;
for (i = 0; i < entries->nelts-1; ++i)
- if (strcmp(dirents[i]->name, dirents[i+1]->name) > 0)
+ if (strcmp(dirents[i]->key, dirents[i+1]->key) > 0)
return FALSE;
return TRUE;
}
-/* Compare the names of the two dirents given in **A and **B. */
+/* Compare the keys of the two dirents given in **A and **B. */
static int
compare_dirents(const void *a, const void *b)
{
- const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a);
- const svn_fs_dirent_t *rhs = *((const svn_fs_dirent_t * const *) b);
+ const svn_fs_fs__dirent_t *lhs = *((const svn_fs_fs__dirent_t * const *) a);
+ const svn_fs_fs__dirent_t *rhs = *((const svn_fs_fs__dirent_t * const *) b);
- return strcmp(lhs->name, rhs->name);
+ return strcmp(lhs->key, rhs->key);
}
-/* Compare the name of the dirents given in **A with the C string in *B. */
+/* Compare the key of the dirents given in **A with the C string in *B. */
static int
-compare_dirent_name(const void *a, const void *b)
+compare_dirent_key(const void *a, const void *b)
{
- const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a);
+ const svn_fs_fs__dirent_t *lhs = *((const svn_fs_fs__dirent_t * const *) a);
const char *rhs = b;
- return strcmp(lhs->name, rhs);
+ return strcmp(lhs->key, rhs);
}
/* Into ENTRIES, read all directories entries from the key-value text in
@@ -1986,6 +1986,7 @@
svn_stream_t *stream,
svn_boolean_t incremental,
const svn_fs_id_t *id,
+ svn_boolean_t normalized_lookup,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
@@ -1999,7 +2000,7 @@
while (1)
{
svn_hash__entry_t entry;
- svn_fs_dirent_t *dirent;
+ svn_fs_fs__dirent_t *dirent;
char *str;
svn_pool_clear(iterpool);
@@ -2028,7 +2029,9 @@
/* Add a new directory entry. */
dirent = apr_pcalloc(result_pool, sizeof(*dirent));
- dirent->name = apr_pstrmemdup(result_pool, entry.key, entry.keylen);
+ dirent->dirent.name = apr_pstrmemdup(result_pool, entry.key, entry.keylen);
+ SVN_ERR(svn_fs_fs__set_dirent_key(dirent, normalized_lookup,
+ result_pool, scratch_pool));
str = svn_cstring_tokenize(" ", &entry.val);
if (str == NULL)
@@ -2038,11 +2041,11 @@
if (strcmp(str, SVN_FS_FS__KIND_FILE) == 0)
{
- dirent->kind = svn_node_file;
+ dirent->dirent.kind = svn_node_file;
}
else if (strcmp(str, SVN_FS_FS__KIND_DIR) == 0)
{
- dirent->kind = svn_node_dir;
+ dirent->dirent.kind = svn_node_dir;
}
else
{
@@ -2057,7 +2060,7 @@
_("Directory entry corrupt in '%s'"),
svn_fs_fs__id_unparse(id, scratch_pool)->data);
- dirent->id = svn_fs_fs__id_parse(str, strlen(str), result_pool);
+ dirent->dirent.id = svn_fs_fs__id_parse(str, strlen(str), result_pool);
/* In incremental mode, update the hash; otherwise, write to the
* final array. Be sure to use hash keys that survive this iteration.
@@ -2065,7 +2068,7 @@
if (incremental)
apr_hash_set(hash, dirent->name, entry.keylen, dirent);
else
- APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = dirent;
+ APR_ARRAY_PUSH(entries, svn_fs_fs__dirent_t *) = dirent;
}
/* Convert container to a sorted array. */
@@ -2073,7 +2076,7 @@
{
apr_hash_index_t *hi;
for (hi = apr_hash_first(iterpool, hash); hi; hi = apr_hash_next(hi))
- APR_ARRAY_PUSH(entries, svn_fs_dirent_t *)
+ APR_ARRAY_PUSH(entries, svn_fs_fs__dirent_t *)
= svn__apr_hash_index_val(hi);
}
@@ -2087,7 +2090,7 @@
/* Fetch the contents of a directory into ENTRIES. Values are stored
as filename to string mappings; further conversion is necessary to
- convert them into svn_fs_dirent_t values. */
+ convert them into svn_fs_fs__dirent_t values. */
static svn_error_t *
get_dir_contents(apr_array_header_t **entries,
svn_fs_t *fs,
@@ -2095,9 +2098,11 @@
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
+ const svn_boolean_t normalized_lookup =
+ ((fs_fs_data_t*)fs->fsap_data)->normalized_lookup;
svn_stream_t *contents;
- *entries = apr_array_make(result_pool, 16, sizeof(svn_fs_dirent_t *));
+ *entries = apr_array_make(result_pool, 16, sizeof(svn_fs_fs__dirent_t *));
if (noderev->data_rep && svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
{
const char *filename
@@ -2109,7 +2114,7 @@
SVN_ERR(svn_stream_open_readonly(&contents, filename, scratch_pool,
scratch_pool));
SVN_ERR(read_dir_entries(*entries, contents, TRUE, noderev->id,
- result_pool, scratch_pool));
+ normalized_lookup, result_pool, scratch_pool));
SVN_ERR(svn_stream_close(contents));
}
else if (noderev->data_rep)
@@ -2133,7 +2138,7 @@
/* de-serialize hash */
contents = svn_stream_from_stringbuf(text, text_pool);
SVN_ERR(read_dir_entries(*entries, contents, FALSE, noderev->id,
- result_pool, scratch_pool));
+ normalized_lookup, result_pool, scratch_pool));
svn_pool_destroy(text_pool);
}
@@ -2215,21 +2220,21 @@
return SVN_NO_ERROR;
}
-svn_fs_dirent_t *
+svn_fs_fs__dirent_t *
svn_fs_fs__find_dir_entry(apr_array_header_t *entries,
- const char *name,
+ const char *key,
int *hint)
{
- svn_fs_dirent_t **result
- = svn_sort__array_lookup(entries, name, hint, compare_dirent_name);
+ svn_fs_fs__dirent_t **result
+ = svn_sort__array_lookup(entries, key, hint, compare_dirent_key);
return result ? *result : NULL;
}
svn_error_t *
-svn_fs_fs__rep_contents_dir_entry(svn_fs_dirent_t **dirent,
+svn_fs_fs__rep_contents_dir_entry(svn_fs_fs__dirent_t **dirent,
svn_fs_t *fs,
node_revision_t *noderev,
- const char *name,
+ const char *key,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
@@ -2237,8 +2242,8 @@
/* find the cache we may use */
pair_cache_key_t pair_key = { 0 };
- const void *key;
- svn_cache__t *cache = locate_dir_cache(fs, &key, &pair_key, noderev,
+ const void *cache_key;
+ svn_cache__t *cache = locate_dir_cache(fs, &cache_key, &pair_key, noderev,
scratch_pool);
if (cache)
{
@@ -2246,9 +2251,9 @@
SVN_ERR(svn_cache__get_partial((void **)dirent,
&found,
cache,
- key,
+ cache_key,
svn_fs_fs__extract_dir_entry,
- (void*)name,
+ (void*)key,
result_pool));
}
@@ -2256,8 +2261,8 @@
if (! found)
{
apr_array_header_t *entries;
- svn_fs_dirent_t *entry;
- svn_fs_dirent_t *entry_copy = NULL;
+ svn_fs_fs__dirent_t *entry;
+ svn_fs_fs__dirent_t *entry_copy = NULL;
/* read the dir from the file system. It will probably be put it
into the cache for faster lookup in future calls. */
@@ -2265,13 +2270,21 @@
scratch_pool, scratch_pool));
/* find desired entry and return a copy in POOL, if found */
- entry = svn_fs_fs__find_dir_entry(entries, name, NULL);
+ entry = svn_fs_fs__find_dir_entry(entries, key, NULL);
if (entry)
{
+ svn_fs_dirent_t *dirent_copy;
+
entry_copy = apr_palloc(result_pool, sizeof(*entry_copy));
- entry_copy->name = apr_pstrdup(result_pool, entry->name);
- entry_copy->id = svn_fs_fs__id_copy(entry->id, result_pool);
- entry_copy->kind = entry->kind;
+ dirent_copy = &entry_copy->dirent;
+ dirent_copy->name = apr_pstrdup(result_pool, entry->dirent.name);
+ dirent_copy->id = svn_fs_fs__id_copy(entry->dirent.id, result_pool);
+ dirent_copy->kind = entry->dirent.kind;
+
+ if (entry->key != entry->dirent.name)
+ entry_copy->key = apr_pstrdup(result_pool, entry->key);
+ else
+ entry_copy->key = dirent_copy->name;
}
*dirent = entry_copy;
diff --git a/subversion/libsvn_fs_fs/cached_data.h b/subversion/libsvn_fs_fs/cached_data.h
index 4c10f7f..66e4792 100644
--- a/subversion/libsvn_fs_fs/cached_data.h
+++ b/subversion/libsvn_fs_fs/cached_data.h
@@ -27,6 +27,7 @@
#include "svn_fs.h"
#include "fs.h"
+#include "dirent.h"
@@ -114,24 +115,32 @@
apr_pool_t *result_pool,
apr_pool_t *scratch_pool);
-/* Return the directory entry from ENTRIES that matches NAME. If no such
+/* Return the directory entry from ENTRIES that matches KEY. If no such
entry exists, return NULL. If HINT is not NULL, set *HINT to the array
index of the entry returned. Successive calls in a linear scan scenario
- will be faster called with the same HINT variable. */
-svn_fs_dirent_t *
+ will be faster called with the same HINT variable.
+
+ Note: When normalized lookups are enabled, KEY must be the
+ normalized form of the entry name; otherwise, it should be
+ the original form of the entry name. */
+svn_fs_fs__dirent_t *
svn_fs_fs__find_dir_entry(apr_array_header_t *entries,
- const char *name,
+ const char *key,
int *hint);
-/* Set *DIRENT to the entry identified by NAME in the directory given
+/* Set *DIRENT to the entry identified by KEY in the directory given
by NODEREV in filesystem FS. If no such entry exits, *DIRENT will
be NULL. The returned object is allocated in RESULT_POOL; SCRATCH_POOL
- used for temporary allocations. */
+ used for temporary allocations.
+
+ Note: When normalized lookups are enabled, KEY must be the
+ normalized form of the entry name; otherwise, it should be
+ the original form of the entry name. */
svn_error_t *
-svn_fs_fs__rep_contents_dir_entry(svn_fs_dirent_t **dirent,
+svn_fs_fs__rep_contents_dir_entry(svn_fs_fs__dirent_t **dirent,
svn_fs_t *fs,
node_revision_t *noderev,
- const char *name,
+ const char *key,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool);
diff --git a/subversion/libsvn_fs_fs/dag.c b/subversion/libsvn_fs_fs/dag.c
index 9e039b4..da0caeb 100644
--- a/subversion/libsvn_fs_fs/dag.c
+++ b/subversion/libsvn_fs_fs/dag.c
@@ -74,7 +74,10 @@
/* The pool to allocate NODE_REVISION in. */
apr_pool_t *node_pool;
- /* the path at which this node was created. */
+ /* The path at which this node was created.
+
+ Note: This is the path in its original form, even when
+ normalized lookups are enabled. */
const char *created_path;
};
@@ -301,21 +304,26 @@
/* Some of these are helpers for functions outside this section. */
-/* Set *ID_P to the node-id for entry NAME in PARENT. If no such
- entry, set *ID_P to NULL but do not error. The node-id is
- allocated in POOL. */
+/* Set *ID_P to the node-id for entry KEY in PARENT, and *NAME_P to
+ its denormalized name.. If no such entry exists, set both *ID_P
+ and *NAME_P to NULL but do not error. The node-id and name are
+ allocated in RESULT_POOL. */
static svn_error_t *
-dir_entry_id_from_node(const svn_fs_id_t **id_p,
- dag_node_t *parent,
- const char *name,
- apr_pool_t *result_pool,
- apr_pool_t *scratch_pool)
+dir_entry_id_and_name_from_node(const svn_fs_id_t **id_p,
+ const char **name_p,
+ dag_node_t *parent,
+ const char *key,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
{
- svn_fs_dirent_t *dirent;
+ svn_fs_fs__dirent_t *entry;
- SVN_ERR(svn_fs_fs__dag_dir_entry(&dirent, parent, name, result_pool,
+ SVN_ERR(svn_fs_fs__dag_dir_entry(&entry, parent, key, result_pool,
scratch_pool));
- *id_p = dirent ? dirent->id : NULL;
+ if (id_p)
+ *id_p = (entry ? entry->dirent.id : NULL);
+ if (name_p)
+ *name_p = (entry ? entry->dirent.name : NULL);
return SVN_NO_ERROR;
}
@@ -327,10 +335,12 @@
Assumptions:
- PARENT is a mutable directory.
- ID does not refer to an ancestor of parent
+ - KEY is the normalized form of NAME
- NAME is a single path component
*/
static svn_error_t *
set_entry(dag_node_t *parent,
+ const char *key,
const char *name,
const svn_fs_id_t *id,
svn_node_kind_t kind,
@@ -343,8 +353,8 @@
SVN_ERR(get_node_revision(&parent_noderev, parent));
/* Set the new entry. */
- return svn_fs_fs__set_entry(parent->fs, txn_id, parent_noderev, name, id,
- kind, pool);
+ return svn_fs_fs__set_entry(parent->fs, txn_id, parent_noderev,
+ key, name, id, kind, pool);
}
@@ -359,6 +369,7 @@
make_entry(dag_node_t **child_p,
dag_node_t *parent,
const char *parent_path,
+ const char *key,
const char *name,
svn_boolean_t is_dir,
const svn_fs_fs__id_part_t *txn_id,
@@ -367,11 +378,11 @@
const svn_fs_id_t *new_node_id;
node_revision_t new_noderev, *parent_noderev;
- /* Make sure that NAME is a single path component. */
- if (! svn_path_is_single_path_component(name))
+ /* Make sure that KEY is a single path component. */
+ if (! svn_path_is_single_path_component(key))
return svn_error_createf
(SVN_ERR_FS_NOT_SINGLE_PATH_COMPONENT, NULL,
- _("Attempted to create a node with an illegal name '%s'"), name);
+ _("Attempted to create a node with an illegal name '%s'"), key);
/* Make sure that parent is a directory */
if (parent->kind != svn_node_dir)
@@ -409,7 +420,7 @@
/* We can safely call set_entry because we already know that
PARENT is mutable, and we just created CHILD, so we know it has
no ancestors (therefore, PARENT cannot be an ancestor of CHILD) */
- return set_entry(parent, name, svn_fs_fs__dag_get_id(*child_p),
+ return set_entry(parent, key, name, svn_fs_fs__dag_get_id(*child_p),
new_noderev.kind, txn_id, pool);
}
@@ -431,9 +442,9 @@
}
svn_error_t *
-svn_fs_fs__dag_dir_entry(svn_fs_dirent_t **dirent,
+svn_fs_fs__dag_dir_entry(svn_fs_fs__dirent_t **dirent,
dag_node_t *node,
- const char* name,
+ const char* key,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
@@ -445,14 +456,15 @@
_("Can't get entries of non-directory"));
/* Get a dirent hash for this directory. */
- return svn_fs_fs__rep_contents_dir_entry(dirent, node->fs, noderev, name,
+ return svn_fs_fs__rep_contents_dir_entry(dirent, node->fs, noderev, key,
result_pool, scratch_pool);
}
svn_error_t *
svn_fs_fs__dag_set_entry(dag_node_t *node,
- const char *entry_name,
+ const char *key,
+ const char *name,
const svn_fs_id_t *id,
svn_node_kind_t kind,
const svn_fs_fs__id_part_t *txn_id,
@@ -470,7 +482,7 @@
(SVN_ERR_FS_NOT_MUTABLE, NULL,
_("Attempted to set entry in immutable node"));
- return set_entry(node, entry_name, id, kind, txn_id, pool);
+ return set_entry(node, key, name, id, kind, txn_id, pool);
}
@@ -660,13 +672,14 @@
svn_fs_fs__dag_clone_child(dag_node_t **child_p,
dag_node_t *parent,
const char *parent_path,
- const char *name,
+ const char *key,
const svn_fs_fs__id_part_t *copy_id,
const svn_fs_fs__id_part_t *txn_id,
svn_boolean_t is_parent_copyroot,
apr_pool_t *pool)
{
- dag_node_t *cur_entry; /* parent's current entry named NAME */
+ dag_node_t *cur_entry; /* parent's current entry named KEY */
+ const char *name; /* denormalized name of parent's current entry */
const svn_fs_id_t *new_node_id; /* node id we'll put into NEW_NODE */
svn_fs_t *fs = svn_fs_fs__dag_get_fs(parent);
apr_pool_t *subpool = svn_pool_create(pool);
@@ -677,14 +690,14 @@
(SVN_ERR_FS_NOT_MUTABLE, NULL,
"Attempted to clone child of non-mutable node");
- /* Make sure that NAME is a single path component. */
- if (! svn_path_is_single_path_component(name))
+ /* Make sure that KEY is a single path component. */
+ if (! svn_path_is_single_path_component(key))
return svn_error_createf
(SVN_ERR_FS_NOT_SINGLE_PATH_COMPONENT, NULL,
- "Attempted to make a child clone with an illegal name '%s'", name);
+ "Attempted to make a child clone with an illegal name '%s'", key);
- /* Find the node named NAME in PARENT's entries list if it exists. */
- SVN_ERR(svn_fs_fs__dag_open(&cur_entry, parent, name, pool, subpool));
+ /* Find the node named KEY in PARENT's entries list if it exists. */
+ SVN_ERR(svn_fs_fs__dag_open(&cur_entry, &name, parent, key, pool, subpool));
/* Check for mutability in the node we found. If it's mutable, we
don't need to clone it. */
@@ -721,8 +734,8 @@
/* Replace the ID in the parent's ENTRY list with the ID which
refers to the mutable clone of this child. */
- SVN_ERR(set_entry(parent, name, new_node_id, noderev->kind, txn_id,
- pool));
+ SVN_ERR(set_entry(parent, key, name, new_node_id, noderev->kind,
+ txn_id, pool));
}
/* Initialize the youngster. */
@@ -761,62 +774,64 @@
svn_error_t *
svn_fs_fs__dag_delete(dag_node_t *parent,
- const char *name,
+ const char *key,
const svn_fs_fs__id_part_t *txn_id,
apr_pool_t *pool)
{
node_revision_t *parent_noderev;
svn_fs_t *fs = parent->fs;
- svn_fs_dirent_t *dirent;
+ svn_fs_fs__dirent_t *entry;
svn_fs_id_t *id;
+ const char *name;
apr_pool_t *subpool;
/* Make sure parent is a directory. */
if (parent->kind != svn_node_dir)
return svn_error_createf
(SVN_ERR_FS_NOT_DIRECTORY, NULL,
- "Attempted to delete entry '%s' from *non*-directory node", name);
+ "Attempted to delete entry '%s' from *non*-directory node", key);
/* Make sure parent is mutable. */
if (! svn_fs_fs__dag_check_mutable(parent))
return svn_error_createf
(SVN_ERR_FS_NOT_MUTABLE, NULL,
- "Attempted to delete entry '%s' from immutable directory node", name);
+ "Attempted to delete entry '%s' from immutable directory node", key);
- /* Make sure that NAME is a single path component. */
- if (! svn_path_is_single_path_component(name))
+ /* Make sure that KEY is a single path component. */
+ if (! svn_path_is_single_path_component(key))
return svn_error_createf
(SVN_ERR_FS_NOT_SINGLE_PATH_COMPONENT, NULL,
- "Attempted to delete a node with an illegal name '%s'", name);
+ "Attempted to delete a node with an illegal name '%s'", key);
/* Get a fresh NODE-REVISION for the parent node. */
SVN_ERR(get_node_revision(&parent_noderev, parent));
subpool = svn_pool_create(pool);
- /* Search this directory for a dirent with that NAME. */
- SVN_ERR(svn_fs_fs__rep_contents_dir_entry(&dirent, fs, parent_noderev,
- name, subpool, subpool));
+ /* Search this directory for a dirent with that KEY. */
+ SVN_ERR(svn_fs_fs__rep_contents_dir_entry(&entry, fs, parent_noderev,
+ key, subpool, subpool));
/* If we never found ID in ENTRIES (perhaps because there are no
ENTRIES, perhaps because ID just isn't in the existing ENTRIES
... it doesn't matter), return an error. */
- if (! dirent)
+ if (! entry)
return svn_error_createf
(SVN_ERR_FS_NO_SUCH_ENTRY, NULL,
- "Delete failed--directory has no entry '%s'", name);
+ "Delete failed--directory has no entry '%s'", key);
- /* Copy the ID out of the subpool and release the rest of the
- directory listing. */
- id = svn_fs_fs__id_copy(dirent->id, pool);
+ /* Copy the name and ID out of the subpool and release the rest of
+ the directory listing. */
+ name = apr_pstrdup(pool, entry->dirent.name);
+ id = svn_fs_fs__id_copy(entry->dirent.id, pool);
svn_pool_destroy(subpool);
/* If mutable, remove it and any mutable children from db. */
SVN_ERR(svn_fs_fs__dag_delete_if_mutable(parent->fs, id, pool));
/* Remove this entry from its parent's entries list. */
- return svn_fs_fs__set_entry(parent->fs, txn_id, parent_noderev, name,
- NULL, svn_node_unknown, pool);
+ return svn_fs_fs__set_entry(parent->fs, txn_id, parent_noderev,
+ key, name, NULL, svn_node_unknown, pool);
}
@@ -864,9 +879,11 @@
SVN_ERR(svn_fs_fs__dag_dir_entries(&entries, node, pool));
if (entries)
for (i = 0; i < entries->nelts; ++i)
- SVN_ERR(svn_fs_fs__dag_delete_if_mutable(fs,
- APR_ARRAY_IDX(entries, i, svn_fs_dirent_t *)->id,
- pool));
+ {
+ const svn_fs_id_t *const child_id =
+ APR_ARRAY_IDX(entries, i, svn_fs_fs__dirent_t *)->dirent.id;
+ SVN_ERR(svn_fs_fs__dag_delete_if_mutable(fs, child_id, pool));
+ }
}
/* ... then delete the node itself, after deleting any mutable
@@ -878,12 +895,14 @@
svn_fs_fs__dag_make_file(dag_node_t **child_p,
dag_node_t *parent,
const char *parent_path,
+ const char *key,
const char *name,
const svn_fs_fs__id_part_t *txn_id,
apr_pool_t *pool)
{
/* Call our little helper function */
- return make_entry(child_p, parent, parent_path, name, FALSE, txn_id, pool);
+ return make_entry(child_p, parent, parent_path, key, name,
+ FALSE, txn_id, pool);
}
@@ -891,12 +910,14 @@
svn_fs_fs__dag_make_dir(dag_node_t **child_p,
dag_node_t *parent,
const char *parent_path,
+ const char *key,
const char *name,
const svn_fs_fs__id_part_t *txn_id,
apr_pool_t *pool)
{
/* Call our little helper function */
- return make_entry(child_p, parent, parent_path, name, TRUE, txn_id, pool);
+ return make_entry(child_p, parent, parent_path, key, name,
+ TRUE, txn_id, pool);
}
@@ -1163,28 +1184,30 @@
svn_error_t *
svn_fs_fs__dag_open(dag_node_t **child_p,
+ const char **name_p,
dag_node_t *parent,
- const char *name,
+ const char *key,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
const svn_fs_id_t *node_id;
- /* Ensure that NAME exists in PARENT's entry list. */
- SVN_ERR(dir_entry_id_from_node(&node_id, parent, name,
- scratch_pool, scratch_pool));
- if (! node_id)
+ /* Ensure that KEY exists in PARENT's entry list. */
+ SVN_ERR(dir_entry_id_and_name_from_node(&node_id, name_p, parent, key,
+ scratch_pool, scratch_pool));
+ if (! node_id || ! *name_p)
return svn_error_createf
(SVN_ERR_FS_NOT_FOUND, NULL,
- "Attempted to open non-existent child node '%s'", name);
+ "Attempted to open non-existent child node '%s'", key);
- /* Make sure that NAME is a single path component. */
- if (! svn_path_is_single_path_component(name))
+ /* Make sure that KEY is a single path component. */
+ if (! svn_path_is_single_path_component(key))
return svn_error_createf
(SVN_ERR_FS_NOT_SINGLE_PATH_COMPONENT, NULL,
- "Attempted to open node with an illegal name '%s'", name);
+ "Attempted to open node with an illegal name '%s'", key);
/* Now get the node that was requested. */
+ *name_p = apr_pstrdup(result_pool, *name_p);
return svn_fs_fs__dag_get_node(child_p, svn_fs_fs__dag_get_fs(parent),
node_id, result_pool);
}
@@ -1192,7 +1215,8 @@
svn_error_t *
svn_fs_fs__dag_copy(dag_node_t *to_node,
- const char *entry,
+ const char *key,
+ const char *name,
dag_node_t *from_node,
svn_boolean_t preserve_history,
svn_revnum_t from_rev,
@@ -1222,8 +1246,8 @@
if (to_noderev->predecessor_count != -1)
to_noderev->predecessor_count++;
to_noderev->created_path =
- svn_fspath__join(svn_fs_fs__dag_get_created_path(to_node), entry,
- pool);
+ svn_fspath__join(svn_fs_fs__dag_get_created_path(to_node),
+ name, pool);
to_noderev->copyfrom_path = apr_pstrdup(pool, from_path);
to_noderev->copyfrom_rev = from_rev;
@@ -1240,7 +1264,7 @@
}
/* Set the entry in to_node to the new id. */
- return svn_fs_fs__dag_set_entry(to_node, entry, id, from_node->kind,
+ return svn_fs_fs__dag_set_entry(to_node, key, name, id, from_node->kind,
txn_id, pool);
}
diff --git a/subversion/libsvn_fs_fs/dag.h b/subversion/libsvn_fs_fs/dag.h
index 6e20698..1e40ced 100644
--- a/subversion/libsvn_fs_fs/dag.h
+++ b/subversion/libsvn_fs_fs/dag.h
@@ -28,6 +28,7 @@
#include "private/svn_cache.h"
#include "id.h"
+#include "dirent.h"
#ifdef __cplusplus
extern "C" {
@@ -118,7 +119,10 @@
/* Return the created path of NODE. The value returned is shared
- with NODE, and will be deallocated when NODE is. */
+ with NODE, and will be deallocated when NODE is.
+
+ Note: This is the path in its original form, even when normalized
+ lookups are enabled. */
const char *svn_fs_fs__dag_get_created_path(dag_node_t *node);
@@ -252,14 +256,18 @@
/* Directories. */
-/* Open the node named NAME in the directory PARENT. Set *CHILD_P to
- the new node, allocated in RESULT_POOL. NAME must be a single path
- component; it cannot be a slash-separated directory path.
+/* Open the node identified by KEY in the directory PARENT. Set
+ *CHILD_P to the new node, and *NAME_P to its name in the oriinal
+ form, allocated in RESULT_POOL. KEY must be a single path
+ component; it cannot be a slash-separated directory path, and must
+ be the normalized form of the entry name if normalized lookups are
+ enabled.
*/
svn_error_t *
svn_fs_fs__dag_open(dag_node_t **child_p,
+ const char **name_p,
dag_node_t *parent,
- const char *name,
+ const char *key,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool);
@@ -272,40 +280,48 @@
apr_pool_t *pool);
/* Fetches the NODE's entries and returns a copy of the entry selected
- by the key value given in NAME and set *DIRENT to a copy of that
+ by the key value given in KEY and set *DIRENT to a copy of that
entry. If such entry was found, the copy will be allocated in
RESULT_POOL. Temporary data will be used in SCRATCH_POOL.
Otherwise, the *DIRENT will be set to NULL.
+
+ KEY must be the normalized form of the entry name if normalized
+ lookups are enabled.
*/
/* ### This function is currently only called from dag.c. */
-svn_error_t * svn_fs_fs__dag_dir_entry(svn_fs_dirent_t **dirent,
+svn_error_t * svn_fs_fs__dag_dir_entry(svn_fs_fs__dirent_t **dirent,
dag_node_t *node,
- const char* name,
+ const char *key,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool);
/* Set ENTRY_NAME in NODE to point to ID (with kind KIND), allocating
- from POOL. NODE must be a mutable directory. ID can refer to a
- mutable or immutable node. If ENTRY_NAME does not exist, it will
- be created. TXN_ID is the Subversion transaction under which this
- occurs.
+ from POOL. Use ENTRY_KEY to find an existing entry in NODE.
+
+ KEY must be the normalized form of NAME if normalized lookups are
+ enabled; otherwise, it must be the same pointer value.
+
+ NODE must be a mutable directory. ID can refer to a mutable or
+ immutable node. If ENTRY_NAME does not exist, it will be created.
+ TXN_ID is the Subversion transaction under which this occurs.
Use POOL for all allocations, including to cache the node_revision in
NODE.
*/
svn_error_t *svn_fs_fs__dag_set_entry(dag_node_t *node,
- const char *entry_name,
+ const char *key,
+ const char *name,
const svn_fs_id_t *id,
svn_node_kind_t kind,
const svn_fs_fs__id_part_t *txn_id,
apr_pool_t *pool);
-/* Make a new mutable clone of the node named NAME in PARENT, and
- adjust PARENT's directory entry to point to it, unless NAME in
+/* Make a new mutable clone of the node PARENT referred to by KEY, and
+ adjust PARENT's directory entry to point to it, unless KEY in
PARENT already refers to a mutable node. In either case, set
*CHILD_P to a reference to the new node, allocated in POOL. PARENT
- must be mutable. NAME must be a single path component; it cannot
+ must be mutable. KEY must be a single path component; it cannot
be a slash-separated directory path. PARENT_PATH must be the
canonicalized absolute path of the parent directory.
@@ -318,32 +334,38 @@
TXN_ID is the Subversion transaction under which this occurs.
+ KEY must be the normalized form of the entry name if normalized
+ lookups are enabled.
+
Use POOL for all allocations.
*/
svn_error_t *svn_fs_fs__dag_clone_child(dag_node_t **child_p,
dag_node_t *parent,
const char *parent_path,
- const char *name,
+ const char *key,
const svn_fs_fs__id_part_t *copy_id,
const svn_fs_fs__id_part_t *txn_id,
svn_boolean_t is_parent_copyroot,
apr_pool_t *pool);
-/* Delete the directory entry named NAME from PARENT, allocating from
- POOL. PARENT must be mutable. NAME must be a single path
+/* Delete the directory entry from PARENT referred to by KEY, allocating from
+ POOL. PARENT must be mutable. KEY must be a single path
component; it cannot be a slash-separated directory path. If the
node being deleted is a mutable directory, remove all mutable nodes
reachable from it. TXN_ID is the Subversion transaction under
which this occurs.
- If return SVN_ERR_FS_NO_SUCH_ENTRY, then there is no entry NAME in
+ If return SVN_ERR_FS_NO_SUCH_ENTRY, then there is no entry KEY in
PARENT.
+ KEY must be the normalized form of the entry name if normalized
+ lookups are enabled.
+
Use POOL for all allocations.
*/
svn_error_t *svn_fs_fs__dag_delete(dag_node_t *parent,
- const char *name,
+ const char *key,
const svn_fs_fs__id_part_t *txn_id,
apr_pool_t *pool);
@@ -372,13 +394,17 @@
apr_pool_t *pool);
-/* Create a new mutable directory named NAME in PARENT. Set *CHILD_P
- to a reference to the new node, allocated in POOL. The new
+/* Create a new mutable directory named NAME in PARENT. Use KEY to
+ find any existing entry in PARENT. KEY must be the normalized form of
+ NAME if normalized lookups are enabled; otherwise, it must be the
+ same pointer value.
+
+ Set *CHILD_P to a reference to the new node, allocated in POOL. The new
directory has no contents, and no properties. PARENT must be
mutable. NAME must be a single path component; it cannot be a
slash-separated directory path. PARENT_PATH must be the
canonicalized absolute path of the parent directory. PARENT must
- not currently have an entry named NAME. TXN_ID is the Subversion
+ not currently have an entry matching KEY. TXN_ID is the Subversion
transaction under which this occurs.
Use POOL for all allocations.
@@ -386,6 +412,7 @@
svn_error_t *svn_fs_fs__dag_make_dir(dag_node_t **child_p,
dag_node_t *parent,
const char *parent_path,
+ const char *key,
const char *name,
const svn_fs_fs__id_part_t *txn_id,
apr_pool_t *pool);
@@ -484,12 +511,17 @@
svn_checksum_kind_t kind,
apr_pool_t *pool);
-/* Create a new mutable file named NAME in PARENT. Set *CHILD_P to a
- reference to the new node, allocated in POOL. The new file's
- contents are the empty string, and it has no properties. PARENT
- must be mutable. NAME must be a single path component; it cannot
+/* Create a new mutable file named NAME in PARENT. Use KEY to find any
+ existing entry in PARENT. KEY must be the normalized form of NAME
+ if normalized lookups are enabled; otherwise, it must be the same
+ pointer value.
+
+ Set *CHILD_P to a reference to the new node, allocated in POOL.
+ The new file's contents are the empty string, and it has no properties.
+ PARENT must be mutable. NAME must be a single path component; it cannot
be a slash-separated directory path. PARENT_PATH must be the
- canonicalized absolute path of the parent directory. TXN_ID is the
+ canonicalized absolute path of the parent directory. PARENT must
+ not currently have an entry matching KEY. TXN_ID is the
Subversion transaction under which this occurs.
Use POOL for all allocations.
@@ -497,6 +529,7 @@
svn_error_t *svn_fs_fs__dag_make_file(dag_node_t **child_p,
dag_node_t *parent,
const char *parent_path,
+ const char *key,
const char *name,
const svn_fs_fs__id_part_t *txn_id,
apr_pool_t *pool);
@@ -505,9 +538,11 @@
/* Copies */
-/* Make ENTRY in TO_NODE be a copy of FROM_NODE, allocating from POOL.
+/* Make NAME in TO_NODE be a copy of FROM_NODE, allocating from POOL.
TO_NODE must be mutable. TXN_ID is the Subversion transaction
- under which this occurs.
+ under which this occurs. Use KEY to find any existing entry in
+ TO_NODE. KEY must be the normalized form of NAME if normalized
+ lookups are enabled; otherwise, it must be the same pointer value.
If PRESERVE_HISTORY is true, the new node will record that it was
copied from FROM_PATH in FROM_REV; therefore, FROM_NODE should be
@@ -520,7 +555,8 @@
Use POOL for all allocations.
*/
svn_error_t *svn_fs_fs__dag_copy(dag_node_t *to_node,
- const char *entry,
+ const char *key,
+ const char *name,
dag_node_t *from_node,
svn_boolean_t preserve_history,
svn_revnum_t from_rev,
diff --git a/subversion/libsvn_fs_fs/dirent.h b/subversion/libsvn_fs_fs/dirent.h
new file mode 100644
index 0000000..06fe11f
--- /dev/null
+++ b/subversion/libsvn_fs_fs/dirent.h
@@ -0,0 +1,60 @@
+/* dirent.h : utilities for normalization-independent path lookup
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+#ifndef SVN_LIBSVN_FS__DIRENT_H
+#define SVN_LIBSVN_FS__DIRENT_H
+
+#include "fs.h"
+
+/* Array item used for directory contents. */
+typedef struct svn_fs_fs__dirent_t
+{
+ /* The wrapped public directory entry.
+ *
+ * It must always be the first member of this structure, so that a
+ * pointer to an svn_fs_fs__dirent_t can be reinterpreted as a
+ * pointer to an svn_fs_dirent_t.
+ */
+ svn_fs_dirent_t dirent;
+
+ /* The directory entry key.
+ *
+ * When normalized lookup is disabled, or dirent.name is already
+ * normalized, this the same pointer value as dirent.name.
+ * Otherwise, it is its normalized form.
+ */
+ const char *key;
+} svn_fs_fs__dirent_t;
+
+
+/* Given a directory entry with a valid DIRENT->dirent.name, set
+ DIRENT->key according to specification. DIRENT *must* be allocated
+ in RESULT_POOL, and DIRENT->key will be, too.
+
+ Use SCRATCH_POOL for temporary allocations. */
+svn_error_t *
+svn_fs_fs__set_dirent_key(svn_fs_fs__dirent_t *dirent,
+ svn_boolean_t normalized,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool);
+
+#endif /* SVN_LIBSVN_FS__DIRENT_H */
diff --git a/subversion/libsvn_fs_fs/fs.h b/subversion/libsvn_fs_fs/fs.h
index 6f8ee08..5b96088 100644
--- a/subversion/libsvn_fs_fs/fs.h
+++ b/subversion/libsvn_fs_fs/fs.h
@@ -117,6 +117,8 @@
#define CONFIG_OPTION_BLOCK_SIZE "block-size"
#define CONFIG_OPTION_L2P_PAGE_SIZE "l2p-page-size"
#define CONFIG_OPTION_P2L_PAGE_SIZE "p2l-page-size"
+#define CONFIG_SECTION_NORMALIZATION "normalization"
+#define CONFIG_OPTION_NORMALIZED_LOOKUP "normalized-lookup"
/* The format number of this filesystem.
This is independent of the repository format number, and
@@ -180,6 +182,9 @@
/* Minimum format number that will record moves */
#define SVN_FS_FS__MIN_MOVE_SUPPORT_FORMAT 7
+/* Minimum format where normalized lookup should be enabled by default. */
+#define SVN_FS_FS__MIN_DEFAULT_NORMALIZED_LOOKUP_FORMAT 7
+
/* The minimum format number that supports a configuration file (fsfs.conf) */
#define SVN_FS_FS__MIN_CONFIG_FILE 4
@@ -454,6 +459,9 @@
/* Compression level to use with txdelta storage format in new revs. */
int delta_compression_level;
+ /* Whether normalization-insensitive path lookup is enabled. */
+ svn_boolean_t normalized_lookup;
+
/* Pointer to svn_fs_open. */
svn_error_t *(*svn_fs_open_)(svn_fs_t **, const char *, apr_hash_t *,
apr_pool_t *);
diff --git a/subversion/libsvn_fs_fs/fs_fs.c b/subversion/libsvn_fs_fs/fs_fs.c
index be11b71..f7e1ef3 100644
--- a/subversion/libsvn_fs_fs/fs_fs.c
+++ b/subversion/libsvn_fs_fs/fs_fs.c
@@ -468,6 +468,9 @@
const char *fs_path,
apr_pool_t *pool)
{
+ const svn_boolean_t default_normalized_lookup =
+ (ffd->format >= SVN_FS_FS__MIN_DEFAULT_NORMALIZED_LOOKUP_FORMAT);
+
SVN_ERR(svn_config_read3(&ffd->config,
svn_dirent_join(fs_path, PATH_CONFIG, pool),
FALSE, FALSE, FALSE, pool));
@@ -566,7 +569,17 @@
ffd->l2p_page_size = 0x2000;
ffd->p2l_page_size = 0x1000;
}
-
+
+ /* Initialize normalization settings in ffd.
+
+ Note: enable-normalized-lookups does not affect the filesystem
+ contents; therefore, its availability does not have to be
+ limited to any particular FS format version. */
+ SVN_ERR(svn_config_get_bool(ffd->config, &ffd->normalized_lookup,
+ CONFIG_SECTION_NORMALIZATION,
+ CONFIG_OPTION_NORMALIZED_LOOKUP,
+ default_normalized_lookup));
+
return SVN_NO_ERROR;
}
@@ -771,6 +784,32 @@
"### Must be a power of 2." NL
"### p2l-page-size is 64 kBytes by default." NL
"# " CONFIG_OPTION_P2L_PAGE_SIZE " = 64" NL
+"" NL
+"[" CONFIG_SECTION_NORMALIZATION "]" NL
+"### Subversion decrees that paths in the repository must be in the Unicode" NL
+"### character set, and further requires that they are encoded in UTF-8." NL
+"### Unfortunately it does not prescribe whether or how the names should" NL
+"### be normalized. Consequently, it is possible to create two paths that" NL
+"### appear to be identical on screen, but contain different Unicode code" NL
+"### points for the same glyphs. Apart from being confusing, this is not" NL
+"### supported by some filesystems (e.g., OSX HFS+, ZFS with normalization" NL
+"### enabled)." NL
+"###" NL
+"### When this option is enabled, FSFS will perform all path lookups in a" NL
+"### normalization-insensitive way. This will prevent the creation of new" NL
+"### paths with conflicting names, and will also remove the restriction on" NL
+"### clients to send paths in exactly the same form as is stored in the" NL
+"### filesystem. The representation of new paths will still be preserved;" NL
+"### FSFS will not normalize them, and will return them from queries in the" NL
+"### same form in which they were created." NL
+"### Normalized lookup is enabled by default for new FSFSv7 repositories." NL
+"# " CONFIG_OPTION_NORMALIZED_LOOKUP " = true" NL
+"###" NL
+"### WARNING: Before enabling this option for existing repositories, you " NL
+"### must verify that there are no extant name collisions by" NL
+"### running the following command:" NL
+"###" NL
+"### svnadmin verify <REPOS-PATH> --check-normalization" NL
;
#undef NL
return svn_io_file_create(svn_dirent_join(fs->path, PATH_CONFIG, pool),
diff --git a/subversion/libsvn_fs_fs/fs_fs.h b/subversion/libsvn_fs_fs/fs_fs.h
index 32239d5..977aac1 100644
--- a/subversion/libsvn_fs_fs/fs_fs.h
+++ b/subversion/libsvn_fs_fs/fs_fs.h
@@ -237,4 +237,4 @@
void
svn_fs_fs__reset_txn_caches(svn_fs_t *fs);
-#endif
+#endif /* SVN_LIBSVN_FS__FS_FS_H */
diff --git a/subversion/libsvn_fs_fs/temp_serializer.c b/subversion/libsvn_fs_fs/temp_serializer.c
index 7821573..da0dd90 100644
--- a/subversion/libsvn_fs_fs/temp_serializer.c
+++ b/subversion/libsvn_fs_fs/temp_serializer.c
@@ -167,7 +167,7 @@
apr_size_t len;
/* reference to the entries */
- svn_fs_dirent_t **entries;
+ svn_fs_fs__dirent_t **entries;
/* size of the serialized entries and don't be too wasteful
* (needed since the entries are no longer in sequence) */
@@ -180,18 +180,25 @@
*/
static void
serialize_dir_entry(svn_temp_serializer__context_t *context,
- svn_fs_dirent_t **entry_p,
+ svn_fs_fs__dirent_t **entry_p,
apr_uint32_t *length)
{
- svn_fs_dirent_t *entry = *entry_p;
+ svn_fs_fs__dirent_t *entry = *entry_p;
apr_size_t initial_length = svn_temp_serializer__get_length(context);
svn_temp_serializer__push(context,
(const void * const *)entry_p,
- sizeof(svn_fs_dirent_t));
+ sizeof(*entry));
+ svn_fs_fs__id_serialize(context, &entry->dirent.id);
+ svn_temp_serializer__add_string(context, &entry->dirent.name);
- svn_fs_fs__id_serialize(context, &entry->id);
- svn_temp_serializer__add_string(context, &entry->name);
+ /* Serialize the key. If it's the same as the dirent name, we'll
+ store a null pointer instead instead, as a signal to the
+ deserializer. */
+ if (entry->key != entry->dirent.name)
+ svn_temp_serializer__add_string(context, &entry->key);
+ else
+ svn_temp_serializer__set_null(context, (const void *const *)&entry->key);
*length = (apr_uint32_t)( svn_temp_serializer__get_length(context)
- APR_ALIGN_DEFAULT(initial_length));
@@ -212,7 +219,8 @@
/* calculate sizes */
int count = entries->nelts;
apr_size_t over_provision = 2 + count / 4;
- apr_size_t entries_len = (count + over_provision) * sizeof(svn_fs_dirent_t*);
+ apr_size_t entries_len =
+ (count + over_provision) * sizeof(svn_fs_fs__dirent_t);
apr_size_t lengths_len = (count + over_provision) * sizeof(apr_uint32_t);
/* copy the hash entries to an auxiliary struct of known layout */
@@ -223,7 +231,7 @@
dir_data.lengths = apr_palloc(pool, lengths_len);
for (i = 0; i < count; ++i)
- dir_data.entries[i] = APR_ARRAY_IDX(entries, i, svn_fs_dirent_t *);
+ dir_data.entries[i] = APR_ARRAY_IDX(entries, i, svn_fs_fs__dirent_t *);
/* Serialize that aux. structure into a new one. Also, provide a good
* estimate for the size of the buffer that we will need. */
@@ -260,11 +268,11 @@
deserialize_dir(void *buffer, dir_data_t *dir_data, apr_pool_t *pool)
{
apr_array_header_t *result
- = apr_array_make(pool, dir_data->count, sizeof(svn_fs_dirent_t *));
+ = apr_array_make(pool, dir_data->count, sizeof(svn_fs_fs__dirent_t *));
apr_size_t i;
apr_size_t count;
- svn_fs_dirent_t *entry;
- svn_fs_dirent_t **entries;
+ svn_fs_fs__dirent_t *entry;
+ svn_fs_fs__dirent_t **entries;
/* resolve the reference to the entries array */
svn_temp_deserializer__resolve(buffer, (void **)&dir_data->entries);
@@ -277,11 +285,16 @@
entry = dir_data->entries[i];
/* pointer fixup */
- svn_temp_deserializer__resolve(entry, (void **)&entry->name);
- svn_fs_fs__id_deserialize(entry, (svn_fs_id_t **)&entry->id);
+ svn_temp_deserializer__resolve(entry, (void **)&entry->key);
+ svn_temp_deserializer__resolve(entry, (void **)&entry->dirent.name);
+ svn_fs_fs__id_deserialize(entry, (svn_fs_id_t **)&entry->dirent.id);
+
+ /* fix up the entry key */
+ if (!entry->key)
+ entry->key = entry->dirent.name;
/* add the entry to the hash */
- APR_ARRAY_PUSH(result, svn_fs_dirent_t *) = entry;
+ APR_ARRAY_PUSH(result, svn_fs_fs__dirent_t *) = entry;
}
/* return the now complete hash */
@@ -755,31 +768,44 @@
return SVN_NO_ERROR;
}
-/* Utility function that returns the lowest index of the first entry in
- * *ENTRIES that points to a dir entry with a name equal or larger than NAME.
- * If an exact match has been found, *FOUND will be set to TRUE. COUNT is
- * the number of valid entries in ENTRIES.
+/* Helper function for find_entry() that returns the key for ENTRIES[INDEX] */
+static const char *
+get_entry_key(svn_fs_fs__dirent_t **entries, apr_size_t index)
+{
+ const svn_fs_fs__dirent_t *entry =
+ svn_temp_deserializer__ptr(entries, (const void *const *)&entries[index]);
+ const char* entry_key =
+ svn_temp_deserializer__ptr(entry, (const void *const *)&entry->key);
+
+ /* use the name if it's identical to the key */
+ if (!entry_key)
+ entry_key = svn_temp_deserializer__ptr(
+ entry, (const void *const *)&entry->dirent.name);
+ return entry_key;
+}
+
+/* Utility function that sets *POSITION to the lowest index of the
+ * first entry in *ENTRIES that points to a dir entry with a key equal
+ * or larger than KEY. If an exact match has been found, *FOUND will
+ * be set to TRUE. COUNT is the number of valid entries in ENTRIES.
*/
-static apr_size_t
-find_entry(svn_fs_dirent_t **entries,
- const char *name,
+static svn_error_t *
+find_entry(apr_size_t *position,
+ svn_fs_fs__dirent_t **entries,
+ const char *key,
apr_size_t count,
svn_boolean_t *found)
{
- /* binary search for the desired entry by name */
+ /* binary search for the desired entry by key */
apr_size_t lower = 0;
apr_size_t upper = count;
apr_size_t middle;
for (middle = upper / 2; lower < upper; middle = (upper + lower) / 2)
{
- const svn_fs_dirent_t *entry =
- svn_temp_deserializer__ptr(entries, (const void *const *)&entries[middle]);
- const char* entry_name =
- svn_temp_deserializer__ptr(entry, (const void *const *)&entry->name);
+ const char *entry_key = get_entry_key(entries, middle);
- int diff = strcmp(entry_name, name);
- if (diff < 0)
+ if (0 >= strcmp(entry_key, key))
lower = middle + 1;
else
upper = middle;
@@ -789,16 +815,40 @@
*found = FALSE;
if (lower < count)
{
- const svn_fs_dirent_t *entry =
- svn_temp_deserializer__ptr(entries, (const void *const *)&entries[lower]);
- const char* entry_name =
- svn_temp_deserializer__ptr(entry, (const void *const *)&entry->name);
+ const char *entry_key = get_entry_key(entries, lower);
- if (strcmp(entry_name, name) == 0)
- *found = TRUE;
+ if (0 == strcmp(entry_key, key))
+ {
+ *found = TRUE;
+
+ /* Check for name collisions in the directory list.
+
+ A repository that was upgraded from FSFS v6 or earlier
+ and had not been properly verified and sanitized might
+ contain name collisions. When normalized lookups are
+ enabled, we must forbid all operations on colliding
+ names, or users might end up making modifications to the
+ wrong node.
+
+ XXX This check is really only needed when normalized
+ lookups are enabled. If it turns out to be a
+ performance problem, we can propagate the
+ normalized-lookups flag all the way through the API
+ to here. */
+ if (/* normalized_lookup && */ count > 1 && lower < count - 1)
+ {
+ entry_key = get_entry_key(entries, lower + 1);
+ if (0 == strcmp(entry_key, key))
+ return svn_error_createf(
+ SVN_ERR_FS_NAME_COLLISION, NULL,
+ _("A directory contains more than one entry named '%s'"),
+ key);
+ }
+ }
}
- return lower;
+ *position = lower;
+ return SVN_NO_ERROR;
}
svn_error_t *
@@ -809,29 +859,28 @@
apr_pool_t *pool)
{
const dir_data_t *dir_data = data;
- const char* name = baton;
+ const char* key = baton;
svn_boolean_t found;
/* resolve the reference to the entries array */
- const svn_fs_dirent_t * const *entries =
+ const svn_fs_fs__dirent_t * const *entries =
svn_temp_deserializer__ptr(data, (const void *const *)&dir_data->entries);
/* resolve the reference to the lengths array */
const apr_uint32_t *lengths =
svn_temp_deserializer__ptr(data, (const void *const *)&dir_data->lengths);
- /* binary search for the desired entry by name */
- apr_size_t pos = find_entry((svn_fs_dirent_t **)entries,
- name,
- dir_data->count,
- &found);
+ /* binary search for the desired entry by key */
+ apr_size_t pos;
+ SVN_ERR(find_entry(&pos, (svn_fs_fs__dirent_t **)entries,
+ key, dir_data->count, &found));
/* de-serialize that entry or return NULL, if no match has been found */
*out = NULL;
if (found)
{
- const svn_fs_dirent_t *source =
- svn_temp_deserializer__ptr(entries, (const void *const *)&entries[pos]);
+ const svn_fs_fs__dirent_t *source =
+ svn_temp_deserializer__ptr(entries, (const void *const *)&entries[pos]);
/* Entries have been serialized one-by-one, each time including all
* nested structures and strings. Therefore, they occupy a single
@@ -841,12 +890,20 @@
apr_size_t size = lengths[pos];
/* copy & deserialize the entry */
- svn_fs_dirent_t *new_entry = apr_palloc(pool, size);
+ svn_fs_fs__dirent_t *new_entry = apr_palloc(pool, size);
memcpy(new_entry, source, size);
- svn_temp_deserializer__resolve(new_entry, (void **)&new_entry->name);
- svn_fs_fs__id_deserialize(new_entry, (svn_fs_id_t **)&new_entry->id);
- *(svn_fs_dirent_t **)out = new_entry;
+ /* FIXME: Extract common code from here and deserialize_dir(). */
+ /* pointer fixup */
+ svn_temp_deserializer__resolve(new_entry, (void **)&new_entry->key);
+ svn_temp_deserializer__resolve(new_entry, (void **)&new_entry->dirent.name);
+ svn_fs_fs__id_deserialize(new_entry, (svn_fs_id_t **)&new_entry->dirent.id);
+
+ /* fix up the entry key */
+ if (!new_entry->key)
+ new_entry->key = new_entry->dirent.name;
+
+ *(svn_fs_fs__dirent_t **)out = new_entry;
}
return SVN_NO_ERROR;
@@ -865,21 +922,21 @@
dir_data_t *dir_data = (dir_data_t *)*data;
apr_array_header_t *dir;
int idx = -1;
- svn_fs_dirent_t *entry;
+ svn_fs_fs__dirent_t *entry;
SVN_ERR(svn_fs_fs__deserialize_dir_entries((void **)&dir,
*data,
dir_data->len,
pool));
- entry = svn_fs_fs__find_dir_entry(dir, replace_baton->name, &idx);
+ entry = svn_fs_fs__find_dir_entry(dir, replace_baton->key, &idx);
/* Replacement or removal? */
if (replace_baton->new_entry)
{
/* Replace ENTRY with / insert the NEW_ENTRY */
if (entry)
- APR_ARRAY_IDX(dir, idx, svn_fs_dirent_t *) = replace_baton->new_entry;
+ APR_ARRAY_IDX(dir, idx, svn_fs_fs__dirent_t *) = replace_baton->new_entry;
else
svn_sort__array_insert(dir, &replace_baton->new_entry, idx);
}
@@ -902,7 +959,7 @@
replace_baton_t *replace_baton = (replace_baton_t *)baton;
dir_data_t *dir_data = (dir_data_t *)*data;
svn_boolean_t found;
- svn_fs_dirent_t **entries;
+ svn_fs_fs__dirent_t **entries;
apr_uint32_t *lengths;
apr_uint32_t length;
apr_size_t pos;
@@ -916,7 +973,7 @@
return slowly_replace_dir_entry(data, data_len, baton, pool);
/* resolve the reference to the entries array */
- entries = (svn_fs_dirent_t **)
+ entries = (svn_fs_fs__dirent_t **)
svn_temp_deserializer__ptr((const char *)dir_data,
(const void *const *)&dir_data->entries);
@@ -925,8 +982,9 @@
svn_temp_deserializer__ptr((const char *)dir_data,
(const void *const *)&dir_data->lengths);
- /* binary search for the desired entry by name */
- pos = find_entry(entries, replace_baton->name, dir_data->count, &found);
+ /* binary search for the desired entry by key */
+ SVN_ERR(find_entry(&pos, entries, replace_baton->key,
+ dir_data->count, &found));
/* handle entry removal (if found at all) */
if (replace_baton->new_entry == NULL)
diff --git a/subversion/libsvn_fs_fs/temp_serializer.h b/subversion/libsvn_fs_fs/temp_serializer.h
index 4fe07d9..be7f89e 100644
--- a/subversion/libsvn_fs_fs/temp_serializer.h
+++ b/subversion/libsvn_fs_fs/temp_serializer.h
@@ -24,6 +24,7 @@
#define SVN_LIBSVN_FS__TEMP_SERIALIZER_H
#include "fs.h"
+#include "dirent.h"
/**
* Prepend the @a number to the @a string in a space efficient way such that
@@ -189,8 +190,12 @@
/**
* Implements #svn_cache__partial_getter_func_t for a single
- * #svn_fs_dirent_t within a serialized directory contents hash,
- * identified by its name (const char @a *baton).
+ * #svn_fs_fs__dirent_t within a serialized directory contents hash,
+ * identified by its key (const char @a *baton).
+ *
+ * Note: When normalized lookups are enabled, DATA must be the
+ * normalized form of the entry name; otherwise, it should be
+ * the original form of the entry name.
*/
svn_error_t *
svn_fs_fs__extract_dir_entry(void **out,
@@ -207,16 +212,20 @@
*/
typedef struct replace_baton_t
{
- /** name of the directory entry to modify */
- const char *name;
+ /** The key of the directory entry to modify.
+
+ Note: When normalized lookups are enabled, this must be the
+ normalized form of the entry name; otherwise, it should be
+ the original form of the entry name. */
+ const char *key;
/** directory entry to insert instead */
- svn_fs_dirent_t *new_entry;
+ svn_fs_fs__dirent_t *new_entry;
} replace_baton_t;
/**
* Implements #svn_cache__partial_setter_func_t for a single
- * #svn_fs_dirent_t within a serialized directory contents hash,
+ * #svn_fs_fs__dirent_t within a serialized directory contents hash,
* identified by its name in the #replace_baton_t in @a baton.
*/
svn_error_t *
diff --git a/subversion/libsvn_fs_fs/transaction.c b/subversion/libsvn_fs_fs/transaction.c
index d8c61a6..415754a 100644
--- a/subversion/libsvn_fs_fs/transaction.c
+++ b/subversion/libsvn_fs_fs/transaction.c
@@ -576,11 +576,11 @@
int i;
for (i = 0; i < entries->nelts; ++i)
{
- svn_fs_dirent_t *dirent;
+ svn_fs_fs__dirent_t *entry;
svn_pool_clear(iterpool);
- dirent = APR_ARRAY_IDX(entries, i, svn_fs_dirent_t *);
- SVN_ERR(unparse_dir_entry(dirent, stream, iterpool));
+ entry = APR_ARRAY_IDX(entries, i, svn_fs_fs__dirent_t *);
+ SVN_ERR(unparse_dir_entry(&entry->dirent, stream, iterpool));
}
SVN_ERR(svn_stream_printf(stream, pool, "%s\n", SVN_HASH_TERMINATOR));
@@ -1460,6 +1460,7 @@
svn_fs_fs__set_entry(svn_fs_t *fs,
const svn_fs_fs__id_part_t *txn_id,
node_revision_t *parent_noderev,
+ const char *key,
const char *name,
const svn_fs_id_t *id,
svn_node_kind_t kind,
@@ -1510,23 +1511,30 @@
if (ffd->txn_dir_cache)
{
/* build parameters: (name, new entry) pair */
- const char *key =
- svn_fs_fs__id_unparse(parent_noderev->id, subpool)->data;
+ const char *cache_key =
+ svn_fs_fs__id_unparse(parent_noderev->id, subpool)->data;
replace_baton_t baton;
- baton.name = name;
- baton.new_entry = NULL;
-
if (id)
{
baton.new_entry = apr_pcalloc(subpool, sizeof(*baton.new_entry));
- baton.new_entry->name = name;
- baton.new_entry->kind = kind;
- baton.new_entry->id = id;
+ baton.new_entry->dirent.name = name;
+ baton.new_entry->dirent.kind = kind;
+ baton.new_entry->dirent.id = id;
+ if (0 == strcmp(key, name))
+ baton.new_entry->key = baton.new_entry->dirent.name;
+ else
+ baton.new_entry->key = key;
+ baton.key = baton.new_entry->key;
+ }
+ else
+ {
+ baton.key = key;
+ baton.new_entry = NULL;
}
/* actually update the cached directory (if cached) */
- SVN_ERR(svn_cache__set_partial(ffd->txn_dir_cache, key,
+ SVN_ERR(svn_cache__set_partial(ffd->txn_dir_cache, cache_key,
svn_fs_fs__replace_dir_entry, &baton,
subpool));
}
@@ -2498,7 +2506,7 @@
return SVN_NO_ERROR;
}
-/* Implement collection_writer_t writing the svn_fs_dirent_t* array given
+/* Implement collection_writer_t writing the svn_fs_fs__dirent_t* array given
as BATON. */
static svn_error_t *
write_directory_to_stream(svn_stream_t *stream,
@@ -2898,16 +2906,16 @@
subpool));
for (i = 0; i < entries->nelts; ++i)
{
- svn_fs_dirent_t *dirent
- = APR_ARRAY_IDX(entries, i, svn_fs_dirent_t *);
+ svn_fs_fs__dirent_t *entry
+ = APR_ARRAY_IDX(entries, i, svn_fs_fs__dirent_t *);
svn_pool_clear(subpool);
- SVN_ERR(write_final_rev(&new_id, file, rev, fs, dirent->id,
+ SVN_ERR(write_final_rev(&new_id, file, rev, fs, entry->dirent.id,
start_node_id, start_copy_id, initial_offset,
reps_to_cache, reps_hash, reps_pool, FALSE,
subpool));
if (new_id && (svn_fs_fs__id_rev(new_id) == rev))
- dirent->id = svn_fs_fs__id_copy(new_id, pool);
+ entry->dirent.id = svn_fs_fs__id_copy(new_id, pool);
}
svn_pool_destroy(subpool);
diff --git a/subversion/libsvn_fs_fs/transaction.h b/subversion/libsvn_fs_fs/transaction.h
index d201a9c..62eea06 100644
--- a/subversion/libsvn_fs_fs/transaction.h
+++ b/subversion/libsvn_fs_fs/transaction.h
@@ -126,11 +126,15 @@
/* Add or set in filesystem FS, transaction TXN_ID, in directory
PARENT_NODEREV a directory entry for NAME pointing to ID of type
- KIND. Allocations are done in POOL. */
+ KIND. Allocations are done in POOL.
+
+ KEY must be the normalized form of NAME if normalized lookups are
+ enabled; otherwise, it must be the same pointer value. */
svn_error_t *
svn_fs_fs__set_entry(svn_fs_t *fs,
const svn_fs_fs__id_part_t *txn_id,
node_revision_t *parent_noderev,
+ const char *key,
const char *name,
const svn_fs_id_t *id,
svn_node_kind_t kind,
diff --git a/subversion/libsvn_fs_fs/util.c b/subversion/libsvn_fs_fs/util.c
index 33a4702..5731733 100644
--- a/subversion/libsvn_fs_fs/util.c
+++ b/subversion/libsvn_fs_fs/util.c
@@ -25,7 +25,9 @@
#include "svn_ctype.h"
#include "svn_dirent_uri.h"
#include "private/svn_string_private.h"
+#include "private/svn_utf_private.h"
+#include "dirent.h"
#include "fs_fs.h"
#include "pack.h"
#include "util.h"
@@ -645,3 +647,33 @@
return ffd->format >= SVN_FS_FS__MIN_MOVE_SUPPORT_FORMAT;
}
+
+svn_error_t *
+svn_fs_fs__normalize(const char **normstr, const char *str, apr_pool_t *pool)
+{
+ svn_membuf_t buffer;
+ svn_membuf__create(&buffer, 0, pool);
+ return svn_error_trace(
+ svn_utf__normalize(normstr, str,
+ SVN_UTF__UNKNOWN_LENGTH, &buffer));
+}
+
+svn_error_t *
+svn_fs_fs__set_dirent_key(svn_fs_fs__dirent_t *dirent,
+ svn_boolean_t normalized,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ if (!normalized)
+ dirent->key = dirent->dirent.name;
+ else
+ {
+ SVN_ERR(svn_fs_fs__normalize(&dirent->key, dirent->dirent.name,
+ scratch_pool));
+ if (0 == strcmp(dirent->key, dirent->dirent.name))
+ dirent->key = dirent->dirent.name;
+ else if (result_pool != scratch_pool)
+ dirent->key = apr_pstrdup(result_pool, dirent->key);
+ }
+ return SVN_NO_ERROR;
+}
diff --git a/subversion/libsvn_fs_fs/util.h b/subversion/libsvn_fs_fs/util.h
index 5d62b8f..f3444ae 100644
--- a/subversion/libsvn_fs_fs/util.h
+++ b/subversion/libsvn_fs_fs/util.h
@@ -389,4 +389,8 @@
svn_boolean_t
svn_fs_fs__supports_move(svn_fs_t *fs);
+/* Set *NORMSTR to a normalized form of STR, allocated from POOL. */
+svn_error_t *
+svn_fs_fs__normalize(const char **normstr, const char *str, apr_pool_t *pool);
+
#endif