blob: 50a67debb0cd74d83a90fdae2f9c7ac95f27ed49 [file] [log] [blame]
/*
* paths.c: a path manipulation library using svn_stringbuf_t
*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
* ====================================================================
*/
#include <string.h>
#include <assert.h>
#include <apr_file_info.h>
#include <apr_lib.h>
#include <apr_uri.h>
#include "svn_string.h"
#include "svn_dirent_uri.h"
#include "svn_path.h"
#include "svn_private_config.h" /* for SVN_PATH_LOCAL_SEPARATOR */
#include "svn_utf.h"
#include "svn_io.h" /* for svn_io_stat() */
#include "svn_ctype.h"
#include "dirent_uri.h"
/* The canonical empty path. Can this be changed? Well, change the empty
test below and the path library will work, not so sure about the fs/wc
libraries. */
#define SVN_EMPTY_PATH ""
/* TRUE if s is the canonical empty path, FALSE otherwise */
#define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0')
/* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can
this be changed? Well, the path library will work, not so sure about
the OS! */
#define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.')
#ifndef NDEBUG
/* This function is an approximation of svn_path_is_canonical.
* It is supposed to be used in functions that do not have access
* to a pool, but still want to assert that a path is canonical.
*
* PATH with length LEN is assumed to be canonical if it isn't
* the platform's empty path (see definition of SVN_PATH_IS_PLATFORM_EMPTY),
* and does not contain "/./", and any one of the following
* conditions is also met:
*
* 1. PATH has zero length
* 2. PATH is the root directory (what exactly a root directory is
* depends on the platform)
* 3. PATH is not a root directory and does not end with '/'
*
* If possible, please use svn_path_is_canonical instead.
*/
static svn_boolean_t
is_canonical(const char *path,
apr_size_t len)
{
return (! SVN_PATH_IS_PLATFORM_EMPTY(path, len)
&& strstr(path, "/./") == NULL
&& (len == 0
|| (len == 1 && path[0] == '/')
|| (path[len-1] != '/')
#if defined(WIN32) || defined(__CYGWIN__)
|| svn_dirent_is_root(path, len)
#endif
));
}
#endif
/* functionality of svn_path_is_canonical but without the deprecation */
static svn_boolean_t
svn_path_is_canonical_internal(const char *path, apr_pool_t *pool)
{
return svn_uri_is_canonical(path, pool) ||
svn_dirent_is_canonical(path, pool) ||
svn_relpath_is_canonical(path);
}
svn_boolean_t
svn_path_is_canonical(const char *path, apr_pool_t *pool)
{
return svn_path_is_canonical_internal(path, pool);
}
/* functionality of svn_path_join but without the deprecation */
static char *
svn_path_join_internal(const char *base,
const char *component,
apr_pool_t *pool)
{
apr_size_t blen = strlen(base);
apr_size_t clen = strlen(component);
char *path;
assert(svn_path_is_canonical_internal(base, pool));
assert(svn_path_is_canonical_internal(component, pool));
/* If the component is absolute, then return it. */
if (*component == '/')
return apr_pmemdup(pool, component, clen + 1);
/* If either is empty return the other */
if (SVN_PATH_IS_EMPTY(base))
return apr_pmemdup(pool, component, clen + 1);
if (SVN_PATH_IS_EMPTY(component))
return apr_pmemdup(pool, base, blen + 1);
if (blen == 1 && base[0] == '/')
blen = 0; /* Ignore base, just return separator + component */
/* Construct the new, combined path. */
path = apr_palloc(pool, blen + 1 + clen + 1);
memcpy(path, base, blen);
path[blen] = '/';
memcpy(path + blen + 1, component, clen + 1);
return path;
}
char *svn_path_join(const char *base,
const char *component,
apr_pool_t *pool)
{
return svn_path_join_internal(base, component, pool);
}
char *svn_path_join_many(apr_pool_t *pool, const char *base, ...)
{
#define MAX_SAVED_LENGTHS 10
apr_size_t saved_lengths[MAX_SAVED_LENGTHS];
apr_size_t total_len;
int nargs;
va_list va;
const char *s;
apr_size_t len;
char *path;
char *p;
svn_boolean_t base_is_empty = FALSE, base_is_root = FALSE;
int base_arg = 0;
total_len = strlen(base);
assert(svn_path_is_canonical_internal(base, pool));
if (total_len == 1 && *base == '/')
base_is_root = TRUE;
else if (SVN_PATH_IS_EMPTY(base))
{
total_len = sizeof(SVN_EMPTY_PATH) - 1;
base_is_empty = TRUE;
}
saved_lengths[0] = total_len;
/* Compute the length of the resulting string. */
nargs = 0;
va_start(va, base);
while ((s = va_arg(va, const char *)) != NULL)
{
len = strlen(s);
assert(svn_path_is_canonical_internal(s, pool));
if (SVN_PATH_IS_EMPTY(s))
continue;
if (nargs++ < MAX_SAVED_LENGTHS)
saved_lengths[nargs] = len;
if (*s == '/')
{
/* an absolute path. skip all components to this point and reset
the total length. */
total_len = len;
base_arg = nargs;
base_is_root = len == 1;
base_is_empty = FALSE;
}
else if (nargs == base_arg
|| (nargs == base_arg + 1 && base_is_root)
|| base_is_empty)
{
/* if we have skipped everything up to this arg, then the base
and all prior components are empty. just set the length to
this component; do not add a separator. If the base is empty
we can now ignore it. */
if (base_is_empty)
{
base_is_empty = FALSE;
total_len = 0;
}
total_len += len;
}
else
{
total_len += 1 + len;
}
}
va_end(va);
/* base == "/" and no further components. just return that. */
if (base_is_root && total_len == 1)
return apr_pmemdup(pool, "/", 2);
/* we got the total size. allocate it, with room for a NULL character. */
path = p = apr_palloc(pool, total_len + 1);
/* if we aren't supposed to skip forward to an absolute component, and if
this is not an empty base that we are skipping, then copy the base
into the output. */
if (base_arg == 0 && ! (SVN_PATH_IS_EMPTY(base) && ! base_is_empty))
{
if (SVN_PATH_IS_EMPTY(base))
memcpy(p, SVN_EMPTY_PATH, len = saved_lengths[0]);
else
memcpy(p, base, len = saved_lengths[0]);
p += len;
}
nargs = 0;
va_start(va, base);
while ((s = va_arg(va, const char *)) != NULL)
{
if (SVN_PATH_IS_EMPTY(s))
continue;
if (++nargs < base_arg)
continue;
if (nargs < MAX_SAVED_LENGTHS)
len = saved_lengths[nargs];
else
len = strlen(s);
/* insert a separator if we aren't copying in the first component
(which can happen when base_arg is set). also, don't put in a slash
if the prior character is a slash (occurs when prior component
is "/"). */
if (p != path && p[-1] != '/')
*p++ = '/';
/* copy the new component and advance the pointer */
memcpy(p, s, len);
p += len;
}
va_end(va);
*p = '\0';
assert((apr_size_t)(p - path) == total_len);
return path;
}
apr_size_t
svn_path_component_count(const char *path)
{
apr_size_t count = 0;
assert(is_canonical(path, strlen(path)));
while (*path)
{
const char *start;
while (*path == '/')
++path;
start = path;
while (*path && *path != '/')
++path;
if (path != start)
++count;
}
return count;
}
/* Return the length of substring necessary to encompass the entire
* previous path segment in PATH, which should be a LEN byte string.
*
* A trailing slash will not be included in the returned length except
* in the case in which PATH is absolute and there are no more
* previous segments.
*/
static apr_size_t
previous_segment(const char *path,
apr_size_t len)
{
if (len == 0)
return 0;
while (len > 0 && path[--len] != '/')
;
if (len == 0 && path[0] == '/')
return 1;
else
return len;
}
void
svn_path_add_component(svn_stringbuf_t *path,
const char *component)
{
apr_size_t len = strlen(component);
assert(is_canonical(path->data, path->len));
assert(is_canonical(component, strlen(component)));
/* Append a dir separator, but only if this path is neither empty
nor consists of a single dir separator already. */
if ((! SVN_PATH_IS_EMPTY(path->data))
&& (! ((path->len == 1) && (*(path->data) == '/'))))
{
char dirsep = '/';
svn_stringbuf_appendbytes(path, &dirsep, sizeof(dirsep));
}
svn_stringbuf_appendbytes(path, component, len);
}
void
svn_path_remove_component(svn_stringbuf_t *path)
{
assert(is_canonical(path->data, path->len));
path->len = previous_segment(path->data, path->len);
path->data[path->len] = '\0';
}
void
svn_path_remove_components(svn_stringbuf_t *path, apr_size_t n)
{
while (n > 0)
{
svn_path_remove_component(path);
n--;
}
}
char *
svn_path_dirname(const char *path, apr_pool_t *pool)
{
apr_size_t len = strlen(path);
assert(svn_path_is_canonical_internal(path, pool));
return apr_pstrmemdup(pool, path, previous_segment(path, len));
}
char *
svn_path_basename(const char *path, apr_pool_t *pool)
{
apr_size_t len = strlen(path);
apr_size_t start;
assert(svn_path_is_canonical_internal(path, pool));
if (len == 1 && path[0] == '/')
start = 0;
else
{
start = len;
while (start > 0 && path[start - 1] != '/')
--start;
}
return apr_pstrmemdup(pool, path + start, len - start);
}
int
svn_path_is_empty(const char *path)
{
assert(is_canonical(path, strlen(path)));
if (SVN_PATH_IS_EMPTY(path))
return 1;
return 0;
}
int
svn_path_compare_paths(const char *path1,
const char *path2)
{
apr_size_t path1_len = strlen(path1);
apr_size_t path2_len = strlen(path2);
apr_size_t min_len = ((path1_len < path2_len) ? path1_len : path2_len);
apr_size_t i = 0;
assert(is_canonical(path1, path1_len));
assert(is_canonical(path2, path2_len));
/* Skip past common prefix. */
while (i < min_len && path1[i] == path2[i])
++i;
/* Are the paths exactly the same? */
if ((path1_len == path2_len) && (i >= min_len))
return 0;
/* Children of paths are greater than their parents, but less than
greater siblings of their parents. */
if ((path1[i] == '/') && (path2[i] == 0))
return 1;
if ((path2[i] == '/') && (path1[i] == 0))
return -1;
if (path1[i] == '/')
return -1;
if (path2[i] == '/')
return 1;
/* Common prefix was skipped above, next character is compared to
determine order. We need to use an unsigned comparison, though,
so a "next character" of NULL (0x00) sorts numerically
smallest. */
return (unsigned char)(path1[i]) < (unsigned char)(path2[i]) ? -1 : 1;
}
/* Return the string length of the longest common ancestor of PATH1 and PATH2.
*
* This function handles everything except the URL-handling logic
* of svn_path_get_longest_ancestor, and assumes that PATH1 and
* PATH2 are *not* URLs.
*
* If the two paths do not share a common ancestor, return 0.
*
* New strings are allocated in POOL.
*/
static apr_size_t
get_path_ancestor_length(const char *path1,
const char *path2,
apr_pool_t *pool)
{
apr_size_t path1_len, path2_len;
apr_size_t i = 0;
apr_size_t last_dirsep = 0;
path1_len = strlen(path1);
path2_len = strlen(path2);
if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2))
return 0;
while (path1[i] == path2[i])
{
/* Keep track of the last directory separator we hit. */
if (path1[i] == '/')
last_dirsep = i;
i++;
/* If we get to the end of either path, break out. */
if ((i == path1_len) || (i == path2_len))
break;
}
/* two special cases:
1. '/' is the longest common ancestor of '/' and '/foo'
2. '/' is the longest common ancestor of '/rif' and '/raf' */
if (i == 1 && path1[0] == '/' && path2[0] == '/')
return 1;
/* last_dirsep is now the offset of the last directory separator we
crossed before reaching a non-matching byte. i is the offset of
that non-matching byte. */
if (((i == path1_len) && (path2[i] == '/'))
|| ((i == path2_len) && (path1[i] == '/'))
|| ((i == path1_len) && (i == path2_len)))
return i;
else
if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/')
return 1;
return last_dirsep;
}
char *
svn_path_get_longest_ancestor(const char *path1,
const char *path2,
apr_pool_t *pool)
{
svn_boolean_t path1_is_url = svn_path_is_url(path1);
svn_boolean_t path2_is_url = svn_path_is_url(path2);
/* Are we messing with URLs? If we have a mix of URLs and non-URLs,
there's nothing common between them. */
if (path1_is_url && path2_is_url)
{
return svn_uri_get_longest_ancestor(path1, path2, pool);
}
else if ((! path1_is_url) && (! path2_is_url))
{
return apr_pstrndup(pool, path1,
get_path_ancestor_length(path1, path2, pool));
}
else
{
/* A URL and a non-URL => no common prefix */
return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH));
}
}
const char *
svn_path_is_child(const char *path1,
const char *path2,
apr_pool_t *pool)
{
apr_size_t i;
/* assert (is_canonical (path1, strlen (path1))); ### Expensive strlen */
/* assert (is_canonical (path2, strlen (path2))); ### Expensive strlen */
/* Allow "" and "foo" to be parent/child */
if (SVN_PATH_IS_EMPTY(path1)) /* "" is the parent */
{
if (SVN_PATH_IS_EMPTY(path2) /* "" not a child */
|| path2[0] == '/') /* "/foo" not a child */
return NULL;
else
/* everything else is child */
return pool ? apr_pstrdup(pool, path2) : path2;
}
/* Reach the end of at least one of the paths. How should we handle
things like path1:"foo///bar" and path2:"foo/bar/baz"? It doesn't
appear to arise in the current Subversion code, it's not clear to me
if they should be parent/child or not. */
for (i = 0; path1[i] && path2[i]; i++)
if (path1[i] != path2[i])
return NULL;
/* There are two cases that are parent/child
... path1[i] == '\0'
.../foo path2[i] == '/'
or
/ path1[i] == '\0'
/foo path2[i] != '/'
*/
if (path1[i] == '\0' && path2[i])
{
if (path2[i] == '/')
return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1;
else if (i == 1 && path1[0] == '/')
return pool ? apr_pstrdup(pool, path2 + 1) : path2 + 1;
}
/* Otherwise, path2 isn't a child. */
return NULL;
}
svn_boolean_t
svn_path_is_ancestor(const char *path1, const char *path2)
{
apr_size_t path1_len = strlen(path1);
/* If path1 is empty and path2 is not absoulte, then path1 is an ancestor. */
if (SVN_PATH_IS_EMPTY(path1))
return *path2 != '/';
/* If path1 is a prefix of path2, then:
- If path1 ends in a path separator,
- If the paths are of the same length
OR
- path2 starts a new path component after the common prefix,
then path1 is an ancestor. */
if (strncmp(path1, path2, path1_len) == 0)
return path1[path1_len - 1] == '/'
|| (path2[path1_len] == '/' || path2[path1_len] == '\0');
return FALSE;
}
apr_array_header_t *
svn_path_decompose(const char *path,
apr_pool_t *pool)
{
apr_size_t i, oldi;
apr_array_header_t *components =
apr_array_make(pool, 1, sizeof(const char *));
assert(svn_path_is_canonical_internal(path, pool));
if (SVN_PATH_IS_EMPTY(path))
return components; /* ### Should we return a "" component? */
/* If PATH is absolute, store the '/' as the first component. */
i = oldi = 0;
if (path[i] == '/')
{
char dirsep = '/';
APR_ARRAY_PUSH(components, const char *)
= apr_pstrmemdup(pool, &dirsep, sizeof(dirsep));
i++;
oldi++;
if (path[i] == '\0') /* path is a single '/' */
return components;
}
do
{
if ((path[i] == '/') || (path[i] == '\0'))
{
if (SVN_PATH_IS_PLATFORM_EMPTY(path + oldi, i - oldi))
APR_ARRAY_PUSH(components, const char *) = SVN_EMPTY_PATH;
else
APR_ARRAY_PUSH(components, const char *)
= apr_pstrmemdup(pool, path + oldi, i - oldi);
i++;
oldi = i; /* skipping past the dirsep */
continue;
}
i++;
}
while (path[i-1]);
return components;
}
const char *
svn_path_compose(const apr_array_header_t *components,
apr_pool_t *pool)
{
apr_size_t *lengths = apr_palloc(pool, components->nelts*sizeof(*lengths));
apr_size_t max_length = components->nelts;
char *path;
char *p;
int i;
/* Get the length of each component so a total length can be
calculated. */
for (i = 0; i < components->nelts; ++i)
{
apr_size_t l = strlen(APR_ARRAY_IDX(components, i, const char *));
lengths[i] = l;
max_length += l;
}
path = apr_palloc(pool, max_length + 1);
p = path;
for (i = 0; i < components->nelts; ++i)
{
/* Append a '/' to the path. Handle the case with an absolute
path where a '/' appears in the first component. Only append
a '/' if the component is the second component that does not
follow a "/" first component; or it is the third or later
component. */
if (i > 1 ||
(i == 1 && strcmp("/", APR_ARRAY_IDX(components,
0,
const char *)) != 0))
{
*p++ = '/';
}
memcpy(p, APR_ARRAY_IDX(components, i, const char *), lengths[i]);
p += lengths[i];
}
*p = '\0';
return path;
}
svn_boolean_t
svn_path_is_single_path_component(const char *name)
{
assert(is_canonical(name, strlen(name)));
/* Can't be empty or `..' */
if (SVN_PATH_IS_EMPTY(name)
|| (name[0] == '.' && name[1] == '.' && name[2] == '\0'))
return FALSE;
/* Slashes are bad, m'kay... */
if (strchr(name, '/') != NULL)
return FALSE;
/* It is valid. */
return TRUE;
}
svn_boolean_t
svn_path_is_dotpath_present(const char *path)
{
size_t len;
/* The empty string does not have a dotpath */
if (path[0] == '\0')
return FALSE;
/* Handle "." or a leading "./" */
if (path[0] == '.' && (path[1] == '\0' || path[1] == '/'))
return TRUE;
/* Paths of length 1 (at this point) have no dotpath present. */
if (path[1] == '\0')
return FALSE;
/* If any segment is "/./", then a dotpath is present. */
if (strstr(path, "/./") != NULL)
return TRUE;
/* Does the path end in "/." ? */
len = strlen(path);
return path[len - 2] == '/' && path[len - 1] == '.';
}
svn_boolean_t
svn_path_is_backpath_present(const char *path)
{
size_t len;
/* 0 and 1-length paths do not have a backpath */
if (path[0] == '\0' || path[1] == '\0')
return FALSE;
/* Handle ".." or a leading "../" */
if (path[0] == '.' && path[1] == '.' && (path[2] == '\0' || path[2] == '/'))
return TRUE;
/* Paths of length 2 (at this point) have no backpath present. */
if (path[2] == '\0')
return FALSE;
/* If any segment is "..", then a backpath is present. */
if (strstr(path, "/../") != NULL)
return TRUE;
/* Does the path end in "/.." ? */
len = strlen(path);
return path[len - 3] == '/' && path[len - 2] == '.' && path[len - 1] == '.';
}
/*** URI Stuff ***/
/* Examine PATH as a potential URI, and return a substring of PATH
that immediately follows the (scheme):// portion of the URI, or
NULL if PATH doesn't appear to be a valid URI. The returned value
is not alloced -- it shares memory with PATH. */
static const char *
skip_uri_scheme(const char *path)
{
apr_size_t j;
/* A scheme is terminated by a : and cannot contain any /'s. */
for (j = 0; path[j] && path[j] != ':'; ++j)
if (path[j] == '/')
return NULL;
if (j > 0 && path[j] == ':' && path[j+1] == '/' && path[j+2] == '/')
return path + j + 3;
return NULL;
}
svn_boolean_t
svn_path_is_url(const char *path)
{
/* ### This function is reaaaaaaaaaaaaaally stupid right now.
We're just going to look for:
(scheme)://(optional_stuff)
Where (scheme) has no ':' or '/' characters.
Someday it might be nice to have an actual URI parser here.
*/
return skip_uri_scheme(path) != NULL;
}
/* Here is the BNF for path components in a URI. "pchar" is a
character in a path component.
pchar = unreserved | escaped |
":" | "@" | "&" | "=" | "+" | "$" | ","
unreserved = alphanum | mark
mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
Note that "escaped" doesn't really apply to what users can put in
their paths, so that really means the set of characters is:
alphanum | mark | ":" | "@" | "&" | "=" | "+" | "$" | ","
*/
const char svn_uri__char_validity[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0,
/* 64 */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
/* 128 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 192 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
svn_boolean_t
svn_path_is_uri_safe(const char *path)
{
apr_size_t i;
/* Skip the URI scheme. */
path = skip_uri_scheme(path);
/* No scheme? Get outta here. */
if (! path)
return FALSE;
/* Skip to the first slash that's after the URI scheme. */
path = strchr(path, '/');
/* If there's no first slash, then there's only a host portion;
therefore there couldn't be any uri-unsafe characters after the
host... so return true. */
if (path == NULL)
return TRUE;
for (i = 0; path[i]; i++)
{
/* Allow '%XX' (where each X is a hex digit) */
if (path[i] == '%')
{
if (svn_ctype_isxdigit(path[i + 1]) &&
svn_ctype_isxdigit(path[i + 2]))
{
i += 2;
continue;
}
return FALSE;
}
else if (! svn_uri__char_validity[((unsigned char)path[i])])
{
return FALSE;
}
}
return TRUE;
}
/* URI-encode each character c in PATH for which TABLE[c] is 0.
If no encoding was needed, return PATH, else return a new string allocated
in POOL. */
static const char *
uri_escape(const char *path, const char table[], apr_pool_t *pool)
{
svn_stringbuf_t *retstr;
apr_size_t i, copied = 0;
int c;
apr_size_t len;
const char *p;
/* To terminate our scanning loop, table[NUL] must report "invalid". */
assert(table[0] == 0);
/* Quick check: Does any character need escaping? */
for (p = path; table[(unsigned char)*p]; ++p)
{}
/* No char to escape before EOS? */
if (*p == '\0')
return path;
/* We need to escape at least one character. */
len = strlen(p) + (p - path);
retstr = svn_stringbuf_create_ensure(len, pool);
for (i = p - path; i < len; i++)
{
c = (unsigned char)path[i];
if (table[c])
continue;
/* If we got here, we're looking at a character that isn't
supported by the (or at least, our) URI encoding scheme. We
need to escape this character. */
/* First things first, copy all the good stuff that we haven't
yet copied into our output buffer. */
if (i - copied)
svn_stringbuf_appendbytes(retstr, path + copied,
i - copied);
/* Now, write in our escaped character, consisting of the
'%' and two digits. We cast the C to unsigned char here because
the 'X' format character will be tempted to treat it as an unsigned
int...which causes problem when messing with 0x80-0xFF chars.
We also need space for a null as apr_snprintf will write one. */
svn_stringbuf_ensure(retstr, retstr->len + 4);
apr_snprintf(retstr->data + retstr->len, 4, "%%%02X", (unsigned char)c);
retstr->len += 3;
/* Finally, update our copy counter. */
copied = i + 1;
}
/* Anything left to copy? */
if (i - copied)
svn_stringbuf_appendbytes(retstr, path + copied, i - copied);
/* retstr is null-terminated either by apr_snprintf or the svn_stringbuf
functions. */
return retstr->data;
}
const char *
svn_path_uri_encode(const char *path, apr_pool_t *pool)
{
const char *ret;
ret = uri_escape(path, svn_uri__char_validity, pool);
/* Our interface guarantees a copy. */
if (ret == path)
return apr_pstrdup(pool, path);
else
return ret;
}
static const char iri_escape_chars[256] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 128 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
const char *
svn_path_uri_from_iri(const char *iri, apr_pool_t *pool)
{
return uri_escape(iri, iri_escape_chars, pool);
}
static const char uri_autoescape_chars[256] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
/* 64 */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
/* 128 */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 192 */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
};
const char *
svn_path_uri_autoescape(const char *uri, apr_pool_t *pool)
{
return uri_escape(uri, uri_autoescape_chars, pool);
}
const char *
svn_path_uri_decode(const char *path, apr_pool_t *pool)
{
svn_stringbuf_t *retstr;
apr_size_t i;
svn_boolean_t query_start = FALSE;
/* avoid repeated realloc */
retstr = svn_stringbuf_create_ensure(strlen(path) + 1, pool);
retstr->len = 0;
for (i = 0; path[i]; i++)
{
char c = path[i];
if (c == '?')
{
/* Mark the start of the query string, if it exists. */
query_start = TRUE;
}
else if (c == '+' && query_start)
{
/* Only do this if we are into the query string.
* RFC 2396, section 3.3 */
c = ' ';
}
else if (c == '%' && svn_ctype_isxdigit(path[i + 1])
&& svn_ctype_isxdigit(path[i+2]))
{
char digitz[3];
digitz[0] = path[++i];
digitz[1] = path[++i];
digitz[2] = '\0';
c = (char)(strtol(digitz, NULL, 16));
}
retstr->data[retstr->len++] = c;
}
/* Null-terminate this bad-boy. */
retstr->data[retstr->len] = 0;
return retstr->data;
}
const char *
svn_path_url_add_component2(const char *url,
const char *component,
apr_pool_t *pool)
{
/* = svn_path_uri_encode() but without always copying */
component = uri_escape(component, svn_uri__char_validity, pool);
return svn_path_join_internal(url, component, pool);
}
svn_error_t *
svn_path_get_absolute(const char **pabsolute,
const char *relative,
apr_pool_t *pool)
{
if (svn_path_is_url(relative))
{
*pabsolute = apr_pstrdup(pool, relative);
return SVN_NO_ERROR;
}
return svn_dirent_get_absolute(pabsolute, relative, pool);
}
#if !defined(WIN32) && !defined(DARWIN)
/** Get APR's internal path encoding. */
static svn_error_t *
get_path_encoding(svn_boolean_t *path_is_utf8, apr_pool_t *pool)
{
apr_status_t apr_err;
int encoding_style;
apr_err = apr_filepath_encoding(&encoding_style, pool);
if (apr_err)
return svn_error_wrap_apr(apr_err,
_("Can't determine the native path encoding"));
/* ### What to do about APR_FILEPATH_ENCODING_UNKNOWN?
Well, for now we'll just punt to the svn_utf_ functions;
those will at least do the ASCII-subset check. */
*path_is_utf8 = (encoding_style == APR_FILEPATH_ENCODING_UTF8);
return SVN_NO_ERROR;
}
#endif
svn_error_t *
svn_path_cstring_from_utf8(const char **path_apr,
const char *path_utf8,
apr_pool_t *pool)
{
#if !defined(WIN32) && !defined(DARWIN)
svn_boolean_t path_is_utf8;
SVN_ERR(get_path_encoding(&path_is_utf8, pool));
if (path_is_utf8)
#endif
{
*path_apr = apr_pstrdup(pool, path_utf8);
return SVN_NO_ERROR;
}
#if !defined(WIN32) && !defined(DARWIN)
else
return svn_utf_cstring_from_utf8(path_apr, path_utf8, pool);
#endif
}
svn_error_t *
svn_path_cstring_to_utf8(const char **path_utf8,
const char *path_apr,
apr_pool_t *pool)
{
#if !defined(WIN32) && !defined(DARWIN)
svn_boolean_t path_is_utf8;
SVN_ERR(get_path_encoding(&path_is_utf8, pool));
if (path_is_utf8)
#endif
{
*path_utf8 = apr_pstrdup(pool, path_apr);
return SVN_NO_ERROR;
}
#if !defined(WIN32) && !defined(DARWIN)
else
return svn_utf_cstring_to_utf8(path_utf8, path_apr, pool);
#endif
}
const char *
svn_path_illegal_path_escape(const char *path, apr_pool_t *pool)
{
svn_stringbuf_t *retstr;
apr_size_t i, copied = 0;
int c;
/* At least one control character:
strlen - 1 (control) + \ + N + N + N + null . */
retstr = svn_stringbuf_create_ensure(strlen(path) + 4, pool);
for (i = 0; path[i]; i++)
{
c = (unsigned char)path[i];
if (! svn_ctype_iscntrl(c))
continue;
/* If we got here, we're looking at a character that isn't
supported by the (or at least, our) URI encoding scheme. We
need to escape this character. */
/* First things first, copy all the good stuff that we haven't
yet copied into our output buffer. */
if (i - copied)
svn_stringbuf_appendbytes(retstr, path + copied,
i - copied);
/* Make sure buffer is big enough for '\' 'N' 'N' 'N' (and NUL) */
svn_stringbuf_ensure(retstr, retstr->len + 5);
/*### The backslash separator doesn't work too great with Windows,
but it's what we'll use for consistency with invalid utf8
formatting (until someone has a better idea) */
apr_snprintf(retstr->data + retstr->len, 5, "\\%03o", (unsigned char)c);
retstr->len += 4;
/* Finally, update our copy counter. */
copied = i + 1;
}
/* If we didn't encode anything, we don't need to duplicate the string. */
if (retstr->len == 0)
return path;
/* Anything left to copy? */
if (i - copied)
svn_stringbuf_appendbytes(retstr, path + copied, i - copied);
/* retstr is null-terminated either by apr_snprintf or the svn_stringbuf
functions. */
return retstr->data;
}
svn_error_t *
svn_path_check_valid(const char *path, apr_pool_t *pool)
{
const char *c;
for (c = path; *c; c++)
{
if (svn_ctype_iscntrl(*c))
{
return svn_error_createf(SVN_ERR_FS_PATH_SYNTAX, NULL,
_("Invalid control character '0x%02x' in path '%s'"),
(unsigned char)*c,
svn_path_illegal_path_escape(svn_dirent_local_style(path, pool),
pool));
}
}
return SVN_NO_ERROR;
}
void
svn_path_splitext(const char **path_root,
const char **path_ext,
const char *path,
apr_pool_t *pool)
{
const char *last_dot, *last_slash;
/* Easy out -- why do all the work when there's no way to report it? */
if (! (path_root || path_ext))
return;
/* Do we even have a period in this thing? And if so, is there
anything after it? We look for the "rightmost" period in the
string. */
last_dot = strrchr(path, '.');
if (last_dot && (*(last_dot + 1) != '\0'))
{
/* If we have a period, we need to make sure it occurs in the
final path component -- that there's no path separator
between the last period and the end of the PATH -- otherwise,
it doesn't count. Also, we want to make sure that our period
isn't the first character of the last component. */
last_slash = strrchr(path, '/');
if ((last_slash && (last_dot > (last_slash + 1)))
|| ((! last_slash) && (last_dot > path)))
{
if (path_root)
*path_root = apr_pstrmemdup(pool, path,
(last_dot - path + 1) * sizeof(*path));
if (path_ext)
*path_ext = apr_pstrdup(pool, last_dot + 1);
return;
}
}
/* If we get here, we never found a suitable separator character, so
there's no split. */
if (path_root)
*path_root = apr_pstrdup(pool, path);
if (path_ext)
*path_ext = "";
}
/* Repository relative URLs (^/). */
svn_boolean_t
svn_path_is_repos_relative_url(const char *path)
{
return (0 == strncmp("^/", path, 2));
}
svn_error_t *
svn_path_resolve_repos_relative_url(const char **absolute_url,
const char *relative_url,
const char *repos_root_url,
apr_pool_t *pool)
{
if (! svn_path_is_repos_relative_url(relative_url))
return svn_error_createf(SVN_ERR_BAD_URL, NULL,
_("Improper relative URL '%s'"),
relative_url);
/* No assumptions are made about the canonicalization of the input
* arguments, it is presumed that the output will be canonicalized after
* this function, which will remove any duplicate path separator.
*/
*absolute_url = apr_pstrcat(pool, repos_root_url, relative_url + 1,
SVN_VA_NULL);
return SVN_NO_ERROR;
}