| /************************************************************** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| *************************************************************/ |
| |
| |
| |
| #ifndef _RTL_URI_H_ |
| #define _RTL_URI_H_ |
| |
| #include "rtl/textenc.h" |
| #include "rtl/ustring.h" |
| #include "sal/types.h" |
| |
| #if defined __cplusplus |
| extern "C" { |
| #endif /* __cplusplus */ |
| |
| /** Various predefined URI 'char classes.' |
| |
| @descr |
| A 'char class' defines which (ASCII) characters can be written 'as they |
| are' in a part of a Uri, and which characters have to be written using |
| escape sequences ('%' followed by two hex digits). Characters outside |
| the ASCII range are always written using escape sequences. |
| |
| @descr |
| If there are other frequently used char classes, they can be added to |
| this enumeration; the function rtl_getUriCharClass() has to be adapted |
| then, too. |
| */ |
| typedef enum |
| { |
| /** The empty char class. |
| |
| @descr |
| All characters are written using escape sequences. |
| */ |
| rtl_UriCharClassNone, |
| |
| /** The RFC 2732 <uric> char class. |
| |
| @descr |
| The 'valid' characters are !$&'()*+,-./:;=?@[]_~ plus digits and |
| letters. |
| */ |
| rtl_UriCharClassUric, |
| |
| /** The RFC 2396 <uric_no_slash> char class. |
| |
| @descr |
| The 'valid' characters are !$&'()*+,-.:;=?@_~ plus digits and letters. |
| */ |
| rtl_UriCharClassUricNoSlash, |
| |
| /** The RFC 2396 <rel_segment> char class. |
| |
| @descr |
| The 'valid' characters are !$&'()*+,-.;=@_~ plus digits and letters. |
| */ |
| rtl_UriCharClassRelSegment, |
| |
| /** The RFC 2396 <reg_name> char class. |
| |
| @descr |
| The 'valid' characters are !$&'()*+,-.:;=@_~ plus digits and letters. |
| */ |
| rtl_UriCharClassRegName, |
| |
| /** The RFC 2396 <userinfo> char class. |
| |
| @descr |
| The 'valid' characters are !$&'()*+,-.:;=_~ plus digits and letters. |
| */ |
| rtl_UriCharClassUserinfo, |
| |
| /** The RFC 2396 <pchar> char class. |
| |
| @descr |
| The 'valid' characters are !$&'()*+,-.:=@_~ plus digits and letters. |
| */ |
| rtl_UriCharClassPchar, |
| |
| /** The char class for the values of uno URL parameters. |
| |
| @descr |
| The 'valid' characters are !$&'()*+-./:?@_~ plus digits and letters. |
| */ |
| rtl_UriCharClassUnoParamValue, |
| |
| rtl_UriCharClass_FORCE_EQUAL_SIZE = SAL_MAX_ENUM |
| } |
| rtl_UriCharClass; |
| |
| /** The mechanism describing how escape sequences in the input of |
| rtl_uriEncode() are handled. |
| */ |
| typedef enum |
| { |
| /** The special meaning of '%' is ignored (i.e., there are by definition |
| no escape sequences in the input). |
| |
| @descr |
| This mechanism is useful to encode user input as part of a URI (e.g., |
| the user-supplied password in an ftp URL---'%20abcde' is a valid |
| password, so do not assume that the '%20' is an escaped space). |
| */ |
| rtl_UriEncodeIgnoreEscapes, |
| |
| /** All escape sequences ('%' followed by two hex digits) are kept intact, |
| even if they represent characters that need not be escaped or if they |
| do not even map to characters in the given charset. |
| |
| @descr |
| This mechanism is useful when passing on complete URIs more or less |
| unmodified (e.g., within an HTTP proxy): missing escape sequences are |
| added, but existing escape sequences are not touched (except that any |
| lower case hex digits are replaced by upper case hex digits). |
| */ |
| rtl_UriEncodeKeepEscapes, |
| |
| /** All escape sequences ('%' followed by two hex digits) are resolved in |
| a first step; only those that represent characters that need to be |
| escaped are kept intact. |
| |
| @descr |
| This mechanism is useful to properly encode complete URIs entered by |
| the user: the URI is brought into a 'canonic form,' but care is taken |
| not to damage (valid) escape sequences the (careful) user already |
| entered as such. |
| */ |
| rtl_UriEncodeCheckEscapes, |
| |
| /** Like rtl_UriEncodeIgnoreEscapes, but indicating failure when converting |
| unmappable characters. |
| |
| @since UDK 3.2.0 |
| */ |
| rtl_UriEncodeStrict, |
| |
| /** Like rtl_UriEncodeKeepEscapes, but indicating failure when converting |
| unmappable characters. |
| |
| @since UDK 3.2.7 |
| */ |
| rtl_UriEncodeStrictKeepEscapes, |
| |
| rtl_UriEncode_FORCE_EQUAL_SIZE = SAL_MAX_ENUM |
| } |
| rtl_UriEncodeMechanism; |
| |
| /** The mechanism describing how rtl_uriDecode() translates (part of) a URI |
| into a Unicode string. |
| */ |
| typedef enum |
| { |
| /** The text is returned completely unmodified. |
| */ |
| rtl_UriDecodeNone, |
| |
| /** The text is returned in the form of an IURI (cf. |
| draft-masinter-url-i18n-05.txt). |
| |
| @descr |
| All escape sequences representing ASCII characters (%00--%7F) are |
| kept, all other escape sequences are interpreted as UTF-8 characters |
| and translated to Unicode, if possible. |
| */ |
| rtl_UriDecodeToIuri, |
| |
| /** The text is decoded. |
| |
| @descr |
| All escape sequences representing characters from the given charset |
| are decoded and translated to Unicode, if possible. |
| */ |
| rtl_UriDecodeWithCharset, |
| |
| /** Like rtl_UriDecodeWithCharset, but indicating failure when converting |
| unmappable characters. |
| |
| @since UDK 3.2.0 |
| */ |
| rtl_UriDecodeStrict, |
| |
| rtl_UriDecode_FORCE_EQUAL_SIZE = SAL_MAX_ENUM |
| } |
| rtl_UriDecodeMechanism; |
| |
| /** Map a predefined rtl_UriCharClass to a form usable by rtl_uriEncode(). |
| |
| @descr |
| The function rtl_uriEncode() expects an array of 128 booleans, and this |
| function maps rtl_UriCharClass enumeration members to such arrays. |
| |
| @param eCharClass |
| Any valid member of rtl_UriCharClass. |
| |
| @return |
| An array of 128 booleans, to be used in calls to rtl_uriEncode(). |
| */ |
| sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass) |
| SAL_THROW_EXTERN_C(); |
| |
| /** Encode a text as (part of) a URI. |
| |
| @param pText |
| Any Unicode string. Must not be null. |
| |
| @param pCharClass |
| A char class, represented as an array of 128 booleans (true means keep the |
| corresponding ASCII character unencoded, false means encode it). Must not |
| be null, and the boolean corresponding to the percent sign (0x25) must be |
| false. (See rtl_getUriCharClass() for a function mapping from |
| rtl_UriCharClass to such arrays.) |
| |
| @param eMechanism |
| The mechanism describing how escape sequences in the input text are |
| handled. |
| |
| @param eCharset |
| When Unicode characters from the input text have to be written using |
| escape sequences (because they are either outside the ASCII range or do |
| not belong to the given char class), they are first translated into this |
| charset before being encoded using escape sequences. |
| |
| Also, if the encode mechanism is rtl_UriEncodeCheckEscapes, all escape |
| sequences already present in the input text are interpreted as characters |
| from this charset. |
| |
| @param pResult |
| Returns an encoded representation of the input text. Must itself not be |
| null, and must point to either null or a valid string. |
| |
| If the encode mechanism is rtl_UriEncodeStrict, and pText cannot be |
| converted to eCharset because it contains unmappable characters (which |
| implies that pText is not empty), then an empty string is returned. |
| */ |
| void SAL_CALL rtl_uriEncode(rtl_uString * pText, |
| sal_Bool const * pCharClass, |
| rtl_UriEncodeMechanism eMechanism, |
| rtl_TextEncoding eCharset, |
| rtl_uString ** pResult) |
| SAL_THROW_EXTERN_C(); |
| |
| /** Decode (a part of) a URI. |
| |
| @param pText |
| Any Unicode string. Must not be null. (If the input is indeed part of a |
| valid URI, this string will only contain a subset of the ASCII characters, |
| but this function also handles other Unicode characters properly.) |
| |
| @param eMechanism |
| The mechanism describing how the input text is translated into a Unicode |
| string. |
| |
| @param eCharset |
| When the decode mechanism is rtl_UriDecodeWithCharset, all escape |
| sequences in the input text are interpreted as characters from this |
| charset. Those characters are translated to Unicode characters in the |
| resulting output, if possible. |
| |
| When the decode mechanism is rtl_UriDecodeNone or rtl_UriDecodeToIuri, |
| this parameter is ignored (and is best specified as |
| RTL_TEXTENCODING_UTF8). |
| |
| @param pResult |
| Returns a decoded representation of the input text. Must itself not be |
| null, and must point to either null or a valid string. |
| |
| If the decode mechanism is rtl_UriDecodeStrict, and pText cannot be |
| converted to eCharset because it contains (encodings of) unmappable |
| characters (which implies that pText is not empty), then an empty string is |
| returned. |
| */ |
| void SAL_CALL rtl_uriDecode(rtl_uString * pText, |
| rtl_UriDecodeMechanism eMechanism, |
| rtl_TextEncoding eCharset, |
| rtl_uString ** pResult) |
| SAL_THROW_EXTERN_C(); |
| |
| /** Convert a relative URI reference into an absolute one. |
| |
| A URI reference is a URI plus an optional <"#" fragment> part. |
| |
| This function uses the algorithm described in RFC 2396, section 5.2, with |
| the following clarifications: (1) Backwards-compatible relative URIs |
| starting with a scheme component (see RFC 2396, section 5.2, step 3) are not |
| supported. (2) Segments "." and ".." within the path of the base URI are |
| not considered special, RFC 2396 seems a bit unlcear about that point. |
| (3) Erroneous excess segments ".." within the path of the relative URI (if |
| it is indeed relative) are left intact, as the examples in RFC 2396, |
| section C.2, suggest. (4) If the relative URI is a reference to the |
| "current document," the "current document" is taken to be the base URI. |
| |
| This function signals exceptions by returning false and letting pException |
| point to a message explaining the exception. |
| |
| @param pBaseUriRef |
| An absolute, hierarchical URI reference that serves as the base URI. If it |
| has to be inspected (i.e., pRelUriRef is not an absolute URI already), and |
| if it either is not an absolute URI (i.e., does not begin with a |
| <scheme ":"> part) or has a path that is non-empty but does not start |
| with "/", an exception will be signaled. |
| |
| @param pRelUriRef |
| An URI reference that may be either absolute or relative. If it is |
| absolute, it will be returned unmodified (and it need not be hierarchical |
| then). |
| |
| @param pResult |
| Returns an absolute URI reference. Must itself not be null, and must point |
| to either null or a valid string. If an exception is signalled, it is left |
| unchanged. |
| |
| @param pException |
| Returns an explanatory message in case an exception is signalled. Must |
| itself not be null, and must point to either null or a valid string. If no |
| exception is signalled, it is left unchanged. |
| |
| @return |
| True if no exception is signalled, otherwise false. |
| */ |
| sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef, |
| rtl_uString * pRelUriRef, |
| rtl_uString ** pResult, |
| rtl_uString ** pException) |
| SAL_THROW_EXTERN_C(); |
| |
| #if defined __cplusplus |
| } |
| #endif /* __cplusplus */ |
| |
| #endif /* _RTL_URI_H_ */ |