main/sal/rtl/source/uri.cxx - openoffice - Git at Google

 /**************************************************************
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  *
  *************************************************************/


 // MARKER(update_precomp.py): autogen include statement, do not remove
 #include "precompiled_sal.hxx"

 #include "rtl/uri.h"

 #include "surrogates.h"

 #include "osl/diagnose.h"
 #include "rtl/strbuf.hxx"
 #include "rtl/textenc.h"
 #include "rtl/textcvt.h"
 #include "rtl/uri.h"
 #include "rtl/ustrbuf.h"
 #include "rtl/ustrbuf.hxx"
 #include "rtl/ustring.h"
 #include "rtl/ustring.hxx"
 #include "sal/types.h"

 #include <cstddef>

 namespace {

 std::size_t const nCharClassSize = 128;

 sal_Unicode const cEscapePrefix = 0x25; // '%'

 inline bool isDigit(sal_uInt32 nUtf32)
 {
     return nUtf32 >= 0x30 && nUtf32 <= 0x39; // '0'--'9'
 }

 inline bool isAlpha(sal_uInt32 nUtf32)
 {
     // 'A'--'Z', 'a'--'z'
     return (
             (nUtf32 >= 0x41 && nUtf32 <= 0x5A) ||
             (nUtf32 >= 0x61 && nUtf32 <= 0x7A)
            );
 }

 inline bool isHighSurrogate(sal_uInt32 nUtf16)
 {
     return SAL_RTL_IS_HIGH_SURROGATE(nUtf16);
 }

 inline bool isLowSurrogate(sal_uInt32 nUtf16)
 {
     return SAL_RTL_IS_LOW_SURROGATE(nUtf16);
 }

 inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
 {
     return SAL_RTL_COMBINE_SURROGATES(high, low);
 }

 inline int getHexWeight(sal_uInt32 nUtf32)
 {
     return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
                static_cast< int >(nUtf32 - 0x30) :
            nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
                static_cast< int >(nUtf32 - 0x41 + 10) :
            nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
                static_cast< int >(nUtf32 - 0x61 + 10) :
                -1; // not a hex digit
 }

 inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
 {
     return nUtf32 < nCharClassSize && pCharClass[nUtf32];
 }

 inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
                          sal_Unicode cChar)
 {
     rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
 }

 enum EscapeType
 {
     EscapeNo,
     EscapeChar,
     EscapeOctet
 };

 /* Read any of the following:

    - sequence of escape sequences representing character from eCharset,
      translated to single UCS4 character; or

    - pair of UTF-16 surrogates, translated to single UCS4 character; or

    _ single UTF-16 character, extended to UCS4 character.
  */
 sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
                     bool bEncoded, rtl_TextEncoding eCharset,
                     EscapeType * pType)
 {
     sal_uInt32 nChar = *(*pBegin)++;
     int nWeight1;
     int nWeight2;
     if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
         && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
         && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
     {
         *pBegin += 2;
         nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
         if (nChar <= 0x7F)
             *pType = EscapeChar;
         else if (eCharset == RTL_TEXTENCODING_UTF8)
         {
             if (nChar >= 0xC0 && nChar <= 0xF4)
             {
                 sal_uInt32 nEncoded;
                 int nShift;
                 sal_uInt32 nMin;
                 if (nChar <= 0xDF)
                 {
                     nEncoded = (nChar & 0x1F) << 6;
                     nShift = 0;
                     nMin = 0x80;
                 }
                 else if (nChar <= 0xEF)
                 {
                     nEncoded = (nChar & 0x0F) << 12;
                     nShift = 6;
                     nMin = 0x800;
                 }
                 else
                 {
                     nEncoded = (nChar & 0x07) << 18;
                     nShift = 12;
                     nMin = 0x10000;
                 }
                 sal_Unicode const * p = *pBegin;
                 bool bUTF8 = true;
                 for (; nShift >= 0; nShift -= 6)
                 {
                     if (pEnd - p < 3 || p[0] != cEscapePrefix
                         || (nWeight1 = getHexWeight(p[1])) < 8
                         || nWeight1 > 11
                         || (nWeight2 = getHexWeight(p[2])) < 0)
                     {
                         bUTF8 = sal_False;
                         break;
                     }
                     p += 3;
                     nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
                 }
                 if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
                     && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
                 {
                     *pBegin = p;
                     *pType = EscapeChar;
                     return nEncoded;
                 }
             }
             *pType = EscapeOctet;
         }
         else
         {
             rtl::OStringBuffer aBuf;
             aBuf.append(static_cast< char >(nChar));
             rtl_TextToUnicodeConverter aConverter
                 = rtl_createTextToUnicodeConverter(eCharset);
             sal_Unicode const * p = *pBegin;
             for (;;)
             {
                 sal_Unicode aDst[2];
                 sal_uInt32 nInfo;
                 sal_Size nConverted;
                 sal_Size nDstSize = rtl_convertTextToUnicode(
                     aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
                     sizeof aDst / sizeof aDst[0],
                     (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
                      | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
                      | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
                     &nInfo, &nConverted);
                 if (nInfo == 0)
                 {
                     OSL_ASSERT(
                         nConverted
                         == sal::static_int_cast< sal_uInt32 >(
                             aBuf.getLength()));
                     rtl_destroyTextToUnicodeConverter(aConverter);
                     *pBegin = p;
                     *pType = EscapeChar;
                     OSL_ASSERT(
                         nDstSize == 1
                         || (nDstSize == 2 && isHighSurrogate(aDst[0])
                             && isLowSurrogate(aDst[1])));
                     return nDstSize == 1
                         ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
                 }
                 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
                          && pEnd - p >= 3 && p[0] == cEscapePrefix
                          && (nWeight1 = getHexWeight(p[1])) >= 0
                          && (nWeight2 = getHexWeight(p[2])) >= 0)
                 {
                     p += 3;
                     aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
                 }
                 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
                          && p != pEnd && *p <= 0x7F)
                 {
                     aBuf.append(static_cast< char >(*p++));
                 }
                 else
                 {
                     OSL_ASSERT(
                         (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
                         == 0);
                     break;
                 }
             }
             rtl_destroyTextToUnicodeConverter(aConverter);
             *pType = EscapeOctet;
         }
         return nChar;
     }
     else
     {
         *pType = EscapeNo;
         return isHighSurrogate(nChar) && *pBegin < pEnd
                && isLowSurrogate(**pBegin) ?
                    combineSurrogates(nChar, *(*pBegin)++) : nChar;
     }
 }

 void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
 {
     OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char");
     if (nUtf32 <= 0xFFFF) {
         writeUnicode(
             pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
     } else {
         nUtf32 -= 0x10000;
         writeUnicode(
             pBuffer, pCapacity,
             static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
         writeUnicode(
             pBuffer, pCapacity,
             static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
     }
 }

 void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
                       sal_uInt32 nOctet)
 {
     OSL_ENSURE(nOctet <= 0xFF, "bad octet");

     static sal_Unicode const aHex[16]
         = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
             0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */

     writeUnicode(pBuffer, pCapacity, cEscapePrefix);
     writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
     writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
 }

 bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
                      sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
 {
     OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char");
     if (eCharset == RTL_TEXTENCODING_UTF8) {
         if (nUtf32 < 0x80)
             writeEscapeOctet(pBuffer, pCapacity, nUtf32);
         else if (nUtf32 < 0x800)
         {
             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
         }
         else if (nUtf32 < 0x10000)
         {
             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
         }
         else
         {
             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
         }
     } else {
         rtl_UnicodeToTextConverter aConverter
             = rtl_createUnicodeToTextConverter(eCharset);
         sal_Unicode aSrc[2];
         sal_Size nSrcSize;
         if (nUtf32 <= 0xFFFF)
         {
             aSrc[0] = static_cast< sal_Unicode >(nUtf32);
             nSrcSize = 1;
         }
         else
         {
             aSrc[0] = static_cast< sal_Unicode >(
                 ((nUtf32 - 0x10000) >> 10) | 0xD800);
             aSrc[1] = static_cast< sal_Unicode >(
                 ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
             nSrcSize = 2;
         }
         sal_Char aDst[32]; // FIXME  random value
         sal_uInt32 nInfo;
         sal_Size nConverted;
         sal_Size nDstSize = rtl_convertUnicodeToText(
             aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
             RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
             | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
             | RTL_UNICODETOTEXT_FLAGS_FLUSH,
             &nInfo, &nConverted);
         OSL_ASSERT((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
         rtl_destroyUnicodeToTextConverter(aConverter);
         if (nInfo == 0) {
             OSL_ENSURE(nConverted == nSrcSize, "bad rtl_convertUnicodeToText");
             for (sal_Size i = 0; i < nDstSize; ++i)
                 writeEscapeOctet(pBuffer, pCapacity,
                                  static_cast< unsigned char >(aDst[i]));
                     // FIXME  all octets are escaped, even if there is no need
         } else {
             if (bStrict) {
                 return false;
             } else {
                 writeUcs4(pBuffer, pCapacity, nUtf32);
             }
         }
     }
     return true;
 }

 struct Component
 {
     sal_Unicode const * pBegin;
     sal_Unicode const * pEnd;

     inline Component(): pBegin(0) {}

     inline bool isPresent() const { return pBegin != 0; }

     inline sal_Int32 getLength() const;
 };

 inline sal_Int32 Component::getLength() const
 {
     OSL_ENSURE(isPresent(), "taking length of non-present component");
     return static_cast< sal_Int32 >(pEnd - pBegin);
 }

 struct Components
 {
     Component aScheme;
     Component aAuthority;
     Component aPath;
     Component aQuery;
     Component aFragment;
 };

 void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
 {
     // This algorithm is liberal and accepts various forms of illegal input.

     sal_Unicode const * pBegin = pUriRef->buffer;
     sal_Unicode const * pEnd = pBegin + pUriRef->length;
     sal_Unicode const * pPos = pBegin;

     if (pPos != pEnd && isAlpha(*pPos))
         for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
             if (*p == ':')
             {
                 pComponents->aScheme.pBegin = pBegin;
                 pComponents->aScheme.pEnd = ++p;
                 pPos = p;
                 break;
             }
             else if (!isAlpha(*p) && !isDigit(*p) && *p != '+' && *p != '-'
                      && *p != '.')
                 break;

     if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
     {
         pComponents->aAuthority.pBegin = pPos;
         pPos += 2;
         while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
             ++pPos;
         pComponents->aAuthority.pEnd = pPos;
     }

     pComponents->aPath.pBegin = pPos;
     while (pPos != pEnd && *pPos != '?' && * pPos != '#')
         ++pPos;
     pComponents->aPath.pEnd = pPos;

     if (pPos != pEnd && *pPos == '?')
     {
         pComponents->aQuery.pBegin = pPos++;
         while (pPos != pEnd && * pPos != '#')
             ++pPos;
         pComponents->aQuery.pEnd = pPos;
     }

     if (pPos != pEnd)
     {
         OSL_ASSERT(*pPos == '#');
         pComponents->aFragment.pBegin = pPos;
         pComponents->aFragment.pEnd = pEnd;
     }
 }

 rtl::OUString joinPaths(Component const & rBasePath, Component const & rRelPath)
 {
     OSL_ASSERT(rBasePath.isPresent() && *rBasePath.pBegin == '/');
     OSL_ASSERT(rRelPath.isPresent());

     // The invariant of aBuffer is that it always starts and ends with a slash
     // (until probably right at the end of the algorithm, when the last segment
     // of rRelPath is added, which does not necessarily end in a slash):
     rtl::OUStringBuffer aBuffer(rBasePath.getLength() + rRelPath.getLength());
         // XXX  numeric overflow

     // Segments "." and ".." within rBasePath are not conisdered special (but
     // are also not removed by ".." segments within rRelPath), RFC 2396 seems a
     // bit unclear about this point:
     sal_Int32 nFixed = 1;
     sal_Unicode const * p = rBasePath.pBegin + 1;
     for (sal_Unicode const * q = p; q != rBasePath.pEnd; ++q)
         if (*q == '/')
         {
             if (
                 (q - p == 1 && p[0] == '.') ||
                 (q - p == 2 && p[0] == '.' && p[1] == '.')
                )
             {
                 nFixed = q + 1 - rBasePath.pBegin;
             }
             p = q + 1;
         }
     aBuffer.append(rBasePath.pBegin, p - rBasePath.pBegin);

     p = rRelPath.pBegin;
     if (p != rRelPath.pEnd)
         for (;;)
         {
             sal_Unicode const * q = p;
             sal_Unicode const * r;
             for (;;)
             {
                 if (q == rRelPath.pEnd)
                 {
                     r = q;
                     break;
                 }
                 if (*q == '/')
                 {
                     r = q + 1;
                     break;
                 }
                 ++q;
             }
             if (q - p == 2 && p[0] == '.' && p[1] == '.')
             {
                 // Erroneous excess segments ".." within rRelPath are left
                 // intact, as the examples in RFC 2396, section C.2, suggest:
                 sal_Int32 i = aBuffer.getLength() - 1;
                 if (i < nFixed)
                 {
                     aBuffer.append(p, r - p);
                     nFixed += 3;
                 }
                 else
                 {
                     while (aBuffer.charAt(i - 1) != '/')
                         --i;
                     aBuffer.setLength(i);
                 }
             }
             else if (q - p != 1 || *p != '.')
                 aBuffer.append(p, r - p);
             if (q == rRelPath.pEnd)
                 break;
             p = q + 1;
         }

     return aBuffer.makeStringAndClear();
 }

 }

 sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
     SAL_THROW_EXTERN_C()
 {
     static sal_Bool const aCharClass[][nCharClassSize]
     = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0  /*pqrstuvwxyz{|}~ */
        },
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
        },
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
        },
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
        },
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
        },
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
        },
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
        },
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
        }};
     OSL_ENSURE(
         (eCharClass >= 0
          && (sal::static_int_cast< std::size_t >(eCharClass)
              < sizeof aCharClass / sizeof aCharClass[0])),
         "bad eCharClass");
     return aCharClass[eCharClass];
 }

 void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
                             rtl_UriEncodeMechanism eMechanism,
                             rtl_TextEncoding eCharset, rtl_uString ** pResult)
     SAL_THROW_EXTERN_C()
 {
     OSL_ENSURE(!pCharClass[0x25], "bad pCharClass");
         // make sure the percent sign is encoded...

     sal_Unicode const * p = pText->buffer;
     sal_Unicode const * pEnd = p + pText->length;
     sal_Int32 nCapacity = 0;
     rtl_uString_new(pResult);
     while (p < pEnd)
     {
         EscapeType eType;
         sal_uInt32 nUtf32 = readUcs4(
             &p, pEnd,
             (eMechanism == rtl_UriEncodeKeepEscapes
              || eMechanism == rtl_UriEncodeCheckEscapes
              || eMechanism == rtl_UriEncodeStrictKeepEscapes),
             eCharset, &eType);
         switch (eType)
         {
         case EscapeNo:
             if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
                 writeUnicode(pResult, &nCapacity,
                              static_cast< sal_Unicode >(nUtf32));
             else if (!writeEscapeChar(
                          pResult, &nCapacity, nUtf32, eCharset,
                          (eMechanism == rtl_UriEncodeStrict
                           || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
             {
                 rtl_uString_new(pResult);
                 return;
             }
             break;

         case EscapeChar:
             if (eMechanism == rtl_UriEncodeCheckEscapes
                 && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
                 writeUnicode(pResult, &nCapacity,
                              static_cast< sal_Unicode >(nUtf32));
             else if (!writeEscapeChar(
                          pResult, &nCapacity, nUtf32, eCharset,
                          (eMechanism == rtl_UriEncodeStrict
                           || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
             {
                 rtl_uString_new(pResult);
                 return;
             }
             break;

         case EscapeOctet:
             writeEscapeOctet(pResult, &nCapacity, nUtf32);
             break;
         }
     }
 }

 void SAL_CALL rtl_uriDecode(rtl_uString * pText,
                             rtl_UriDecodeMechanism eMechanism,
                             rtl_TextEncoding eCharset, rtl_uString ** pResult)
     SAL_THROW_EXTERN_C()
 {
     switch (eMechanism)
     {
     case rtl_UriDecodeNone:
         rtl_uString_assign(pResult, pText);
         break;

     case rtl_UriDecodeToIuri:
         eCharset = RTL_TEXTENCODING_UTF8;
     default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
         {
             sal_Unicode const * p = pText->buffer;
             sal_Unicode const * pEnd = p + pText->length;
             sal_Int32 nCapacity = 0;
             rtl_uString_new(pResult);
             while (p < pEnd)
             {
                 EscapeType eType;
                 sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
                 switch (eType)
                 {
                 case EscapeChar:
                     if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
                     {
                         writeEscapeOctet(pResult, &nCapacity, nUtf32);
                         break;
                     }
                 case EscapeNo:
                     writeUcs4(pResult, &nCapacity, nUtf32);
                     break;

                 case EscapeOctet:
                     if (eMechanism == rtl_UriDecodeStrict) {
                         rtl_uString_new(pResult);
                         return;
                     }
                     writeEscapeOctet(pResult, &nCapacity, nUtf32);
                     break;
                 }
             }
         }
         break;
     }
 }

 sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
                                          rtl_uString * pRelUriRef,
                                          rtl_uString ** pResult,
                                          rtl_uString ** pException)
     SAL_THROW_EXTERN_C()
 {
     // If pRelUriRef starts with a scheme component it is an absolute URI
     // reference, and we are done (i.e., this algorithm does not support
     // backwards-compatible relative URIs starting with a scheme component, see
     // RFC 2396, section 5.2, step 3):
     Components aRelComponents;
     parseUriRef(pRelUriRef, &aRelComponents);
     if (aRelComponents.aScheme.isPresent())
     {
         rtl_uString_assign(pResult, pRelUriRef);
         return true;
     }

     // Parse pBaseUriRef; if the scheme component is not present or not valid,
     // or the path component is not empty and starts with anything but a slash,
     // an exception is raised:
     Components aBaseComponents;
     parseUriRef(pBaseUriRef, &aBaseComponents);
     if (!aBaseComponents.aScheme.isPresent())
     {
         rtl::OUString aMessage(pBaseUriRef);
         aMessage += rtl::OUString(
                         RTL_CONSTASCII_USTRINGPARAM(
                             " does not start with a scheme component"));
         rtl_uString_assign(pException,
                            const_cast< rtl::OUString & >(aMessage).pData);
         return false;
     }
     if (aBaseComponents.aPath.pBegin != aBaseComponents.aPath.pEnd
         && *aBaseComponents.aPath.pBegin != '/')
     {
         rtl::OUString aMessage(pBaseUriRef);
         aMessage += rtl::OUString(
                         RTL_CONSTASCII_USTRINGPARAM(
                             "path component does not start with slash"));
         rtl_uString_assign(pException, aMessage.pData);
         return false;
     }

     // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI
     // into an absolute one (if the relative URI is a reference to the "current
     // document," the "current document" is here taken to be the base URI):
     rtl::OUStringBuffer aBuffer;
     aBuffer.append(aBaseComponents.aScheme.pBegin,
                    aBaseComponents.aScheme.getLength());
     if (aRelComponents.aAuthority.isPresent())
     {
         aBuffer.append(aRelComponents.aAuthority.pBegin,
                        aRelComponents.aAuthority.getLength());
         aBuffer.append(aRelComponents.aPath.pBegin,
                        aRelComponents.aPath.getLength());
         if (aRelComponents.aQuery.isPresent())
             aBuffer.append(aRelComponents.aQuery.pBegin,
                            aRelComponents.aQuery.getLength());
     }
     else
     {
         if (aBaseComponents.aAuthority.isPresent())
             aBuffer.append(aBaseComponents.aAuthority.pBegin,
                            aBaseComponents.aAuthority.getLength());
         if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd
             && !aRelComponents.aQuery.isPresent())
         {
             aBuffer.append(aBaseComponents.aPath.pBegin,
                            aBaseComponents.aPath.getLength());
             if (aBaseComponents.aQuery.isPresent())
                 aBuffer.append(aBaseComponents.aQuery.pBegin,
                                aBaseComponents.aQuery.getLength());
         }
         else
         {
             if (*aRelComponents.aPath.pBegin == '/')
                 aBuffer.append(aRelComponents.aPath.pBegin,
                                aRelComponents.aPath.getLength());
             else
                 aBuffer.append(joinPaths(aBaseComponents.aPath,
                                          aRelComponents.aPath));
             if (aRelComponents.aQuery.isPresent())
                 aBuffer.append(aRelComponents.aQuery.pBegin,
                                aRelComponents.aQuery.getLength());
         }
     }
     if (aRelComponents.aFragment.isPresent())
         aBuffer.append(aRelComponents.aFragment.pBegin,
                        aRelComponents.aFragment.getLength());
     rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
     return true;
 }