| /************************************************************** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| *************************************************************/ |
| |
| |
| |
| // MARKER(update_precomp.py): autogen include statement, do not remove |
| #include "precompiled_sal.hxx" |
| |
| #include "rtl/uri.h" |
| |
| #include "surrogates.h" |
| |
| #include "osl/diagnose.h" |
| #include "rtl/strbuf.hxx" |
| #include "rtl/textenc.h" |
| #include "rtl/textcvt.h" |
| #include "rtl/uri.h" |
| #include "rtl/ustrbuf.h" |
| #include "rtl/ustrbuf.hxx" |
| #include "rtl/ustring.h" |
| #include "rtl/ustring.hxx" |
| #include "sal/types.h" |
| |
| #include <cstddef> |
| |
| namespace { |
| |
| std::size_t const nCharClassSize = 128; |
| |
| sal_Unicode const cEscapePrefix = 0x25; // '%' |
| |
| inline bool isDigit(sal_uInt32 nUtf32) |
| { |
| return nUtf32 >= 0x30 && nUtf32 <= 0x39; // '0'--'9' |
| } |
| |
| inline bool isAlpha(sal_uInt32 nUtf32) |
| { |
| // 'A'--'Z', 'a'--'z' |
| return ( |
| (nUtf32 >= 0x41 && nUtf32 <= 0x5A) || |
| (nUtf32 >= 0x61 && nUtf32 <= 0x7A) |
| ); |
| } |
| |
| inline bool isHighSurrogate(sal_uInt32 nUtf16) |
| { |
| return SAL_RTL_IS_HIGH_SURROGATE(nUtf16); |
| } |
| |
| inline bool isLowSurrogate(sal_uInt32 nUtf16) |
| { |
| return SAL_RTL_IS_LOW_SURROGATE(nUtf16); |
| } |
| |
| inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low) |
| { |
| return SAL_RTL_COMBINE_SURROGATES(high, low); |
| } |
| |
| inline int getHexWeight(sal_uInt32 nUtf32) |
| { |
| return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9' |
| static_cast< int >(nUtf32 - 0x30) : |
| nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F' |
| static_cast< int >(nUtf32 - 0x41 + 10) : |
| nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f' |
| static_cast< int >(nUtf32 - 0x61 + 10) : |
| -1; // not a hex digit |
| } |
| |
| inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32) |
| { |
| return nUtf32 < nCharClassSize && pCharClass[nUtf32]; |
| } |
| |
| inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity, |
| sal_Unicode cChar) |
| { |
| rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1); |
| } |
| |
| enum EscapeType |
| { |
| EscapeNo, |
| EscapeChar, |
| EscapeOctet |
| }; |
| |
| /* Read any of the following: |
| |
| - sequence of escape sequences representing character from eCharset, |
| translated to single UCS4 character; or |
| |
| - pair of UTF-16 surrogates, translated to single UCS4 character; or |
| |
| _ single UTF-16 character, extended to UCS4 character. |
| */ |
| sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd, |
| bool bEncoded, rtl_TextEncoding eCharset, |
| EscapeType * pType) |
| { |
| sal_uInt32 nChar = *(*pBegin)++; |
| int nWeight1; |
| int nWeight2; |
| if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2 |
| && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0 |
| && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0) |
| { |
| *pBegin += 2; |
| nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2); |
| if (nChar <= 0x7F) |
| *pType = EscapeChar; |
| else if (eCharset == RTL_TEXTENCODING_UTF8) |
| { |
| if (nChar >= 0xC0 && nChar <= 0xF4) |
| { |
| sal_uInt32 nEncoded; |
| int nShift; |
| sal_uInt32 nMin; |
| if (nChar <= 0xDF) |
| { |
| nEncoded = (nChar & 0x1F) << 6; |
| nShift = 0; |
| nMin = 0x80; |
| } |
| else if (nChar <= 0xEF) |
| { |
| nEncoded = (nChar & 0x0F) << 12; |
| nShift = 6; |
| nMin = 0x800; |
| } |
| else |
| { |
| nEncoded = (nChar & 0x07) << 18; |
| nShift = 12; |
| nMin = 0x10000; |
| } |
| sal_Unicode const * p = *pBegin; |
| bool bUTF8 = true; |
| for (; nShift >= 0; nShift -= 6) |
| { |
| if (pEnd - p < 3 || p[0] != cEscapePrefix |
| || (nWeight1 = getHexWeight(p[1])) < 8 |
| || nWeight1 > 11 |
| || (nWeight2 = getHexWeight(p[2])) < 0) |
| { |
| bUTF8 = sal_False; |
| break; |
| } |
| p += 3; |
| nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift; |
| } |
| if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded) |
| && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF) |
| { |
| *pBegin = p; |
| *pType = EscapeChar; |
| return nEncoded; |
| } |
| } |
| *pType = EscapeOctet; |
| } |
| else |
| { |
| rtl::OStringBuffer aBuf; |
| aBuf.append(static_cast< char >(nChar)); |
| rtl_TextToUnicodeConverter aConverter |
| = rtl_createTextToUnicodeConverter(eCharset); |
| sal_Unicode const * p = *pBegin; |
| for (;;) |
| { |
| sal_Unicode aDst[2]; |
| sal_uInt32 nInfo; |
| sal_Size nConverted; |
| sal_Size nDstSize = rtl_convertTextToUnicode( |
| aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst, |
| sizeof aDst / sizeof aDst[0], |
| (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR |
| | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR |
| | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR), |
| &nInfo, &nConverted); |
| if (nInfo == 0) |
| { |
| OSL_ASSERT( |
| nConverted |
| == sal::static_int_cast< sal_uInt32 >( |
| aBuf.getLength())); |
| rtl_destroyTextToUnicodeConverter(aConverter); |
| *pBegin = p; |
| *pType = EscapeChar; |
| OSL_ASSERT( |
| nDstSize == 1 |
| || (nDstSize == 2 && isHighSurrogate(aDst[0]) |
| && isLowSurrogate(aDst[1]))); |
| return nDstSize == 1 |
| ? aDst[0] : combineSurrogates(aDst[0], aDst[1]); |
| } |
| else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL |
| && pEnd - p >= 3 && p[0] == cEscapePrefix |
| && (nWeight1 = getHexWeight(p[1])) >= 0 |
| && (nWeight2 = getHexWeight(p[2])) >= 0) |
| { |
| p += 3; |
| aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2)); |
| } |
| else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL |
| && p != pEnd && *p <= 0x7F) |
| { |
| aBuf.append(static_cast< char >(*p++)); |
| } |
| else |
| { |
| OSL_ASSERT( |
| (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL) |
| == 0); |
| break; |
| } |
| } |
| rtl_destroyTextToUnicodeConverter(aConverter); |
| *pType = EscapeOctet; |
| } |
| return nChar; |
| } |
| else |
| { |
| *pType = EscapeNo; |
| return isHighSurrogate(nChar) && *pBegin < pEnd |
| && isLowSurrogate(**pBegin) ? |
| combineSurrogates(nChar, *(*pBegin)++) : nChar; |
| } |
| } |
| |
| void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32) |
| { |
| OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char"); |
| if (nUtf32 <= 0xFFFF) { |
| writeUnicode( |
| pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32)); |
| } else { |
| nUtf32 -= 0x10000; |
| writeUnicode( |
| pBuffer, pCapacity, |
| static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800)); |
| writeUnicode( |
| pBuffer, pCapacity, |
| static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00)); |
| } |
| } |
| |
| void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity, |
| sal_uInt32 nOctet) |
| { |
| OSL_ENSURE(nOctet <= 0xFF, "bad octet"); |
| |
| static sal_Unicode const aHex[16] |
| = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, |
| 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */ |
| |
| writeUnicode(pBuffer, pCapacity, cEscapePrefix); |
| writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]); |
| writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]); |
| } |
| |
| bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity, |
| sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict) |
| { |
| OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char"); |
| if (eCharset == RTL_TEXTENCODING_UTF8) { |
| if (nUtf32 < 0x80) |
| writeEscapeOctet(pBuffer, pCapacity, nUtf32); |
| else if (nUtf32 < 0x800) |
| { |
| writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0); |
| writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80); |
| } |
| else if (nUtf32 < 0x10000) |
| { |
| writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0); |
| writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80); |
| writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80); |
| } |
| else |
| { |
| writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0); |
| writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80); |
| writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80); |
| writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80); |
| } |
| } else { |
| rtl_UnicodeToTextConverter aConverter |
| = rtl_createUnicodeToTextConverter(eCharset); |
| sal_Unicode aSrc[2]; |
| sal_Size nSrcSize; |
| if (nUtf32 <= 0xFFFF) |
| { |
| aSrc[0] = static_cast< sal_Unicode >(nUtf32); |
| nSrcSize = 1; |
| } |
| else |
| { |
| aSrc[0] = static_cast< sal_Unicode >( |
| ((nUtf32 - 0x10000) >> 10) | 0xD800); |
| aSrc[1] = static_cast< sal_Unicode >( |
| ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00); |
| nSrcSize = 2; |
| } |
| sal_Char aDst[32]; // FIXME random value |
| sal_uInt32 nInfo; |
| sal_Size nConverted; |
| sal_Size nDstSize = rtl_convertUnicodeToText( |
| aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst, |
| RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR |
| | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR |
| | RTL_UNICODETOTEXT_FLAGS_FLUSH, |
| &nInfo, &nConverted); |
| OSL_ASSERT((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0); |
| rtl_destroyUnicodeToTextConverter(aConverter); |
| if (nInfo == 0) { |
| OSL_ENSURE(nConverted == nSrcSize, "bad rtl_convertUnicodeToText"); |
| for (sal_Size i = 0; i < nDstSize; ++i) |
| writeEscapeOctet(pBuffer, pCapacity, |
| static_cast< unsigned char >(aDst[i])); |
| // FIXME all octets are escaped, even if there is no need |
| } else { |
| if (bStrict) { |
| return false; |
| } else { |
| writeUcs4(pBuffer, pCapacity, nUtf32); |
| } |
| } |
| } |
| return true; |
| } |
| |
| struct Component |
| { |
| sal_Unicode const * pBegin; |
| sal_Unicode const * pEnd; |
| |
| inline Component(): pBegin(0) {} |
| |
| inline bool isPresent() const { return pBegin != 0; } |
| |
| inline sal_Int32 getLength() const; |
| }; |
| |
| inline sal_Int32 Component::getLength() const |
| { |
| OSL_ENSURE(isPresent(), "taking length of non-present component"); |
| return static_cast< sal_Int32 >(pEnd - pBegin); |
| } |
| |
| struct Components |
| { |
| Component aScheme; |
| Component aAuthority; |
| Component aPath; |
| Component aQuery; |
| Component aFragment; |
| }; |
| |
| void parseUriRef(rtl_uString const * pUriRef, Components * pComponents) |
| { |
| // This algorithm is liberal and accepts various forms of illegal input. |
| |
| sal_Unicode const * pBegin = pUriRef->buffer; |
| sal_Unicode const * pEnd = pBegin + pUriRef->length; |
| sal_Unicode const * pPos = pBegin; |
| |
| if (pPos != pEnd && isAlpha(*pPos)) |
| for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p) |
| if (*p == ':') |
| { |
| pComponents->aScheme.pBegin = pBegin; |
| pComponents->aScheme.pEnd = ++p; |
| pPos = p; |
| break; |
| } |
| else if (!isAlpha(*p) && !isDigit(*p) && *p != '+' && *p != '-' |
| && *p != '.') |
| break; |
| |
| if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/') |
| { |
| pComponents->aAuthority.pBegin = pPos; |
| pPos += 2; |
| while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#') |
| ++pPos; |
| pComponents->aAuthority.pEnd = pPos; |
| } |
| |
| pComponents->aPath.pBegin = pPos; |
| while (pPos != pEnd && *pPos != '?' && * pPos != '#') |
| ++pPos; |
| pComponents->aPath.pEnd = pPos; |
| |
| if (pPos != pEnd && *pPos == '?') |
| { |
| pComponents->aQuery.pBegin = pPos++; |
| while (pPos != pEnd && * pPos != '#') |
| ++pPos; |
| pComponents->aQuery.pEnd = pPos; |
| } |
| |
| if (pPos != pEnd) |
| { |
| OSL_ASSERT(*pPos == '#'); |
| pComponents->aFragment.pBegin = pPos; |
| pComponents->aFragment.pEnd = pEnd; |
| } |
| } |
| |
| rtl::OUString joinPaths(Component const & rBasePath, Component const & rRelPath) |
| { |
| OSL_ASSERT(rBasePath.isPresent() && *rBasePath.pBegin == '/'); |
| OSL_ASSERT(rRelPath.isPresent()); |
| |
| // The invariant of aBuffer is that it always starts and ends with a slash |
| // (until probably right at the end of the algorithm, when the last segment |
| // of rRelPath is added, which does not necessarily end in a slash): |
| rtl::OUStringBuffer aBuffer(rBasePath.getLength() + rRelPath.getLength()); |
| // XXX numeric overflow |
| |
| // Segments "." and ".." within rBasePath are not conisdered special (but |
| // are also not removed by ".." segments within rRelPath), RFC 2396 seems a |
| // bit unclear about this point: |
| sal_Int32 nFixed = 1; |
| sal_Unicode const * p = rBasePath.pBegin + 1; |
| for (sal_Unicode const * q = p; q != rBasePath.pEnd; ++q) |
| if (*q == '/') |
| { |
| if ( |
| (q - p == 1 && p[0] == '.') || |
| (q - p == 2 && p[0] == '.' && p[1] == '.') |
| ) |
| { |
| nFixed = q + 1 - rBasePath.pBegin; |
| } |
| p = q + 1; |
| } |
| aBuffer.append(rBasePath.pBegin, p - rBasePath.pBegin); |
| |
| p = rRelPath.pBegin; |
| if (p != rRelPath.pEnd) |
| for (;;) |
| { |
| sal_Unicode const * q = p; |
| sal_Unicode const * r; |
| for (;;) |
| { |
| if (q == rRelPath.pEnd) |
| { |
| r = q; |
| break; |
| } |
| if (*q == '/') |
| { |
| r = q + 1; |
| break; |
| } |
| ++q; |
| } |
| if (q - p == 2 && p[0] == '.' && p[1] == '.') |
| { |
| // Erroneous excess segments ".." within rRelPath are left |
| // intact, as the examples in RFC 2396, section C.2, suggest: |
| sal_Int32 i = aBuffer.getLength() - 1; |
| if (i < nFixed) |
| { |
| aBuffer.append(p, r - p); |
| nFixed += 3; |
| } |
| else |
| { |
| while (aBuffer.charAt(i - 1) != '/') |
| --i; |
| aBuffer.setLength(i); |
| } |
| } |
| else if (q - p != 1 || *p != '.') |
| aBuffer.append(p, r - p); |
| if (q == rRelPath.pEnd) |
| break; |
| p = q + 1; |
| } |
| |
| return aBuffer.makeStringAndClear(); |
| } |
| |
| } |
| |
| sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass) |
| SAL_THROW_EXTERN_C() |
| { |
| static sal_Bool const aCharClass[][nCharClassSize] |
| = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */ |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/ |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/ |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/ |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/ |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/ |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */ |
| }, |
| { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */ |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/ |
| 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ |
| }, |
| { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */ |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ |
| 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ |
| }, |
| { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */ |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ |
| 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ |
| }, |
| { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */ |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ |
| 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ |
| }, |
| { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */ |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/ |
| 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ |
| 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ |
| }, |
| { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */ |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ |
| 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ |
| }, |
| { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */ |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ |
| 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ |
| }}; |
| OSL_ENSURE( |
| (eCharClass >= 0 |
| && (sal::static_int_cast< std::size_t >(eCharClass) |
| < sizeof aCharClass / sizeof aCharClass[0])), |
| "bad eCharClass"); |
| return aCharClass[eCharClass]; |
| } |
| |
| void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass, |
| rtl_UriEncodeMechanism eMechanism, |
| rtl_TextEncoding eCharset, rtl_uString ** pResult) |
| SAL_THROW_EXTERN_C() |
| { |
| OSL_ENSURE(!pCharClass[0x25], "bad pCharClass"); |
| // make sure the percent sign is encoded... |
| |
| sal_Unicode const * p = pText->buffer; |
| sal_Unicode const * pEnd = p + pText->length; |
| sal_Int32 nCapacity = 0; |
| rtl_uString_new(pResult); |
| while (p < pEnd) |
| { |
| EscapeType eType; |
| sal_uInt32 nUtf32 = readUcs4( |
| &p, pEnd, |
| (eMechanism == rtl_UriEncodeKeepEscapes |
| || eMechanism == rtl_UriEncodeCheckEscapes |
| || eMechanism == rtl_UriEncodeStrictKeepEscapes), |
| eCharset, &eType); |
| switch (eType) |
| { |
| case EscapeNo: |
| if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F |
| writeUnicode(pResult, &nCapacity, |
| static_cast< sal_Unicode >(nUtf32)); |
| else if (!writeEscapeChar( |
| pResult, &nCapacity, nUtf32, eCharset, |
| (eMechanism == rtl_UriEncodeStrict |
| || eMechanism == rtl_UriEncodeStrictKeepEscapes))) |
| { |
| rtl_uString_new(pResult); |
| return; |
| } |
| break; |
| |
| case EscapeChar: |
| if (eMechanism == rtl_UriEncodeCheckEscapes |
| && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F |
| writeUnicode(pResult, &nCapacity, |
| static_cast< sal_Unicode >(nUtf32)); |
| else if (!writeEscapeChar( |
| pResult, &nCapacity, nUtf32, eCharset, |
| (eMechanism == rtl_UriEncodeStrict |
| || eMechanism == rtl_UriEncodeStrictKeepEscapes))) |
| { |
| rtl_uString_new(pResult); |
| return; |
| } |
| break; |
| |
| case EscapeOctet: |
| writeEscapeOctet(pResult, &nCapacity, nUtf32); |
| break; |
| } |
| } |
| } |
| |
| void SAL_CALL rtl_uriDecode(rtl_uString * pText, |
| rtl_UriDecodeMechanism eMechanism, |
| rtl_TextEncoding eCharset, rtl_uString ** pResult) |
| SAL_THROW_EXTERN_C() |
| { |
| switch (eMechanism) |
| { |
| case rtl_UriDecodeNone: |
| rtl_uString_assign(pResult, pText); |
| break; |
| |
| case rtl_UriDecodeToIuri: |
| eCharset = RTL_TEXTENCODING_UTF8; |
| default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict |
| { |
| sal_Unicode const * p = pText->buffer; |
| sal_Unicode const * pEnd = p + pText->length; |
| sal_Int32 nCapacity = 0; |
| rtl_uString_new(pResult); |
| while (p < pEnd) |
| { |
| EscapeType eType; |
| sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType); |
| switch (eType) |
| { |
| case EscapeChar: |
| if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri) |
| { |
| writeEscapeOctet(pResult, &nCapacity, nUtf32); |
| break; |
| } |
| case EscapeNo: |
| writeUcs4(pResult, &nCapacity, nUtf32); |
| break; |
| |
| case EscapeOctet: |
| if (eMechanism == rtl_UriDecodeStrict) { |
| rtl_uString_new(pResult); |
| return; |
| } |
| writeEscapeOctet(pResult, &nCapacity, nUtf32); |
| break; |
| } |
| } |
| } |
| break; |
| } |
| } |
| |
| sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef, |
| rtl_uString * pRelUriRef, |
| rtl_uString ** pResult, |
| rtl_uString ** pException) |
| SAL_THROW_EXTERN_C() |
| { |
| // If pRelUriRef starts with a scheme component it is an absolute URI |
| // reference, and we are done (i.e., this algorithm does not support |
| // backwards-compatible relative URIs starting with a scheme component, see |
| // RFC 2396, section 5.2, step 3): |
| Components aRelComponents; |
| parseUriRef(pRelUriRef, &aRelComponents); |
| if (aRelComponents.aScheme.isPresent()) |
| { |
| rtl_uString_assign(pResult, pRelUriRef); |
| return true; |
| } |
| |
| // Parse pBaseUriRef; if the scheme component is not present or not valid, |
| // or the path component is not empty and starts with anything but a slash, |
| // an exception is raised: |
| Components aBaseComponents; |
| parseUriRef(pBaseUriRef, &aBaseComponents); |
| if (!aBaseComponents.aScheme.isPresent()) |
| { |
| rtl::OUString aMessage(pBaseUriRef); |
| aMessage += rtl::OUString( |
| RTL_CONSTASCII_USTRINGPARAM( |
| " does not start with a scheme component")); |
| rtl_uString_assign(pException, |
| const_cast< rtl::OUString & >(aMessage).pData); |
| return false; |
| } |
| if (aBaseComponents.aPath.pBegin != aBaseComponents.aPath.pEnd |
| && *aBaseComponents.aPath.pBegin != '/') |
| { |
| rtl::OUString aMessage(pBaseUriRef); |
| aMessage += rtl::OUString( |
| RTL_CONSTASCII_USTRINGPARAM( |
| "path component does not start with slash")); |
| rtl_uString_assign(pException, aMessage.pData); |
| return false; |
| } |
| |
| // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI |
| // into an absolute one (if the relative URI is a reference to the "current |
| // document," the "current document" is here taken to be the base URI): |
| rtl::OUStringBuffer aBuffer; |
| aBuffer.append(aBaseComponents.aScheme.pBegin, |
| aBaseComponents.aScheme.getLength()); |
| if (aRelComponents.aAuthority.isPresent()) |
| { |
| aBuffer.append(aRelComponents.aAuthority.pBegin, |
| aRelComponents.aAuthority.getLength()); |
| aBuffer.append(aRelComponents.aPath.pBegin, |
| aRelComponents.aPath.getLength()); |
| if (aRelComponents.aQuery.isPresent()) |
| aBuffer.append(aRelComponents.aQuery.pBegin, |
| aRelComponents.aQuery.getLength()); |
| } |
| else |
| { |
| if (aBaseComponents.aAuthority.isPresent()) |
| aBuffer.append(aBaseComponents.aAuthority.pBegin, |
| aBaseComponents.aAuthority.getLength()); |
| if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd |
| && !aRelComponents.aQuery.isPresent()) |
| { |
| aBuffer.append(aBaseComponents.aPath.pBegin, |
| aBaseComponents.aPath.getLength()); |
| if (aBaseComponents.aQuery.isPresent()) |
| aBuffer.append(aBaseComponents.aQuery.pBegin, |
| aBaseComponents.aQuery.getLength()); |
| } |
| else |
| { |
| if (*aRelComponents.aPath.pBegin == '/') |
| aBuffer.append(aRelComponents.aPath.pBegin, |
| aRelComponents.aPath.getLength()); |
| else |
| aBuffer.append(joinPaths(aBaseComponents.aPath, |
| aRelComponents.aPath)); |
| if (aRelComponents.aQuery.isPresent()) |
| aBuffer.append(aRelComponents.aQuery.pBegin, |
| aRelComponents.aQuery.getLength()); |
| } |
| } |
| if (aRelComponents.aFragment.isPresent()) |
| aBuffer.append(aRelComponents.aFragment.pBegin, |
| aRelComponents.aFragment.getLength()); |
| rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData); |
| return true; |
| } |