| /************************************************************** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| *************************************************************/ |
| |
| |
| |
| #include "convertgb18030.h" |
| #include "context.h" |
| #include "converter.h" |
| #include "tenchelp.h" |
| #include "unichars.h" |
| #include "rtl/alloc.h" |
| #include "rtl/textcvt.h" |
| #include "sal/types.h" |
| |
| typedef enum |
| { |
| IMPL_GB_18030_TO_UNICODE_STATE_0, |
| IMPL_GB_18030_TO_UNICODE_STATE_1, |
| IMPL_GB_18030_TO_UNICODE_STATE_2, |
| IMPL_GB_18030_TO_UNICODE_STATE_3 |
| } ImplGb18030ToUnicodeState; |
| |
| typedef struct |
| { |
| ImplGb18030ToUnicodeState m_eState; |
| sal_uInt32 m_nCode; |
| } ImplGb18030ToUnicodeContext; |
| |
| void * ImplCreateGb18030ToUnicodeContext(void) |
| { |
| void * pContext |
| = rtl_allocateMemory(sizeof (ImplGb18030ToUnicodeContext)); |
| ((ImplGb18030ToUnicodeContext *) pContext)->m_eState |
| = IMPL_GB_18030_TO_UNICODE_STATE_0; |
| return pContext; |
| } |
| |
| void ImplResetGb18030ToUnicodeContext(void * pContext) |
| { |
| if (pContext) |
| ((ImplGb18030ToUnicodeContext *) pContext)->m_eState |
| = IMPL_GB_18030_TO_UNICODE_STATE_0; |
| } |
| |
| sal_Size ImplConvertGb18030ToUnicode(ImplTextConverterData const * pData, |
| void * pContext, |
| sal_Char const * pSrcBuf, |
| sal_Size nSrcBytes, |
| sal_Unicode * pDestBuf, |
| sal_Size nDestChars, |
| sal_uInt32 nFlags, |
| sal_uInt32 * pInfo, |
| sal_Size * pSrcCvtBytes) |
| { |
| sal_Unicode const * pGb18030Data |
| = ((ImplGb18030ConverterData const *) pData)->m_pGb18030ToUnicodeData; |
| ImplGb180302000ToUnicodeRange const * pGb18030Ranges |
| = ((ImplGb18030ConverterData const *) pData)-> |
| m_pGb18030ToUnicodeRanges; |
| ImplGb18030ToUnicodeState eState = IMPL_GB_18030_TO_UNICODE_STATE_0; |
| sal_uInt32 nCode = 0; |
| sal_uInt32 nInfo = 0; |
| sal_Size nConverted = 0; |
| sal_Unicode * pDestBufPtr = pDestBuf; |
| sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; |
| |
| if (pContext) |
| { |
| eState = ((ImplGb18030ToUnicodeContext *) pContext)->m_eState; |
| nCode = ((ImplGb18030ToUnicodeContext *) pContext)->m_nCode; |
| } |
| |
| for (; nConverted < nSrcBytes; ++nConverted) |
| { |
| sal_Bool bUndefined = sal_True; |
| sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++; |
| switch (eState) |
| { |
| case IMPL_GB_18030_TO_UNICODE_STATE_0: |
| if (nChar < 0x80) |
| if (pDestBufPtr != pDestBufEnd) |
| *pDestBufPtr++ = (sal_Unicode) nChar; |
| else |
| goto no_output; |
| else if (nChar == 0x80) |
| goto bad_input; |
| else if (nChar <= 0xFE) |
| { |
| nCode = nChar - 0x81; |
| eState = IMPL_GB_18030_TO_UNICODE_STATE_1; |
| } |
| else |
| { |
| bUndefined = sal_False; |
| goto bad_input; |
| } |
| break; |
| |
| case IMPL_GB_18030_TO_UNICODE_STATE_1: |
| if (nChar >= 0x30 && nChar <= 0x39) |
| { |
| nCode = nCode * 10 + (nChar - 0x30); |
| eState = IMPL_GB_18030_TO_UNICODE_STATE_2; |
| } |
| else if ((nChar >= 0x40 && nChar <= 0x7E) |
| || (nChar >= 0x80 && nChar <= 0xFE)) |
| { |
| nCode = nCode * 190 + (nChar <= 0x7E ? nChar - 0x40 : |
| nChar - 0x80 + 63); |
| if (pDestBufPtr != pDestBufEnd) |
| *pDestBufPtr++ = pGb18030Data[nCode]; |
| else |
| goto no_output; |
| eState = IMPL_GB_18030_TO_UNICODE_STATE_0; |
| } |
| else |
| { |
| bUndefined = sal_False; |
| goto bad_input; |
| } |
| break; |
| |
| case IMPL_GB_18030_TO_UNICODE_STATE_2: |
| if (nChar >= 0x81 && nChar <= 0xFE) |
| { |
| nCode = nCode * 126 + (nChar - 0x81); |
| eState = IMPL_GB_18030_TO_UNICODE_STATE_3; |
| } |
| else |
| { |
| bUndefined = sal_False; |
| goto bad_input; |
| } |
| break; |
| |
| case IMPL_GB_18030_TO_UNICODE_STATE_3: |
| if (nChar >= 0x30 && nChar <= 0x39) |
| { |
| nCode = nCode * 10 + (nChar - 0x30); |
| |
| /* 90 30 81 30 to E3 32 9A 35 maps to U+10000 to U+10FFFF: */ |
| if (nCode >= 189000 && nCode <= 1237575) |
| if (pDestBufEnd - pDestBufPtr >= 2) |
| { |
| nCode -= 189000 - 0x10000; |
| *pDestBufPtr++ |
| = (sal_Unicode) ImplGetHighSurrogate(nCode); |
| *pDestBufPtr++ |
| = (sal_Unicode) ImplGetLowSurrogate(nCode); |
| } |
| else |
| goto no_output; |
| else |
| { |
| ImplGb180302000ToUnicodeRange const * pRange |
| = pGb18030Ranges; |
| sal_uInt32 nFirstNonRange = 0; |
| for (;;) |
| { |
| if (pRange->m_nNonRangeDataIndex == -1) |
| goto bad_input; |
| else if (nCode < pRange->m_nFirstLinear) |
| { |
| if (pDestBufPtr != pDestBufEnd) |
| *pDestBufPtr++ |
| = pGb18030Data[ |
| pRange->m_nNonRangeDataIndex |
| + (nCode - nFirstNonRange)]; |
| else |
| goto no_output; |
| break; |
| } |
| else if (nCode < pRange->m_nPastLinear) |
| { |
| if (pDestBufPtr != pDestBufEnd) |
| *pDestBufPtr++ |
| = (sal_Unicode) |
| (pRange->m_nFirstUnicode |
| + (nCode |
| - pRange-> |
| m_nFirstLinear)); |
| else |
| goto no_output; |
| break; |
| } |
| nFirstNonRange = (pRange++)->m_nPastLinear; |
| } |
| } |
| eState = IMPL_GB_18030_TO_UNICODE_STATE_0; |
| } |
| else |
| { |
| bUndefined = sal_False; |
| goto bad_input; |
| } |
| break; |
| } |
| continue; |
| |
| bad_input: |
| switch (ImplHandleBadInputTextToUnicodeConversion( |
| bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd, |
| &nInfo)) |
| { |
| case IMPL_BAD_INPUT_STOP: |
| eState = IMPL_GB_18030_TO_UNICODE_STATE_0; |
| break; |
| |
| case IMPL_BAD_INPUT_CONTINUE: |
| eState = IMPL_GB_18030_TO_UNICODE_STATE_0; |
| continue; |
| |
| case IMPL_BAD_INPUT_NO_OUTPUT: |
| goto no_output; |
| } |
| break; |
| |
| no_output: |
| --pSrcBuf; |
| nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; |
| break; |
| } |
| |
| if (eState != IMPL_GB_18030_TO_UNICODE_STATE_0 |
| && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR |
| | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) |
| == 0) |
| { |
| if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) |
| nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; |
| else |
| switch (ImplHandleBadInputTextToUnicodeConversion( |
| sal_False, sal_True, 0, nFlags, &pDestBufPtr, |
| pDestBufEnd, &nInfo)) |
| { |
| case IMPL_BAD_INPUT_STOP: |
| case IMPL_BAD_INPUT_CONTINUE: |
| eState = IMPL_GB_18030_TO_UNICODE_STATE_0; |
| break; |
| |
| case IMPL_BAD_INPUT_NO_OUTPUT: |
| nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; |
| break; |
| } |
| } |
| |
| if (pContext) |
| { |
| ((ImplGb18030ToUnicodeContext *) pContext)->m_eState = eState; |
| ((ImplGb18030ToUnicodeContext *) pContext)->m_nCode = nCode; |
| } |
| if (pInfo) |
| *pInfo = nInfo; |
| if (pSrcCvtBytes) |
| *pSrcCvtBytes = nConverted; |
| |
| return pDestBufPtr - pDestBuf; |
| } |
| |
| sal_Size ImplConvertUnicodeToGb18030(ImplTextConverterData const * pData, |
| void * pContext, |
| sal_Unicode const * pSrcBuf, |
| sal_Size nSrcChars, |
| sal_Char * pDestBuf, |
| sal_Size nDestBytes, |
| sal_uInt32 nFlags, |
| sal_uInt32 * pInfo, |
| sal_Size * pSrcCvtChars) |
| { |
| sal_uInt32 const * pGb18030Data |
| = ((ImplGb18030ConverterData const *) pData)-> |
| m_pUnicodeToGb18030Data; |
| ImplUnicodeToGb180302000Range const * pGb18030Ranges |
| = ((ImplGb18030ConverterData const *) pData)-> |
| m_pUnicodeToGb18030Ranges; |
| sal_Unicode nHighSurrogate = 0; |
| sal_uInt32 nInfo = 0; |
| sal_Size nConverted = 0; |
| sal_Char * pDestBufPtr = pDestBuf; |
| sal_Char * pDestBufEnd = pDestBuf + nDestBytes; |
| |
| if (pContext) |
| nHighSurrogate |
| = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate; |
| |
| for (; nConverted < nSrcChars; ++nConverted) |
| { |
| sal_Bool bUndefined = sal_True; |
| sal_uInt32 nChar = *pSrcBuf++; |
| if (nHighSurrogate == 0) |
| { |
| if (ImplIsHighSurrogate(nChar)) |
| { |
| nHighSurrogate = (sal_Unicode) nChar; |
| continue; |
| } |
| } |
| else if (ImplIsLowSurrogate(nChar)) |
| nChar = ImplCombineSurrogates(nHighSurrogate, nChar); |
| else |
| { |
| bUndefined = sal_False; |
| goto bad_input; |
| } |
| |
| if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar)) |
| { |
| bUndefined = sal_False; |
| goto bad_input; |
| } |
| |
| if (nChar < 0x80) |
| if (pDestBufPtr != pDestBufEnd) |
| *pDestBufPtr++ = (sal_Char) nChar; |
| else |
| goto no_output; |
| else if (nChar < 0x10000) |
| { |
| ImplUnicodeToGb180302000Range const * pRange = pGb18030Ranges; |
| sal_Unicode nFirstNonRange = 0x80; |
| for (;;) |
| { |
| if (nChar < pRange->m_nFirstUnicode) |
| { |
| sal_uInt32 nCode |
| = pGb18030Data[pRange->m_nNonRangeDataIndex |
| + (nChar - nFirstNonRange)]; |
| if (pDestBufEnd - pDestBufPtr |
| >= (nCode <= 0xFFFF ? 2 : 4)) |
| { |
| if (nCode > 0xFFFF) |
| { |
| *pDestBufPtr++ = (sal_Char) (nCode >> 24); |
| *pDestBufPtr++ = (sal_Char) (nCode >> 16 & 0xFF); |
| } |
| *pDestBufPtr++ = (sal_Char) (nCode >> 8 & 0xFF); |
| *pDestBufPtr++ = (sal_Char) (nCode & 0xFF); |
| } |
| else |
| goto no_output; |
| break; |
| } |
| else if (nChar <= pRange->m_nLastUnicode) |
| { |
| if (pDestBufEnd - pDestBufPtr >= 4) |
| { |
| sal_uInt32 nCode |
| = pRange->m_nFirstLinear |
| + (nChar - pRange->m_nFirstUnicode); |
| *pDestBufPtr++ = (sal_Char) (nCode / 12600 + 0x81); |
| *pDestBufPtr++ |
| = (sal_Char) (nCode / 1260 % 10 + 0x30); |
| *pDestBufPtr++ = (sal_Char) (nCode / 10 % 126 + 0x81); |
| *pDestBufPtr++ = (sal_Char) (nCode % 10 + 0x30); |
| } |
| else |
| goto no_output; |
| break; |
| } |
| nFirstNonRange |
| = (sal_Unicode) ((pRange++)->m_nLastUnicode + 1); |
| } |
| } |
| else |
| if (pDestBufEnd - pDestBufPtr >= 4) |
| { |
| sal_uInt32 nCode = nChar - 0x10000; |
| *pDestBufPtr++ = (sal_Char) (nCode / 12600 + 0x90); |
| *pDestBufPtr++ = (sal_Char) (nCode / 1260 % 10 + 0x30); |
| *pDestBufPtr++ = (sal_Char) (nCode / 10 % 126 + 0x81); |
| *pDestBufPtr++ = (sal_Char) (nCode % 10 + 0x30); |
| } |
| else |
| goto no_output; |
| nHighSurrogate = 0; |
| continue; |
| |
| bad_input: |
| switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined, |
| nChar, |
| nFlags, |
| &pDestBufPtr, |
| pDestBufEnd, |
| &nInfo, |
| NULL, |
| 0, |
| NULL)) |
| { |
| case IMPL_BAD_INPUT_STOP: |
| nHighSurrogate = 0; |
| break; |
| |
| case IMPL_BAD_INPUT_CONTINUE: |
| nHighSurrogate = 0; |
| continue; |
| |
| case IMPL_BAD_INPUT_NO_OUTPUT: |
| goto no_output; |
| } |
| break; |
| |
| no_output: |
| --pSrcBuf; |
| nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; |
| break; |
| } |
| |
| if (nHighSurrogate != 0 |
| && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR |
| | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) |
| == 0) |
| { |
| if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) |
| nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; |
| else |
| switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, |
| 0, |
| nFlags, |
| &pDestBufPtr, |
| pDestBufEnd, |
| &nInfo, |
| NULL, |
| 0, |
| NULL)) |
| { |
| case IMPL_BAD_INPUT_STOP: |
| case IMPL_BAD_INPUT_CONTINUE: |
| nHighSurrogate = 0; |
| break; |
| |
| case IMPL_BAD_INPUT_NO_OUTPUT: |
| nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; |
| break; |
| } |
| } |
| |
| if (pContext) |
| ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate |
| = nHighSurrogate; |
| if (pInfo) |
| *pInfo = nInfo; |
| if (pSrcCvtChars) |
| *pSrcCvtChars = nConverted; |
| |
| return pDestBufPtr - pDestBuf; |
| } |