blob: 89986d347c76da4f100e3757bf3a3a6f9099704b [file] [log] [blame]
/*-
* Copyright (c) 2000
* Konstantin Chuguev. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Konstantin Chuguev
* and its contributors.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* iconv (Charset Conversion Library) v1.0
*/
#define ICONV_INTERNAL
#include "iconv.h"
#include <stdlib.h>
static const char * const names[] = {
"unicode-1-1-utf-7",
"utf-7",
NULL
};
static const char * const *
utf7_names(struct iconv_ces *ces)
{
return names;
}
static APR_INLINE int
lackofbytes(apr_size_t bytes, apr_size_t *bytesleft)
{
if (bytes > *bytesleft)
return 1;
(*bytesleft) -= bytes;
return 0;
}
static const char *base64_str = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";
#define base64(ch) (base64_str[(ch) & 0x3F])
static APR_INLINE int output(char ch, unsigned char **outbuf)
{
*(*outbuf)++ = ch;
return 1;
}
static int
encode(char *state, ucs_t ch, unsigned char **outbuf)
{
switch (state[0]) {
case 2:
output(base64(state[1] | (ch >> 14)), outbuf);
output(base64(ch >> 8), outbuf);
output(base64(ch >> 2), outbuf);
state[1] = ch << 4;
state[0] = 3;
break;
case 3:
output(base64(state[1] | (ch >> 12)), outbuf);
output(base64(ch >> 6), outbuf);
output(base64(ch), outbuf);
state[0] = 1;
break;
default:
output(base64(ch >> 10), outbuf);
output(base64(ch >> 4), outbuf);
state[1] = ch << 2;
state[0] = 2;
}
return 1;
}
enum { utf7_printable, utf7_base64, utf7_encoded, utf7_shift_in,
utf7_shift_out, utf7_end };
#define between(ch, min, max) ((min) <= (ch) && (ch) <= (max))
static int char_type(ucs_t ch)
{
switch (ch) {
case UCS_CHAR_NONE:
return utf7_end;
case '-':
return utf7_shift_out;
case '+':
return utf7_shift_in;
case ':':
case '?':
return utf7_printable;
}
return (between(ch, '/', '9') || between(ch, 'A', 'Z') ||
between(ch, 'a', 'z')) ?
utf7_base64 :
(ch <= ' ' || (between(ch, '\'', '.') && ch != '*')) ?
utf7_printable :
utf7_encoded;
}
static apr_ssize_t
convert_from_ucs(struct iconv_ces *module, ucs_t in,
unsigned char **outbuf, apr_size_t *outbytesleft)
{
#define utf7_state ((char *)(module->data))
int ch = char_type(in), needbytes = 3;
if (iconv_char32bit(in))
return -1;
if (utf7_state[0]) {
needbytes = utf7_state[0] > 1 ? 1 : 0;
switch (ch) {
case utf7_encoded:
case utf7_shift_in:
return lackofbytes(needbytes + 2, outbytesleft)
? 0 : encode(utf7_state, in, outbuf);
case utf7_base64:
case utf7_shift_out:
needbytes ++;
case utf7_printable:
needbytes ++;
break;
default:
if (needbytes) {
output(base64(utf7_state[1]), outbuf);
(*outbytesleft) --;
}
return 1;
}
if (lackofbytes(needbytes, outbytesleft))
return 0;
if (utf7_state[0] > 1)
output(base64(utf7_state[1]), outbuf);
if (ch != utf7_printable)
output('-', outbuf);
utf7_state[0] = 0;
return output((unsigned char)in, outbuf);
}
switch (ch) {
case utf7_end:
return 1;
case utf7_base64:
case utf7_printable:
case utf7_shift_out:
(*outbytesleft) --;
return output((unsigned char)in, outbuf);
case utf7_shift_in:
needbytes = 2;
}
if (lackofbytes(needbytes, outbytesleft))
return 0;
output('+', outbuf);
return ch == utf7_shift_in ? output('-', outbuf)
: encode(utf7_state, in, outbuf);
#undef utf7_state
}
static ucs_t base64_input(const unsigned char **inbuf, int *error)
{
unsigned char ch = *(*inbuf)++;
if (between(ch, 'A', 'Z'))
return ch - 'A';
else if (between(ch, 'a', 'z'))
return ch - 'a' + 26;
else if (between(ch, '0', '9'))
return ch - '0' + 52;
else if (ch == '+')
return 62;
else if (ch == '/')
return 63;
*error = 1;
return UCS_CHAR_INVALID;
}
static ucs_t decode(char *state, const unsigned char **inbuf)
{
int errflag = 0;
ucs_t res, ch;
switch (state[0]) {
case 2:
res = ((unsigned)(state[1]) << 14)
| (base64_input(inbuf, &errflag) << 8)
| (base64_input(inbuf, &errflag) << 2)
| ((ch = base64_input(inbuf, &errflag)) >> 4);
if (errflag)
return UCS_CHAR_INVALID;
state[1] = ch;
state[0] = 3;
break;
case 3:
res = ((unsigned)(state[1]) << 12)
| (base64_input(inbuf, &errflag) << 6)
| base64_input(inbuf, &errflag);
if (errflag)
return UCS_CHAR_INVALID;
state[0] = 1;
break;
default:
res = (base64_input(inbuf, &errflag) << 10)
| (base64_input(inbuf, &errflag) << 4)
| ((ch = base64_input(inbuf, &errflag)) >> 2);
if (errflag)
return UCS_CHAR_INVALID;
state[1] = ch;
state[0] = 2;
}
return res & 0xFFFF;
}
static ucs_t convert_to_ucs(struct iconv_ces *module,
const unsigned char **inbuf, apr_size_t *inbytesleft)
{
#define utf7_state ((char *)(module->data))
ucs_t ret;
int ch = char_type(*(unsigned char *)*inbuf), needbytes = 0;
if (ch == utf7_encoded)
return lackofbytes(1, inbytesleft) ? UCS_CHAR_NONE
: UCS_CHAR_INVALID;
if (utf7_state[0]) {
switch (ch) {
case utf7_shift_out:
if (*inbytesleft < 2)
return UCS_CHAR_NONE;
needbytes = 1;
(*inbuf) ++;
ch = char_type(*(unsigned char *)*inbuf);
(*inbytesleft) --;
case utf7_printable:
utf7_state[0] = 0;
break;
default:
return lackofbytes(utf7_state[0] > 2 ? 2 : 3, inbytesleft)
? UCS_CHAR_NONE : decode(utf7_state, inbuf);
}
}
if (ch == utf7_shift_in) {
if (*inbytesleft < 2) {
(*inbuf) -= needbytes;
(*inbytesleft) += needbytes;
return UCS_CHAR_NONE;
}
(*inbuf) ++;
switch (char_type(*(unsigned char *)*inbuf)) {
case utf7_shift_out:
(*inbuf) ++;
(*inbytesleft) -= 2;
return '+';
case utf7_base64:
case utf7_shift_in:
if (lackofbytes(4, inbytesleft)) {
(*inbuf) -= (++needbytes);
(*inbytesleft) += needbytes;
return UCS_CHAR_NONE;
}
return decode(utf7_state, inbuf);
}
(*inbytesleft) --;
return UCS_CHAR_INVALID;
}
(*inbytesleft) --;
ret = *((unsigned char *)*inbuf);
(*inbuf) ++;
return ret;
#undef utf7_state
}
static const struct iconv_ces_desc iconv_ces_desc = {
apr_iconv_ces_open_func,
apr_iconv_ces_close_func,
apr_iconv_ces_reset_func,
utf7_names,
apr_iconv_ces_nbits7,
apr_iconv_ces_zero,
convert_from_ucs,
convert_to_ucs, NULL
};
struct iconv_module_desc iconv_module = {
ICMOD_UC_CES,
apr_iconv_mod_noevent,
NULL,
&iconv_ces_desc
};