blob: 3bd9f5d73f71ec41c43266d6797ae65ce005a3df [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "config.h"
#include "common/iconv.h"
#include <guacamole/unicode.h>
#include <stdint.h>
/**
* Lookup table for Unicode code points, indexed by CP-1252 codepoint.
*/
const static int __GUAC_RDP_CP1252_CODEPOINT[32] = {
0x20AC, /* 0x80 */
0xFFFD, /* 0x81 */
0x201A, /* 0x82 */
0x0192, /* 0x83 */
0x201E, /* 0x84 */
0x2026, /* 0x85 */
0x2020, /* 0x86 */
0x2021, /* 0x87 */
0x02C6, /* 0x88 */
0x2030, /* 0x89 */
0x0160, /* 0x8A */
0x2039, /* 0x8B */
0x0152, /* 0x8C */
0xFFFD, /* 0x8D */
0x017D, /* 0x8E */
0xFFFD, /* 0x8F */
0xFFFD, /* 0x90 */
0x2018, /* 0x91 */
0x2019, /* 0x92 */
0x201C, /* 0x93 */
0x201D, /* 0x94 */
0x2022, /* 0x95 */
0x2013, /* 0x96 */
0x2014, /* 0x97 */
0x02DC, /* 0x98 */
0x2122, /* 0x99 */
0x0161, /* 0x9A */
0x203A, /* 0x9B */
0x0153, /* 0x9C */
0xFFFD, /* 0x9D */
0x017E, /* 0x9E */
0x0178, /* 0x9F */
};
int guac_iconv(guac_iconv_read* reader, const char** input, int in_remaining,
guac_iconv_write* writer, char** output, int out_remaining) {
while (in_remaining > 0 && out_remaining > 0) {
int value;
const char* read_start;
char* write_start;
/* Read character */
read_start = *input;
value = reader(input, in_remaining);
in_remaining -= *input - read_start;
/* Write character */
write_start = *output;
writer(output, out_remaining, value);
out_remaining -= *output - write_start;
/* Stop if null terminator reached */
if (value == 0)
return 1;
}
/* Null terminator not reached */
return 0;
}
int GUAC_READ_UTF8(const char** input, int remaining) {
int value;
*input += guac_utf8_read(*input, remaining, &value);
return value;
}
int GUAC_READ_UTF16(const char** input, int remaining) {
int value;
/* Bail if not enough data */
if (remaining < 2)
return 0;
/* Read two bytes as integer */
value = *((uint16_t*) *input);
*input += 2;
return value;
}
int GUAC_READ_CP1252(const char** input, int remaining) {
int value = *((unsigned char*) *input);
/* Replace value with exception if not identical to ISO-8859-1 */
if (value >= 0x80 && value <= 0x9F)
value = __GUAC_RDP_CP1252_CODEPOINT[value - 0x80];
(*input)++;
return value;
}
int GUAC_READ_ISO8859_1(const char** input, int remaining) {
int value = *((unsigned char*) *input);
(*input)++;
return value;
}
/**
* Invokes the given reader function, automatically normalizing newline
* sequences as Unix-style newline characters ('\n'). All other charaters are
* read verbatim.
*
* @param reader
* The reader to use to read the given character.
*
* @param input
* Pointer to the location within the input buffer that the next character
* should be read from.
*
* @param remaining
* The number of bytes remaining in the input buffer.
*
* @return
* The codepoint that was read, or zero if the end of the input string has
* been reached.
*/
static int guac_iconv_read_normalized(guac_iconv_read* reader,
const char** input, int remaining) {
/* Read requested character */
const char* input_start = *input;
int value = reader(input, remaining);
/* Automatically translate CRLF pairs to simple newlines */
if (value == '\r') {
/* Peek ahead by one character, adjusting remaining bytes relative to
* last read */
int peek_remaining = remaining - (*input - input_start);
const char* peek_input = *input;
int peek_value = reader(&peek_input, peek_remaining);
/* Consider read value to be a newline if we have encountered a "\r\n"
* (CRLF) pair */
if (peek_value == '\n') {
value = '\n';
*input = peek_input;
}
}
return value;
}
int GUAC_READ_UTF8_NORMALIZED(const char** input, int remaining) {
return guac_iconv_read_normalized(GUAC_READ_UTF8, input, remaining);
}
int GUAC_READ_UTF16_NORMALIZED(const char** input, int remaining) {
return guac_iconv_read_normalized(GUAC_READ_UTF16, input, remaining);
}
int GUAC_READ_CP1252_NORMALIZED(const char** input, int remaining) {
return guac_iconv_read_normalized(GUAC_READ_CP1252, input, remaining);
}
int GUAC_READ_ISO8859_1_NORMALIZED(const char** input, int remaining) {
return guac_iconv_read_normalized(GUAC_READ_ISO8859_1, input, remaining);
}
void GUAC_WRITE_UTF8(char** output, int remaining, int value) {
*output += guac_utf8_write(value, *output, remaining);
}
void GUAC_WRITE_UTF16(char** output, int remaining, int value) {
/* Bail if not enough data */
if (remaining < 2)
return;
/* Write two bytes as integer */
*((uint16_t*) *output) = value;
*output += 2;
}
void GUAC_WRITE_CP1252(char** output, int remaining, int value) {
/* If not in ISO-8859-1 part of CP1252, check lookup table */
if ((value >= 0x80 && value <= 0x9F) || value > 0xFF) {
int i;
int replacement_value = '?';
const int* codepoint = __GUAC_RDP_CP1252_CODEPOINT;
/* Search lookup table for value */
for (i=0x80; i<=0x9F; i++, codepoint++) {
if (*codepoint == value) {
replacement_value = i;
break;
}
}
/* Replace value with discovered value (or question mark) */
value = replacement_value;
}
*((unsigned char*) *output) = (unsigned char) value;
(*output)++;
}
void GUAC_WRITE_ISO8859_1(char** output, int remaining, int value) {
/* Translate to question mark if out of range */
if (value > 0xFF)
value = '?';
*((unsigned char*) *output) = (unsigned char) value;
(*output)++;
}
/**
* Invokes the given writer function, automatically writing newline characters
* ('\n') as CRLF ("\r\n"). All other charaters are written verbatim.
*
* @param writer
* The writer to use to write the given character.
*
* @param output
* Pointer to the location within the output buffer that the next character
* should be written.
*
* @param remaining
* The number of bytes remaining in the output buffer.
*
* @param value
* The codepoint of the character to write.
*/
static void guac_iconv_write_crlf(guac_iconv_write* writer, char** output,
int remaining, int value) {
if (value != '\n') {
writer(output, remaining, value);
return;
}
char* output_start = *output;
writer(output, remaining, '\r');
remaining -= *output - output_start;
if (remaining > 0)
writer(output, remaining, '\n');
}
void GUAC_WRITE_UTF8_CRLF(char** output, int remaining, int value) {
guac_iconv_write_crlf(GUAC_WRITE_UTF8, output, remaining, value);
}
void GUAC_WRITE_UTF16_CRLF(char** output, int remaining, int value) {
guac_iconv_write_crlf(GUAC_WRITE_UTF16, output, remaining, value);
}
void GUAC_WRITE_CP1252_CRLF(char** output, int remaining, int value) {
guac_iconv_write_crlf(GUAC_WRITE_CP1252, output, remaining, value);
}
void GUAC_WRITE_ISO8859_1_CRLF(char** output, int remaining, int value) {
guac_iconv_write_crlf(GUAC_WRITE_ISO8859_1, output, remaining, value);
}