Account for char possibly being unsigned
This sounds rather insane to me but I've managed to show that `(char)
-1` is converted to 255 on some platforms. This was reproduced on
ppc64el via Qemu on OS X. A simple program that does `fprintf(stderr,
"%d\r\n", (char) -1);` prints 255 to the console. Rather than rely on
the signedness of a char I've just updated things to use an unsigned
char (which hopefully is never signed) and replaced -1 with 255 for the
sentinel value when converting hex values.
Thanks to Balint Reczey (@rbalint) for the report.
Fixes #74
diff --git a/c_src/utf8.c b/c_src/utf8.c
index e251bc6..cd0c717 100644
--- a/c_src/utf8.c
+++ b/c_src/utf8.c
@@ -3,15 +3,23 @@
#include "jiffy.h"
#include <stdio.h>
-static const char hexvals[256] = {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
- -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+static const unsigned char hexvals[256] = {
+ 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255,
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 255, 255, 255, 255, 255, 255,
+ 255, 10, 11, 12, 13, 14, 15, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 10, 11, 12, 13, 14, 15, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255
};
static const char hexdigits[16] = {
@@ -27,10 +35,10 @@
unsigned char* h = (unsigned char*) p;
int ret;
- if(hexvals[*(h+0)] < 0) return -1;
- if(hexvals[*(h+1)] < 0) return -1;
- if(hexvals[*(h+2)] < 0) return -1;
- if(hexvals[*(h+3)] < 0) return -1;
+ if(hexvals[*(h+0)] == 255) return -1;
+ if(hexvals[*(h+1)] == 255) return -1;
+ if(hexvals[*(h+2)] == 255) return -1;
+ if(hexvals[*(h+3)] == 255) return -1;
ret = (hexvals[*(h+0)] << 12)
+ (hexvals[*(h+1)] << 8)