blob: c7d0fc1e822c07139ffbe92645b81db54b781b1e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_DOMAIN "vm.accessors"
#include <apr_atomic.h>
#include "cxxlog.h"
#include "vm_strings.h"
#include "environment.h"
#include "vm_stats.h"
#include "exceptions.h"
#include "vm_arrays.h"
#include "port_threadunsafe.h"
/////////////////////////////////////////////////////////////
// begin utf8 support
//
// See JVM Spec, Section 4.4.7
//
// return length of UTF-8 encoded string, or negative value in case of error.
int get_unicode_length_of_utf8(const char *utf8)
{
int len = 0;
U_8 ch;
U_8 ch2;
U_8 ch3;
while((ch = *utf8++)) {
len++;
if(ch & 0x80) { // 2 or 3 byte encoding
if (! (ch & 0x40))
return -1;
ch2 = *utf8++;
if(ch & 0x20) { // 3 byte encoding
ch3 = *utf8++;
if ((ch & 0xf0) != 0xe0 || // check first byte high bits
(ch2 & 0xc0) != 0x80 || // check second byte high bits
(ch3 & 0xc0) != 0x80) // check third byte high bits
return -1;
} else { // 2 byte encoding
if ((ch2 & 0xc0) != 0x80) // check second byte high bits
return -1;
}
}
}
return len;
} //get_unicode_length_of_utf8
unsigned get_utf8_length_of_unicode(const uint16 *unicode, unsigned unicode_length)
{
unsigned length = 0;
for(unsigned i = 0; i < unicode_length; i++) {
uint16 ch = unicode[i];
if(ch == 0) {
length += 2;
} else if(ch < 0x80) {
length += 1;
} else if(ch < 0x800) {
length += 2;
} else {
length += 3;
}
}
return length;
} //get_utf8_length_of_unicode
unsigned get_utf8_length_of_8bit(const U_8* chars, size_t length)
{
unsigned len = 0;
for(unsigned i=0; i < length; i++)
if (chars[i]!=0 && chars[i]<0x80)
len++;
else
len += 2;
return len;
}
void pack_utf8(char *utf8_string, const uint16 *unicode, unsigned unicode_length)
{
char *s = utf8_string;
for(unsigned i = 0; i < unicode_length; i++) {
unsigned ch = unicode[i];
if(ch == 0) {
*s++ = (char)0xc0;
*s++ = (char)0x80;
} else if(ch < 0x80) {
*s++ = (char)ch;
} else if(ch < 0x800) {
unsigned b5_0 = ch & 0x3f;
unsigned b10_6 = (ch >> 6) & 0x1f;
*s++ = (char)(0xc0 | b10_6);
*s++ = (char)(0x80 | b5_0);
} else {
unsigned b5_0 = ch & 0x3f;
unsigned b11_6 = (ch >> 6) & 0x3f;
unsigned b15_12 = (ch >> 12) & 0xf;
*s++ = (char)(0xe0 | b15_12);
*s++ = (char)(0x80 | b11_6);
*s++ = (char)(0x80 | b5_0);
}
}
*s = 0;
} //pack_utf8
void utf8_from_8bit(char* utf8_string, const U_8* chars, size_t length)
{
char* s = utf8_string;
for(unsigned i=0; i<length; i++) {
unsigned ch = chars[i];
if (ch==0) {
*s++ = (char)0xc0;
*s++ = (char)0x80;
} else if(ch < 0x80) {
*s++ = (char)ch;
} else {
unsigned b5_0 = ch & 0x3f;
unsigned b10_6 = (ch >> 6) & 0x1f;
*s++ = (char)(0xc0 | b10_6);
*s++ = (char)(0x80 | b5_0);
}
}
*s = '\0';
}
void unpack_utf8(uint16 *unicode, const char *utf8_string)
{
const U_8 *utf8 = (const U_8 *)utf8_string;
unsigned len = 0;
uint16 ch;
while((ch = (uint16)*utf8++)) {
len++;
if(ch & 0x80) {
assert(ch & 0x40);
if(ch & 0x20) {
uint16 x = ch;
uint16 y = (uint16)*utf8++;
uint16 z = (uint16)*utf8++;
*unicode++ = (uint16)(((0x0f & x) << 12) + ((0x3f & y) << 6) + ((0x3f & z)));
} else {
uint16 x = ch;
uint16 y = (uint16)*utf8++;
*unicode++ = (uint16)(((0x1f & x) << 6) + (0x3f & y));
}
} else {
*unicode++ = ch;
}
}
} //unpack_utf8
// end utf8 support
/////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////
// begin strings
// The actual characters of a string might be stored as an array of 16-bit characters or compressed
// as an array of 8-bit characters. This structure combines this information into one structure.
// If is_compressed then the characters are stored as 8-bit and compressed points to the array,
// otherwise the characters are stored as 16-bit and unicode points to the array.
struct StringBuffer {
uint16* unicode;
U_8* compressed;
bool is_compressed;
};
static void string_get_buffer(ManagedObject* str, StringBuffer* buf);
/////////////////////////////////////////////////////////////
// String creation
// Offset of String fields in ManagedObject.
static unsigned f_count_offset, f_offset_offset, f_value_char_offset, f_value_byte_offset;
static void init_fields() {
Global_Env *global_env = VM_Global_State::loader_env;
Class* clss = global_env->JavaLangString_Class;
Field *f_count = class_lookup_field_recursive(clss, "count", "I");
Field *f_offset = class_lookup_field_recursive(clss, "offset", "I");
Field *f_value_byte = class_lookup_field_recursive(clss, "bvalue", "[B");
Field *f_value_char = class_lookup_field_recursive(clss, "value", "[C");
assert(f_count);
assert(f_offset);
f_count_offset = f_count->get_offset();
f_offset_offset = f_offset->get_offset();
f_value_char_offset = f_value_char != 0 ? f_value_char->get_offset(): 0;
f_value_byte_offset = f_value_byte != 0 ? f_value_byte->get_offset(): 0;
}
static void string_set_fields_separate(ManagedObject* str, unsigned length, unsigned offset, Vector_Handle chars, bool is_byte_array)
{
if (f_count_offset == 0) {
init_fields();
}
unsigned f_value_offset;
if (is_byte_array)
f_value_offset = f_value_byte_offset;
else
f_value_offset = f_value_char_offset;
assert(f_value_offset);
U_8* str_raw = (U_8*)str;
*(U_32*)(str_raw+f_count_offset) = length;
*(U_32*)(str_raw+f_offset_offset) = offset;
STORE_REFERENCE(str, str_raw+f_value_offset, chars);
}
// GC must be disabled but at a same point
// Create a string with unicode_length characters
// If eight_bit then characters can be compressed to 8 bits
// Return: str gets the string object, buf points to buffer
static void string_create(unsigned unicode_length, bool eight_bit, ManagedObject** str, StringBuffer* buf)
{
ASSERT_RAISE_AREA;
assert(!hythread_is_suspend_enabled());
Global_Env *global_env = VM_Global_State::loader_env;
Class *clss;
clss = global_env->ArrayOfChar_Class;
if (eight_bit)
clss = global_env->ArrayOfByte_Class;
assert(clss);
unsigned sz = clss->calculate_array_size(unicode_length);
if (sz == 0) {
// string too long
*str = NULL;
exn_raise_object(VM_Global_State::loader_env->java_lang_OutOfMemoryError);
return;
}
Vector_Handle array = vm_alloc_and_report_ti(sz, clss->get_allocation_handle(),
vm_get_gc_thread_local(), clss);
if(!array) { // OutOfMemory should be thrown
*str = NULL;
exn_raise_object(VM_Global_State::loader_env->java_lang_OutOfMemoryError);
return;
}
#ifdef VM_STATS
clss->instance_allocated(sz);
#endif //VM_STATS
set_vector_length(array, unicode_length);
VTable *jls_vtable = VM_Global_State::loader_env->JavaLangString_VTable;
assert(!hythread_is_suspend_enabled());
GcFrame gc;
gc.add_object((ManagedObject**)&array);
ManagedObject* jls = (ManagedObject*)class_alloc_new_object_using_vtable(jls_vtable);
if (!jls) { // OutOfMemory is thrown
*str = NULL;
return;
}
gc.add_object((ManagedObject**)&jls);
string_set_fields_separate(jls, unicode_length, 0, array, eight_bit);
*str = jls;
buf->is_compressed = eight_bit;
if (eight_bit)
buf->compressed = (U_8*)get_vector_element_address_int8(array, 0);
else
buf->unicode = get_vector_element_address_uint16(array, 0);
}
// return String ManagedObject representing string provided in UTF-8 encoding,
// or NULL in case of error.
// GC must be disabled, but at a GC safe point
ManagedObject* string_create_from_utf8(const char* buf, unsigned length)
{
ASSERT_RAISE_AREA;
assert(buf && buf[length]=='\0');
int unicode_length = get_unicode_length_of_utf8(buf);
if (unicode_length < 0) // data error
return NULL;
ManagedObject* str;
StringBuffer buf2;
string_create((unsigned) unicode_length, false, &str, &buf2);
if (!str) { // if OutOfMemory
return NULL;
}
if (buf2.is_compressed)
memcpy(buf2.compressed, buf, unicode_length);
else
unpack_utf8(buf2.unicode, buf);
return str;
}
static bool is_compressible_jchar_array(const uint16* unicodeChars, unsigned length)
{
for(unsigned i=0; i<length; i++)
if (unicodeChars[i] > 0xff)
return false;
return true;
}
// GC must be disabled, but at a GC safe point
ManagedObject* string_create_from_unicode(const uint16* buf, unsigned length)
{
ASSERT_RAISE_AREA;
Global_Env *global_env = VM_Global_State::loader_env;
bool compress = global_env->strings_are_compressed && is_compressible_jchar_array(buf, length);
ManagedObject* str;
StringBuffer buf2;
string_create(length, compress, &str, &buf2);
if (!str) { // if OutOfMemory
return NULL;
}
if (buf2.is_compressed) {
for(unsigned i=0; i<length; i++)
buf2.compressed[i] = (U_8)buf[i];
} else {
memcpy(buf2.unicode, buf, sizeof(uint16) * length);
}
return str;
}
ObjectHandle string_create_from_utf8_h(const char* buf, unsigned length)
{
ASSERT_RAISE_AREA;
assert(hythread_is_suspend_enabled());
tmn_suspend_disable();
ObjectHandle res = oh_allocate_local_handle();
res->object = string_create_from_utf8(buf, length);
tmn_suspend_enable();
return res->object ? res : NULL;
}
ObjectHandle string_create_from_unicode_h(const uint16* buf, unsigned length)
{
ASSERT_RAISE_AREA;
assert(hythread_is_suspend_enabled());
tmn_suspend_disable();
ObjectHandle res = oh_allocate_local_handle();
res->object = string_create_from_unicode(buf, length);
tmn_suspend_enable();
return res;
}
///////////////////////////////////////////////////////////////////
// Getting Length
// GC must be disabled
// returns length in characters
unsigned string_get_length(ManagedObject* str)
{
assert(!hythread_is_suspend_enabled());
assert(str);
if (f_count_offset == 0) init_fields();
U_8* str_raw = (U_8*)str;
return *(U_32*)(str_raw+f_count_offset);
}
// returns length in characters
unsigned string_get_length_h(ObjectHandle str)
{
assert(hythread_is_suspend_enabled());
tmn_suspend_disable();
assert(str && str->object);
unsigned len = string_get_length(str->object);
tmn_suspend_enable();
return len;
}
// GC must be disabled
// returns the length of the UTF8 encoding of the string
unsigned string_get_utf8_length(ManagedObject* str)
{
StringBuffer buf;
unsigned len = string_get_length(str);
string_get_buffer(str, &buf);
if (buf.is_compressed)
return get_utf8_length_of_8bit(buf.compressed, len);
else
return get_utf8_length_of_unicode(buf.unicode, len);
}
// returns the length of the UTF8 encoding of the string
unsigned string_get_utf8_length_h(ObjectHandle str)
{
assert(hythread_is_suspend_enabled());
tmn_suspend_disable();
assert(str && str->object);
unsigned utf8_len = string_get_utf8_length(str->object);
tmn_suspend_enable();
return utf8_len;
}
///////////////////////////////////////////////////////////////////
// Getting Characters
static void string_get_buffer(ManagedObject* str, StringBuffer* buf)
{
if (f_value_char_offset == 0) init_fields();
assert(f_value_char_offset);
U_8* str_raw = (U_8*)str;
unsigned offset = *(U_32*)(str_raw + f_offset_offset);
Vector_Handle char_array = get_raw_reference_pointer((ManagedObject**)(str_raw+f_value_char_offset));
if (char_array) {
buf->is_compressed = false;
buf->unicode = get_vector_element_address_uint16(char_array, offset);
} else {
buf->is_compressed = true;
assert(f_value_byte_offset);
buf->compressed = (U_8*)get_vector_element_address_int8(*(Vector_Handle*)(str_raw+f_value_byte_offset), offset);
}
}
// GC must be disabled
// result is zero terminated
// Caller should free the result
const uint16* string_get_unicode_chars(ManagedObject* string)
{
assert(string);
U_32 unicode_size = string_get_length(string);
StringBuffer buf;
string_get_buffer(string, &buf);
uint16* unicode_chars = (uint16*)STD_MALLOC(sizeof(uint16)*(unicode_size+1));
if (NULL == unicode_chars)
return NULL;
if (buf.is_compressed) {
for(unsigned i=0; i<unicode_size; i++)
unicode_chars[i] = buf.compressed[i];
} else {
memcpy(unicode_chars, buf.unicode, sizeof(uint16)*unicode_size);
}
unicode_chars[unicode_size] = 0;
return unicode_chars;
}
// GC must be disabled
// result is zero terminated
// Caller should free the result
const char* string_get_utf8_chars(ManagedObject* string)
{
assert(string);
U_32 unicode_size = string_get_length(string);
StringBuffer buf;
string_get_buffer(string, &buf);
char* utf_chars;
size_t sz;
if (buf.is_compressed) {
unsigned utf_size = get_utf8_length_of_8bit(buf.compressed, unicode_size);
sz = utf_size+1;
utf_chars = (char*)STD_MALLOC(sz);
assert(utf_chars);
utf8_from_8bit(utf_chars, buf.compressed, unicode_size);
} else {
unsigned utf_size = get_utf8_length_of_unicode(buf.unicode, unicode_size);
sz = utf_size+1;
utf_chars = (char*)STD_MALLOC(sz);
assert(utf_chars);
pack_utf8(utf_chars, buf.unicode, unicode_size);
}
assert(strlen(utf_chars) < sz);
return utf_chars;
} //string_get_utf8_chars
// Caller should free the result
const char* string_get_utf8_chars_h(ObjectHandle string)
{
assert(hythread_is_suspend_enabled());
tmn_suspend_disable();
assert(string && string->object);
const char* res = string_get_utf8_chars(string->object);
tmn_suspend_enable();
return res;
}
// GC must be disabled
// Copy the characters offset..offset+count-1 into res
void string_get_unicode_region(ManagedObject* str, unsigned offset, unsigned count, uint16* res)
{
StringBuffer buf;
string_get_buffer(str, &buf);
if (buf.is_compressed)
for(unsigned i=0; i<count; i++)
res[i] = buf.compressed[i+offset];
else
memcpy(res, buf.unicode+offset, count*sizeof(uint16));
}
// Copy the characters offset..offset+count-1 into buf
void string_get_unicode_region_h(ObjectHandle str, unsigned offset, unsigned count, uint16* buf)
{
assert(hythread_is_suspend_enabled());
tmn_suspend_disable();
assert(str && str->object && offset+count<=string_get_length(str->object));
string_get_unicode_region(str->object, offset, count, buf);
tmn_suspend_enable();
}
// GC must be disabled
// Encode characters offset..offset+count-1 into UTF8 and place in res
void string_get_utf8_region(ManagedObject* str, unsigned offset, unsigned count, char* res)
{
StringBuffer buf;
string_get_buffer(str, &buf);
if (buf.is_compressed)
utf8_from_8bit(res, buf.compressed+offset, count);
else
pack_utf8(res, buf.unicode+offset, count);
}
// Encode characters offset..offset+count-1 into UTF8 and place in buf
void string_get_utf8_region_h(ObjectHandle str, unsigned offset, unsigned count, char* buf)
{
assert(hythread_is_suspend_enabled());
tmn_suspend_disable();
assert(str && str->object && offset+count<=string_get_length(str->object));
string_get_utf8_region(str->object, offset, count, buf);
tmn_suspend_enable();
}
///////////////////////////////////////////////////////////////////
// Old interface
// Given a String, creates its interned Java_java_lang_string from its byte array. GC must be disabled
VMEXPORT // temporary solution for interpreter unplug
Java_java_lang_String *vm_instantiate_cp_string_resolved(String *str)
{
ASSERT_RAISE_AREA;
assert(!hythread_is_suspend_enabled());
REFS_RUNTIME_SWITCH_IF
#ifdef REFS_RUNTIME_OR_COMPRESSED
if (str->intern.compressed_ref != 0) {
return uncompress_compressed_reference(str->intern.compressed_ref);
}
#endif // REFS_RUNTIME_OR_COMPRESSED
REFS_RUNTIME_SWITCH_ELSE
#ifdef REFS_RUNTIME_OR_UNCOMPRESSED
if (str->intern.raw_ref != NULL) {
return str->intern.raw_ref;
}
#endif // REFS_RUNTIME_OR_UNCOMPRESSED
REFS_RUNTIME_SWITCH_ENDIF
return VM_Global_State::loader_env->string_pool.intern(str);
} //vm_instantiate_cp_string_resolved
// Interning of strings
jstring String_to_interned_jstring(String* str)
{
ASSERT_RAISE_AREA;
tmn_suspend_disable();
Java_java_lang_String *jstr = vm_instantiate_cp_string_resolved(str);
if (jstr == NULL) {
tmn_suspend_enable();
assert(exn_raised());
return NULL;
}
ObjectHandle hstr = oh_allocate_local_handle();
hstr->object = jstr;
tmn_suspend_enable();
return (jstring)hstr;
}
Java_java_lang_String*
vm_instantiate_cp_string_slow(Class* c, unsigned cp_index)
{
ASSERT_THROW_AREA;
#ifdef VM_STATS
UNSAFE_REGION_START
VM_Statistics::get_vm_stats().num_instantiate_cp_string_slow++;
UNSAFE_REGION_END
#endif
Java_java_lang_String* result;
ConstantPool& cp = c->get_constant_pool();
String* str = cp.get_string(cp_index);
BEGIN_RAISE_AREA;
result = vm_instantiate_cp_string_resolved(str);
END_RAISE_AREA;
exn_rethrow_if_pending();
return result;
} //vm_instantiate_cp_string_slow
// end strings
/////////////////////////////////////////////////////////////