blob: 2c4a1128878c559f1e135cd353c0af6fec11b7ec [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
parcel Lucy;
inert class Lucy::Util::StringHelper cnick StrHelp {
/* A table where the values indicate the number of bytes in a UTF-8
* sequence implied by the leading utf8 byte.
*/
inert const uint8_t[] UTF8_COUNT;
/** Return the number of bytes that two strings have in common.
*/
inert int32_t
overlap(const char *a, const char *b, size_t a_len, size_t b_len);
/** Encode a NULL-terminated string representation of a value in base 36
* into <code>buffer</code>.
*
* @param value The number to be encoded.
* @param buffer A buffer at least MAX_BASE36_BYTES bytes long.
* @return the number of digits encoded (not including the terminating
* NULL).
*/
inert uint32_t
to_base36(uint64_t value, void *buffer);
/** Return true if the string is valid UTF-8, false otherwise.
*/
inert bool_t
utf8_valid(const char *ptr, size_t len);
/** Returns true if the code point qualifies as Unicode whitespace.
*/
inert bool_t
is_whitespace(uint32_t code_point);
/** Encode a Unicode code point to a UTF-8 sequence.
*
* @param code_point A legal unicode code point.
* @param buffer Write buffer which must hold at least 4 bytes (the
* maximum legal length for a UTF-8 char).
*/
inert uint32_t
encode_utf8_char(uint32_t code_point, void *buffer);
/** Decode a UTF-8 sequence to a Unicode code point. Assumes valid UTF-8.
*/
inert uint32_t
decode_utf8_char(const char *utf8);
/** Return the first non-continuation byte before the supplied pointer.
* If backtracking progresses beyond the supplied start, return NULL.
*/
inert nullable const char*
back_utf8_char(const char *utf8, char *start);
}
__C__
/** The maximum number of bytes encoded by to_base36(), including the
* terminating NULL.
*/
#define lucy_StrHelp_MAX_BASE36_BYTES 14
#ifdef LUCY_USE_SHORT_NAMES
#define StrHelp_MAX_BASE36_BYTES lucy_StrHelp_MAX_BASE36_BYTES
#endif
__END_C__