| /* Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| parcel Lucy; |
| |
| inert class Lucy::Util::StringHelper cnick StrHelp { |
| |
| /* A table where the values indicate the number of bytes in a UTF-8 |
| * sequence implied by the leading utf8 byte. |
| */ |
| inert const uint8_t[] UTF8_COUNT; |
| |
| /** Return the number of bytes that two strings have in common. |
| */ |
| inert int32_t |
| overlap(const char *a, const char *b, size_t a_len, size_t b_len); |
| |
| /** Encode a NULL-terminated string representation of a value in base 36 |
| * into <code>buffer</code>. |
| * |
| * @param value The number to be encoded. |
| * @param buffer A buffer at least MAX_BASE36_BYTES bytes long. |
| * @return the number of digits encoded (not including the terminating |
| * NULL). |
| */ |
| inert uint32_t |
| to_base36(uint64_t value, void *buffer); |
| |
| /** Return true if the string is valid UTF-8, false otherwise. |
| */ |
| inert bool_t |
| utf8_valid(const char *ptr, size_t len); |
| |
| /** Returns true if the code point qualifies as Unicode whitespace. |
| */ |
| inert bool_t |
| is_whitespace(uint32_t code_point); |
| |
| /** Encode a Unicode code point to a UTF-8 sequence. |
| * |
| * @param code_point A legal unicode code point. |
| * @param buffer Write buffer which must hold at least 4 bytes (the |
| * maximum legal length for a UTF-8 char). |
| */ |
| inert uint32_t |
| encode_utf8_char(uint32_t code_point, void *buffer); |
| |
| /** Decode a UTF-8 sequence to a Unicode code point. Assumes valid UTF-8. |
| */ |
| inert uint32_t |
| decode_utf8_char(const char *utf8); |
| |
| /** Return the first non-continuation byte before the supplied pointer. |
| * If backtracking progresses beyond the supplied start, return NULL. |
| */ |
| inert nullable const char* |
| back_utf8_char(const char *utf8, char *start); |
| } |
| |
| __C__ |
| /** The maximum number of bytes encoded by to_base36(), including the |
| * terminating NULL. |
| */ |
| #define lucy_StrHelp_MAX_BASE36_BYTES 14 |
| #ifdef LUCY_USE_SHORT_NAMES |
| #define StrHelp_MAX_BASE36_BYTES lucy_StrHelp_MAX_BASE36_BYTES |
| #endif |
| __END_C__ |
| |
| |