| /* Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #define C_LUCY_CASEFOLDER |
| #define C_LUCY_BYTEBUF |
| #define C_LUCY_TOKEN |
| #include "XSBind.h" |
| |
| #include "Lucy/Analysis/CaseFolder.h" |
| #include "Lucy/Analysis/Token.h" |
| #include "Lucy/Analysis/Inversion.h" |
| #include "Lucy/Object/ByteBuf.h" |
| #include "Lucy/Util/Memory.h" |
| #include "Lucy/Util/StringHelper.h" |
| |
| static size_t |
| S_lc_to_work_buf(lucy_CaseFolder *self, uint8_t *source, size_t len, |
| uint8_t **buf, uint8_t **limit) { |
| lucy_ByteBuf *const work_buf = self->work_buf; |
| uint8_t *dest = *buf; |
| uint8_t *dest_start = dest; |
| uint8_t *const end = source + len; |
| uint8_t utf8_buf[7]; |
| |
| while (source < end) { |
| STRLEN buf_utf8_len; |
| (void)to_utf8_lower(source, utf8_buf, &buf_utf8_len); |
| |
| // Grow if necessary. |
| if (((STRLEN)(*limit - dest)) < buf_utf8_len) { |
| size_t bytes_so_far = dest - dest_start; |
| size_t amount = bytes_so_far + (end - source) + 10; |
| Lucy_BB_Set_Size(work_buf, bytes_so_far); |
| *buf = (uint8_t*)Lucy_BB_Grow(work_buf, amount); |
| dest_start = *buf; |
| dest = dest_start + bytes_so_far; |
| *limit = dest_start + work_buf->cap; |
| } |
| memcpy(dest, utf8_buf, buf_utf8_len); |
| |
| source += lucy_StrHelp_UTF8_COUNT[*source]; |
| dest += buf_utf8_len; |
| } |
| |
| { |
| size_t size = dest - dest_start; |
| Lucy_BB_Set_Size(work_buf, size); |
| return size; |
| } |
| } |
| |
| lucy_Inversion* |
| lucy_CaseFolder_transform(lucy_CaseFolder *self, lucy_Inversion *inversion) { |
| lucy_Token *token; |
| uint8_t *buf = (uint8_t*)Lucy_BB_Get_Buf(self->work_buf); |
| uint8_t *limit = buf + Lucy_BB_Get_Capacity(self->work_buf); |
| while (NULL != (token = Lucy_Inversion_Next(inversion))) { |
| size_t size = S_lc_to_work_buf(self, (uint8_t*)token->text, |
| token->len, &buf, &limit); |
| if (size > token->len) { |
| LUCY_FREEMEM(token->text); |
| token->text = (char*)LUCY_MALLOCATE(size + 1); |
| } |
| memcpy(token->text, buf, size); |
| token->text[size] = '\0'; |
| token->len = size; |
| } |
| Lucy_Inversion_Reset(inversion); |
| return (lucy_Inversion*)LUCY_INCREF(inversion); |
| } |
| |
| lucy_Inversion* |
| lucy_CaseFolder_transform_text(lucy_CaseFolder *self, lucy_CharBuf *text) { |
| lucy_Inversion *retval; |
| lucy_Token *token; |
| uint8_t *buf = (uint8_t*)Lucy_BB_Get_Buf(self->work_buf); |
| uint8_t *limit = buf + Lucy_BB_Get_Capacity(self->work_buf); |
| size_t size = S_lc_to_work_buf(self, Lucy_CB_Get_Ptr8(text), |
| Lucy_CB_Get_Size(text), &buf, &limit); |
| token = lucy_Token_new((char*)buf, size, 0, size, 1.0f, 1); |
| retval = lucy_Inversion_new(token); |
| LUCY_DECREF(token); |
| return retval; |
| } |
| |
| |