| /* Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| parcel Lucy; |
| |
| /** Warehouse for information about one segment of an inverted index. |
| * |
| * Apache Lucy's indexes are made up of individual "segments", each of which is |
| * is an independent inverted index. On the file system, each segment is a |
| * directory within the main index directory whose name starts with "seg_": |
| * "seg_2", "seg_5a", etc. |
| * |
| * Each Segment object keeps track of information about an index segment: its |
| * fields, document count, and so on. The Segment object itself writes one |
| * file, <code>segmeta.json</code>; besides storing info needed by Segment |
| * itself, the "segmeta" file serves as a central repository for metadata |
| * generated by other index components -- relieving them of the burden of |
| * storing metadata themselves. |
| */ |
| |
| class Lucy::Index::Segment cnick Seg inherits Lucy::Object::Obj { |
| |
| CharBuf *name; |
| int64_t count; |
| int64_t number; |
| Hash *by_name; /* field numbers by name */ |
| VArray *by_num; /* field names by num */ |
| Hash *metadata; |
| |
| inert incremented Segment* |
| new(int64_t number); |
| |
| public inert Segment* |
| init(Segment *self, int64_t number); |
| |
| /** Return a segment name with a base-36-encoded segment number. |
| */ |
| inert incremented CharBuf* |
| num_to_name(int64_t number); |
| |
| /** Return true if the CharBuf is a segment name, i.e. matches this |
| * pattern: /^seg_\w+$/ |
| */ |
| inert bool_t |
| valid_seg_name(const CharBuf *name); |
| |
| /** Register a new field and assign it a field number. If the field was |
| * already known, nothing happens. |
| * |
| * @param field Field name. |
| * @return the field's field number, which is a positive integer. |
| */ |
| public int32_t |
| Add_Field(Segment *self, const CharBuf *field); |
| |
| /** Store arbitrary information in the segment's metadata Hash, to be |
| * serialized later. Throws an error if <code>key</code> is used twice. |
| * |
| * @param key String identifying an index component. |
| * @param metadata JSON-izable data structure. |
| */ |
| public void |
| Store_Metadata(Segment *self, const CharBuf *key, |
| decremented Obj *metadata); |
| |
| void |
| Store_Metadata_Str(Segment *self, const char *key, size_t len, |
| decremented Obj *value); |
| |
| /** Fetch a value from the Segment's metadata hash. |
| */ |
| public nullable Obj* |
| Fetch_Metadata(Segment *self, const CharBuf *key); |
| |
| nullable Obj* |
| Fetch_Metadata_Str(Segment *self, const char *key, size_t len); |
| |
| /** Given a field name, return its field number for this segment (which |
| * may differ from its number in other segments). Return 0 (an invalid |
| * field number) if the field name can't be found. |
| * |
| * @param field Field name. |
| */ |
| public int32_t |
| Field_Num(Segment *self, const CharBuf *field); |
| |
| /** Given a field number, return the name of its field, or NULL if the |
| * field name can't be found. |
| */ |
| public nullable CharBuf* |
| Field_Name(Segment *self, int32_t field_num); |
| |
| /** Getter for the object's seg name. |
| */ |
| public CharBuf* |
| Get_Name(Segment *self); |
| |
| /** Getter for the segment number. |
| */ |
| public int64_t |
| Get_Number(Segment *self); |
| |
| /** Setter for the object's document count. |
| */ |
| public void |
| Set_Count(Segment *self, int64_t count); |
| |
| /** Getter for the object's document count. |
| */ |
| public int64_t |
| Get_Count(Segment *self); |
| |
| /** Add <code>increment</code> to the object's document count, then return |
| * the new, modified total. |
| */ |
| int64_t |
| Increment_Count(Segment *self, int64_t increment); |
| |
| /** Get the segment metadata. |
| */ |
| Hash* |
| Get_Metadata(Segment *self); |
| |
| /** Write the segdata file. |
| */ |
| public void |
| Write_File(Segment *self, Folder *folder); |
| |
| /** Read the segmeta file for this segment. |
| * |
| * @return true if the file is read and decoded successfully, false |
| * otherwise. |
| */ |
| public bool_t |
| Read_File(Segment *self, Folder *folder); |
| |
| /** Compare by segment number. |
| */ |
| public int32_t |
| Compare_To(Segment *self, Obj *other); |
| |
| public void |
| Destroy(Segment *self); |
| } |
| |
| |