blob: c43e8c0e2d259c4b63e9e8fe6e547b0d5610b6e0 [file] [log] [blame]
/**
* SPDX-FileCopyrightText: 2016-2021 The Apache Software Foundation
* SPDX-License-Identifier: Apache-2.0
* @license
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/**
* Represents a piece of text in any kind of ‘file’.
*
* Its purpose is to enable generic algorithms to deal with text content of any
* type of ‘file’ that consists of many pieces of text (e.g. a DOM, PDF, …).
* Each Chunk represents one piece of text ({@link Chunk.data}). An object
* implementing this interface would typically have other attributes as well to
* map the chunk back to its position in the file (e.g. a Text node in the DOM).
*
* @typeParam TData - Piece of text, typically `string`
*
* @public
*/
export interface Chunk<TData> {
/**
* The piece of text this chunk represents.
*/
readonly data: TData;
equals?(otherChunk: this): boolean;
}
/**
* Test two {@link Chunk}s for equality.
*
* Equality here means that both represent the same piece of text (i.e. at the
* same position) in the file. It compares using the custom {@link Chunk.equals}
* method if either chunk defines one, and falls back to checking the objects’
* identity (i.e. `chunk1 === chunk2`).
*
* @public
*/
export function chunkEquals(chunk1: Chunk<any>, chunk2: Chunk<any>): boolean {
if (chunk1.equals) return chunk1.equals(chunk2);
if (chunk2.equals) return chunk2.equals(chunk1);
return chunk1 === chunk2;
}
/**
* Points at a range of characters between two points inside {@link Chunk}s.
*
* Analogous to the DOM’s ({@link https://developer.mozilla.org/en-US/docs/Web/API/AbstractRange
* | Abstract}){@link https://developer.mozilla.org/en-US/docs/Web/API/Range |
* Range}. Each index expresses an offset inside the value of the corresponding
* {@link Chunk.data}, and can equal the length of that data in order to point
* to the position right after the chunk’s last character.
*
* @public
*/
export interface ChunkRange<TChunk extends Chunk<any>> {
startChunk: TChunk;
startIndex: number;
endChunk: TChunk;
endIndex: number;
}
/**
* Test two {@link ChunkRange}s for equality.
*
* Equality here means equality of each of their four properties (i.e.
* {@link startChunk}, {@link startIndex},
* {@link endChunk}, and {@link endIndex}).
* For the `startChunk`s and `endChunk`s, this function uses the custom
* {@link Chunk.equals} method if defined.
*
* Note that if the start/end of one range points at the end of a chunk, and the
* other to the start of a subsequent chunk, they are not considered equal, even
* though semantically they may be representing the same range of characters. To
* test for such semantic equivalence, ensure that both inputs are normalised:
* typically this means the range is shrunk to its narrowest equivalent, and (if
* it is empty) positioned at its first equivalent.
*
* @public
*/
export function chunkRangeEquals(
range1: ChunkRange<any>,
range2: ChunkRange<any>,
): boolean {
return (
chunkEquals(range1.startChunk, range2.startChunk) &&
chunkEquals(range1.endChunk, range2.endChunk) &&
range1.startIndex === range2.startIndex &&
range1.endIndex === range2.endIndex
);
}
/**
* Presents the pieces of text contained in some underlying ‘file’ as a sequence
* of {@link Chunk}s.
*
* Rather than presenting a list of all pieces, the `Chunker` provides methods
* to walk through the file piece by piece. This permits implementations to read
* and convert the file to `Chunk`s lazily.
*
* For those familiar with the DOM APIs, it is similar to a NodeIterator (but
* unlike NodeIterator, it has no concept of being ‘before’ or ‘after’ a chunk).
*
* @typeParam TChunk - (sub)type of `Chunk` being used.
*
* @public
*/
export interface Chunker<TChunk extends Chunk<any>> {
/**
* The chunk currently being pointed at.
*
* Initially, this should normally be the first chunk in the file.
*/
readonly currentChunk: TChunk;
/**
* Point {@link currentChunk} at the chunk following it, and return that chunk.
* If there are no chunks following it, keep `currentChunk` unchanged and
* return null.
*/
nextChunk(): TChunk | null;
/**
* Point {@link currentChunk} at the chunk preceding it, and return that chunk.
* If there are no chunks preceding it, keep `currentChunk` unchanged and
* return null.
*/
previousChunk(): TChunk | null;
/**
* Test if a given `chunk` is before the {@link currentChunk|current
* chunk}.
*
* Returns true if `chunk` is before `this.currentChunk`, false otherwise
* (i.e. if `chunk` follows it or is the current chunk).
*
* The given `chunk` need not necessarily be obtained from the same `Chunker`,
* but the chunkers would need to represent the same file. Otherwise behaviour
* is unspecified (an implementation might throw or just return `false`).
*
* @param chunk - A chunk, typically obtained from the same `Chunker`.
*/
precedesCurrentChunk(chunk: TChunk): boolean;
}