blob: ff4b8b81089b80b915bb125c01e4eb0f91427b83 [file] [log] [blame]
using System;
using System.IO;
namespace Lucene.Net.Analysis
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// Subclasses of <see cref="CharFilter"/> can be chained to filter a <see cref="TextReader"/>
/// They can be used as <see cref="TextReader"/> with additional offset
/// correction. <see cref="Tokenizer"/>s will automatically use <see cref="CorrectOffset"/>
/// if a <see cref="CharFilter"/> subclass is used.
/// <para/>
/// This class is abstract: at a minimum you must implement <see cref="TextReader.Read(char[], int, int)"/>,
/// transforming the input in some way from <see cref="m_input"/>, and <seealso cref="Correct(int)"/>
/// to adjust the offsets to match the originals.
/// <para/>
/// You can optionally provide more efficient implementations of additional methods
/// like <see cref="TextReader.Read()"/>, but this is not required.
/// <para/>
/// For examples and integration with <see cref="Analyzer"/>, see the
/// <see cref="Lucene.Net.Analysis"/> namespace documentation.
/// </summary>
// the way java.io.FilterReader should work!
public abstract class CharFilter : TextReader
{
/// <summary>
/// The underlying character-input stream.
/// </summary>
protected internal readonly TextReader m_input;
/// <summary>
/// Create a new <see cref="CharFilter"/> wrapping the provided reader. </summary>
/// <param name="input"> a <see cref="TextReader"/>, can also be a <see cref="CharFilter"/> for chaining. </param>
public CharFilter(TextReader input)
{
this.m_input = input;
}
/// <summary>
/// Closes the underlying input stream.
/// <para/>
/// <b>NOTE:</b>
/// The default implementation closes the input <see cref="TextReader"/>, so
/// be sure to call <c>base.Dispose(disposing)</c> when overriding this method.
/// </summary>
protected override void Dispose(bool disposing)
{
if (disposing)
{
m_input.Dispose();
}
base.Dispose(disposing);
}
/// <summary>
/// Subclasses override to correct the current offset.
/// </summary>
/// <param name="currentOff"> current offset </param>
/// <returns> corrected offset </returns>
protected abstract int Correct(int currentOff);
/// <summary>
/// Chains the corrected offset through the input
/// <see cref="CharFilter"/>(s).
/// </summary>
public int CorrectOffset(int currentOff)
{
int corrected = Correct(currentOff);
return (m_input is CharFilter) ? ((CharFilter)m_input).CorrectOffset(corrected) : corrected;
}
// LUCENENET specific - force subclasses to implement Read(char[] buffer, int index, int count),
// since it is required (and the .NET implementation calls Read() which would have infinite recursion
// if it were called.
public abstract override int Read(char[] buffer, int index, int count);
// LUCENENET specific - need to override read, as it returns -1 by default in .NET.
public override int Read()
{
var buffer = new char[1];
int count = Read(buffer, 0, 1);
return (count < 1) ? -1 : buffer[0];
}
// LUCENENET specific
#region From Reader Class
/// <summary>
/// Skips characters. This method will block until some characters are available, an I/O error occurs, or the end of the stream is reached.
///
/// LUCENENET specific. Moved here from the Reader class (in Java) so it can be overridden to provide reader buffering.
/// </summary>
/// <param name="n">The number of characters to skip</param>
/// <returns>The number of characters actually skipped</returns>
public virtual long Skip(int n)
{
throw new NotSupportedException("Skip() not supported");
}
/// <summary>
/// LUCENENET specific. Moved here from the Reader class (in Java) so it can be overridden to provide reader buffering.
/// </summary>
/// <returns></returns>
public virtual void Reset()
{
throw new NotSupportedException("Reset() not supported");
}
/// <summary>
/// Tells whether this stream is ready to be read.
/// <para/>
/// True if the next <see cref="TextReader.Read()"/> is guaranteed not to block for input, false otherwise. Note
/// that returning false does not guarantee that the next read will block.
/// <para/>
/// LUCENENET specific. Moved here from the Reader class (in Java) so it can be overridden to provide reader buffering.
/// </summary>
public virtual bool IsReady => false;
/// <summary>
/// Tells whether this stream supports the <see cref="Mark(int)"/> operation. The default implementation always
/// returns false. Subclasses should override this method.
/// <para/>
/// LUCENENET specific. Moved here from the Reader class (in Java) so it can be overridden to provide reader buffering.
/// </summary>
/// <returns>true if and only if this stream supports the mark operation.</returns>
public virtual bool IsMarkSupported => false;
/// <summary>
/// Marks the present position in the stream. Subsequent calls to <see cref="Reset"/> will attempt to
/// reposition the stream to this point. Not all character-input streams support the <see cref="Mark(int)"/> operation.
/// <para/>
/// LUCENENET specific. Moved here from the Reader class (in Java) so it can be overridden to provide reader buffering.
/// </summary>
/// <param name="readAheadLimit">Limit on the number of characters that may be read while still preserving the mark. After
/// reading this many characters, attempting to reset the stream may fail.</param>
public virtual void Mark(int readAheadLimit)
{
throw new IOException("Mark() not supported");
}
#endregion
}
}