blob: a354310608971dd69aa44b4b075d6f3ae908d508 [file] [log] [blame]
using System;
using System.Diagnostics;
using System.IO;
using Lucene.Net.Util;
namespace Lucene.Net.Analysis
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// A Tokenizer is a TokenStream whose input is a Reader.
/// <p>
/// this is an abstract class; subclasses must override <seealso cref="#IncrementToken()"/>
/// <p>
/// NOTE: Subclasses overriding <seealso cref="#IncrementToken()"/> must
/// call <seealso cref="AttributeSource#ClearAttributes()"/> before
/// setting attributes.
/// </summary>
public abstract class Tokenizer : TokenStream
{
/// <summary>
/// The text source for this Tokenizer. </summary>
protected internal TextReader input = ILLEGAL_STATE_READER;
/// <summary>
/// Pending reader: not actually assigned to input until reset() </summary>
private TextReader InputPending = ILLEGAL_STATE_READER;
/// <summary>
/// Construct a token stream processing the given input. </summary>
protected internal Tokenizer(TextReader input)
{
if (input == null)
{
throw new System.ArgumentNullException("input", "input must not be null");
}
this.InputPending = input;
}
/// <summary>
/// Construct a token stream processing the given input using the given AttributeFactory. </summary>
protected internal Tokenizer(AttributeFactory factory, TextReader input)
: base(factory)
{
if (input == null)
{
throw new System.ArgumentNullException("input", "input must not be null");
}
this.InputPending = input;
}
/// <summary>
/// {@inheritDoc}
/// <p>
/// <b>NOTE:</b>
/// The default implementation closes the input Reader, so
/// be sure to call <code>super.close()</code> when overriding this method.
/// </summary>
public override void Dispose()
{
input.Dispose();
// LUCENE-2387: don't hold onto Reader after close, so
// GC can reclaim
InputPending = ILLEGAL_STATE_READER;
input = ILLEGAL_STATE_READER;
}
/// <summary>
/// Return the corrected offset. If <seealso cref="#input"/> is a <seealso cref="CharFilter"/> subclass
/// this method calls <seealso cref="CharFilter#correctOffset"/>, else returns <code>currentOff</code>. </summary>
/// <param name="currentOff"> offset as seen in the output </param>
/// <returns> corrected offset based on the input </returns>
/// <seealso> cref= CharFilter#correctOffset </seealso>
protected internal int CorrectOffset(int currentOff)
{
return (input is CharFilter) ? ((CharFilter)input).CorrectOffset(currentOff) : currentOff;
}
/// <summary>
/// Expert: Set a new reader on the Tokenizer. Typically, an
/// analyzer (in its tokenStream method) will use
/// this to re-use a previously created tokenizer.
/// </summary>
public TextReader Reader
{
set
{
if (value == null)
{
throw new System.ArgumentNullException("value", "input must not be null");
}
else if (this.input != ILLEGAL_STATE_READER)
{
throw new InvalidOperationException("TokenStream contract violation: close() call missing");
}
this.InputPending = value;
Debug.Assert(SetReaderTestPoint());
}
}
public override void Reset()
{
base.Reset();
input = InputPending;
InputPending = ILLEGAL_STATE_READER;
}
// only used by assert, for testing
internal virtual bool SetReaderTestPoint()
{
return true;
}
private static readonly TextReader ILLEGAL_STATE_READER = new ReaderAnonymousInnerClassHelper();
private class ReaderAnonymousInnerClassHelper : TextReader
{
public override int Read(char[] cbuf, int off, int len)
{
throw new InvalidOperationException("TokenStream contract violation: reset()/close() call missing, "
+ "reset() called multiple times, or subclass does not call super.reset(). "
+ "Please see Javadocs of TokenStream class for more information about the correct consuming workflow.");
}
public override void Close()
{
}
}
}
}