| // This class was sourced from the Apache Harmony project's BufferedReader |
| // https://svn.apache.org/repos/asf/harmony/enhanced/java/trunk/ |
| |
| using Lucene.Net.Analysis.CharFilters; |
| using System; |
| using System.IO; |
| using System.Text; |
| using System.Threading.Tasks; |
| |
| namespace Lucene.Net.Analysis.Util |
| { |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /// <summary> |
| /// LUCENENET specific class to mimic Java's BufferedReader (that is, a reader that is seekable) |
| /// so it supports Mark() and Reset() (which are part of the Java Reader class), but also |
| /// provide the Correct() method of BaseCharFilter. |
| /// </summary> |
| public class BufferedCharFilter : BaseCharFilter |
| { |
| public const int DEFAULT_CHAR_BUFFER_SIZE = 8192; |
| |
| /// <summary> |
| /// The object used to synchronize access to the reader. |
| /// </summary> |
| protected object m_lock = new object(); |
| |
| private TextReader @in; |
| |
| /// <summary> |
| /// The characters that can be read and refilled in bulk. We maintain three |
| /// indices into this buffer: |
| /// <code> |
| /// { X X X X X X X X X X X X - - } |
| /// ^ ^ ^ |
| /// | | | |
| /// mark pos end |
| /// </code> |
| /// Pos points to the next readable character.End is one greater than the |
| /// last readable character.When<c> pos == end</c>, the buffer is empty and |
| /// must be <see cref="FillBuf()"/> before characters can be read. |
| /// |
| /// <para/> Mark is the value pos will be set to on calls to |
| /// <see cref="Reset()"/>. Its value is in the range <c>[0...pos]</c>. If the mark is <c>-1</c>, the |
| /// buffer cannot be reset. |
| /// |
| /// <para/> MarkLimit limits the distance between the mark and the pos.When this |
| /// limit is exceeded, <see cref="Reset()"/> is permitted (but not required) to |
| /// throw an exception. For shorter distances, <see cref="Reset()"/> shall not throw |
| /// (unless the reader is closed). |
| /// </summary> |
| private char[] buf; |
| private int pos; |
| private int end; |
| private int mark = -1; |
| private int markLimit = -1; |
| |
| #if FEATURE_TEXTWRITER_CLOSE |
| /// <summary> |
| /// LUCENENET specific to throw an exception if the user calls <see cref="Close()"/> instead of <see cref="TextReader.Dispose()"/> |
| /// </summary> |
| private bool isDisposing = false; |
| #endif |
| |
| /// <summary> |
| /// Creates a buffering character-input stream that uses a default-sized input buffer. |
| /// </summary> |
| /// <param name="in">A TextReader</param> |
| public BufferedCharFilter(TextReader @in) |
| : base(@in) |
| { |
| this.@in = @in; |
| buf = new char[DEFAULT_CHAR_BUFFER_SIZE]; |
| } |
| |
| /// <summary> |
| /// Creates a buffering character-input stream that uses an input buffer of the specified size. |
| /// </summary> |
| /// <param name="in">A TextReader</param> |
| /// <param name="size">Input-buffer size</param> |
| public BufferedCharFilter(TextReader @in, int size) |
| : base(@in) |
| { |
| if (size <= 0) |
| { |
| throw new ArgumentOutOfRangeException("Buffer size <= 0"); |
| } |
| this.@in = @in; |
| buf = new char[size]; |
| } |
| |
| /// <summary> |
| /// Disposes this reader. This implementation closes the buffered source reader |
| /// and releases the buffer. Nothing is done if this reader has already been |
| /// disposed. |
| /// </summary> |
| /// <param name="disposing"></param> |
| /// <exception cref="IOException">if an error occurs while closing this reader.</exception> |
| protected override void Dispose(bool disposing) |
| { |
| if (disposing) |
| { |
| #if FEATURE_TEXTWRITER_CLOSE |
| this.isDisposing = true; |
| #endif |
| lock (m_lock) |
| { |
| if (!IsClosed) |
| { |
| @in.Dispose(); |
| @in = null; |
| buf = null; |
| } |
| } |
| #if FEATURE_TEXTWRITER_CLOSE |
| this.isDisposing = false; |
| #endif |
| } |
| } |
| |
| /// <summary> |
| /// Populates the buffer with data. It is an error to call this method when |
| /// the buffer still contains data; ie. if <c>pos < end</c>. |
| /// </summary> |
| /// <returns> |
| /// the number of bytes read into the buffer, or -1 if the end of the |
| /// source stream has been reached. |
| /// </returns> |
| private int FillBuf() |
| { |
| // assert(pos == end); |
| |
| if (mark == -1 || (pos - mark >= markLimit)) |
| { |
| /* mark isn't set or has exceeded its limit. use the whole buffer */ |
| int result = @in.Read(buf, 0, buf.Length); |
| if (result > 0) |
| { |
| mark = -1; |
| pos = 0; |
| end = result; |
| } |
| // LUCENENET specific: convert result to -1 to mimic java's reader |
| return result == 0 ? -1 : result; |
| } |
| |
| if (mark == 0 && markLimit > buf.Length) |
| { |
| /* the only way to make room when mark=0 is by growing the buffer */ |
| int newLength = buf.Length * 2; |
| if (newLength > markLimit) |
| { |
| newLength = markLimit; |
| } |
| char[] newbuf = new char[newLength]; |
| System.Array.Copy(buf, 0, newbuf, 0, buf.Length); |
| buf = newbuf; |
| } |
| else if (mark > 0) |
| { |
| /* make room by shifting the buffered data to left mark positions */ |
| System.Array.Copy(buf, mark, buf, 0, buf.Length - mark); |
| pos -= mark; |
| end -= mark; |
| mark = 0; |
| } |
| |
| /* Set the new position and mark position */ |
| int count = @in.Read(buf, pos, buf.Length - pos); |
| if (count > 0) |
| { |
| end += count; |
| } |
| // LUCENENET specific: convert result to -1 to mimic java's reader |
| return count == 0 ? -1 : count; |
| } |
| |
| /// <summary> |
| /// Checks to make sure that the stream has not been closed |
| /// </summary> |
| private void EnsureOpen() |
| { |
| if (IsClosed) |
| { |
| throw new IOException("Reader already closed"); |
| } |
| } |
| |
| /// <summary> |
| /// Indicates whether or not this reader is closed. |
| /// </summary> |
| private bool IsClosed => buf == null; |
| |
| /// <summary> |
| /// Sets a mark position in this reader. The parameter <paramref name="markLimit"/> |
| /// indicates how many characters can be read before the mark is invalidated. |
| /// Calling <see cref="Reset()"/> will reposition the reader back to the marked |
| /// position if <see cref="markLimit"/> has not been surpassed. |
| /// </summary> |
| /// <param name="markLimit"> |
| /// the number of characters that can be read before the mark is |
| /// invalidated. |
| /// </param> |
| /// <exception cref="ArgumentOutOfRangeException">if <c>markLimit < 0</c></exception> |
| /// <exception cref="IOException">if an error occurs while setting a mark in this reader.</exception> |
| public override void Mark(int markLimit) |
| { |
| if (markLimit < 0) |
| { |
| throw new ArgumentOutOfRangeException("Read-ahead limit < 0"); |
| } |
| lock (m_lock) |
| { |
| EnsureOpen(); |
| this.markLimit = markLimit; |
| mark = pos; |
| } |
| } |
| |
| /// <summary> |
| /// Indicates whether this reader supports the <see cref="Mark(int)"/> and |
| /// <see cref="Reset()"/> methods. This implementation returns <c>true</c>. |
| /// </summary> |
| /// <seealso cref="Mark(int)"/> |
| /// <seealso cref="Reset()"/> |
| public override bool IsMarkSupported => true; |
| |
| |
| /// <summary> |
| /// Reads a single character from this reader and returns it with the two |
| /// higher-order bytes set to 0. If possible, <see cref="BufferedCharFilter"/> returns a |
| /// character from the buffer. If there are no characters available in the |
| /// buffer, it fills the buffer and then returns a character. It returns -1 |
| /// if there are no more characters in the source reader. |
| /// </summary> |
| /// <returns>The character read or -1 if the end of the source reader has been reached.</returns> |
| /// <exception cref="IOException">If this reader is disposed or some other I/O error occurs.</exception> |
| public override int Read() |
| { |
| lock (m_lock) |
| { |
| EnsureOpen(); |
| /* Are there buffered characters available? */ |
| if (pos < end || FillBuf() != -1) |
| { |
| return buf[pos++]; |
| } |
| return -1; |
| } |
| } |
| |
| /// <summary> |
| /// Reads at most <paramref name="length"/> characters from this reader and stores them |
| /// at <paramref name="offset"/> in the character array <paramref name="buffer"/>. Returns the |
| /// number of characters actually read or -1 if the end of the source reader |
| /// has been reached. If all the buffered characters have been used, a mark |
| /// has not been set and the requested number of characters is larger than |
| /// this readers buffer size, BufferedReader bypasses the buffer and simply |
| /// places the results directly into <paramref name="buffer"/>. |
| /// </summary> |
| /// <param name="buffer">the character array to store the characters read.</param> |
| /// <param name="offset">the initial position in <paramref name="buffer"/> to store the bytes read from this reader.</param> |
| /// <param name="length">the maximum number of characters to read, must be non-negative.</param> |
| /// <returns>number of characters read or -1 if the end of the source reader has been reached.</returns> |
| /// <exception cref="ArgumentOutOfRangeException"> |
| /// if <c>offset < 0</c> or <c>length < 0</c>, or if |
| /// <c>offset + length</c> is greater than the size of |
| /// <paramref name="buffer"/>. |
| /// </exception> |
| /// <exception cref="IOException">if this reader is disposed or some other I/O error occurs.</exception> |
| public override int Read(char[] buffer, int offset, int length) |
| { |
| lock(m_lock) |
| { |
| EnsureOpen(); |
| if (offset < 0 || offset > buffer.Length - length || length < 0) |
| { |
| throw new ArgumentOutOfRangeException(); |
| } |
| int outstanding = length; |
| while (outstanding > 0) |
| { |
| |
| /* |
| * If there are bytes in the buffer, grab those first. |
| */ |
| int available = end - pos; |
| if (available > 0) |
| { |
| int count2 = available >= outstanding ? outstanding : available; |
| System.Array.Copy(buf, pos, buffer, offset, count2); |
| pos += count2; |
| offset += count2; |
| outstanding -= count2; |
| } |
| |
| /* |
| * Before attempting to read from the underlying stream, make |
| * sure we really, really want to. We won't bother if we're |
| * done, or if we've already got some bytes and reading from the |
| * underlying stream would block. |
| */ |
| // LUCENENET specific: only CharFilter derived types support IsReady |
| var charFilter = @in as CharFilter; |
| if (outstanding == 0 || (outstanding < length) && charFilter != null && !charFilter.IsReady) |
| { |
| break; |
| } |
| |
| // assert(pos == end); |
| |
| /* |
| * If we're unmarked and the requested size is greater than our |
| * buffer, read the bytes directly into the caller's buffer. We |
| * don't read into smaller buffers because that could result in |
| * a many reads. |
| */ |
| if ((mark == -1 || (pos - mark >= markLimit)) |
| && outstanding >= buf.Length) |
| { |
| int count3 = @in.Read(buffer, offset, outstanding); |
| if (count3 > 0) |
| { |
| offset += count3; |
| outstanding -= count3; |
| mark = -1; |
| } |
| |
| break; // assume the source stream gave us all that it could |
| } |
| |
| if (FillBuf() == -1) |
| { |
| break; // source is exhausted |
| } |
| } |
| |
| int count = length - outstanding; |
| return (count > 0 || count == length) ? count : 0 /*-1*/; |
| } |
| } |
| |
| /// <summary> |
| /// Returns the next line of text available from this reader. A line is |
| /// represented by zero or more characters followed by <c>'\n'</c>, |
| /// <c>'\r'</c>, <c>"\r\n"</c> or the end of the reader. The string does |
| /// not include the newline sequence. |
| /// </summary> |
| /// <returns>The contents of the line or <c>null</c> if no characters were |
| /// read before the end of the reader has been reached.</returns> |
| /// <exception cref="IOException">if this reader is disposed or some other I/O error occurs.</exception> |
| public override string ReadLine() |
| { |
| lock(m_lock) |
| { |
| EnsureOpen(); |
| /* has the underlying stream been exhausted? */ |
| if (pos == end && FillBuf() == -1) |
| { |
| return null; |
| } |
| for (int charPos = pos; charPos < end; charPos++) |
| { |
| char ch = buf[charPos]; |
| if (ch > '\r') |
| { |
| continue; |
| } |
| if (ch == '\n') |
| { |
| string res = new string(buf, pos, charPos - pos); |
| pos = charPos + 1; |
| return res; |
| } |
| else if (ch == '\r') |
| { |
| string res = new string(buf, pos, charPos - pos); |
| pos = charPos + 1; |
| if (((pos < end) || (FillBuf() != -1)) |
| && (buf[pos] == '\n')) |
| { |
| pos++; |
| } |
| return res; |
| } |
| } |
| |
| char eol = '\0'; |
| StringBuilder result = new StringBuilder(80); |
| /* Typical Line Length */ |
| |
| result.Append(buf, pos, end - pos); |
| while (true) |
| { |
| pos = end; |
| |
| /* Are there buffered characters available? */ |
| if (eol == '\n') |
| { |
| return result.ToString(); |
| } |
| // attempt to fill buffer |
| if (FillBuf() == -1) |
| { |
| // characters or null. |
| return result.Length > 0 || eol != '\0' |
| ? result.ToString() |
| : null; |
| } |
| for (int charPos = pos; charPos < end; charPos++) |
| { |
| char c = buf[charPos]; |
| if (eol == '\0') |
| { |
| if ((c == '\n' || c == '\r')) |
| { |
| eol = c; |
| } |
| } |
| else if (eol == '\r' && c == '\n') |
| { |
| if (charPos > pos) |
| { |
| result.Append(buf, pos, charPos - pos - 1); |
| } |
| pos = charPos + 1; |
| return result.ToString(); |
| } |
| else |
| { |
| if (charPos > pos) |
| { |
| result.Append(buf, pos, charPos - pos - 1); |
| } |
| pos = charPos; |
| return result.ToString(); |
| } |
| } |
| if (eol == '\0') |
| { |
| result.Append(buf, pos, end - pos); |
| } |
| else |
| { |
| result.Append(buf, pos, end - pos - 1); |
| } |
| } |
| } |
| } |
| |
| /// <summary> |
| /// Indicates whether this reader is ready to be read without blocking. |
| /// </summary> |
| /// <returns> |
| /// <c>true</c> if this reader will not block when <see cref="Read()"/> is |
| /// called, <c>false</c> if unknown or blocking will occur. |
| /// </returns> |
| public override bool IsReady |
| { |
| get |
| { |
| lock (m_lock) |
| { |
| EnsureOpen(); |
| // LUCENENET specific: only CharFilter derived types support IsReady |
| var charFilter = @in as CharFilter; |
| return ((end - pos) > 0) || (charFilter != null && charFilter.IsReady); |
| } |
| } |
| } |
| |
| /// <summary> |
| /// Resets this reader's position to the last <see cref="Mark(int)"/> location. |
| /// Invocations of <see cref="Read()"/> and <see cref="Skip(int)"/> will occur from this new |
| /// location. |
| /// </summary> |
| /// <exception cref="IOException">If this reader is disposed or no mark has been set.</exception> |
| /// <seealso cref="Mark(int)"/> |
| /// <seealso cref="IsMarkSupported"/> |
| public override void Reset() |
| { |
| lock (m_lock) |
| { |
| EnsureOpen(); |
| if (mark < 0) |
| { |
| throw new IOException("Reader not marked"); |
| } |
| pos = mark; |
| } |
| } |
| |
| /// <summary> |
| /// Skips <paramref name="amount"/> characters in this reader. Subsequent |
| /// <see cref="Read()"/>s will not return these characters unless <see cref="Reset()"/> |
| /// is used. Skipping characters may invalidate a mark if <see cref="markLimit"/> |
| /// is surpassed. |
| /// </summary> |
| /// <param name="amount">the maximum number of characters to skip.</param> |
| /// <returns>the number of characters actually skipped.</returns> |
| /// <exception cref="ArgumentOutOfRangeException">if <c>amount < 0</c>.</exception> |
| /// <exception cref="IOException">If this reader is disposed or some other I/O error occurs.</exception> |
| /// <seealso cref="Mark(int)"/> |
| /// <seealso cref="IsMarkSupported"/> |
| /// <seealso cref="Reset()"/> |
| public override long Skip(int amount) |
| { |
| if (amount < 0L) |
| { |
| throw new ArgumentOutOfRangeException("skip value is negative"); |
| } |
| lock (m_lock) |
| { |
| EnsureOpen(); |
| if (amount < 1) |
| { |
| return 0; |
| } |
| if (end - pos >= amount) |
| { |
| pos += amount; |
| return amount; |
| } |
| |
| int read = end - pos; |
| pos = end; |
| while (read < amount) |
| { |
| if (FillBuf() == -1) |
| { |
| return read; |
| } |
| if (end - pos >= amount - read) |
| { |
| pos += amount - read; |
| return amount; |
| } |
| // Couldn't get all the characters, skip what we read |
| read += (end - pos); |
| pos = end; |
| } |
| return amount; |
| } |
| } |
| |
| #region LUCENENET Specific Methods |
| |
| public override int Peek() |
| { |
| throw new NotImplementedException(); |
| } |
| |
| public override Task<int> ReadAsync(char[] buffer, int index, int count) |
| { |
| throw new NotImplementedException(); |
| } |
| |
| public override int ReadBlock(char[] buffer, int index, int count) |
| { |
| throw new NotImplementedException(); |
| } |
| |
| public override Task<int> ReadBlockAsync(char[] buffer, int index, int count) |
| { |
| throw new NotImplementedException(); |
| } |
| |
| public override Task<string> ReadLineAsync() |
| { |
| throw new NotImplementedException(); |
| } |
| |
| public override string ReadToEnd() |
| { |
| throw new NotImplementedException(); |
| } |
| |
| public override Task<string> ReadToEndAsync() |
| { |
| throw new NotImplementedException(); |
| } |
| #if FEATURE_TEXTWRITER_INITIALIZELIFETIMESERVICE |
| public override object InitializeLifetimeService() |
| { |
| throw new NotImplementedException(); |
| } |
| #endif |
| |
| #if FEATURE_TEXTWRITER_CLOSE |
| public override void Close() |
| { |
| if (!isDisposing) |
| { |
| throw new NotSupportedException("Close() is not supported. Call Dispose() instead."); |
| } |
| } |
| #endif |
| #endregion |
| } |
| } |