| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.commons.io.input; |
| |
| import java.io.Closeable; |
| import java.io.File; |
| import java.io.IOException; |
| import java.io.UnsupportedEncodingException; |
| import java.nio.ByteBuffer; |
| import java.nio.channels.SeekableByteChannel; |
| import java.nio.charset.Charset; |
| import java.nio.charset.CharsetEncoder; |
| import java.nio.charset.StandardCharsets; |
| import java.nio.file.Files; |
| import java.nio.file.Path; |
| import java.nio.file.StandardOpenOption; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collections; |
| import java.util.List; |
| |
| import org.apache.commons.io.Charsets; |
| import org.apache.commons.io.FileSystem; |
| import org.apache.commons.io.StandardLineSeparator; |
| import org.apache.commons.io.build.AbstractStreamBuilder; |
| |
| /** |
| * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files. |
| * <p> |
| * To build an instance, use {@link Builder}. |
| * </p> |
| * |
| * @see Builder |
| * @since 2.2 |
| */ |
| public class ReversedLinesFileReader implements Closeable { |
| |
| // @formatter:off |
| /** |
| * Builds a new {@link ReversedLinesFileReader}. |
| * |
| * <p> |
| * For example: |
| * </p> |
| * <pre>{@code |
| * ReversedLinesFileReader r = ReversedLinesFileReader.builder() |
| * .setPath(path) |
| * .setBufferSize(4096) |
| * .setCharset(StandardCharsets.UTF_8) |
| * .get();} |
| * </pre> |
| * |
| * @see #get() |
| * @since 2.12.0 |
| */ |
| // @formatter:on |
| public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> { |
| |
| /** |
| * Constructs a new {@link Builder}. |
| */ |
| public Builder() { |
| setBufferSizeDefault(DEFAULT_BLOCK_SIZE); |
| setBufferSize(DEFAULT_BLOCK_SIZE); |
| } |
| |
| /** |
| * Builds a new {@link ReversedLinesFileReader}. |
| * <p> |
| * You must set input that supports {@link #getInputStream()} on this builder, otherwise, this method throws an exception. |
| * </p> |
| * <p> |
| * This builder use the following aspects: |
| * </p> |
| * <ul> |
| * <li>{@link #getInputStream()}</li> |
| * <li>{@link #getBufferSize()}</li> |
| * <li>{@link #getCharset()}</li> |
| * </ul> |
| * |
| * @return a new instance. |
| * @throws IllegalStateException if the {@code origin} is {@code null}. |
| * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}. |
| * @throws IOException if an I/O error occurs. |
| * @see #getPath() |
| * @see #getBufferSize() |
| * @see #getCharset() |
| */ |
| @Override |
| public ReversedLinesFileReader get() throws IOException { |
| return new ReversedLinesFileReader(getPath(), getBufferSize(), getCharset()); |
| } |
| |
| } |
| |
| private final class FilePart { |
| private final long no; |
| |
| private final byte[] data; |
| |
| private byte[] leftOver; |
| |
| private int currentLastBytePos; |
| |
| /** |
| * Constructs a new instance. |
| * |
| * @param no the part number |
| * @param length its length |
| * @param leftOverOfLastFilePart remainder |
| * @throws IOException if there is a problem reading the file |
| */ |
| private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException { |
| this.no = no; |
| final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0); |
| this.data = new byte[dataLength]; |
| final long off = (no - 1) * blockSize; |
| |
| // read data |
| if (no > 0 /* file not empty */) { |
| channel.position(off); |
| final int countRead = channel.read(ByteBuffer.wrap(data, 0, length)); |
| if (countRead != length) { |
| throw new IllegalStateException("Count of requested bytes and actually read bytes don't match"); |
| } |
| } |
| // copy left over part into data arr |
| if (leftOverOfLastFilePart != null) { |
| System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length); |
| } |
| this.currentLastBytePos = data.length - 1; |
| this.leftOver = null; |
| } |
| |
| /** |
| * Constructs the buffer containing any leftover bytes. |
| */ |
| private void createLeftOver() { |
| final int lineLengthBytes = currentLastBytePos + 1; |
| if (lineLengthBytes > 0) { |
| // create left over for next block |
| leftOver = Arrays.copyOf(data, lineLengthBytes); |
| } else { |
| leftOver = null; |
| } |
| currentLastBytePos = -1; |
| } |
| |
| /** |
| * Finds the new-line sequence and return its length. |
| * |
| * @param data buffer to scan |
| * @param i start offset in buffer |
| * @return length of newline sequence or 0 if none found |
| */ |
| private int getNewLineMatchByteCount(final byte[] data, final int i) { |
| for (final byte[] newLineSequence : newLineSequences) { |
| boolean match = true; |
| for (int j = newLineSequence.length - 1; j >= 0; j--) { |
| final int k = i + j - (newLineSequence.length - 1); |
| match &= k >= 0 && data[k] == newLineSequence[j]; |
| } |
| if (match) { |
| return newLineSequence.length; |
| } |
| } |
| return 0; |
| } |
| |
| /** |
| * Reads a line. |
| * |
| * @return the line or null |
| */ |
| private String readLine() { //NOPMD Bug in PMD |
| |
| String line = null; |
| int newLineMatchByteCount; |
| |
| final boolean isLastFilePart = no == 1; |
| |
| int i = currentLastBytePos; |
| while (i > -1) { |
| |
| if (!isLastFilePart && i < avoidNewlineSplitBufferSize) { |
| // avoidNewlineSplitBuffer: for all except the last file part we |
| // take a few bytes to the next file part to avoid splitting of newlines |
| createLeftOver(); |
| break; // skip last few bytes and leave it to the next file part |
| } |
| |
| // --- check for newline --- |
| if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) { |
| final int lineStart = i + 1; |
| final int lineLengthBytes = currentLastBytePos - lineStart + 1; |
| |
| if (lineLengthBytes < 0) { |
| throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes); |
| } |
| final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes); |
| |
| line = new String(lineData, charset); |
| |
| currentLastBytePos = i - newLineMatchByteCount; |
| break; // found line |
| } |
| |
| // --- move cursor --- |
| i -= byteDecrement; |
| |
| // --- end of file part handling --- |
| if (i < 0) { |
| createLeftOver(); |
| break; // end of file part |
| } |
| } |
| |
| // --- last file part handling --- |
| if (isLastFilePart && leftOver != null) { |
| // there will be no line break anymore, this is the first line of the file |
| line = new String(leftOver, charset); |
| leftOver = null; |
| } |
| |
| return line; |
| } |
| |
| /** |
| * Handles block rollover |
| * |
| * @return the new FilePart or null |
| * @throws IOException if there was a problem reading the file |
| */ |
| private FilePart rollOver() throws IOException { |
| |
| if (currentLastBytePos > -1) { |
| throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... " |
| + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos); |
| } |
| |
| if (no > 1) { |
| return new FilePart(no - 1, blockSize, leftOver); |
| } |
| // NO 1 was the last FilePart, we're finished |
| if (leftOver != null) { |
| throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart=" |
| + new String(leftOver, charset)); |
| } |
| return null; |
| } |
| } |
| |
| private static final String EMPTY_STRING = ""; |
| |
| private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize(); |
| |
| /** |
| * Constructs a new {@link Builder}. |
| * |
| * @return a new {@link Builder}. |
| * @since 2.12.0 |
| */ |
| public static Builder builder() { |
| return new Builder(); |
| } |
| |
| private final int blockSize; |
| private final Charset charset; |
| private final SeekableByteChannel channel; |
| private final long totalByteLength; |
| private final long totalBlockCount; |
| private final byte[][] newLineSequences; |
| private final int avoidNewlineSplitBufferSize; |
| private final int byteDecrement; |
| private FilePart currentFilePart; |
| private boolean trailingNewlineOfFileSkipped; |
| |
| /** |
| * Constructs a ReversedLinesFileReader with default block size of 4KB and the |
| * platform's default encoding. |
| * |
| * @param file the file to be read |
| * @throws IOException if an I/O error occurs. |
| * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} |
| */ |
| @Deprecated |
| public ReversedLinesFileReader(final File file) throws IOException { |
| this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset()); |
| } |
| |
| /** |
| * Constructs a ReversedLinesFileReader with default block size of 4KB and the |
| * specified encoding. |
| * |
| * @param file the file to be read |
| * @param charset the charset to use, null uses the default Charset. |
| * @throws IOException if an I/O error occurs. |
| * @since 2.5 |
| * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} |
| */ |
| @Deprecated |
| public ReversedLinesFileReader(final File file, final Charset charset) throws IOException { |
| this(file.toPath(), charset); |
| } |
| |
| /** |
| * Constructs a ReversedLinesFileReader with the given block size and encoding. |
| * |
| * @param file the file to be read |
| * @param blockSize size of the internal buffer (for ideal performance this |
| * should match with the block size of the underlying file |
| * system). |
| * @param charset the encoding of the file, null uses the default Charset. |
| * @throws IOException if an I/O error occurs. |
| * @since 2.3 |
| * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} |
| */ |
| @Deprecated |
| public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException { |
| this(file.toPath(), blockSize, charset); |
| } |
| |
| /** |
| * Constructs a ReversedLinesFileReader with the given block size and encoding. |
| * |
| * @param file the file to be read |
| * @param blockSize size of the internal buffer (for ideal performance this |
| * should match with the block size of the underlying file |
| * system). |
| * @param charsetName the encoding of the file, null uses the default Charset. |
| * @throws IOException if an I/O error occurs |
| * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported |
| * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} |
| */ |
| @Deprecated |
| public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException { |
| this(file.toPath(), blockSize, charsetName); |
| } |
| |
| /** |
| * Constructs a ReversedLinesFileReader with default block size of 4KB and the |
| * specified encoding. |
| * |
| * @param file the file to be read |
| * @param charset the charset to use, null uses the default Charset. |
| * @throws IOException if an I/O error occurs. |
| * @since 2.7 |
| * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} |
| */ |
| @Deprecated |
| public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException { |
| this(file, DEFAULT_BLOCK_SIZE, charset); |
| } |
| |
| /** |
| * Constructs a ReversedLinesFileReader with the given block size and encoding. |
| * |
| * @param file the file to be read |
| * @param blockSize size of the internal buffer (for ideal performance this |
| * should match with the block size of the underlying file |
| * system). |
| * @param charset the encoding of the file, null uses the default Charset. |
| * @throws IOException if an I/O error occurs. |
| * @since 2.7 |
| * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} |
| */ |
| @Deprecated |
| public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException { |
| this.blockSize = blockSize; |
| this.charset = Charsets.toCharset(charset); |
| |
| // --- check & prepare encoding --- |
| final CharsetEncoder charsetEncoder = this.charset.newEncoder(); |
| final float maxBytesPerChar = charsetEncoder.maxBytesPerChar(); |
| if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) { |
| // all one byte encodings are no problem |
| byteDecrement = 1; |
| } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8 |
| // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html |
| this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese) |
| this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean) |
| this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese) |
| this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese) |
| byteDecrement = 1; |
| } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) { |
| // UTF-16 new line sequences are not allowed as second tuple of four byte |
| // sequences, |
| // however byte order has to be specified |
| byteDecrement = 2; |
| } else if (this.charset == StandardCharsets.UTF_16) { |
| throw new UnsupportedEncodingException( |
| "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)"); |
| } else { |
| throw new UnsupportedEncodingException( |
| "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)"); |
| } |
| |
| // NOTE: The new line sequences are matched in the order given, so it is |
| // important that \r\n is BEFORE \n |
| this.newLineSequences = new byte[][] { |
| StandardLineSeparator.CRLF.getBytes(this.charset), |
| StandardLineSeparator.LF.getBytes(this.charset), |
| StandardLineSeparator.CR.getBytes(this.charset) |
| }; |
| |
| this.avoidNewlineSplitBufferSize = newLineSequences[0].length; |
| |
| // Open file |
| this.channel = Files.newByteChannel(file, StandardOpenOption.READ); |
| this.totalByteLength = channel.size(); |
| int lastBlockLength = (int) (this.totalByteLength % blockSize); |
| if (lastBlockLength > 0) { |
| this.totalBlockCount = this.totalByteLength / blockSize + 1; |
| } else { |
| this.totalBlockCount = this.totalByteLength / blockSize; |
| if (this.totalByteLength > 0) { |
| lastBlockLength = blockSize; |
| } |
| } |
| this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null); |
| |
| } |
| |
| /** |
| * Constructs a ReversedLinesFileReader with the given block size and encoding. |
| * |
| * @param file the file to be read |
| * @param blockSize size of the internal buffer (for ideal performance this |
| * should match with the block size of the underlying file |
| * system). |
| * @param charsetName the encoding of the file, null uses the default Charset. |
| * @throws IOException if an I/O error occurs |
| * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported |
| * @since 2.7 |
| * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} |
| */ |
| @Deprecated |
| public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException { |
| this(file, blockSize, Charsets.toCharset(charsetName)); |
| } |
| |
| /** |
| * Closes underlying resources. |
| * |
| * @throws IOException if an I/O error occurs. |
| */ |
| @Override |
| public void close() throws IOException { |
| channel.close(); |
| } |
| |
| /** |
| * Returns the lines of the file from bottom to top. |
| * |
| * @return the next line or null if the start of the file is reached |
| * @throws IOException if an I/O error occurs. |
| */ |
| public String readLine() throws IOException { |
| |
| String line = currentFilePart.readLine(); |
| while (line == null) { |
| currentFilePart = currentFilePart.rollOver(); |
| if (currentFilePart == null) { |
| // no more FileParts: we're done, leave line set to null |
| break; |
| } |
| line = currentFilePart.readLine(); |
| } |
| |
| // aligned behavior with BufferedReader that doesn't return a last, empty line |
| if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) { |
| trailingNewlineOfFileSkipped = true; |
| line = readLine(); |
| } |
| |
| return line; |
| } |
| |
| /** |
| * Returns {@code lineCount} lines of the file from bottom to top. |
| * <p> |
| * If there are less than {@code lineCount} lines in the file, then that's what |
| * you get. |
| * </p> |
| * <p> |
| * Note: You can easily flip the result with {@link Collections#reverse(List)}. |
| * </p> |
| * |
| * @param lineCount How many lines to read. |
| * @return A new list |
| * @throws IOException if an I/O error occurs. |
| * @since 2.8.0 |
| */ |
| public List<String> readLines(final int lineCount) throws IOException { |
| if (lineCount < 0) { |
| throw new IllegalArgumentException("lineCount < 0"); |
| } |
| final ArrayList<String> arrayList = new ArrayList<>(lineCount); |
| for (int i = 0; i < lineCount; i++) { |
| final String line = readLine(); |
| if (line == null) { |
| return arrayList; |
| } |
| arrayList.add(line); |
| } |
| return arrayList; |
| } |
| |
| /** |
| * Returns the last {@code lineCount} lines of the file. |
| * <p> |
| * If there are less than {@code lineCount} lines in the file, then that's what |
| * you get. |
| * </p> |
| * |
| * @param lineCount How many lines to read. |
| * @return A String. |
| * @throws IOException if an I/O error occurs. |
| * @since 2.8.0 |
| */ |
| public String toString(final int lineCount) throws IOException { |
| final List<String> lines = readLines(lineCount); |
| Collections.reverse(lines); |
| return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator(); |
| } |
| |
| } |