modules/core/src/main/java/org/apache/ignite/internal/util/io/GridReversedLinesFileReader.java - ignite - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.ignite.internal.util.io;

 import java.io.Closeable;
 import java.io.File;
 import java.io.IOException;
 import java.io.RandomAccessFile;
 import java.io.UnsupportedEncodingException;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.UnsupportedCharsetException;

 /**
  * Reads lines in a file reversely (similar to a BufferedReader, but starting at
  * the last line). Useful for e.g. searching in log files.
  */
 @SuppressWarnings("ALL")
 public class GridReversedLinesFileReader implements Closeable {
     /** */
     private final int blockSize;

     /** */
     private final Charset encoding;

     /** */
     private final RandomAccessFile randomAccessFile;

     /** */
     private final long totalByteLength;

     /** */
     private final long totalBlockCount;

     /** */
     private final byte[][] newLineSequences;

     /** */
     private final int avoidNewlineSplitBufferSize;

     /** */
     private final int byteDecrement;

     /** */
     private FilePart currentFilePart;

     /** */
     private boolean trailingNewlineOfFileSkipped = false;

     /**
      * Creates a ReverseLineReader with default block size of 4KB and the
      * platform's default encoding.
      *
      * @param file
      *            the file to be read
      * @throws IOException  if an I/O error occurs
      */
     public GridReversedLinesFileReader(final File file) throws IOException {
         this(file, 4096, Charset.defaultCharset().toString());
     }

     /**
      * Creates a ReverseLineReader with the given block size and encoding.
      *
      * @param file
      *            the file to be read
      * @param blockSize
      *            size of the internal buffer (for ideal performance this should
      *            match with the block size of the underlying file system).
      * @param charset
      *            the encoding of the file
      * @throws IOException  if an I/O error occurs
      * @since 2.3
      */
     public GridReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
         this.blockSize = blockSize;
         this.encoding = charset;

         randomAccessFile = new RandomAccessFile(file, "r");
         totalByteLength = randomAccessFile.length();
         int lastBlockLength = (int) (totalByteLength % blockSize);
         if (lastBlockLength > 0) {
             totalBlockCount = totalByteLength / blockSize + 1;
         } else {
             totalBlockCount = totalByteLength / blockSize;
             if (totalByteLength > 0) {
                 lastBlockLength = blockSize;
             }
         }
         currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);

         // --- check & prepare encoding ---
         CharsetEncoder charsetEncoder = charset.newEncoder();
         float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
         if (maxBytesPerChar == 1f) {
             // all one byte encodings are no problem
             byteDecrement = 1;
         } else if (charset == Charset.forName("UTF-8")) {
             // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte
             // http://en.wikipedia.org/wiki/UTF-8
             byteDecrement = 1;
         } else if (charset == Charset.forName("Shift_JIS")) {
             // Same as for UTF-8
             // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
             byteDecrement = 1;
         } else if (charset == Charset.forName("UTF-16BE") || charset == Charset.forName("UTF-16LE")) {
             // UTF-16 new line sequences are not allowed as second tuple of four byte sequences,
             // however byte order has to be specified
             byteDecrement = 2;
         } else if (charset == Charset.forName("UTF-16")) {
             throw new UnsupportedEncodingException(
                 "For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)");
         } else {
             throw new UnsupportedEncodingException(
                 "Encoding " + charset + " is not supported yet (feel free to submit a patch)");
         }
         // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n
         newLineSequences = new byte[][] {"\r\n".getBytes(charset), "\n".getBytes(charset), "\r".getBytes(charset)};

         avoidNewlineSplitBufferSize = newLineSequences[0].length;
     }

     /**
      * Creates a ReverseLineReader with the given block size and encoding.
      *
      * @param file
      *            the file to be read
      * @param blockSize
      *            size of the internal buffer (for ideal performance this should
      *            match with the block size of the underlying file system).
      * @param encoding
      *            the encoding of the file
      * @throws IOException  if an I/O error occurs
      * @throws UnsupportedCharsetException
      *             thrown instead of {@link UnsupportedEncodingException} in version 2.2 if the encoding is not
      *             supported.
      */
     public GridReversedLinesFileReader(final File file, final int blockSize, final String encoding) throws IOException {
         this(file, blockSize, Charset.forName(encoding));
     }

     /**
      * Returns the lines of the file from bottom to top.
      *
      * @return the next line or null if the start of the file is reached
      * @throws IOException  if an I/O error occurs
      */
     public String readLine() throws IOException {

         String line = currentFilePart.readLine();
         while (line == null) {
             currentFilePart = currentFilePart.rollOver();
             if (currentFilePart != null) {
                 line = currentFilePart.readLine();
             } else {
                 // no more fileparts: we're done, leave line set to null
                 break;
             }
         }

         // aligned behaviour wiht BufferedReader that doesn't return a last, emtpy line
         if ("".equals(line) && !trailingNewlineOfFileSkipped) {
             trailingNewlineOfFileSkipped = true;
             line = readLine();
         }

         return line;
     }

     /**
      * Closes underlying resources.
      *
      * @throws IOException  if an I/O error occurs
      */
     public void close() throws IOException {
         randomAccessFile.close();
     }

     /** */
     private class FilePart {
         /** */
         private final long no;

         /** */
         private final byte[] data;

         /** */
         private byte[] leftOver;

         /** */
         private int currentLastBytePos;

         /**
          * ctor
          * @param no the part number
          * @param length its length
          * @param leftOverOfLastFilePart remainder
          * @throws IOException if there is a problem reading the file
          */
         private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
             this.no = no;
             int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
             this.data = new byte[dataLength];
             final long off = (no - 1) * blockSize;

             // read data
             if (no > 0 /* file not empty */) {
                 randomAccessFile.seek(off);
                 final int countRead = randomAccessFile.read(data, 0, length);
                 if (countRead != length) {
                     throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
                 }
             }
             // copy left over part into data arr
             if (leftOverOfLastFilePart != null) {
                 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
             }
             this.currentLastBytePos = data.length - 1;
             this.leftOver = null;
         }

         /**
          * Handles block rollover
          *
          * @return the new FilePart or null
          * @throws IOException if there was a problem reading the file
          */
         private FilePart rollOver() throws IOException {

             if (currentLastBytePos > -1) {
                 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
                     + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
             }

             if (no > 1) {
                 return new FilePart(no - 1, blockSize, leftOver);
             } else {
                 // NO 1 was the last FilePart, we're finished
                 if (leftOver != null) {
                     throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
                         + new String(leftOver, encoding));
                 }
                 return null;
             }
         }

         /**
          * Reads a line.
          *
          * @return the line or null
          * @throws IOException if there is an error reading from the file
          */
         private String readLine() throws IOException {

             String line = null;
             int newLineMatchByteCount;

             boolean isLastFilePart = no == 1;

             int i = currentLastBytePos;
             while (i > -1) {

                 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
                     // avoidNewlineSplitBuffer: for all except the last file part we
                     // take a few bytes to the next file part to avoid splitting of newlines
                     createLeftOver();
                     break; // skip last few bytes and leave it to the next file part
                 }

                 // --- check for newline ---
                 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
                     final int lineStart = i + 1;
                     int lineLengthBytes = currentLastBytePos - lineStart + 1;

                     if (lineLengthBytes < 0) {
                         throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
                     }
                     byte[] lineData = new byte[lineLengthBytes];
                     System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes);

                     line = new String(lineData, encoding);

                     currentLastBytePos = i - newLineMatchByteCount;
                     break; // found line
                 }

                 // --- move cursor ---
                 i -= byteDecrement;

                 // --- end of file part handling ---
                 if (i < 0) {
                     createLeftOver();
                     break; // end of file part
                 }
             }

             // --- last file part handling ---
             if (isLastFilePart && leftOver != null) {
                 // there will be no line break anymore, this is the first line of the file
                 line = new String(leftOver, encoding);
                 leftOver = null;
             }

             return line;
         }

         /**
          * Creates the buffer containing any left over bytes.
          */
         private void createLeftOver() {
             int lineLengthBytes = currentLastBytePos + 1;
             if (lineLengthBytes > 0) {
                 // create left over for next block
                 leftOver = new byte[lineLengthBytes];
                 System.arraycopy(data, 0, leftOver, 0, lineLengthBytes);
             } else {
                 leftOver = null;
             }
             currentLastBytePos = -1;
         }

         /**
          * Finds the new-line sequence and return its length.
          *
          * @param data buffer to scan
          * @param i start offset in buffer
          * @return length of newline sequence or 0 if none found
          */
         private int getNewLineMatchByteCount(byte[] data, int i) {
             for (byte[] newLineSequence : newLineSequences) {
                 boolean match = true;
                 for (int j = newLineSequence.length - 1; j >= 0; j--) {
                     int k = i + j - (newLineSequence.length - 1);
                     match &= k >= 0 && data[k] == newLineSequence[j];
                 }
                 if (match) {
                     return newLineSequence.length;
                 }
             }
             return 0;
         }
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.ignite.internal.util.io;

	import java.io.Closeable;
	import java.io.File;
	import java.io.IOException;
	import java.io.RandomAccessFile;
	import java.io.UnsupportedEncodingException;
	import java.nio.charset.Charset;
	import java.nio.charset.CharsetEncoder;
	import java.nio.charset.UnsupportedCharsetException;

	/**
	* Reads lines in a file reversely (similar to a BufferedReader, but starting at
	* the last line). Useful for e.g. searching in log files.
	*/
	@SuppressWarnings("ALL")
	public class GridReversedLinesFileReader implements Closeable {
	/** */
	private final int blockSize;

	/** */
	private final Charset encoding;

	/** */
	private final RandomAccessFile randomAccessFile;

	/** */
	private final long totalByteLength;

	/** */
	private final long totalBlockCount;

	/** */
	private final byte[][] newLineSequences;

	/** */
	private final int avoidNewlineSplitBufferSize;

	/** */
	private final int byteDecrement;

	/** */
	private FilePart currentFilePart;

	/** */
	private boolean trailingNewlineOfFileSkipped = false;

	/**
	* Creates a ReverseLineReader with default block size of 4KB and the
	* platform's default encoding.
	*
	* @param file
	* the file to be read
	* @throws IOException if an I/O error occurs
	*/
	public GridReversedLinesFileReader(final File file) throws IOException {
	this(file, 4096, Charset.defaultCharset().toString());
	}

	/**
	* Creates a ReverseLineReader with the given block size and encoding.
	*
	* @param file
	* the file to be read
	* @param blockSize
	* size of the internal buffer (for ideal performance this should
	* match with the block size of the underlying file system).
	* @param charset
	* the encoding of the file
	* @throws IOException if an I/O error occurs
	* @since 2.3
	*/
	public GridReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
	this.blockSize = blockSize;
	this.encoding = charset;

	randomAccessFile = new RandomAccessFile(file, "r");
	totalByteLength = randomAccessFile.length();
	int lastBlockLength = (int) (totalByteLength % blockSize);
	if (lastBlockLength > 0) {
	totalBlockCount = totalByteLength / blockSize + 1;
	} else {
	totalBlockCount = totalByteLength / blockSize;
	if (totalByteLength > 0) {
	lastBlockLength = blockSize;
	}
	}
	currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);

	// --- check & prepare encoding ---
	CharsetEncoder charsetEncoder = charset.newEncoder();
	float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
	if (maxBytesPerChar == 1f) {
	// all one byte encodings are no problem
	byteDecrement = 1;
	} else if (charset == Charset.forName("UTF-8")) {
	// UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte
	// http://en.wikipedia.org/wiki/UTF-8
	byteDecrement = 1;
	} else if (charset == Charset.forName("Shift_JIS")) {
	// Same as for UTF-8
	// http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
	byteDecrement = 1;
	} else if (charset == Charset.forName("UTF-16BE") \|\| charset == Charset.forName("UTF-16LE")) {
	// UTF-16 new line sequences are not allowed as second tuple of four byte sequences,
	// however byte order has to be specified
	byteDecrement = 2;
	} else if (charset == Charset.forName("UTF-16")) {
	throw new UnsupportedEncodingException(
	"For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)");
	} else {
	throw new UnsupportedEncodingException(
	"Encoding " + charset + " is not supported yet (feel free to submit a patch)");
	}
	// NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n
	newLineSequences = new byte[][] {"\r\n".getBytes(charset), "\n".getBytes(charset), "\r".getBytes(charset)};

	avoidNewlineSplitBufferSize = newLineSequences[0].length;
	}

	/**
	* Creates a ReverseLineReader with the given block size and encoding.
	*
	* @param file
	* the file to be read
	* @param blockSize
	* size of the internal buffer (for ideal performance this should
	* match with the block size of the underlying file system).
	* @param encoding
	* the encoding of the file
	* @throws IOException if an I/O error occurs
	* @throws UnsupportedCharsetException
	* thrown instead of {@link UnsupportedEncodingException} in version 2.2 if the encoding is not
	* supported.
	*/
	public GridReversedLinesFileReader(final File file, final int blockSize, final String encoding) throws IOException {
	this(file, blockSize, Charset.forName(encoding));
	}

	/**
	* Returns the lines of the file from bottom to top.
	*
	* @return the next line or null if the start of the file is reached
	* @throws IOException if an I/O error occurs
	*/
	public String readLine() throws IOException {

	String line = currentFilePart.readLine();
	while (line == null) {
	currentFilePart = currentFilePart.rollOver();
	if (currentFilePart != null) {
	line = currentFilePart.readLine();
	} else {
	// no more fileparts: we're done, leave line set to null
	break;
	}
	}

	// aligned behaviour wiht BufferedReader that doesn't return a last, emtpy line
	if ("".equals(line) && !trailingNewlineOfFileSkipped) {
	trailingNewlineOfFileSkipped = true;
	line = readLine();
	}

	return line;
	}

	/**
	* Closes underlying resources.
	*
	* @throws IOException if an I/O error occurs
	*/
	public void close() throws IOException {
	randomAccessFile.close();
	}

	/** */
	private class FilePart {
	/** */
	private final long no;

	/** */
	private final byte[] data;

	/** */
	private byte[] leftOver;

	/** */
	private int currentLastBytePos;

	/**
	* ctor
	* @param no the part number
	* @param length its length
	* @param leftOverOfLastFilePart remainder
	* @throws IOException if there is a problem reading the file
	*/
	private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
	this.no = no;
	int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
	this.data = new byte[dataLength];
	final long off = (no - 1) * blockSize;

	// read data
	if (no > 0 /* file not empty */) {
	randomAccessFile.seek(off);
	final int countRead = randomAccessFile.read(data, 0, length);
	if (countRead != length) {
	throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
	}
	}
	// copy left over part into data arr
	if (leftOverOfLastFilePart != null) {
	System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
	}
	this.currentLastBytePos = data.length - 1;
	this.leftOver = null;
	}

	/**
	* Handles block rollover
	*
	* @return the new FilePart or null
	* @throws IOException if there was a problem reading the file
	*/
	private FilePart rollOver() throws IOException {

	if (currentLastBytePos > -1) {
	throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
	+ "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
	}

	if (no > 1) {
	return new FilePart(no - 1, blockSize, leftOver);
	} else {
	// NO 1 was the last FilePart, we're finished
	if (leftOver != null) {
	throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
	+ new String(leftOver, encoding));
	}
	return null;
	}
	}

	/**
	* Reads a line.
	*
	* @return the line or null
	* @throws IOException if there is an error reading from the file
	*/
	private String readLine() throws IOException {

	String line = null;
	int newLineMatchByteCount;

	boolean isLastFilePart = no == 1;

	int i = currentLastBytePos;
	while (i > -1) {

	if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
	// avoidNewlineSplitBuffer: for all except the last file part we
	// take a few bytes to the next file part to avoid splitting of newlines
	createLeftOver();
	break; // skip last few bytes and leave it to the next file part
	}

	// --- check for newline ---
	if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
	final int lineStart = i + 1;
	int lineLengthBytes = currentLastBytePos - lineStart + 1;

	if (lineLengthBytes < 0) {
	throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
	}
	byte[] lineData = new byte[lineLengthBytes];
	System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes);

	line = new String(lineData, encoding);

	currentLastBytePos = i - newLineMatchByteCount;
	break; // found line
	}

	// --- move cursor ---
	i -= byteDecrement;

	// --- end of file part handling ---
	if (i < 0) {
	createLeftOver();
	break; // end of file part
	}
	}

	// --- last file part handling ---
	if (isLastFilePart && leftOver != null) {
	// there will be no line break anymore, this is the first line of the file
	line = new String(leftOver, encoding);
	leftOver = null;
	}

	return line;
	}

	/**
	* Creates the buffer containing any left over bytes.
	*/
	private void createLeftOver() {
	int lineLengthBytes = currentLastBytePos + 1;
	if (lineLengthBytes > 0) {
	// create left over for next block
	leftOver = new byte[lineLengthBytes];
	System.arraycopy(data, 0, leftOver, 0, lineLengthBytes);
	} else {
	leftOver = null;
	}
	currentLastBytePos = -1;
	}

	/**
	* Finds the new-line sequence and return its length.
	*
	* @param data buffer to scan
	* @param i start offset in buffer
	* @return length of newline sequence or 0 if none found
	*/
	private int getNewLineMatchByteCount(byte[] data, int i) {
	for (byte[] newLineSequence : newLineSequences) {
	boolean match = true;
	for (int j = newLineSequence.length - 1; j >= 0; j--) {
	int k = i + j - (newLineSequence.length - 1);
	match &= k >= 0 && data[k] == newLineSequence[j];
	}
	if (match) {
	return newLineSequence.length;
	}
	}
	return 0;
	}
	}
	}