indexer-reader/src/main/java/org/apache/maven/index/reader/ChunkReader.java - maven-indexer - Git at Google

 package org.apache.maven.index.reader;

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 import java.io.Closeable;
 import java.io.DataInput;
 import java.io.DataInputStream;
 import java.io.EOFException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.UTFDataFormatException;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.NoSuchElementException;
 import java.util.zip.GZIPInputStream;

 /**
  * Maven 2 Index published binary chunk reader, it reads raw Maven Indexer records from the transport binary format.
  *
  * @since 5.1.2
  */
 public class ChunkReader
     implements Closeable, Iterable<Map<String, String>>
 {
     private final String chunkName;

     private final DataInputStream dataInputStream;

     private final int version;

     private final Date timestamp;

     public ChunkReader( final String chunkName, final InputStream inputStream )
         throws IOException
     {
         this.chunkName = chunkName.trim();
         this.dataInputStream = new DataInputStream( new GZIPInputStream( inputStream, 2 * 1024 ) );
         this.version = ( (int) dataInputStream.readByte() ) & 0xff;
         this.timestamp = new Date( dataInputStream.readLong() );
     }

     /**
      * Returns the chunk name.
      */
     public String getName()
     {
         return chunkName;
     }

     /**
      * Returns index version. All releases so far always returned {@code 1}.
      */
     public int getVersion()
     {
         return version;
     }

     /**
      * Returns the index timestamp of last update of the index.
      */
     public Date getTimestamp()
     {
         return timestamp;
     }

     /**
      * Returns the {@link Record} iterator.
      */
     public Iterator<Map<String, String>> iterator()
     {
         try
         {
             return new IndexIterator( dataInputStream );
         }
         catch ( IOException e )
         {
             throw new RuntimeException( "error", e );
         }
     }

     /**
      * Closes this reader and it's underlying input.
      */
     public void close()
         throws IOException
     {
         dataInputStream.close();
     }

     /**
      * Low memory footprint index iterator that incrementally parses the underlying stream.
      */
     private static class IndexIterator
         implements Iterator<Map<String, String>>
     {
         private final DataInputStream dataInputStream;

         private Map<String, String> nextRecord;

         private IndexIterator( final DataInputStream dataInputStream )
             throws IOException
         {
             this.dataInputStream = dataInputStream;
             this.nextRecord = nextRecord();
         }

         public boolean hasNext()
         {
             return nextRecord != null;
         }

         public Map<String, String> next()
         {
             if ( nextRecord == null )
             {
                 throw new NoSuchElementException( "chunk depleted" );
             }
             Map<String, String> result = nextRecord;
             nextRecord = nextRecord();
             return result;
         }

         public void remove()
         {
             throw new UnsupportedOperationException( "remove" );
         }

         private Map<String, String> nextRecord()
         {
             try
             {
                 return readRecord( dataInputStream );
             }
             catch ( IOException e )
             {
                 throw new RuntimeException( "read error", e );
             }
         }
     }

     /**
      * Reads and returns next record from the underlying stream, or {@code null} if no more records.
      */
     private static Map<String, String> readRecord( final DataInput dataInput )
         throws IOException
     {
         int fieldCount;
         try
         {
             fieldCount = dataInput.readInt();
         }
         catch ( EOFException ex )
         {
             return null; // no more documents
         }

         Map<String, String> recordMap = new HashMap<String, String>();
         for ( int i = 0; i < fieldCount; i++ )
         {
             readField( recordMap, dataInput );
         }
         return recordMap;
     }

     private static void readField( final Map<String, String> record, final DataInput dataInput )
         throws IOException
     {
         dataInput.readByte(); // flags: neglect them
         String name = dataInput.readUTF();
         String value = readUTF( dataInput );
         record.put( name, value );
     }

     private static String readUTF( final DataInput dataInput )
         throws IOException
     {
         int utflen = dataInput.readInt();

         byte[] bytearr;
         char[] chararr;

         try
         {
             bytearr = new byte[utflen];
             chararr = new char[utflen];
         }
         catch ( OutOfMemoryError e )
         {
             IOException ioex = new IOException( "Index data content is corrupt" );
             ioex.initCause( e );
             throw ioex;
         }

         int c, char2, char3;
         int count = 0;
         int chararrCount = 0;

         dataInput.readFully( bytearr, 0, utflen );

         while ( count < utflen )
         {
             c = bytearr[count] & 0xff;
             if ( c > 127 )
             {
                 break;
             }
             count++;
             chararr[chararrCount++] = (char) c;
         }

         while ( count < utflen )
         {
             c = bytearr[count] & 0xff;
             switch ( c >> 4 )
             {
                 case 0:
                 case 1:
                 case 2:
                 case 3:
                 case 4:
                 case 5:
                 case 6:
                 case 7:
                     /* 0xxxxxxx */
                     count++;
                     chararr[chararrCount++] = (char) c;
                     break;

                 case 12:
                 case 13:
                     /* 110x xxxx 10xx xxxx */
                     count += 2;
                     if ( count > utflen )
                     {
                         throw new UTFDataFormatException( "malformed input: partial character at end" );
                     }
                     char2 = bytearr[count - 1];
                     if ( ( char2 & 0xC0 ) != 0x80 )
                     {
                         throw new UTFDataFormatException( "malformed input around byte " + count );
                     }
                     chararr[chararrCount++] = (char) ( ( ( c & 0x1F ) << 6 ) | ( char2 & 0x3F ) );
                     break;

                 case 14:
                     /* 1110 xxxx 10xx xxxx 10xx xxxx */
                     count += 3;
                     if ( count > utflen )
                     {
                         throw new UTFDataFormatException( "malformed input: partial character at end" );
                     }
                     char2 = bytearr[count - 2];
                     char3 = bytearr[count - 1];
                     if ( ( ( char2 & 0xC0 ) != 0x80 ) || ( ( char3 & 0xC0 ) != 0x80 ) )
                     {
                         throw new UTFDataFormatException( "malformed input around byte " + ( count - 1 ) );
                     }
                     chararr[chararrCount++] =
                         (char) ( ( ( c & 0x0F ) << 12 ) | ( ( char2 & 0x3F ) << 6 ) | ( char3 & 0x3F ) );
                     break;

                 default:
                     /* 10xx xxxx, 1111 xxxx */
                     throw new UTFDataFormatException( "malformed input around byte " + count );
             }
         }

         // The number of chars produced may be less than utflen
         return new String( chararr, 0, chararrCount );
     }
 }
	package org.apache.maven.index.reader;

	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	import java.io.Closeable;
	import java.io.DataInput;
	import java.io.DataInputStream;
	import java.io.EOFException;
	import java.io.IOException;
	import java.io.InputStream;
	import java.io.UTFDataFormatException;
	import java.util.Date;
	import java.util.HashMap;
	import java.util.Iterator;
	import java.util.Map;
	import java.util.NoSuchElementException;
	import java.util.zip.GZIPInputStream;

	/**
	* Maven 2 Index published binary chunk reader, it reads raw Maven Indexer records from the transport binary format.
	*
	* @since 5.1.2
	*/
	public class ChunkReader
	implements Closeable, Iterable<Map<String, String>>
	{
	private final String chunkName;

	private final DataInputStream dataInputStream;

	private final int version;

	private final Date timestamp;

	public ChunkReader( final String chunkName, final InputStream inputStream )
	throws IOException
	{
	this.chunkName = chunkName.trim();
	this.dataInputStream = new DataInputStream( new GZIPInputStream( inputStream, 2 * 1024 ) );
	this.version = ( (int) dataInputStream.readByte() ) & 0xff;
	this.timestamp = new Date( dataInputStream.readLong() );
	}

	/**
	* Returns the chunk name.
	*/
	public String getName()
	{
	return chunkName;
	}

	/**
	* Returns index version. All releases so far always returned {@code 1}.
	*/
	public int getVersion()
	{
	return version;
	}

	/**
	* Returns the index timestamp of last update of the index.
	*/
	public Date getTimestamp()
	{
	return timestamp;
	}

	/**
	* Returns the {@link Record} iterator.
	*/
	public Iterator<Map<String, String>> iterator()
	{
	try
	{
	return new IndexIterator( dataInputStream );
	}
	catch ( IOException e )
	{
	throw new RuntimeException( "error", e );
	}
	}

	/**
	* Closes this reader and it's underlying input.
	*/
	public void close()
	throws IOException
	{
	dataInputStream.close();
	}

	/**
	* Low memory footprint index iterator that incrementally parses the underlying stream.
	*/
	private static class IndexIterator
	implements Iterator<Map<String, String>>
	{
	private final DataInputStream dataInputStream;

	private Map<String, String> nextRecord;

	private IndexIterator( final DataInputStream dataInputStream )
	throws IOException
	{
	this.dataInputStream = dataInputStream;
	this.nextRecord = nextRecord();
	}

	public boolean hasNext()
	{
	return nextRecord != null;
	}

	public Map<String, String> next()
	{
	if ( nextRecord == null )
	{
	throw new NoSuchElementException( "chunk depleted" );
	}
	Map<String, String> result = nextRecord;
	nextRecord = nextRecord();
	return result;
	}

	public void remove()
	{
	throw new UnsupportedOperationException( "remove" );
	}

	private Map<String, String> nextRecord()
	{
	try
	{
	return readRecord( dataInputStream );
	}
	catch ( IOException e )
	{
	throw new RuntimeException( "read error", e );
	}
	}
	}

	/**
	* Reads and returns next record from the underlying stream, or {@code null} if no more records.
	*/
	private static Map<String, String> readRecord( final DataInput dataInput )
	throws IOException
	{
	int fieldCount;
	try
	{
	fieldCount = dataInput.readInt();
	}
	catch ( EOFException ex )
	{
	return null; // no more documents
	}

	Map<String, String> recordMap = new HashMap<String, String>();
	for ( int i = 0; i < fieldCount; i++ )
	{
	readField( recordMap, dataInput );
	}
	return recordMap;
	}

	private static void readField( final Map<String, String> record, final DataInput dataInput )
	throws IOException
	{
	dataInput.readByte(); // flags: neglect them
	String name = dataInput.readUTF();
	String value = readUTF( dataInput );
	record.put( name, value );
	}

	private static String readUTF( final DataInput dataInput )
	throws IOException
	{
	int utflen = dataInput.readInt();

	byte[] bytearr;
	char[] chararr;

	try
	{
	bytearr = new byte[utflen];
	chararr = new char[utflen];
	}
	catch ( OutOfMemoryError e )
	{
	IOException ioex = new IOException( "Index data content is corrupt" );
	ioex.initCause( e );
	throw ioex;
	}

	int c, char2, char3;
	int count = 0;
	int chararrCount = 0;

	dataInput.readFully( bytearr, 0, utflen );

	while ( count < utflen )
	{
	c = bytearr[count] & 0xff;
	if ( c > 127 )
	{
	break;
	}
	count++;
	chararr[chararrCount++] = (char) c;
	}

	while ( count < utflen )
	{
	c = bytearr[count] & 0xff;
	switch ( c >> 4 )
	{
	case 0:
	case 1:
	case 2:
	case 3:
	case 4:
	case 5:
	case 6:
	case 7:
	/* 0xxxxxxx */
	count++;
	chararr[chararrCount++] = (char) c;
	break;

	case 12:
	case 13:
	/* 110x xxxx 10xx xxxx */
	count += 2;
	if ( count > utflen )
	{
	throw new UTFDataFormatException( "malformed input: partial character at end" );
	}
	char2 = bytearr[count - 1];
	if ( ( char2 & 0xC0 ) != 0x80 )
	{
	throw new UTFDataFormatException( "malformed input around byte " + count );
	}
	chararr[chararrCount++] = (char) ( ( ( c & 0x1F ) << 6 ) \| ( char2 & 0x3F ) );
	break;

	case 14:
	/* 1110 xxxx 10xx xxxx 10xx xxxx */
	count += 3;
	if ( count > utflen )
	{
	throw new UTFDataFormatException( "malformed input: partial character at end" );
	}
	char2 = bytearr[count - 2];
	char3 = bytearr[count - 1];
	if ( ( ( char2 & 0xC0 ) != 0x80 ) \|\| ( ( char3 & 0xC0 ) != 0x80 ) )
	{
	throw new UTFDataFormatException( "malformed input around byte " + ( count - 1 ) );
	}
	chararr[chararrCount++] =
	(char) ( ( ( c & 0x0F ) << 12 ) \| ( ( char2 & 0x3F ) << 6 ) \| ( char3 & 0x3F ) );
	break;

	default:
	/* 10xx xxxx, 1111 xxxx */
	throw new UTFDataFormatException( "malformed input around byte " + count );
	}
	}

	// The number of chars produced may be less than utflen
	return new String( chararr, 0, chararrCount );
	}
	}