| package org.apache.maven.index.reader; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| import java.io.Closeable; |
| import java.io.DataInput; |
| import java.io.DataInputStream; |
| import java.io.EOFException; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.UTFDataFormatException; |
| import java.util.Date; |
| import java.util.HashMap; |
| import java.util.Iterator; |
| import java.util.Map; |
| import java.util.NoSuchElementException; |
| import java.util.zip.GZIPInputStream; |
| |
| /** |
| * Maven 2 Index published binary chunk reader, it reads raw Maven Indexer records from the transport binary format. |
| * |
| * @since 5.1.2 |
| */ |
| public class ChunkReader |
| implements Closeable, Iterable<Map<String, String>> |
| { |
| private final String chunkName; |
| |
| private final DataInputStream dataInputStream; |
| |
| private final int version; |
| |
| private final Date timestamp; |
| |
| public ChunkReader( final String chunkName, final InputStream inputStream ) |
| throws IOException |
| { |
| this.chunkName = chunkName.trim(); |
| this.dataInputStream = new DataInputStream( new GZIPInputStream( inputStream, 2 * 1024 ) ); |
| this.version = ( (int) dataInputStream.readByte() ) & 0xff; |
| this.timestamp = new Date( dataInputStream.readLong() ); |
| } |
| |
| /** |
| * Returns the chunk name. |
| */ |
| public String getName() |
| { |
| return chunkName; |
| } |
| |
| /** |
| * Returns index version. All releases so far always returned {@code 1}. |
| */ |
| public int getVersion() |
| { |
| return version; |
| } |
| |
| /** |
| * Returns the index timestamp of last update of the index. |
| */ |
| public Date getTimestamp() |
| { |
| return timestamp; |
| } |
| |
| /** |
| * Returns the {@link Record} iterator. |
| */ |
| public Iterator<Map<String, String>> iterator() |
| { |
| try |
| { |
| return new IndexIterator( dataInputStream ); |
| } |
| catch ( IOException e ) |
| { |
| throw new RuntimeException( "error", e ); |
| } |
| } |
| |
| /** |
| * Closes this reader and it's underlying input. |
| */ |
| public void close() |
| throws IOException |
| { |
| dataInputStream.close(); |
| } |
| |
| /** |
| * Low memory footprint index iterator that incrementally parses the underlying stream. |
| */ |
| private static class IndexIterator |
| implements Iterator<Map<String, String>> |
| { |
| private final DataInputStream dataInputStream; |
| |
| private Map<String, String> nextRecord; |
| |
| private IndexIterator( final DataInputStream dataInputStream ) |
| throws IOException |
| { |
| this.dataInputStream = dataInputStream; |
| this.nextRecord = nextRecord(); |
| } |
| |
| public boolean hasNext() |
| { |
| return nextRecord != null; |
| } |
| |
| public Map<String, String> next() |
| { |
| if ( nextRecord == null ) |
| { |
| throw new NoSuchElementException( "chunk depleted" ); |
| } |
| Map<String, String> result = nextRecord; |
| nextRecord = nextRecord(); |
| return result; |
| } |
| |
| public void remove() |
| { |
| throw new UnsupportedOperationException( "remove" ); |
| } |
| |
| private Map<String, String> nextRecord() |
| { |
| try |
| { |
| return readRecord( dataInputStream ); |
| } |
| catch ( IOException e ) |
| { |
| throw new RuntimeException( "read error", e ); |
| } |
| } |
| } |
| |
| /** |
| * Reads and returns next record from the underlying stream, or {@code null} if no more records. |
| */ |
| private static Map<String, String> readRecord( final DataInput dataInput ) |
| throws IOException |
| { |
| int fieldCount; |
| try |
| { |
| fieldCount = dataInput.readInt(); |
| } |
| catch ( EOFException ex ) |
| { |
| return null; // no more documents |
| } |
| |
| Map<String, String> recordMap = new HashMap<String, String>(); |
| for ( int i = 0; i < fieldCount; i++ ) |
| { |
| readField( recordMap, dataInput ); |
| } |
| return recordMap; |
| } |
| |
| private static void readField( final Map<String, String> record, final DataInput dataInput ) |
| throws IOException |
| { |
| dataInput.readByte(); // flags: neglect them |
| String name = dataInput.readUTF(); |
| String value = readUTF( dataInput ); |
| record.put( name, value ); |
| } |
| |
| private static String readUTF( final DataInput dataInput ) |
| throws IOException |
| { |
| int utflen = dataInput.readInt(); |
| |
| byte[] bytearr; |
| char[] chararr; |
| |
| try |
| { |
| bytearr = new byte[utflen]; |
| chararr = new char[utflen]; |
| } |
| catch ( OutOfMemoryError e ) |
| { |
| IOException ioex = new IOException( "Index data content is corrupt" ); |
| ioex.initCause( e ); |
| throw ioex; |
| } |
| |
| int c, char2, char3; |
| int count = 0; |
| int chararrCount = 0; |
| |
| dataInput.readFully( bytearr, 0, utflen ); |
| |
| while ( count < utflen ) |
| { |
| c = bytearr[count] & 0xff; |
| if ( c > 127 ) |
| { |
| break; |
| } |
| count++; |
| chararr[chararrCount++] = (char) c; |
| } |
| |
| while ( count < utflen ) |
| { |
| c = bytearr[count] & 0xff; |
| switch ( c >> 4 ) |
| { |
| case 0: |
| case 1: |
| case 2: |
| case 3: |
| case 4: |
| case 5: |
| case 6: |
| case 7: |
| /* 0xxxxxxx */ |
| count++; |
| chararr[chararrCount++] = (char) c; |
| break; |
| |
| case 12: |
| case 13: |
| /* 110x xxxx 10xx xxxx */ |
| count += 2; |
| if ( count > utflen ) |
| { |
| throw new UTFDataFormatException( "malformed input: partial character at end" ); |
| } |
| char2 = bytearr[count - 1]; |
| if ( ( char2 & 0xC0 ) != 0x80 ) |
| { |
| throw new UTFDataFormatException( "malformed input around byte " + count ); |
| } |
| chararr[chararrCount++] = (char) ( ( ( c & 0x1F ) << 6 ) | ( char2 & 0x3F ) ); |
| break; |
| |
| case 14: |
| /* 1110 xxxx 10xx xxxx 10xx xxxx */ |
| count += 3; |
| if ( count > utflen ) |
| { |
| throw new UTFDataFormatException( "malformed input: partial character at end" ); |
| } |
| char2 = bytearr[count - 2]; |
| char3 = bytearr[count - 1]; |
| if ( ( ( char2 & 0xC0 ) != 0x80 ) || ( ( char3 & 0xC0 ) != 0x80 ) ) |
| { |
| throw new UTFDataFormatException( "malformed input around byte " + ( count - 1 ) ); |
| } |
| chararr[chararrCount++] = |
| (char) ( ( ( c & 0x0F ) << 12 ) | ( ( char2 & 0x3F ) << 6 ) | ( char3 & 0x3F ) ); |
| break; |
| |
| default: |
| /* 10xx xxxx, 1111 xxxx */ |
| throw new UTFDataFormatException( "malformed input around byte " + count ); |
| } |
| } |
| |
| // The number of chars produced may be less than utflen |
| return new String( chararr, 0, chararrCount ); |
| } |
| } |