blob: 716fda6d8b663034acbdfba453ec4ffa2fe4833f [file] [log] [blame]
package org.apache.maven.index.updater;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.io.BufferedInputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.UTFDataFormatException;
import java.util.Date;
import java.util.zip.GZIPInputStream;
import com.google.common.base.Strings;
import java.util.LinkedHashSet;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.maven.index.ArtifactInfo;
import org.apache.maven.index.context.IndexUtils;
import org.apache.maven.index.context.IndexingContext;
/**
* An index data reader used to parse transfer index format.
*
* @author Eugene Kuleshov
*/
public class IndexDataReader
{
private final DataInputStream dis;
public IndexDataReader( final InputStream is )
throws IOException
{
// MINDEXER-13
// LightweightHttpWagon may have performed automatic decompression
// Handle it transparently
is.mark( 2 );
InputStream data;
if ( is.read() == 0x1f && is.read() == 0x8b ) // GZIPInputStream.GZIP_MAGIC
{
is.reset();
data = new BufferedInputStream( new GZIPInputStream( is, 1024 * 8 ), 1024 * 8 );
}
else
{
BufferedInputStream bis = new BufferedInputStream( is, 1024 * 8 );
bis.reset();
data = bis;
}
this.dis = new DataInputStream( data );
}
public IndexDataReadResult readIndex( IndexWriter w, IndexingContext context )
throws IOException
{
long timestamp = readHeader();
Date date = null;
if ( timestamp != -1 )
{
date = new Date( timestamp );
IndexUtils.updateTimestamp( w.getDirectory(), date );
}
int n = 0;
Document doc;
Set<String> rootGroups = new LinkedHashSet<>();
Set<String> allGroups = new LinkedHashSet<>();
while ( ( doc = readDocument() ) != null )
{
ArtifactInfo ai = IndexUtils.constructArtifactInfo( doc, context );
if ( ai != null )
{
w.addDocument( IndexUtils.updateDocument( doc, context, false, ai ) );
rootGroups.add( ai.getRootGroup() );
allGroups.add( ai.getGroupId() );
}
else
{
w.addDocument( doc );
}
n++;
}
w.commit();
IndexDataReadResult result = new IndexDataReadResult();
result.setDocumentCount( n );
result.setTimestamp( date );
result.setRootGroups( rootGroups );
result.setAllGroups( allGroups );
return result;
}
public long readHeader()
throws IOException
{
final byte HDRBYTE = (byte) ( ( IndexDataWriter.VERSION << 24 ) >> 24 );
if ( HDRBYTE != dis.readByte() )
{
// data format version mismatch
throw new IOException( "Provided input contains unexpected data (0x01 expected as 1st byte)!" );
}
return dis.readLong();
}
public Document readDocument()
throws IOException
{
int fieldCount;
try
{
fieldCount = dis.readInt();
}
catch ( EOFException ex )
{
return null; // no more documents
}
Document doc = new Document();
for ( int i = 0; i < fieldCount; i++ )
{
doc.add( readField() );
}
// Fix up UINFO field wrt MINDEXER-41
final Field uinfoField = (Field) doc.getField( ArtifactInfo.UINFO );
final String info = doc.get( ArtifactInfo.INFO );
if ( uinfoField != null && !Strings.isNullOrEmpty( info ) )
{
final String[] splitInfo = ArtifactInfo.FS_PATTERN.split( info );
if ( splitInfo.length > 6 )
{
final String extension = splitInfo[6];
final String uinfoString = uinfoField.stringValue();
if ( uinfoString.endsWith( ArtifactInfo.FS + ArtifactInfo.NA ) )
{
uinfoField.setStringValue( uinfoString + ArtifactInfo.FS + ArtifactInfo.nvl( extension ) );
}
}
}
return doc;
}
private Field readField()
throws IOException
{
int flags = dis.read();
Index index = Index.NO;
if ( ( flags & IndexDataWriter.F_INDEXED ) > 0 )
{
boolean isTokenized = ( flags & IndexDataWriter.F_TOKENIZED ) > 0;
index = isTokenized ? Index.ANALYZED : Index.NOT_ANALYZED;
}
Store store = Store.NO;
if ( ( flags & IndexDataWriter.F_STORED ) > 0 )
{
store = Store.YES;
}
String name = dis.readUTF();
String value = readUTF( dis );
return new Field( name, value, store, index );
}
private static String readUTF( DataInput in )
throws IOException
{
int utflen = in.readInt();
byte[] bytearr;
char[] chararr;
try
{
bytearr = new byte[utflen];
chararr = new char[utflen];
}
catch ( OutOfMemoryError e )
{
final IOException ex =
new IOException( "Index data content is inappropriate (is junk?), leads to OutOfMemoryError!"
+ " See MINDEXER-28 for more information!" );
ex.initCause( e );
throw ex;
}
int c, char2, char3;
int count = 0;
int chararr_count = 0;
in.readFully( bytearr, 0, utflen );
while ( count < utflen )
{
c = bytearr[count] & 0xff;
if ( c > 127 )
{
break;
}
count++;
chararr[chararr_count++] = (char) c;
}
while ( count < utflen )
{
c = bytearr[count] & 0xff;
switch ( c >> 4 )
{
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
/* 0xxxxxxx */
count++;
chararr[chararr_count++] = (char) c;
break;
case 12:
case 13:
/* 110x xxxx 10xx xxxx */
count += 2;
if ( count > utflen )
{
throw new UTFDataFormatException( "malformed input: partial character at end" );
}
char2 = bytearr[count - 1];
if ( ( char2 & 0xC0 ) != 0x80 )
{
throw new UTFDataFormatException( "malformed input around byte " + count );
}
chararr[chararr_count++] = (char) ( ( ( c & 0x1F ) << 6 ) | ( char2 & 0x3F ) );
break;
case 14:
/* 1110 xxxx 10xx xxxx 10xx xxxx */
count += 3;
if ( count > utflen )
{
throw new UTFDataFormatException( "malformed input: partial character at end" );
}
char2 = bytearr[count - 2];
char3 = bytearr[count - 1];
if ( ( ( char2 & 0xC0 ) != 0x80 ) || ( ( char3 & 0xC0 ) != 0x80 ) )
{
throw new UTFDataFormatException( "malformed input around byte " + ( count - 1 ) );
}
chararr[chararr_count++] =
(char) ( ( ( c & 0x0F ) << 12 ) | ( ( char2 & 0x3F ) << 6 ) | ( ( char3 & 0x3F ) << 0 ) );
break;
default:
/* 10xx xxxx, 1111 xxxx */
throw new UTFDataFormatException( "malformed input around byte " + count );
}
}
// The number of chars produced may be less than utflen
return new String( chararr, 0, chararr_count );
}
/**
* An index data read result holder
*/
public static class IndexDataReadResult
{
private Date timestamp;
private int documentCount;
private Set<String> rootGroups;
private Set<String> allGroups;
public void setDocumentCount( int documentCount )
{
this.documentCount = documentCount;
}
public int getDocumentCount()
{
return documentCount;
}
public void setTimestamp( Date timestamp )
{
this.timestamp = timestamp;
}
public Date getTimestamp()
{
return timestamp;
}
public void setRootGroups( Set<String> rootGroups )
{
this.rootGroups = rootGroups;
}
public Set<String> getRootGroups()
{
return rootGroups;
}
public void setAllGroups( Set<String> allGroups )
{
this.allGroups = allGroups;
}
public Set<String> getAllGroups()
{
return allGroups;
}
}
/**
* Reads index content by using a visitor. <br>
* The visitor is called for each read documents after it has been populated with Lucene fields.
*
* @param visitor an index data visitor
* @param context indexing context
* @return statistics about read data
* @throws IOException in case of an IO exception during index file access
*/
public IndexDataReadResult readIndex( final IndexDataReadVisitor visitor, final IndexingContext context )
throws IOException
{
dis.readByte(); // data format version
long timestamp = dis.readLong();
Date date = null;
if ( timestamp != -1 )
{
date = new Date( timestamp );
}
int n = 0;
Document doc;
while ( ( doc = readDocument() ) != null )
{
visitor.visitDocument( IndexUtils.updateDocument( doc, context, false ) );
n++;
}
IndexDataReadResult result = new IndexDataReadResult();
result.setDocumentCount( n );
result.setTimestamp( date );
return result;
}
/**
* Visitor of indexed Lucene documents.
*/
public interface IndexDataReadVisitor
{
/**
* Called on each read document. The document is already populated with fields.
*
* @param document read document
*/
void visitDocument( Document document );
}
}