| package org.apache.maven.index.updater; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| import java.io.BufferedOutputStream; |
| import java.io.DataOutput; |
| import java.io.DataOutputStream; |
| import java.io.IOException; |
| import java.io.OutputStream; |
| import java.util.ArrayList; |
| import java.util.Date; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Set; |
| import java.util.zip.GZIPOutputStream; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field.Store; |
| import org.apache.lucene.document.StringField; |
| import org.apache.lucene.index.IndexOptions; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.IndexableField; |
| import org.apache.lucene.index.MultiFields; |
| import org.apache.lucene.util.Bits; |
| import org.apache.maven.index.ArtifactInfo; |
| import org.apache.maven.index.context.DefaultIndexingContext; |
| import org.apache.maven.index.context.IndexingContext; |
| |
| /** |
| * An index data writer used to write transfer index format. |
| * |
| * @author Eugene Kuleshov |
| */ |
| public class IndexDataWriter |
| { |
| static final int VERSION = 1; |
| |
| static final int F_INDEXED = 1; |
| |
| static final int F_TOKENIZED = 2; |
| |
| static final int F_STORED = 4; |
| |
| static final int F_COMPRESSED = 8; |
| |
| private final DataOutputStream dos; |
| |
| private final GZIPOutputStream gos; |
| |
| private final BufferedOutputStream bos; |
| |
| private final Set<String> allGroups; |
| |
| private final Set<String> rootGroups; |
| |
| private boolean descriptorWritten; |
| |
| public IndexDataWriter( OutputStream os ) |
| throws IOException |
| { |
| bos = new BufferedOutputStream( os, 1024 * 8 ); |
| gos = new GZIPOutputStream( bos, 1024 * 2 ); |
| dos = new DataOutputStream( gos ); |
| |
| this.allGroups = new HashSet<String>(); |
| this.rootGroups = new HashSet<String>(); |
| this.descriptorWritten = false; |
| } |
| |
| public int write( IndexingContext context, IndexReader indexReader, List<Integer> docIndexes ) |
| throws IOException |
| { |
| writeHeader( context ); |
| |
| int n = writeDocuments( indexReader, docIndexes ); |
| |
| writeGroupFields(); |
| |
| close(); |
| |
| return n; |
| } |
| |
| public void close() |
| throws IOException |
| { |
| dos.flush(); |
| |
| gos.flush(); |
| gos.finish(); |
| |
| bos.flush(); |
| } |
| |
| public void writeHeader( IndexingContext context ) |
| throws IOException |
| { |
| dos.writeByte( VERSION ); |
| |
| Date timestamp = context.getTimestamp(); |
| dos.writeLong( timestamp == null ? -1 : timestamp.getTime() ); |
| } |
| |
| public void writeGroupFields() |
| throws IOException |
| { |
| { |
| List<IndexableField> allGroupsFields = new ArrayList<>( 2 ); |
| allGroupsFields.add( new StringField( ArtifactInfo.ALL_GROUPS, ArtifactInfo.ALL_GROUPS_VALUE, Store.YES ) ); |
| allGroupsFields.add( new StringField( ArtifactInfo.ALL_GROUPS_LIST, ArtifactInfo.lst2str( allGroups ), |
| Store.YES ) ); |
| writeDocumentFields( allGroupsFields ); |
| } |
| |
| { |
| List<IndexableField> rootGroupsFields = new ArrayList<>( 2 ); |
| rootGroupsFields.add( new StringField( ArtifactInfo.ROOT_GROUPS, ArtifactInfo.ROOT_GROUPS_VALUE, |
| Store.YES ) ); |
| rootGroupsFields.add( new StringField( ArtifactInfo.ROOT_GROUPS_LIST, ArtifactInfo.lst2str( rootGroups ), |
| Store.YES ) ); |
| writeDocumentFields( rootGroupsFields ); |
| } |
| } |
| |
| public int writeDocuments( IndexReader r, List<Integer> docIndexes ) |
| throws IOException |
| { |
| int n = 0; |
| Bits liveDocs = MultiFields.getLiveDocs( r ); |
| |
| if ( docIndexes == null ) |
| { |
| for ( int i = 0; i < r.maxDoc(); i++ ) |
| { |
| if ( liveDocs == null || liveDocs.get( i ) ) |
| { |
| if ( writeDocument( r.document( i ) ) ) |
| { |
| n++; |
| } |
| } |
| } |
| } |
| else |
| { |
| for ( int i : docIndexes ) |
| { |
| if ( liveDocs == null || liveDocs.get( i ) ) |
| { |
| if ( writeDocument( r.document( i ) ) ) |
| { |
| n++; |
| } |
| } |
| } |
| } |
| |
| return n; |
| } |
| |
| public boolean writeDocument( final Document document ) |
| throws IOException |
| { |
| List<IndexableField> fields = document.getFields(); |
| |
| List<IndexableField> storedFields = new ArrayList<>( fields.size() ); |
| |
| for ( IndexableField field : fields ) |
| { |
| if ( DefaultIndexingContext.FLD_DESCRIPTOR.equals( field.name() ) ) |
| { |
| if ( descriptorWritten ) |
| { |
| return false; |
| } |
| else |
| { |
| descriptorWritten = true; |
| } |
| } |
| |
| if ( ArtifactInfo.ALL_GROUPS.equals( field.name() ) ) |
| { |
| final String groupList = document.get( ArtifactInfo.ALL_GROUPS_LIST ); |
| |
| if ( groupList != null && groupList.trim().length() > 0 ) |
| { |
| allGroups.addAll( ArtifactInfo.str2lst( groupList ) ); |
| } |
| |
| return false; |
| } |
| |
| if ( ArtifactInfo.ROOT_GROUPS.equals( field.name() ) ) |
| { |
| final String groupList = document.get( ArtifactInfo.ROOT_GROUPS_LIST ); |
| |
| if ( groupList != null && groupList.trim().length() > 0 ) |
| { |
| rootGroups.addAll( ArtifactInfo.str2lst( groupList ) ); |
| } |
| |
| return false; |
| } |
| |
| if ( field.fieldType().stored() ) |
| { |
| storedFields.add( field ); |
| } |
| } |
| |
| writeDocumentFields( storedFields ); |
| |
| return true; |
| } |
| |
| public void writeDocumentFields( List<IndexableField> fields ) |
| throws IOException |
| { |
| dos.writeInt( fields.size() ); |
| |
| for ( IndexableField field : fields ) |
| { |
| writeField( field ); |
| } |
| } |
| |
| public void writeField( IndexableField field ) |
| throws IOException |
| { |
| int flags = ( field.fieldType().indexOptions() != IndexOptions.NONE ? F_INDEXED : 0 ) // |
| + ( field.fieldType().tokenized() ? F_TOKENIZED : 0 ) // |
| + ( field.fieldType().stored() ? F_STORED : 0 ); // |
| // + ( false ? F_COMPRESSED : 0 ); // Compressed not supported anymore |
| |
| String name = field.name(); |
| String value = field.stringValue(); |
| |
| dos.write( flags ); |
| dos.writeUTF( name ); |
| writeUTF( value, dos ); |
| } |
| |
| private static void writeUTF( String str, DataOutput out ) |
| throws IOException |
| { |
| int strlen = str.length(); |
| int utflen = 0; |
| int c; |
| |
| // use charAt instead of copying String to char array |
| for ( int i = 0; i < strlen; i++ ) |
| { |
| c = str.charAt( i ); |
| if ( ( c >= 0x0001 ) && ( c <= 0x007F ) ) |
| { |
| utflen++; |
| } |
| else if ( c > 0x07FF ) |
| { |
| utflen += 3; |
| } |
| else |
| { |
| utflen += 2; |
| } |
| } |
| |
| // TODO optimize storing int value |
| out.writeInt( utflen ); |
| |
| byte[] bytearr = new byte[utflen]; |
| |
| int count = 0; |
| |
| int i = 0; |
| for ( ; i < strlen; i++ ) |
| { |
| c = str.charAt( i ); |
| if ( !( ( c >= 0x0001 ) && ( c <= 0x007F ) ) ) |
| { |
| break; |
| } |
| bytearr[count++] = (byte) c; |
| } |
| |
| for ( ; i < strlen; i++ ) |
| { |
| c = str.charAt( i ); |
| if ( ( c >= 0x0001 ) && ( c <= 0x007F ) ) |
| { |
| bytearr[count++] = (byte) c; |
| |
| } |
| else if ( c > 0x07FF ) |
| { |
| bytearr[count++] = (byte) ( 0xE0 | ( ( c >> 12 ) & 0x0F ) ); |
| bytearr[count++] = (byte) ( 0x80 | ( ( c >> 6 ) & 0x3F ) ); |
| bytearr[count++] = (byte) ( 0x80 | ( ( c >> 0 ) & 0x3F ) ); |
| } |
| else |
| { |
| bytearr[count++] = (byte) ( 0xC0 | ( ( c >> 6 ) & 0x1F ) ); |
| bytearr[count++] = (byte) ( 0x80 | ( ( c >> 0 ) & 0x3F ) ); |
| } |
| } |
| |
| out.write( bytearr, 0, utflen ); |
| } |
| |
| } |