blob: d9e4e3f3fdc88356b8096bb5b8ed5022ea1e9b88 [file] [log] [blame]
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import java.util.Iterator;
import java.util.HashMap;
import java.util.Map;
import java.util.List;
import java.util.ArrayList;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.search.Similarity;
// TODO FI: norms could actually be stored as doc store
/** Writes norms. Each thread X field accumulates the norms
* for the doc/fields it saw, then the flush method below
* merges all of these together into a single _X.nrm file.
*/
final class NormsWriter extends InvertedDocEndConsumer {
private static final byte defaultNorm = Similarity.encodeNorm(1.0f);
private FieldInfos fieldInfos;
@Override
public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
return new NormsWriterPerThread(docInverterPerThread, this);
}
@Override
public void abort() {}
// We only write the _X.nrm file at flush
void files(Collection<String> files) {}
@Override
void setFieldInfos(FieldInfos fieldInfos) {
this.fieldInfos = fieldInfos;
}
/** Produce _X.nrm if any document had a field with norms
* not disabled */
@Override
public void flush(Map<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException {
final Map<FieldInfo,List<NormsWriterPerField>> byField = new HashMap<FieldInfo,List<NormsWriterPerField>>();
// Typically, each thread will have encountered the same
// field. So first we collate by field, ie, all
// per-thread field instances that correspond to the
// same FieldInfo
for (final Map.Entry<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> entry : threadsAndFields.entrySet()) {
final Collection<InvertedDocEndConsumerPerField> fields = entry.getValue();
final Iterator<InvertedDocEndConsumerPerField> fieldsIt = fields.iterator();
while (fieldsIt.hasNext()) {
final NormsWriterPerField perField = (NormsWriterPerField) fieldsIt.next();
if (perField.upto > 0) {
// It has some norms
List<NormsWriterPerField> l = byField.get(perField.fieldInfo);
if (l == null) {
l = new ArrayList<NormsWriterPerField>();
byField.put(perField.fieldInfo, l);
}
l.add(perField);
} else
// Remove this field since we haven't seen it
// since the previous flush
fieldsIt.remove();
}
}
final String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION;
state.flushedFiles.add(normsFileName);
IndexOutput normsOut = state.directory.createOutput(normsFileName);
try {
normsOut.writeBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.length);
final int numField = fieldInfos.size();
int normCount = 0;
for(int fieldNumber=0;fieldNumber<numField;fieldNumber++) {
final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
List<NormsWriterPerField> toMerge = byField.get(fieldInfo);
int upto = 0;
if (toMerge != null) {
final int numFields = toMerge.size();
normCount++;
final NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
int[] uptos = new int[numFields];
for(int j=0;j<numFields;j++)
fields[j] = toMerge.get(j);
int numLeft = numFields;
while(numLeft > 0) {
assert uptos[0] < fields[0].docIDs.length : " uptos[0]=" + uptos[0] + " len=" + (fields[0].docIDs.length);
int minLoc = 0;
int minDocID = fields[0].docIDs[uptos[0]];
for(int j=1;j<numLeft;j++) {
final int docID = fields[j].docIDs[uptos[j]];
if (docID < minDocID) {
minDocID = docID;
minLoc = j;
}
}
assert minDocID < state.numDocs;
// Fill hole
for(;upto<minDocID;upto++)
normsOut.writeByte(defaultNorm);
normsOut.writeByte(fields[minLoc].norms[uptos[minLoc]]);
(uptos[minLoc])++;
upto++;
if (uptos[minLoc] == fields[minLoc].upto) {
fields[minLoc].reset();
if (minLoc != numLeft-1) {
fields[minLoc] = fields[numLeft-1];
uptos[minLoc] = uptos[numLeft-1];
}
numLeft--;
}
}
// Fill final hole with defaultNorm
for(;upto<state.numDocs;upto++)
normsOut.writeByte(defaultNorm);
} else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) {
normCount++;
// Fill entire field with default norm:
for(;upto<state.numDocs;upto++)
normsOut.writeByte(defaultNorm);
}
assert 4+normCount*state.numDocs == normsOut.getFilePointer() : ".nrm file size mismatch: expected=" + (4+normCount*state.numDocs) + " actual=" + normsOut.getFilePointer();
}
} finally {
normsOut.close();
}
}
@Override
void closeDocStore(SegmentWriteState state) {}
}