blob: 23cb6bdc03d23dcd4d72e24407484821e115c151 [file] [log] [blame]
package org.apache.lucene.codecs.perfield;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.ServiceLoader; // javadocs
import java.util.Set;
import java.util.TreeMap;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
/**
* Enables per field format support.
* <p>
* Note, when extending this class, the name ({@link #getName}) is
* written into the index. In order for the field to be read, the
* name must resolve to your implementation via {@link #forName(String)}.
* This method uses Java's
* {@link ServiceLoader Service Provider Interface} to resolve format names.
* <p>
* @see ServiceLoader
* @lucene.experimental
*/
public abstract class PerFieldPostingsFormat extends PostingsFormat {
public static final String PER_FIELD_EXTENSION = "per";
public static final String PER_FIELD_NAME = "PerField40";
public static final int VERSION_START = 0;
public static final int VERSION_LATEST = VERSION_START;
public PerFieldPostingsFormat() {
super(PER_FIELD_NAME);
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state)
throws IOException {
return new FieldsWriter(state);
}
// NOTE: not private to avoid $accessN at runtime!!
static class FieldsConsumerAndID implements Closeable {
final FieldsConsumer fieldsConsumer;
final String segmentSuffix;
public FieldsConsumerAndID(FieldsConsumer fieldsConsumer, String segmentSuffix) {
this.fieldsConsumer = fieldsConsumer;
this.segmentSuffix = segmentSuffix;
}
@Override
public void close() throws IOException {
fieldsConsumer.close();
}
};
private class FieldsWriter extends FieldsConsumer {
private final Map<PostingsFormat,FieldsConsumerAndID> formats = new IdentityHashMap<PostingsFormat,FieldsConsumerAndID>();
/** Records all fields we wrote. */
private final Map<String,PostingsFormat> fieldToFormat = new HashMap<String,PostingsFormat>();
private final SegmentWriteState segmentWriteState;
public FieldsWriter(SegmentWriteState state) throws IOException {
segmentWriteState = state;
}
@Override
public TermsConsumer addField(FieldInfo field) throws IOException {
final PostingsFormat format = getPostingsFormatForField(field.name);
if (format == null) {
throw new IllegalStateException("invalid null PostingsFormat for field=\"" + field.name + "\"");
}
assert !fieldToFormat.containsKey(field.name);
fieldToFormat.put(field.name, format);
FieldsConsumerAndID consumerAndId = formats.get(format);
if (consumerAndId == null) {
// First time we are seeing this format; assign
// next id and init it:
final String segmentSuffix = getFullSegmentSuffix(field.name,
segmentWriteState.segmentSuffix,
""+formats.size());
consumerAndId = new FieldsConsumerAndID(format.fieldsConsumer(new SegmentWriteState(segmentWriteState, segmentSuffix)),
segmentSuffix);
formats.put(format, consumerAndId);
}
return consumerAndId.fieldsConsumer.addField(field);
}
@Override
public void close() throws IOException {
// Close all subs
IOUtils.close(formats.values());
// Write _X.per: maps field name -> format name and
// format name -> format id
final String mapFileName = IndexFileNames.segmentFileName(segmentWriteState.segmentName, segmentWriteState.segmentSuffix, PER_FIELD_EXTENSION);
final IndexOutput out = segmentWriteState.directory.createOutput(mapFileName, segmentWriteState.context);
boolean success = false;
try {
CodecUtil.writeHeader(out, PER_FIELD_NAME, VERSION_LATEST);
// format name -> int id
out.writeVInt(formats.size());
for(Map.Entry<PostingsFormat,FieldsConsumerAndID> ent : formats.entrySet()) {
out.writeString(ent.getValue().segmentSuffix);
out.writeString(ent.getKey().getName());
}
// field name -> format name
out.writeVInt(fieldToFormat.size());
for(Map.Entry<String,PostingsFormat> ent : fieldToFormat.entrySet()) {
out.writeString(ent.getKey());
out.writeString(ent.getValue().getName());
}
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(out);
} else {
IOUtils.close(out);
}
}
}
}
static String getFullSegmentSuffix(String fieldName, String outerSegmentSuffix, String segmentSuffix) {
if (outerSegmentSuffix.length() == 0) {
return segmentSuffix;
} else {
// TODO: support embedding; I think it should work but
// we need a test confirm to confirm
// return outerSegmentSuffix + "_" + segmentSuffix;
throw new IllegalStateException("cannot embed PerFieldPostingsFormat inside itself (field \"" + fieldName + "\" returned PerFieldPostingsFormat)");
}
}
private class FieldsReader extends FieldsProducer {
private final Map<String,FieldsProducer> fields = new TreeMap<String,FieldsProducer>();
private final Map<PostingsFormat,FieldsProducer> formats = new IdentityHashMap<PostingsFormat,FieldsProducer>();
public FieldsReader(final SegmentReadState readState) throws IOException {
// Read _X.per and init each format:
boolean success = false;
try {
new VisitPerFieldFile(readState.dir, readState.segmentInfo.name, readState.segmentSuffix) {
@Override
protected void visitOneFormat(String segmentSuffix, PostingsFormat postingsFormat) throws IOException {
formats.put(postingsFormat, postingsFormat.fieldsProducer(new SegmentReadState(readState, segmentSuffix)));
}
@Override
protected void visitOneField(String fieldName, PostingsFormat postingsFormat) throws IOException {
assert formats.containsKey(postingsFormat);
fields.put(fieldName, formats.get(postingsFormat));
}
};
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(formats.values());
}
}
}
private final class FieldsIterator extends FieldsEnum {
private final Iterator<String> it;
private String current;
public FieldsIterator() {
it = fields.keySet().iterator();
}
@Override
public String next() throws IOException {
if (it.hasNext()) {
current = it.next();
} else {
current = null;
}
return current;
}
@Override
public Terms terms() throws IOException {
return fields.get(current).terms(current);
}
}
@Override
public FieldsEnum iterator() throws IOException {
return new FieldsIterator();
}
@Override
public Terms terms(String field) throws IOException {
FieldsProducer fieldsProducer = fields.get(field);
return fieldsProducer == null ? null : fieldsProducer.terms(field);
}
@Override
public int size() {
return fields.size();
}
@Override
public void close() throws IOException {
IOUtils.close(formats.values());
}
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state)
throws IOException {
return new FieldsReader(state);
}
private abstract class VisitPerFieldFile {
public VisitPerFieldFile(Directory dir, String segmentName, String outerSegmentSuffix) throws IOException {
final String mapFileName = IndexFileNames.segmentFileName(segmentName, outerSegmentSuffix, PER_FIELD_EXTENSION);
final IndexInput in = dir.openInput(mapFileName, IOContext.READONCE);
boolean success = false;
try {
CodecUtil.checkHeader(in, PER_FIELD_NAME, VERSION_START, VERSION_LATEST);
// Read format name -> format id
final int formatCount = in.readVInt();
for(int formatIDX=0;formatIDX<formatCount;formatIDX++) {
final String segmentSuffix = in.readString();
final String formatName = in.readString();
PostingsFormat postingsFormat = PostingsFormat.forName(formatName);
//System.out.println("do lookup " + formatName + " -> " + postingsFormat);
if (postingsFormat == null) {
throw new IllegalStateException("unable to lookup PostingsFormat for name=\"" + formatName + "\": got null");
}
// Better be defined, because it was defined
// during indexing:
visitOneFormat(segmentSuffix, postingsFormat);
}
// Read field name -> format name
final int fieldCount = in.readVInt();
for(int fieldIDX=0;fieldIDX<fieldCount;fieldIDX++) {
final String fieldName = in.readString();
final String formatName = in.readString();
visitOneField(fieldName, PostingsFormat.forName(formatName));
}
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(in);
} else {
IOUtils.close(in);
}
}
}
// This is called first, for all formats:
protected abstract void visitOneFormat(String segmentSuffix, PostingsFormat format) throws IOException;
// ... then this is called, for all fields:
protected abstract void visitOneField(String fieldName, PostingsFormat format) throws IOException;
}
@Override
public void files(final SegmentInfo info, String segmentSuffix, final Set<String> files) throws IOException {
final Directory dir = info.dir;
final String mapFileName = IndexFileNames.segmentFileName(info.name, segmentSuffix, PER_FIELD_EXTENSION);
files.add(mapFileName);
try {
new VisitPerFieldFile(dir, info.name, segmentSuffix) {
@Override
protected void visitOneFormat(String segmentSuffix, PostingsFormat format) throws IOException {
format.files(info, segmentSuffix, files);
}
@Override
protected void visitOneField(String field, PostingsFormat format) {
}
};
} catch (FileNotFoundException fnfe) {
// TODO: this is somewhat shady... if we can't open
// the .per file then most likely someone is calling
// .files() after this segment was deleted, so, they
// wouldn't be able to do anything with the files even
// if we could return them, so we don't add any files
// in this case.
}
}
// NOTE: only called during writing; for reading we read
// all we need from the index (ie we save the field ->
// format mapping)
public abstract PostingsFormat getPostingsFormatForField(String field);
}