blob: aa08344646bb763a1a19c42aedefdc23905f05bb [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.fst;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.IntsRef; // javadocs
import org.apache.lucene.util.RamUsageEstimator;
/**
* Wraps another Outputs implementation and encodes one or
* more of its output values. You can use this when a single
* input may need to map to more than one output,
* maintaining order: pass the same input with a different
* output by calling {@link Builder#add(IntsRef,Object)} multiple
* times. The builder will then combine the outputs using
* the {@link Outputs#merge(Object,Object)} method.
*
* <p>The resulting FST may not be minimal when an input has
* more than one output, as this requires pushing all
* multi-output values to a final state.
*
* <p>NOTE: the only way to create multiple outputs is to
* add the same input to the FST multiple times in a row. This is
* how the FST maps a single input to multiple outputs (e.g. you
* cannot pass a List&lt;Object&gt; to {@link Builder#add}). If
* your outputs are longs, and you need at most 2, then use
* {@link UpToTwoPositiveIntOutputs} instead since it stores
* the outputs more compactly (by stealing a bit from each
* long value).
*
* <p>NOTE: this cannot wrap itself (ie you cannot make an
* FST with List&lt;List&lt;Object&gt;&gt; outputs using this).
*
* @lucene.experimental
*/
// NOTE: i think we could get a more compact FST if, instead
// of adding the same input multiple times with a different
// output each time, we added it only once with a
// pre-constructed List<T> output. This way the "multiple
// values" is fully opaque to the Builder/FST. It would
// require implementing the full algebra using set
// arithmetic (I think?); maybe SetOfOutputs is a good name.
@SuppressWarnings("unchecked")
public final class ListOfOutputs<T> extends Outputs<Object> {
private final Outputs<T> outputs;
public ListOfOutputs(Outputs<T> outputs) {
this.outputs = outputs;
}
@Override
public Object common(Object output1, Object output2) {
// These will never be a list:
return outputs.common((T) output1, (T) output2);
}
@Override
public Object subtract(Object object, Object inc) {
// These will never be a list:
return outputs.subtract((T) object, (T) inc);
}
@Override
public Object add(Object prefix, Object output) {
assert !(prefix instanceof List);
if (!(output instanceof List)) {
return outputs.add((T) prefix, (T) output);
} else {
List<T> outputList = (List<T>) output;
List<T> addedList = new ArrayList<>(outputList.size());
for(T _output : outputList) {
addedList.add(outputs.add((T) prefix, _output));
}
return addedList;
}
}
@Override
public void write(Object output, DataOutput out) throws IOException {
assert !(output instanceof List);
outputs.write((T) output, out);
}
@Override
public void writeFinalOutput(Object output, DataOutput out) throws IOException {
if (!(output instanceof List)) {
out.writeVInt(1);
outputs.write((T) output, out);
} else {
List<T> outputList = (List<T>) output;
out.writeVInt(outputList.size());
for(T eachOutput : outputList) {
outputs.write(eachOutput, out);
}
}
}
@Override
public Object read(DataInput in) throws IOException {
return outputs.read(in);
}
@Override
public void skipOutput(DataInput in) throws IOException {
outputs.skipOutput(in);
}
@Override
public Object readFinalOutput(DataInput in) throws IOException {
int count = in.readVInt();
if (count == 1) {
return outputs.read(in);
} else {
List<T> outputList = new ArrayList<>(count);
for(int i=0;i<count;i++) {
outputList.add(outputs.read(in));
}
return outputList;
}
}
@Override
public void skipFinalOutput(DataInput in) throws IOException {
int count = in.readVInt();
for(int i=0;i<count;i++) {
outputs.skipOutput(in);
}
}
@Override
public Object getNoOutput() {
return outputs.getNoOutput();
}
@Override
public String outputToString(Object output) {
if (!(output instanceof List)) {
return outputs.outputToString((T) output);
} else {
List<T> outputList = (List<T>) output;
StringBuilder b = new StringBuilder();
b.append('[');
for(int i=0;i<outputList.size();i++) {
if (i > 0) {
b.append(", ");
}
b.append(outputs.outputToString(outputList.get(i)));
}
b.append(']');
return b.toString();
}
}
@Override
public Object merge(Object first, Object second) {
List<T> outputList = new ArrayList<>();
if (!(first instanceof List)) {
outputList.add((T) first);
} else {
outputList.addAll((List<T>) first);
}
if (!(second instanceof List)) {
outputList.add((T) second);
} else {
outputList.addAll((List<T>) second);
}
//System.out.println("MERGE: now " + outputList.size() + " first=" + outputToString(first) + " second=" + outputToString(second));
//System.out.println(" return " + outputToString(outputList));
return outputList;
}
@Override
public String toString() {
return "OneOrMoreOutputs(" + outputs + ")";
}
public List<T> asList(Object output) {
if (!(output instanceof List)) {
List<T> result = new ArrayList<>(1);
result.add((T) output);
return result;
} else {
return (List<T>) output;
}
}
private static final long BASE_LIST_NUM_BYTES = RamUsageEstimator.shallowSizeOf(new ArrayList<Object>());
@Override
public long ramBytesUsed(Object output) {
long bytes = 0;
if (output instanceof List) {
bytes += BASE_LIST_NUM_BYTES;
List<T> outputList = (List<T>) output;
for(T _output : outputList) {
bytes += outputs.ramBytesUsed(_output);
}
// 2 * to allow for ArrayList's oversizing:
bytes += 2 * outputList.size() * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
} else {
bytes += outputs.ramBytesUsed((T) output);
}
return bytes;
}
}