blob: 59b570cd972f6ac31fbcb756ba279326efbfcc9b [file] [log] [blame]
package org.apache.lucene.index;
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* For each Field, store position by position information. It ignores frequency information
* <p/>
* This is not thread-safe.
*/
public class PositionBasedTermVectorMapper extends TermVectorMapper{
private Map<String, Map<Integer,TVPositionInfo>> fieldToTerms;
private String currentField;
/**
* A Map of Integer and TVPositionInfo
*/
private Map<Integer,TVPositionInfo> currentPositions;
private boolean storeOffsets;
/**
*
*
*/
public PositionBasedTermVectorMapper() {
super(false, false);
}
public PositionBasedTermVectorMapper(boolean ignoringOffsets)
{
super(false, ignoringOffsets);
}
/**
* Never ignores positions. This mapper doesn't make much sense unless there are positions
* @return false
*/
@Override
public boolean isIgnoringPositions() {
return false;
}
/**
* Callback for the TermVectorReader.
* @param term
* @param frequency
* @param offsets
* @param positions
*/
@Override
public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
for (int i = 0; i < positions.length; i++) {
Integer posVal = Integer.valueOf(positions[i]);
TVPositionInfo pos = currentPositions.get(posVal);
if (pos == null) {
pos = new TVPositionInfo(positions[i], storeOffsets);
currentPositions.put(posVal, pos);
}
pos.addTerm(term, offsets != null ? offsets[i] : null);
}
}
/**
* Callback mechanism used by the TermVectorReader
* @param field The field being read
* @param numTerms The number of terms in the vector
* @param storeOffsets Whether offsets are available
* @param storePositions Whether positions are available
*/
@Override
public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
if (storePositions == false)
{
throw new RuntimeException("You must store positions in order to use this Mapper");
}
if (storeOffsets == true)
{
//ignoring offsets
}
fieldToTerms = new HashMap<String,Map<Integer,TVPositionInfo>>(numTerms);
this.storeOffsets = storeOffsets;
currentField = field;
currentPositions = new HashMap<Integer,TVPositionInfo>();
fieldToTerms.put(currentField, currentPositions);
}
/**
* Get the mapping between fields and terms, sorted by the comparator
*
* @return A map between field names and a Map. The sub-Map key is the position as the integer, the value is {@link org.apache.lucene.index.PositionBasedTermVectorMapper.TVPositionInfo}.
*/
public Map<String,Map<Integer,TVPositionInfo>> getFieldToTerms() {
return fieldToTerms;
}
/**
* Container for a term at a position
*/
public static class TVPositionInfo{
private int position;
private List<String> terms;
private List<TermVectorOffsetInfo> offsets;
public TVPositionInfo(int position, boolean storeOffsets) {
this.position = position;
terms = new ArrayList<String>();
if (storeOffsets) {
offsets = new ArrayList<TermVectorOffsetInfo>();
}
}
void addTerm(String term, TermVectorOffsetInfo info)
{
terms.add(term);
if (offsets != null) {
offsets.add(info);
}
}
/**
*
* @return The position of the term
*/
public int getPosition() {
return position;
}
/**
* Note, there may be multiple terms at the same position
* @return A List of Strings
*/
public List<String> getTerms() {
return terms;
}
/**
* Parallel list (to {@link #getTerms()}) of TermVectorOffsetInfo objects. There may be multiple entries since there may be multiple terms at a position
* @return A List of TermVectorOffsetInfo objects, if offsets are stored.
*/
public List<TermVectorOffsetInfo> getOffsets() {
return offsets;
}
}
}