blob: 13046350ebce3ee721faaccc8fbe33fc87928a05 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.luke.models.util;
import java.io.IOException;
import java.io.PrintStream;
import java.lang.invoke.MethodHandles;
import java.lang.reflect.Constructor;
import java.nio.file.FileSystems;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.*;
import org.apache.lucene.luke.util.LoggerFactory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.LockFactory;
import org.apache.lucene.util.Bits;
/**
* Utilities for various raw index operations.
*
* <p>
* This is for internal uses, DO NOT call from UI components or applications.
* </p>
*/
public final class IndexUtils {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
/**
* Opens index(es) reader for given index path.
*
* @param indexPath - path to the index directory
* @param dirImpl - class name for the specific directory implementation
* @return index reader
* @throws Exception - if there is a low level IO error.
*/
public static IndexReader openIndex(String indexPath, String dirImpl)
throws Exception {
final Path root = FileSystems.getDefault().getPath(Objects.requireNonNull(indexPath));
final List<DirectoryReader> readers = new ArrayList<>();
// find all valid index directories in this directory
Files.walkFileTree(root, new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult preVisitDirectory(Path path, BasicFileAttributes attrs) throws IOException {
Directory dir = openDirectory(path, dirImpl);
try {
DirectoryReader dr = DirectoryReader.open(dir);
readers.add(dr);
} catch (IOException e) {
log.warn("Error opening directory", e);
}
return FileVisitResult.CONTINUE;
}
});
if (readers.isEmpty()) {
throw new RuntimeException("No valid directory at the location: " + indexPath);
}
if (log.isInfoEnabled()) {
log.info(String.format(Locale.ENGLISH, "IndexReaders (%d leaf readers) successfully opened. Index path=%s", readers.size(), indexPath));
}
if (readers.size() == 1) {
return readers.get(0);
} else {
return new MultiReader(readers.toArray(new IndexReader[readers.size()]));
}
}
/**
* Opens an index directory for given index path.
*
* <p>This can be used to open/repair corrupted indexes.</p>
*
* @param dirPath - index directory path
* @param dirImpl - class name for the specific directory implementation
* @return directory
* @throws IOException - if there is a low level IO error.
*/
public static Directory openDirectory(String dirPath, String dirImpl) throws IOException {
final Path path = FileSystems.getDefault().getPath(Objects.requireNonNull(dirPath));
Directory dir = openDirectory(path, dirImpl);
if (log.isInfoEnabled()) {
log.info(String.format(Locale.ENGLISH, "DirectoryReader successfully opened. Directory path=%s", dirPath));
}
return dir;
}
private static Directory openDirectory(Path path, String dirImpl) throws IOException {
if (!Files.exists(Objects.requireNonNull(path))) {
throw new IllegalArgumentException("Index directory doesn't exist.");
}
Directory dir;
if (dirImpl == null || dirImpl.equalsIgnoreCase("org.apache.lucene.store.FSDirectory")) {
dir = FSDirectory.open(path);
} else {
try {
Class<?> implClazz = Class.forName(dirImpl);
Constructor<?> constr = implClazz.getConstructor(Path.class);
if (constr != null) {
dir = (Directory) constr.newInstance(path);
} else {
constr = implClazz.getConstructor(Path.class, LockFactory.class);
dir = (Directory) constr.newInstance(path, null);
}
} catch (Exception e) {
log.warn("Invalid directory implementation class: {}", dirImpl, e);
throw new IllegalArgumentException("Invalid directory implementation class: " + dirImpl);
}
}
return dir;
}
/**
* Close index directory.
*
* @param dir - index directory to be closed
*/
public static void close(Directory dir) {
try {
if (dir != null) {
dir.close();
log.info("Directory successfully closed.");
}
} catch (IOException e) {
log.error("Error closing directory", e);
}
}
/**
* Close index reader.
*
* @param reader - index reader to be closed
*/
public static void close(IndexReader reader) {
try {
if (reader != null) {
reader.close();
log.info("IndexReader successfully closed.");
if (reader instanceof DirectoryReader) {
Directory dir = ((DirectoryReader) reader).directory();
dir.close();
log.info("Directory successfully closed.");
}
}
} catch (IOException e) {
log.error("Error closing index reader", e);
}
}
/**
* Create an index writer.
*
* @param dir - index directory
* @param analyzer - analyzer used by the index writer
* @param useCompound - if true, compound index files are used
* @param keepAllCommits - if true, all commit generations are kept
* @return new index writer
* @throws IOException - if there is a low level IO error.
*/
public static IndexWriter createWriter(Directory dir, Analyzer analyzer, boolean useCompound, boolean keepAllCommits) throws IOException {
return createWriter(Objects.requireNonNull(dir), analyzer, useCompound, keepAllCommits, null);
}
/**
* Create an index writer.
*
* @param dir - index directory
* @param analyzer - analyser used by the index writer
* @param useCompound - if true, compound index files are used
* @param keepAllCommits - if true, all commit generations are kept
* @param ps - information stream
* @return new index writer
* @throws IOException - if there is a low level IO error.
*/
public static IndexWriter createWriter(Directory dir, Analyzer analyzer, boolean useCompound, boolean keepAllCommits,
PrintStream ps) throws IOException {
Objects.requireNonNull(dir);
IndexWriterConfig config = new IndexWriterConfig(analyzer == null ? new WhitespaceAnalyzer() : analyzer);
config.setUseCompoundFile(useCompound);
if (ps != null) {
config.setInfoStream(ps);
}
if (keepAllCommits) {
config.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
} else {
config.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
}
return new IndexWriter(dir, config);
}
/**
* Execute force merge with the index writer.
*
* @param writer - index writer
* @param expunge - if true, only segments having deleted documents are merged
* @param maxNumSegments - max number of segments
* @throws IOException - if there is a low level IO error.
*/
public static void optimizeIndex(IndexWriter writer, boolean expunge, int maxNumSegments) throws IOException {
Objects.requireNonNull(writer);
if (expunge) {
writer.forceMergeDeletes(true);
} else {
writer.forceMerge(maxNumSegments, true);
}
}
/**
* Check the index status.
*
* @param dir - index directory for checking
* @param ps - information stream
* @return - index status
* @throws IOException - if there is a low level IO error.
*/
public static CheckIndex.Status checkIndex(Directory dir, PrintStream ps) throws IOException {
Objects.requireNonNull(dir);
try (CheckIndex ci = new CheckIndex(dir)) {
if (ps != null) {
ci.setInfoStream(ps);
}
return ci.checkIndex();
}
}
/**
* Try to repair the corrupted index using previously returned index status.
*
* @param dir - index directory for repairing
* @param st - index status
* @param ps - information stream
* @throws IOException - if there is a low level IO error.
*/
public static void tryRepairIndex(Directory dir, CheckIndex.Status st, PrintStream ps) throws IOException {
Objects.requireNonNull(dir);
Objects.requireNonNull(st);
try (CheckIndex ci = new CheckIndex(dir)) {
if (ps != null) {
ci.setInfoStream(ps);
}
ci.exorciseIndex(st);
}
}
/**
* Returns the string representation for Lucene codec version when the index was written.
*
* @param dir - index directory
* @throws IOException - if there is a low level IO error.
*/
public static String getIndexFormat(Directory dir) throws IOException {
Objects.requireNonNull(dir);
return new SegmentInfos.FindSegmentsFile<String>(dir) {
@Override
protected String doBody(String segmentFileName) throws IOException {
String format = "unknown";
try (IndexInput in = dir.openInput(segmentFileName, IOContext.READ)) {
if (CodecUtil.CODEC_MAGIC == in.readInt()) {
int actualVersion = CodecUtil.checkHeaderNoMagic(in, "segments", SegmentInfos.VERSION_70, Integer.MAX_VALUE);
if (actualVersion == SegmentInfos.VERSION_70) {
format = "Lucene 7.0 or later";
} else if (actualVersion == SegmentInfos.VERSION_72) {
format = "Lucene 7.2 or later";
} else if (actualVersion == SegmentInfos.VERSION_74) {
format = "Lucene 7.4 or later";
} else if (actualVersion == SegmentInfos.VERSION_86) {
format = "Lucene 8.6 or later";
} else if (actualVersion > SegmentInfos.VERSION_86) {
format = "Lucene 8.6 or later (UNSUPPORTED)";
}
} else {
format = "Lucene 6.x or prior (UNSUPPORTED)";
}
}
return format;
}
}.run();
}
/**
* Returns user data written with the specified commit.
*
* @param ic - index commit
* @throws IOException - if there is a low level IO error.
*/
public static String getCommitUserData(IndexCommit ic) throws IOException {
Map<String, String> userDataMap = Objects.requireNonNull(ic).getUserData();
if (userDataMap != null) {
return userDataMap.toString();
} else {
return "--";
}
}
/**
* Collect all terms and their counts in the specified fields.
*
* @param reader - index reader
* @param fields - field names
* @return a map contains terms and their occurrence frequencies
* @throws IOException - if there is a low level IO error.
*/
public static Map<String, Long> countTerms(IndexReader reader, Collection<String> fields) throws IOException {
Map<String, Long> res = new HashMap<>();
for (String field : fields) {
if (!res.containsKey(field)) {
res.put(field, 0L);
}
Terms terms = MultiTerms.getTerms(reader, field);
if (terms != null) {
TermsEnum te = terms.iterator();
while (te.next() != null) {
res.put(field, res.get(field) + 1);
}
}
}
return res;
}
/**
* Returns the {@link Bits} representing live documents in the index.
*
* @param reader - index reader
*/
public static Bits getLiveDocs(IndexReader reader) {
if (reader instanceof LeafReader) {
return ((LeafReader) reader).getLiveDocs();
} else {
return MultiBits.getLiveDocs(reader);
}
}
/**
* Returns field {@link FieldInfos} in the index.
*
* @param reader - index reader
*/
public static FieldInfos getFieldInfos(IndexReader reader) {
if (reader instanceof LeafReader) {
return ((LeafReader) reader).getFieldInfos();
} else {
return FieldInfos.getMergedFieldInfos(reader);
}
}
/**
* Returns the {@link FieldInfo} referenced by the field.
*
* @param reader - index reader
* @param fieldName - field name
*/
public static FieldInfo getFieldInfo(IndexReader reader, String fieldName) {
return getFieldInfos(reader).fieldInfo(fieldName);
}
/**
* Returns all field names in the index.
*
* @param reader - index reader
*/
public static Collection<String> getFieldNames(IndexReader reader) {
return StreamSupport.stream(getFieldInfos(reader).spliterator(), false)
.map(f -> f.name)
.collect(Collectors.toList());
}
/**
* Returns the {@link Terms} for the specified field.
*
* @param reader - index reader
* @param field - field name
* @throws IOException - if there is a low level IO error.
*/
public static Terms getTerms(IndexReader reader, String field) throws IOException {
if (reader instanceof LeafReader) {
return ((LeafReader) reader).terms(field);
} else {
return MultiTerms.getTerms(reader, field);
}
}
/**
* Returns the {@link BinaryDocValues} for the specified field.
*
* @param reader - index reader
* @param field - field name
* @throws IOException - if there is a low level IO error.
*/
public static BinaryDocValues getBinaryDocValues(IndexReader reader, String field) throws IOException {
if (reader instanceof LeafReader) {
return ((LeafReader) reader).getBinaryDocValues(field);
} else {
return MultiDocValues.getBinaryValues(reader, field);
}
}
/**
* Returns the {@link NumericDocValues} for the specified field.
*
* @param reader - index reader
* @param field - field name
* @throws IOException - if there is a low level IO error.
*/
public static NumericDocValues getNumericDocValues(IndexReader reader, String field) throws IOException {
if (reader instanceof LeafReader) {
return ((LeafReader) reader).getNumericDocValues(field);
} else {
return MultiDocValues.getNumericValues(reader, field);
}
}
/**
* Returns the {@link SortedNumericDocValues} for the specified field.
*
* @param reader - index reader
* @param field - field name
* @throws IOException - if there is a low level IO error.
*/
public static SortedNumericDocValues getSortedNumericDocValues(IndexReader reader, String field) throws IOException {
if (reader instanceof LeafReader) {
return ((LeafReader) reader).getSortedNumericDocValues(field);
} else {
return MultiDocValues.getSortedNumericValues(reader, field);
}
}
/**
* Returns the {@link SortedDocValues} for the specified field.
*
* @param reader - index reader
* @param field - field name
* @throws IOException - if there is a low level IO error.
*/
public static SortedDocValues getSortedDocValues(IndexReader reader, String field) throws IOException {
if (reader instanceof LeafReader) {
return ((LeafReader) reader).getSortedDocValues(field);
} else {
return MultiDocValues.getSortedValues(reader, field);
}
}
/**
* Returns the {@link SortedSetDocValues} for the specified field.
*
* @param reader - index reader
* @param field - field name
* @throws IOException - if there is a low level IO error.
*/
public static SortedSetDocValues getSortedSetDocvalues(IndexReader reader, String field) throws IOException {
if (reader instanceof LeafReader) {
return ((LeafReader) reader).getSortedSetDocValues(field);
} else {
return MultiDocValues.getSortedSetValues(reader, field);
}
}
private IndexUtils() {
}
}