blob: 5bb122f49ce2a2c13de62517a840d2f3039424a5 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.admin;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.nio.file.NoSuchFileException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.PriorityQueue;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.luke.FieldFlag;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.Base64;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.CopyField;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.update.SolrIndexWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.lucene.index.IndexOptions.DOCS;
import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS;
import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
/**
* This handler exposes the internal lucene index. It is inspired by and
* modeled on Luke, the Lucene Index Browser by Andrzej Bialecki.
* http://www.getopt.org/luke/
*
* For more documentation see:
* http://wiki.apache.org/solr/LukeRequestHandler
*
* @since solr 1.2
*/
public class LukeRequestHandler extends RequestHandlerBase
{
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final String NUMTERMS = "numTerms";
public static final String INCLUDE_INDEX_FIELD_FLAGS = "includeIndexFieldFlags";
public static final String DOC_ID = "docId";
public static final String ID = CommonParams.ID;
public static final int DEFAULT_COUNT = 10;
static final int HIST_ARRAY_SIZE = 33;
private static enum ShowStyle {
ALL,
DOC,
SCHEMA,
INDEX;
public static ShowStyle get(String v) {
if(v==null) return null;
if("schema".equalsIgnoreCase(v)) return SCHEMA;
if("index".equalsIgnoreCase(v)) return INDEX;
if("doc".equalsIgnoreCase(v)) return DOC;
if("all".equalsIgnoreCase(v)) return ALL;
throw new SolrException(ErrorCode.BAD_REQUEST, "Unknown Show Style: "+v);
}
};
@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception
{
IndexSchema schema = req.getSchema();
SolrIndexSearcher searcher = req.getSearcher();
DirectoryReader reader = searcher.getIndexReader();
SolrParams params = req.getParams();
ShowStyle style = ShowStyle.get(params.get("show"));
// If no doc is given, show all fields and top terms
rsp.add("index", getIndexInfo(reader));
if(ShowStyle.INDEX==style) {
return; // that's all we need
}
Integer docId = params.getInt( DOC_ID );
if( docId == null && params.get( ID ) != null ) {
// Look for something with a given solr ID
SchemaField uniqueKey = schema.getUniqueKeyField();
String v = uniqueKey.getType().toInternal( params.get(ID) );
Term t = new Term( uniqueKey.getName(), v );
docId = searcher.getFirstMatch( t );
if( docId < 0 ) {
throw new SolrException( SolrException.ErrorCode.NOT_FOUND, "Can't find document: "+params.get( ID ) );
}
}
// Read the document from the index
if( docId != null ) {
if( style != null && style != ShowStyle.DOC ) {
throw new SolrException(ErrorCode.BAD_REQUEST, "missing doc param for doc style");
}
Document doc = null;
try {
doc = reader.document( docId );
}
catch( Exception ex ) {}
if( doc == null ) {
throw new SolrException( SolrException.ErrorCode.NOT_FOUND, "Can't find document: "+docId );
}
SimpleOrderedMap<Object> info = getDocumentFieldsInfo( doc, docId, reader, schema );
SimpleOrderedMap<Object> docinfo = new SimpleOrderedMap<>();
docinfo.add( "docId", docId );
docinfo.add( "lucene", info );
docinfo.add( "solr", doc );
rsp.add( "doc", docinfo );
}
else if ( ShowStyle.SCHEMA == style ) {
rsp.add( "schema", getSchemaInfo( req.getSchema() ) );
}
else {
rsp.add( "fields", getIndexedFieldsInfo(req) ) ;
}
// Add some generally helpful information
NamedList<Object> info = new SimpleOrderedMap<>();
info.add( "key", getFieldFlagsKey() );
info.add( "NOTE", "Document Frequency (df) is not updated when a document is marked for deletion. df values include deleted documents." );
rsp.add( "info", info );
rsp.setHttpCaching(false);
}
/**
* @return a string representing a IndexableField's flags.
*/
private static String getFieldFlags( IndexableField f )
{
IndexOptions opts = (f == null) ? null : f.fieldType().indexOptions();
StringBuilder flags = new StringBuilder();
flags.append( (f != null && f.fieldType().indexOptions() != IndexOptions.NONE) ? FieldFlag.INDEXED.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().tokenized()) ? FieldFlag.TOKENIZED.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().stored()) ? FieldFlag.STORED.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().docValuesType() != DocValuesType.NONE) ? FieldFlag.DOC_VALUES.getAbbreviation() : "-" );
flags.append( (false) ? FieldFlag.UNINVERTIBLE.getAbbreviation() : '-' ); // SchemaField Specific
flags.append( (false) ? FieldFlag.MULTI_VALUED.getAbbreviation() : '-' ); // SchemaField Specific
flags.append( (f != null && f.fieldType().storeTermVectors()) ? FieldFlag.TERM_VECTOR_STORED.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().storeTermVectorOffsets()) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().storeTermVectorPositions()) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().storeTermVectorPayloads()) ? FieldFlag.TERM_VECTOR_PAYLOADS.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().omitNorms()) ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-' );
flags.append( (f != null && DOCS == opts ) ?
FieldFlag.OMIT_TF.getAbbreviation() : '-' );
flags.append((f != null && DOCS_AND_FREQS == opts) ?
FieldFlag.OMIT_POSITIONS.getAbbreviation() : '-');
flags.append((f != null && DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS == opts) ?
FieldFlag.STORE_OFFSETS_WITH_POSITIONS.getAbbreviation() : '-');
flags.append( (f != null && f.getClass().getSimpleName().equals("LazyField")) ? FieldFlag.LAZY.getAbbreviation() : '-' );
flags.append( (f != null && f.binaryValue()!=null) ? FieldFlag.BINARY.getAbbreviation() : '-' );
flags.append( (false) ? FieldFlag.SORT_MISSING_FIRST.getAbbreviation() : '-' ); // SchemaField Specific
flags.append( (false) ? FieldFlag.SORT_MISSING_LAST.getAbbreviation() : '-' ); // SchemaField Specific
return flags.toString();
}
/**
* @return a string representing a SchemaField's flags.
*/
private static String getFieldFlags( SchemaField f )
{
FieldType t = (f==null) ? null : f.getType();
// see: http://www.nabble.com/schema-field-properties-tf3437753.html#a9585549
boolean lazy = false; // "lazy" is purely a property of reading fields
boolean binary = false; // Currently not possible
StringBuilder flags = new StringBuilder();
flags.append( (f != null && f.indexed()) ? FieldFlag.INDEXED.getAbbreviation() : '-' );
flags.append( (t != null && t.isTokenized()) ? FieldFlag.TOKENIZED.getAbbreviation() : '-' );
flags.append( (f != null && f.stored()) ? FieldFlag.STORED.getAbbreviation() : '-' );
flags.append( (f != null && f.hasDocValues()) ? FieldFlag.DOC_VALUES.getAbbreviation() : "-" );
flags.append( (f != null && f.isUninvertible()) ? FieldFlag.UNINVERTIBLE.getAbbreviation() : "-" );
flags.append( (f != null && f.multiValued()) ? FieldFlag.MULTI_VALUED.getAbbreviation() : '-' );
flags.append( (f != null && f.storeTermVector() ) ? FieldFlag.TERM_VECTOR_STORED.getAbbreviation() : '-' );
flags.append( (f != null && f.storeTermOffsets() ) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );
flags.append( (f != null && f.storeTermPositions() ) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-' );
flags.append( (f != null && f.storeTermPayloads() ) ? FieldFlag.TERM_VECTOR_PAYLOADS.getAbbreviation() : '-' );
flags.append( (f != null && f.omitNorms()) ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-' );
flags.append( (f != null &&
f.omitTermFreqAndPositions() ) ? FieldFlag.OMIT_TF.getAbbreviation() : '-' );
flags.append( (f != null && f.omitPositions() ) ? FieldFlag.OMIT_POSITIONS.getAbbreviation() : '-' );
flags.append( (f != null && f.storeOffsetsWithPositions() ) ? FieldFlag.STORE_OFFSETS_WITH_POSITIONS.getAbbreviation() : '-' );
flags.append( (lazy) ? FieldFlag.LAZY.getAbbreviation() : '-' );
flags.append( (binary) ? FieldFlag.BINARY.getAbbreviation() : '-' );
flags.append( (f != null && f.sortMissingFirst() ) ? FieldFlag.SORT_MISSING_FIRST.getAbbreviation() : '-' );
flags.append( (f != null && f.sortMissingLast() ) ? FieldFlag.SORT_MISSING_LAST.getAbbreviation() : '-' );
return flags.toString();
}
/**
* @return a key to what each character means
*/
public static SimpleOrderedMap<String> getFieldFlagsKey() {
SimpleOrderedMap<String> key = new SimpleOrderedMap<>();
for (FieldFlag f : FieldFlag.values()) {
key.add(String.valueOf(f.getAbbreviation()), f.getDisplay() );
}
return key;
}
private static SimpleOrderedMap<Object> getDocumentFieldsInfo( Document doc, int docId, IndexReader reader,
IndexSchema schema ) throws IOException
{
final CharsRefBuilder spare = new CharsRefBuilder();
SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<>();
for( Object o : doc.getFields() ) {
Field field = (Field)o;
SimpleOrderedMap<Object> f = new SimpleOrderedMap<>();
SchemaField sfield = schema.getFieldOrNull( field.name() );
FieldType ftype = (sfield==null)?null:sfield.getType();
f.add( "type", (ftype==null)?null:ftype.getTypeName() );
f.add( "schema", getFieldFlags( sfield ) );
f.add( "flags", getFieldFlags( field ) );
f.add( "value", (ftype==null)?null:ftype.toExternal( field ) );
// TODO: this really should be "stored"
f.add( "internal", field.stringValue() ); // may be a binary number
BytesRef bytes = field.binaryValue();
if (bytes != null) {
f.add( "binary", Base64.byteArrayToBase64(bytes.bytes, bytes.offset, bytes.length));
}
if (!ftype.isPointField()) {
Term t = new Term(field.name(), ftype!=null ? ftype.storedToIndexed(field) : field.stringValue());
f.add( "docFreq", t.text()==null ? 0 : reader.docFreq( t ) ); // this can be 0 for non-indexed fields
}// TODO: Calculate docFreq for point fields
// If we have a term vector, return that
if( field.fieldType().storeTermVectors() ) {
try {
Terms v = reader.getTermVector( docId, field.name() );
if( v != null ) {
SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<>();
final TermsEnum termsEnum = v.iterator();
BytesRef text;
while((text = termsEnum.next()) != null) {
final int freq = (int) termsEnum.totalTermFreq();
spare.copyUTF8Bytes(text);
tfv.add(spare.toString(), freq);
}
f.add( "termVector", tfv );
}
}
catch( Exception ex ) {
log.warn( "error writing term vector", ex );
}
}
finfo.add( field.name(), f );
}
return finfo;
}
private static SimpleOrderedMap<Object> getIndexedFieldsInfo(SolrQueryRequest req)
throws Exception {
SolrIndexSearcher searcher = req.getSearcher();
SolrParams params = req.getParams();
Set<String> fields = null;
String fl = params.get(CommonParams.FL);
if (fl != null) {
fields = new TreeSet<>(Arrays.asList(fl.split( "[,\\s]+" )));
}
LeafReader reader = searcher.getSlowAtomicReader();
IndexSchema schema = searcher.getSchema();
// Don't be tempted to put this in the loop below, the whole point here is to alphabetize the fields!
Set<String> fieldNames = new TreeSet<>();
for(FieldInfo fieldInfo : reader.getFieldInfos()) {
fieldNames.add(fieldInfo.name);
}
// Walk the term enum and keep a priority queue for each map in our set
SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<>();
for (String fieldName : fieldNames) {
if (fields != null && ! fields.contains(fieldName) && ! fields.contains("*")) {
continue; //we're not interested in this field Still an issue here
}
SimpleOrderedMap<Object> fieldMap = new SimpleOrderedMap<>();
SchemaField sfield = schema.getFieldOrNull( fieldName );
FieldType ftype = (sfield==null)?null:sfield.getType();
fieldMap.add( "type", (ftype==null)?null:ftype.getTypeName() );
fieldMap.add("schema", getFieldFlags(sfield));
if (sfield != null && schema.isDynamicField(sfield.getName()) && schema.getDynamicPattern(sfield.getName()) != null) {
fieldMap.add("dynamicBase", schema.getDynamicPattern(sfield.getName()));
}
Terms terms = reader.terms(fieldName);
if (terms == null) { // Not indexed, so we need to report what we can (it made it through the fl param if specified)
finfo.add( fieldName, fieldMap );
continue;
}
if(sfield != null && sfield.indexed() ) {
if (params.getBool(INCLUDE_INDEX_FIELD_FLAGS,true)) {
Document doc = getFirstLiveDoc(terms, reader);
if (doc != null) {
// Found a document with this field
try {
IndexableField fld = doc.getField(fieldName);
if (fld != null) {
fieldMap.add("index", getFieldFlags(fld));
} else {
// it is a non-stored field...
fieldMap.add("index", "(unstored field)");
}
} catch (Exception ex) {
log.warn("error reading field: {}", fieldName);
}
}
}
fieldMap.add("docs", terms.getDocCount());
}
if (fields != null && (fields.contains(fieldName) || fields.contains("*"))) {
getDetailedFieldInfo(req, fieldName, fieldMap);
}
// Add the field
finfo.add( fieldName, fieldMap );
}
return finfo;
}
// Just get a document with the term in it, the first one will do!
// Is there a better way to do this? Shouldn't actually be very costly
// to do it this way.
private static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws IOException {
PostingsEnum postingsEnum = null;
TermsEnum termsEnum = terms.iterator();
BytesRef text;
// Deal with the chance that the first bunch of terms are in deleted documents. Is there a better way?
for (int idx = 0; idx < 1000 && postingsEnum == null; ++idx) {
text = termsEnum.next();
if (text == null) { // Ran off the end of the terms enum without finding any live docs with that field in them.
return null;
}
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
final Bits liveDocs = reader.getLiveDocs();
if (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
if (liveDocs != null && liveDocs.get(postingsEnum.docID())) {
continue;
}
return reader.document(postingsEnum.docID());
}
}
return null;
}
/**
* Return info from the index
*/
private static SimpleOrderedMap<Object> getSchemaInfo( IndexSchema schema ) {
Map<String, List<String>> typeusemap = new TreeMap<>();
Map<String, Object> fields = new TreeMap<>();
SchemaField uniqueField = schema.getUniqueKeyField();
for( SchemaField f : schema.getFields().values() ) {
populateFieldInfo(schema, typeusemap, fields, uniqueField, f);
}
Map<String, Object> dynamicFields = new TreeMap<>();
for (SchemaField f : schema.getDynamicFieldPrototypes()) {
populateFieldInfo(schema, typeusemap, dynamicFields, uniqueField, f);
}
SimpleOrderedMap<Object> types = new SimpleOrderedMap<>();
Map<String, FieldType> sortedTypes = new TreeMap<>(schema.getFieldTypes());
for( FieldType ft : sortedTypes.values() ) {
SimpleOrderedMap<Object> field = new SimpleOrderedMap<>();
field.add("fields", typeusemap.get( ft.getTypeName() ) );
field.add("tokenized", ft.isTokenized() );
field.add("className", ft.getClass().getName());
field.add("indexAnalyzer", getAnalyzerInfo(ft.getIndexAnalyzer()));
field.add("queryAnalyzer", getAnalyzerInfo(ft.getQueryAnalyzer()));
field.add("similarity", getSimilarityInfo(ft.getSimilarity()));
types.add( ft.getTypeName(), field );
}
// Must go through this to maintain binary compatbility. Putting a TreeMap into a resp leads to casting errors
SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<>();
SimpleOrderedMap<Object> fieldsSimple = new SimpleOrderedMap<>();
for (Map.Entry<String, Object> ent : fields.entrySet()) {
fieldsSimple.add(ent.getKey(), ent.getValue());
}
finfo.add("fields", fieldsSimple);
SimpleOrderedMap<Object> dynamicSimple = new SimpleOrderedMap<>();
for (Map.Entry<String, Object> ent : dynamicFields.entrySet()) {
dynamicSimple.add(ent.getKey(), ent.getValue());
}
finfo.add("dynamicFields", dynamicSimple);
finfo.add("uniqueKeyField",
null == uniqueField ? null : uniqueField.getName());
finfo.add("similarity", getSimilarityInfo(schema.getSimilarity()));
finfo.add("types", types);
return finfo;
}
private static SimpleOrderedMap<Object> getSimilarityInfo(Similarity similarity) {
SimpleOrderedMap<Object> toReturn = new SimpleOrderedMap<>();
if (similarity != null) {
toReturn.add("className", similarity.getClass().getName());
toReturn.add("details", similarity.toString());
}
return toReturn;
}
private static SimpleOrderedMap<Object> getAnalyzerInfo(Analyzer analyzer) {
SimpleOrderedMap<Object> aninfo = new SimpleOrderedMap<>();
aninfo.add("className", analyzer.getClass().getName());
if (analyzer instanceof TokenizerChain) {
TokenizerChain tchain = (TokenizerChain)analyzer;
CharFilterFactory[] cfiltfacs = tchain.getCharFilterFactories();
if (0 < cfiltfacs.length) {
SimpleOrderedMap<Map<String, Object>> cfilters = new SimpleOrderedMap<>();
for (CharFilterFactory cfiltfac : cfiltfacs) {
Map<String, Object> tok = new HashMap<>();
String className = cfiltfac.getClass().getName();
tok.put("className", className);
tok.put("args", cfiltfac.getOriginalArgs());
cfilters.add(className.substring(className.lastIndexOf('.')+1), tok);
}
aninfo.add("charFilters", cfilters);
}
SimpleOrderedMap<Object> tokenizer = new SimpleOrderedMap<>();
TokenizerFactory tfac = tchain.getTokenizerFactory();
tokenizer.add("className", tfac.getClass().getName());
tokenizer.add("args", tfac.getOriginalArgs());
aninfo.add("tokenizer", tokenizer);
TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories();
if (0 < filtfacs.length) {
SimpleOrderedMap<Map<String, Object>> filters = new SimpleOrderedMap<>();
for (TokenFilterFactory filtfac : filtfacs) {
Map<String, Object> tok = new HashMap<>();
String className = filtfac.getClass().getName();
tok.put("className", className);
tok.put("args", filtfac.getOriginalArgs());
filters.add(className.substring(className.lastIndexOf('.')+1), tok);
}
aninfo.add("filters", filters);
}
}
return aninfo;
}
private static void populateFieldInfo(IndexSchema schema,
Map<String, List<String>> typeusemap, Map<String, Object> fields,
SchemaField uniqueField, SchemaField f) {
FieldType ft = f.getType();
SimpleOrderedMap<Object> field = new SimpleOrderedMap<>();
field.add( "type", ft.getTypeName() );
field.add( "flags", getFieldFlags(f) );
if( f.isRequired() ) {
field.add( "required", f.isRequired() );
}
if( f.getDefaultValue() != null ) {
field.add( "default", f.getDefaultValue() );
}
if (f == uniqueField){
field.add("uniqueKey", true);
}
if (ft.getIndexAnalyzer().getPositionIncrementGap(f.getName()) != 0) {
field.add("positionIncrementGap", ft.getIndexAnalyzer().getPositionIncrementGap(f.getName()));
}
field.add("copyDests", toListOfStringDests(schema.getCopyFieldsList(f.getName())));
field.add("copySources", schema.getCopySources(f.getName()));
fields.put( f.getName(), field );
List<String> v = typeusemap.get( ft.getTypeName() );
if( v == null ) {
v = new ArrayList<>();
}
v.add( f.getName() );
typeusemap.put( ft.getTypeName(), v );
}
// This method just gets the top-most level of information. This was conflated with getting detailed info
// for *all* the fields, called from CoreAdminHandler etc.
public static SimpleOrderedMap<Object> getIndexInfo(DirectoryReader reader) throws IOException {
Directory dir = reader.directory();
SimpleOrderedMap<Object> indexInfo = new SimpleOrderedMap<>();
indexInfo.add("numDocs", reader.numDocs());
indexInfo.add("maxDoc", reader.maxDoc());
indexInfo.add("deletedDocs", reader.maxDoc() - reader.numDocs());
indexInfo.add("indexHeapUsageBytes", getIndexHeapUsed(reader));
indexInfo.add("version", reader.getVersion()); // TODO? Is this different then: IndexReader.getCurrentVersion( dir )?
indexInfo.add("segmentCount", reader.leaves().size());
indexInfo.add("current", closeSafe( reader::isCurrent));
indexInfo.add("hasDeletions", reader.hasDeletions() );
indexInfo.add("directory", dir );
IndexCommit indexCommit = reader.getIndexCommit();
String segmentsFileName = indexCommit.getSegmentsFileName();
indexInfo.add("segmentsFile", segmentsFileName);
indexInfo.add("segmentsFileSizeInBytes", getSegmentsFileLength(indexCommit));
Map<String,String> userData = indexCommit.getUserData();
indexInfo.add("userData", userData);
String s = userData.get(SolrIndexWriter.COMMIT_TIME_MSEC_KEY);
if (s != null) {
indexInfo.add("lastModified", new Date(Long.parseLong(s)));
}
return indexInfo;
}
@FunctionalInterface
interface IOSupplier {
boolean get() throws IOException;
}
private static Object closeSafe(IOSupplier isCurrent) {
try {
return isCurrent.get();
}catch(AlreadyClosedException | IOException exception) {
}
return false;
}
/**
* <p>A helper method that attempts to determine the file length of the the segments file for the
* specified IndexCommit from it's Directory.
* </p>
* <p>
* If any sort of {@link IOException} occurs, this method will return "-1" and swallow the exception since
* this may be normal if the IndexCommit is no longer "on disk". The specific type of the Exception will
* affect how severely it is logged: {@link NoSuchFileException} is considered more "acceptible" then other
* types of IOException which may indicate an actual problem with the Directory.
*/
private static long getSegmentsFileLength(IndexCommit commit) {
try {
return commit.getDirectory().fileLength(commit.getSegmentsFileName());
} catch (NoSuchFileException okException) {
log.debug("Unable to determine the (optional) fileSize for the current IndexReader's segments file because it is "
+ "no longer in the Directory, this can happen if there are new commits since the Reader was opened"
, okException);
} catch (IOException strangeException) {
log.warn("Ignoring IOException wile attempting to determine the (optional) fileSize stat for the current IndexReader's segments file",
strangeException);
}
return -1;
}
/** Returns the sum of RAM bytes used by each segment */
private static long getIndexHeapUsed(DirectoryReader reader) {
return reader.leaves().stream()
.map(LeafReaderContext::reader)
.map(FilterLeafReader::unwrap)
.map(leafReader -> {
if (leafReader instanceof Accountable) {
return ((Accountable) leafReader).ramBytesUsed();
} else {
return -1L; // unsupported
}
})
.mapToLong(Long::longValue)
.reduce(0, (left, right) -> left == -1 || right == -1 ? -1 : left + right);
// if any leaves are unsupported (-1), we ultimately return -1.
}
// Get terribly detailed information about a particular field. This is a very expensive call, use it with caution
// especially on large indexes!
@SuppressWarnings("unchecked")
private static void getDetailedFieldInfo(SolrQueryRequest req, String field, SimpleOrderedMap<Object> fieldMap)
throws IOException {
SolrParams params = req.getParams();
final int numTerms = params.getInt( NUMTERMS, DEFAULT_COUNT );
TopTermQueue tiq = new TopTermQueue(numTerms + 1); // Something to collect the top N terms in.
final CharsRefBuilder spare = new CharsRefBuilder();
Terms terms = MultiTerms.getTerms(req.getSearcher().getIndexReader(), field);
if (terms == null) { // field does not exist
return;
}
TermsEnum termsEnum = terms.iterator();
BytesRef text;
int[] buckets = new int[HIST_ARRAY_SIZE];
while ((text = termsEnum.next()) != null) {
++tiq.distinctTerms;
int freq = termsEnum.docFreq(); // This calculation seems odd, but it gives the same results as it used to.
int slot = 32 - Integer.numberOfLeadingZeros(Math.max(0, freq - 1));
buckets[slot] = buckets[slot] + 1;
if (numTerms > 0 && freq > tiq.minFreq) {
spare.copyUTF8Bytes(text);
String t = spare.toString();
tiq.add(new TopTermQueue.TermInfo(new Term(field, t), termsEnum.docFreq()));
if (tiq.size() > numTerms) { // if tiq full
tiq.pop(); // remove lowest in tiq
tiq.minFreq = tiq.getTopTermInfo().docFreq;
}
}
}
tiq.histogram.add(buckets);
fieldMap.add("distinct", tiq.distinctTerms);
// Include top terms
fieldMap.add("topTerms", tiq.toNamedList(req.getSearcher().getSchema()));
// Add a histogram
fieldMap.add("histogram", tiq.histogram.toNamedList());
}
private static List<String> toListOfStrings(SchemaField[] raw) {
List<String> result = new ArrayList<>(raw.length);
for (SchemaField f : raw) {
result.add(f.getName());
}
return result;
}
private static List<String> toListOfStringDests(List<CopyField> raw) {
List<String> result = new ArrayList<>(raw.size());
for (CopyField f : raw) {
result.add(f.getDestination().getName());
}
return result;
}
//////////////////////// SolrInfoMBeans methods //////////////////////
@Override
public String getDescription() {
return "Lucene Index Browser. Inspired and modeled after Luke: http://www.getopt.org/luke/";
}
@Override
public Category getCategory() {
return Category.ADMIN;
}
///////////////////////////////////////////////////////////////////////////////////////
static class TermHistogram
{
int _maxBucket = -1;
int _buckets[] = new int[HIST_ARRAY_SIZE];
public void add(int[] buckets) {
for (int idx = 0; idx < buckets.length; ++idx) {
if (buckets[idx] != 0) _maxBucket = idx;
}
for (int idx = 0; idx <= _maxBucket; ++idx) {
_buckets[idx] = buckets[idx];
}
}
// TODO? should this be a list or a map?
public NamedList<Integer> toNamedList()
{
NamedList<Integer> nl = new NamedList<>();
for( int bucket = 0; bucket <= _maxBucket; bucket++ ) {
nl.add( ""+ (1 << bucket), _buckets[bucket] );
}
return nl;
}
}
/**
* Private internal class that counts up frequent terms
*/
@SuppressWarnings("rawtypes")
private static class TopTermQueue extends PriorityQueue
{
static class TermInfo {
TermInfo(Term t, int df) {
term = t;
docFreq = df;
}
int docFreq;
Term term;
}
public int minFreq = 0;
public int distinctTerms = 0;
public TermHistogram histogram;
TopTermQueue(int size) {
super(size);
histogram = new TermHistogram();
}
@Override
protected final boolean lessThan(Object a, Object b) {
TermInfo termInfoA = (TermInfo)a;
TermInfo termInfoB = (TermInfo)b;
return termInfoA.docFreq < termInfoB.docFreq;
}
/**
* This is a destructive call... the queue is empty at the end
*/
public NamedList<Integer> toNamedList( IndexSchema schema )
{
// reverse the list..
List<TermInfo> aslist = new LinkedList<>();
while( size() > 0 ) {
aslist.add( 0, (TermInfo)pop() );
}
NamedList<Integer> list = new NamedList<>();
for (TermInfo i : aslist) {
String txt = i.term.text();
SchemaField ft = schema.getFieldOrNull( i.term.field() );
if( ft != null ) {
txt = ft.getType().indexedToReadable( txt );
}
list.add( txt, i.docFreq );
}
return list;
}
public TermInfo getTopTermInfo() {
return (TermInfo)top();
}
}
}