blob: c6d12938bd9efab1fe54d90695f074a9e6f8aecc [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.blocktree;
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.Locale;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
/**
* BlockTree statistics for a single field
* returned by {@link FieldReader#getStats()}.
* @lucene.internal
*/
public class Stats {
/** Byte size of the index. */
public long indexNumBytes;
/** Total number of terms in the field. */
public long totalTermCount;
/** Total number of bytes (sum of term lengths) across all terms in the field. */
public long totalTermBytes;
/** The number of normal (non-floor) blocks in the terms file. */
public int nonFloorBlockCount;
/** The number of floor blocks (meta-blocks larger than the
* allowed {@code maxItemsPerBlock}) in the terms file. */
public int floorBlockCount;
/** The number of sub-blocks within the floor blocks. */
public int floorSubBlockCount;
/** The number of "internal" blocks (that have both
* terms and sub-blocks). */
public int mixedBlockCount;
/** The number of "leaf" blocks (blocks that have only
* terms). */
public int termsOnlyBlockCount;
/** The number of "internal" blocks that do not contain
* terms (have only sub-blocks). */
public int subBlocksOnlyBlockCount;
/** Total number of blocks. */
public int totalBlockCount;
/** Number of blocks at each prefix depth. */
public int[] blockCountByPrefixLen = new int[10];
private int startBlockCount;
private int endBlockCount;
/** Total number of bytes used to store term suffixes. */
public long totalBlockSuffixBytes;
/**
* Number of times each compression method has been used.
* 0 = uncompressed
* 1 = lowercase_ascii
* 2 = LZ4
*/
public final long[] compressionAlgorithms = new long[3];
/** Total number of suffix bytes before compression. */
public long totalUncompressedBlockSuffixBytes;
/** Total number of bytes used to store term stats (not
* including what the {@link PostingsReaderBase}
* stores. */
public long totalBlockStatsBytes;
/** Total bytes stored by the {@link PostingsReaderBase},
* plus the other few vInts stored in the frame. */
public long totalBlockOtherBytes;
/** Segment name. */
public final String segment;
/** Field name. */
public final String field;
Stats(String segment, String field) {
this.segment = segment;
this.field = field;
}
void startBlock(SegmentTermsEnumFrame frame, boolean isFloor) {
totalBlockCount++;
if (isFloor) {
if (frame.fp == frame.fpOrig) {
floorBlockCount++;
}
floorSubBlockCount++;
} else {
nonFloorBlockCount++;
}
if (blockCountByPrefixLen.length <= frame.prefix) {
blockCountByPrefixLen = ArrayUtil.grow(blockCountByPrefixLen, 1+frame.prefix);
}
blockCountByPrefixLen[frame.prefix]++;
startBlockCount++;
totalBlockSuffixBytes += frame.totalSuffixBytes;
totalUncompressedBlockSuffixBytes += frame.suffixesReader.length();
if (frame.suffixesReader != frame.suffixLengthsReader) {
totalUncompressedBlockSuffixBytes += frame.suffixLengthsReader.length();
}
totalBlockStatsBytes += frame.statsReader.length();
compressionAlgorithms[frame.compressionAlg.code]++;
}
void endBlock(SegmentTermsEnumFrame frame) {
final int termCount = frame.isLeafBlock ? frame.entCount : frame.state.termBlockOrd;
final int subBlockCount = frame.entCount - termCount;
totalTermCount += termCount;
if (termCount != 0 && subBlockCount != 0) {
mixedBlockCount++;
} else if (termCount != 0) {
termsOnlyBlockCount++;
} else if (subBlockCount != 0) {
subBlocksOnlyBlockCount++;
} else {
throw new IllegalStateException();
}
endBlockCount++;
final long otherBytes = frame.fpEnd - frame.fp - frame.totalSuffixBytes - frame.statsReader.length();
assert otherBytes > 0 : "otherBytes=" + otherBytes + " frame.fp=" + frame.fp + " frame.fpEnd=" + frame.fpEnd;
totalBlockOtherBytes += otherBytes;
}
void term(BytesRef term) {
totalTermBytes += term.length;
}
void finish() {
assert startBlockCount == endBlockCount: "startBlockCount=" + startBlockCount + " endBlockCount=" + endBlockCount;
assert totalBlockCount == floorSubBlockCount + nonFloorBlockCount: "floorSubBlockCount=" + floorSubBlockCount + " nonFloorBlockCount=" + nonFloorBlockCount + " totalBlockCount=" + totalBlockCount;
assert totalBlockCount == mixedBlockCount + termsOnlyBlockCount + subBlocksOnlyBlockCount: "totalBlockCount=" + totalBlockCount + " mixedBlockCount=" + mixedBlockCount + " subBlocksOnlyBlockCount=" + subBlocksOnlyBlockCount + " termsOnlyBlockCount=" + termsOnlyBlockCount;
}
@Override
public String toString() {
final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
PrintStream out;
try {
out = new PrintStream(bos, false, IOUtils.UTF_8);
} catch (UnsupportedEncodingException bogus) {
throw new RuntimeException(bogus);
}
out.println(" index FST:");
out.println(" " + indexNumBytes + " bytes");
out.println(" terms:");
out.println(" " + totalTermCount + " terms");
out.println(" " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : ""));
out.println(" blocks:");
out.println(" " + totalBlockCount + " blocks");
out.println(" " + termsOnlyBlockCount + " terms-only blocks");
out.println(" " + subBlocksOnlyBlockCount + " sub-block-only blocks");
out.println(" " + mixedBlockCount + " mixed blocks");
out.println(" " + floorBlockCount + " floor blocks");
out.println(" " + (totalBlockCount-floorSubBlockCount) + " non-floor blocks");
out.println(" " + floorSubBlockCount + " floor sub-blocks");
out.println(" " + totalUncompressedBlockSuffixBytes + " term suffix bytes before compression" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : ""));
StringBuilder compressionCounts = new StringBuilder();
for (int code = 0; code < compressionAlgorithms.length; ++code) {
if (compressionAlgorithms[code] == 0) {
continue;
}
if (compressionCounts.length() > 0) {
compressionCounts.append(", ");
}
compressionCounts.append(CompressionAlgorithm.byCode(code));
compressionCounts.append(": ");
compressionCounts.append(compressionAlgorithms[code]);
}
out.println(" " + totalBlockSuffixBytes + " compressed term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.2f", ((double) totalBlockSuffixBytes)/totalUncompressedBlockSuffixBytes) +
" compression ratio - compression count by algorithm: " + compressionCounts : "") + ")");
out.println(" " + totalBlockStatsBytes + " term stats bytes " + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : ""));
out.println(" " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : ""));
if (totalBlockCount != 0) {
out.println(" by prefix length:");
int total = 0;
for(int prefix=0;prefix<blockCountByPrefixLen.length;prefix++) {
final int blockCount = blockCountByPrefixLen[prefix];
total += blockCount;
if (blockCount != 0) {
out.println(" " + String.format(Locale.ROOT, "%2d", prefix) + ": " + blockCount);
}
}
assert totalBlockCount == total;
}
try {
return bos.toString(IOUtils.UTF_8);
} catch (UnsupportedEncodingException bogus) {
throw new RuntimeException(bogus);
}
}
}