blob: 9c0a819694b894856278c392f7940809062b2e35 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.backward_codecs.lucene40.blocktree;
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.Locale;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
/**
* BlockTree statistics for a single field returned by {@link FieldReader#getStats()}.
*
* @lucene.internal
*/
public class Stats {
/** Byte size of the index. */
public long indexNumBytes;
/** Total number of terms in the field. */
public long totalTermCount;
/** Total number of bytes (sum of term lengths) across all terms in the field. */
public long totalTermBytes;
/** The number of normal (non-floor) blocks in the terms file. */
public int nonFloorBlockCount;
/**
* The number of floor blocks (meta-blocks larger than the allowed {@code maxItemsPerBlock}) in
* the terms file.
*/
public int floorBlockCount;
/** The number of sub-blocks within the floor blocks. */
public int floorSubBlockCount;
/** The number of "internal" blocks (that have both terms and sub-blocks). */
public int mixedBlockCount;
/** The number of "leaf" blocks (blocks that have only terms). */
public int termsOnlyBlockCount;
/** The number of "internal" blocks that do not contain terms (have only sub-blocks). */
public int subBlocksOnlyBlockCount;
/** Total number of blocks. */
public int totalBlockCount;
/** Number of blocks at each prefix depth. */
public int[] blockCountByPrefixLen = new int[10];
private int startBlockCount;
private int endBlockCount;
/** Total number of bytes used to store term suffixes. */
public long totalBlockSuffixBytes;
/**
* Number of times each compression method has been used. 0 = uncompressed 1 = lowercase_ascii 2 =
* LZ4
*/
public final long[] compressionAlgorithms = new long[3];
/** Total number of suffix bytes before compression. */
public long totalUncompressedBlockSuffixBytes;
/**
* Total number of bytes used to store term stats (not including what the {@link
* PostingsReaderBase} stores.
*/
public long totalBlockStatsBytes;
/**
* Total bytes stored by the {@link PostingsReaderBase}, plus the other few vInts stored in the
* frame.
*/
public long totalBlockOtherBytes;
/** Segment name. */
public final String segment;
/** Field name. */
public final String field;
Stats(String segment, String field) {
this.segment = segment;
this.field = field;
}
void startBlock(SegmentTermsEnumFrame frame, boolean isFloor) {
totalBlockCount++;
if (isFloor) {
if (frame.fp == frame.fpOrig) {
floorBlockCount++;
}
floorSubBlockCount++;
} else {
nonFloorBlockCount++;
}
if (blockCountByPrefixLen.length <= frame.prefix) {
blockCountByPrefixLen = ArrayUtil.grow(blockCountByPrefixLen, 1 + frame.prefix);
}
blockCountByPrefixLen[frame.prefix]++;
startBlockCount++;
totalBlockSuffixBytes += frame.totalSuffixBytes;
totalUncompressedBlockSuffixBytes += frame.suffixesReader.length();
if (frame.suffixesReader != frame.suffixLengthsReader) {
totalUncompressedBlockSuffixBytes += frame.suffixLengthsReader.length();
}
totalBlockStatsBytes += frame.statsReader.length();
compressionAlgorithms[frame.compressionAlg.code]++;
}
void endBlock(SegmentTermsEnumFrame frame) {
final int termCount = frame.isLeafBlock ? frame.entCount : frame.state.termBlockOrd;
final int subBlockCount = frame.entCount - termCount;
totalTermCount += termCount;
if (termCount != 0 && subBlockCount != 0) {
mixedBlockCount++;
} else if (termCount != 0) {
termsOnlyBlockCount++;
} else if (subBlockCount != 0) {
subBlocksOnlyBlockCount++;
} else {
throw new IllegalStateException();
}
endBlockCount++;
final long otherBytes =
frame.fpEnd - frame.fp - frame.totalSuffixBytes - frame.statsReader.length();
assert otherBytes > 0
: "otherBytes=" + otherBytes + " frame.fp=" + frame.fp + " frame.fpEnd=" + frame.fpEnd;
totalBlockOtherBytes += otherBytes;
}
void term(BytesRef term) {
totalTermBytes += term.length;
}
void finish() {
assert startBlockCount == endBlockCount
: "startBlockCount=" + startBlockCount + " endBlockCount=" + endBlockCount;
assert totalBlockCount == floorSubBlockCount + nonFloorBlockCount
: "floorSubBlockCount="
+ floorSubBlockCount
+ " nonFloorBlockCount="
+ nonFloorBlockCount
+ " totalBlockCount="
+ totalBlockCount;
assert totalBlockCount == mixedBlockCount + termsOnlyBlockCount + subBlocksOnlyBlockCount
: "totalBlockCount="
+ totalBlockCount
+ " mixedBlockCount="
+ mixedBlockCount
+ " subBlocksOnlyBlockCount="
+ subBlocksOnlyBlockCount
+ " termsOnlyBlockCount="
+ termsOnlyBlockCount;
}
@Override
public String toString() {
final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
PrintStream out;
try {
out = new PrintStream(bos, false, IOUtils.UTF_8);
} catch (UnsupportedEncodingException bogus) {
throw new RuntimeException(bogus);
}
out.println(" index FST:");
out.println(" " + indexNumBytes + " bytes");
out.println(" terms:");
out.println(" " + totalTermCount + " terms");
out.println(
" "
+ totalTermBytes
+ " bytes"
+ (totalTermCount != 0
? " ("
+ String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes) / totalTermCount)
+ " bytes/term)"
: ""));
out.println(" blocks:");
out.println(" " + totalBlockCount + " blocks");
out.println(" " + termsOnlyBlockCount + " terms-only blocks");
out.println(" " + subBlocksOnlyBlockCount + " sub-block-only blocks");
out.println(" " + mixedBlockCount + " mixed blocks");
out.println(" " + floorBlockCount + " floor blocks");
out.println(" " + (totalBlockCount - floorSubBlockCount) + " non-floor blocks");
out.println(" " + floorSubBlockCount + " floor sub-blocks");
out.println(
" "
+ totalUncompressedBlockSuffixBytes
+ " term suffix bytes before compression"
+ (totalBlockCount != 0
? " ("
+ String.format(
Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes) / totalBlockCount)
+ " suffix-bytes/block)"
: ""));
StringBuilder compressionCounts = new StringBuilder();
for (int code = 0; code < compressionAlgorithms.length; ++code) {
if (compressionAlgorithms[code] == 0) {
continue;
}
if (compressionCounts.length() > 0) {
compressionCounts.append(", ");
}
compressionCounts.append(CompressionAlgorithm.byCode(code));
compressionCounts.append(": ");
compressionCounts.append(compressionAlgorithms[code]);
}
out.println(
" "
+ totalBlockSuffixBytes
+ " compressed term suffix bytes"
+ (totalBlockCount != 0
? " ("
+ String.format(
Locale.ROOT,
"%.2f",
((double) totalBlockSuffixBytes) / totalUncompressedBlockSuffixBytes)
+ " compression ratio - compression count by algorithm: "
+ compressionCounts
: "")
+ ")");
out.println(
" "
+ totalBlockStatsBytes
+ " term stats bytes "
+ (totalBlockCount != 0
? " ("
+ String.format(
Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes) / totalBlockCount)
+ " stats-bytes/block)"
: ""));
out.println(
" "
+ totalBlockOtherBytes
+ " other bytes"
+ (totalBlockCount != 0
? " ("
+ String.format(
Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes) / totalBlockCount)
+ " other-bytes/block)"
: ""));
if (totalBlockCount != 0) {
out.println(" by prefix length:");
int total = 0;
for (int prefix = 0; prefix < blockCountByPrefixLen.length; prefix++) {
final int blockCount = blockCountByPrefixLen[prefix];
total += blockCount;
if (blockCount != 0) {
out.println(" " + String.format(Locale.ROOT, "%2d", prefix) + ": " + blockCount);
}
}
assert totalBlockCount == total;
}
try {
return bos.toString(IOUtils.UTF_8);
} catch (UnsupportedEncodingException bogus) {
throw new RuntimeException(bogus);
}
}
}