| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.backward_codecs.lucene40.blocktree; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.PrintStream; |
| import java.io.UnsupportedEncodingException; |
| import java.util.Locale; |
| import org.apache.lucene.codecs.PostingsReaderBase; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.IOUtils; |
| |
| /** |
| * BlockTree statistics for a single field returned by {@link FieldReader#getStats()}. |
| * |
| * @lucene.internal |
| */ |
| public class Stats { |
| /** Byte size of the index. */ |
| public long indexNumBytes; |
| |
| /** Total number of terms in the field. */ |
| public long totalTermCount; |
| |
| /** Total number of bytes (sum of term lengths) across all terms in the field. */ |
| public long totalTermBytes; |
| |
| /** The number of normal (non-floor) blocks in the terms file. */ |
| public int nonFloorBlockCount; |
| |
| /** |
| * The number of floor blocks (meta-blocks larger than the allowed {@code maxItemsPerBlock}) in |
| * the terms file. |
| */ |
| public int floorBlockCount; |
| |
| /** The number of sub-blocks within the floor blocks. */ |
| public int floorSubBlockCount; |
| |
| /** The number of "internal" blocks (that have both terms and sub-blocks). */ |
| public int mixedBlockCount; |
| |
| /** The number of "leaf" blocks (blocks that have only terms). */ |
| public int termsOnlyBlockCount; |
| |
| /** The number of "internal" blocks that do not contain terms (have only sub-blocks). */ |
| public int subBlocksOnlyBlockCount; |
| |
| /** Total number of blocks. */ |
| public int totalBlockCount; |
| |
| /** Number of blocks at each prefix depth. */ |
| public int[] blockCountByPrefixLen = new int[10]; |
| |
| private int startBlockCount; |
| private int endBlockCount; |
| |
| /** Total number of bytes used to store term suffixes. */ |
| public long totalBlockSuffixBytes; |
| |
| /** |
| * Number of times each compression method has been used. 0 = uncompressed 1 = lowercase_ascii 2 = |
| * LZ4 |
| */ |
| public final long[] compressionAlgorithms = new long[3]; |
| |
| /** Total number of suffix bytes before compression. */ |
| public long totalUncompressedBlockSuffixBytes; |
| |
| /** |
| * Total number of bytes used to store term stats (not including what the {@link |
| * PostingsReaderBase} stores. |
| */ |
| public long totalBlockStatsBytes; |
| |
| /** |
| * Total bytes stored by the {@link PostingsReaderBase}, plus the other few vInts stored in the |
| * frame. |
| */ |
| public long totalBlockOtherBytes; |
| |
| /** Segment name. */ |
| public final String segment; |
| |
| /** Field name. */ |
| public final String field; |
| |
| Stats(String segment, String field) { |
| this.segment = segment; |
| this.field = field; |
| } |
| |
| void startBlock(SegmentTermsEnumFrame frame, boolean isFloor) { |
| totalBlockCount++; |
| if (isFloor) { |
| if (frame.fp == frame.fpOrig) { |
| floorBlockCount++; |
| } |
| floorSubBlockCount++; |
| } else { |
| nonFloorBlockCount++; |
| } |
| |
| if (blockCountByPrefixLen.length <= frame.prefix) { |
| blockCountByPrefixLen = ArrayUtil.grow(blockCountByPrefixLen, 1 + frame.prefix); |
| } |
| blockCountByPrefixLen[frame.prefix]++; |
| startBlockCount++; |
| totalBlockSuffixBytes += frame.totalSuffixBytes; |
| totalUncompressedBlockSuffixBytes += frame.suffixesReader.length(); |
| if (frame.suffixesReader != frame.suffixLengthsReader) { |
| totalUncompressedBlockSuffixBytes += frame.suffixLengthsReader.length(); |
| } |
| totalBlockStatsBytes += frame.statsReader.length(); |
| compressionAlgorithms[frame.compressionAlg.code]++; |
| } |
| |
| void endBlock(SegmentTermsEnumFrame frame) { |
| final int termCount = frame.isLeafBlock ? frame.entCount : frame.state.termBlockOrd; |
| final int subBlockCount = frame.entCount - termCount; |
| totalTermCount += termCount; |
| if (termCount != 0 && subBlockCount != 0) { |
| mixedBlockCount++; |
| } else if (termCount != 0) { |
| termsOnlyBlockCount++; |
| } else if (subBlockCount != 0) { |
| subBlocksOnlyBlockCount++; |
| } else { |
| throw new IllegalStateException(); |
| } |
| endBlockCount++; |
| final long otherBytes = |
| frame.fpEnd - frame.fp - frame.totalSuffixBytes - frame.statsReader.length(); |
| assert otherBytes > 0 |
| : "otherBytes=" + otherBytes + " frame.fp=" + frame.fp + " frame.fpEnd=" + frame.fpEnd; |
| totalBlockOtherBytes += otherBytes; |
| } |
| |
| void term(BytesRef term) { |
| totalTermBytes += term.length; |
| } |
| |
| void finish() { |
| assert startBlockCount == endBlockCount |
| : "startBlockCount=" + startBlockCount + " endBlockCount=" + endBlockCount; |
| assert totalBlockCount == floorSubBlockCount + nonFloorBlockCount |
| : "floorSubBlockCount=" |
| + floorSubBlockCount |
| + " nonFloorBlockCount=" |
| + nonFloorBlockCount |
| + " totalBlockCount=" |
| + totalBlockCount; |
| assert totalBlockCount == mixedBlockCount + termsOnlyBlockCount + subBlocksOnlyBlockCount |
| : "totalBlockCount=" |
| + totalBlockCount |
| + " mixedBlockCount=" |
| + mixedBlockCount |
| + " subBlocksOnlyBlockCount=" |
| + subBlocksOnlyBlockCount |
| + " termsOnlyBlockCount=" |
| + termsOnlyBlockCount; |
| } |
| |
| @Override |
| public String toString() { |
| final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); |
| PrintStream out; |
| try { |
| out = new PrintStream(bos, false, IOUtils.UTF_8); |
| } catch (UnsupportedEncodingException bogus) { |
| throw new RuntimeException(bogus); |
| } |
| |
| out.println(" index FST:"); |
| out.println(" " + indexNumBytes + " bytes"); |
| out.println(" terms:"); |
| out.println(" " + totalTermCount + " terms"); |
| out.println( |
| " " |
| + totalTermBytes |
| + " bytes" |
| + (totalTermCount != 0 |
| ? " (" |
| + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes) / totalTermCount) |
| + " bytes/term)" |
| : "")); |
| out.println(" blocks:"); |
| out.println(" " + totalBlockCount + " blocks"); |
| out.println(" " + termsOnlyBlockCount + " terms-only blocks"); |
| out.println(" " + subBlocksOnlyBlockCount + " sub-block-only blocks"); |
| out.println(" " + mixedBlockCount + " mixed blocks"); |
| out.println(" " + floorBlockCount + " floor blocks"); |
| out.println(" " + (totalBlockCount - floorSubBlockCount) + " non-floor blocks"); |
| out.println(" " + floorSubBlockCount + " floor sub-blocks"); |
| out.println( |
| " " |
| + totalUncompressedBlockSuffixBytes |
| + " term suffix bytes before compression" |
| + (totalBlockCount != 0 |
| ? " (" |
| + String.format( |
| Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes) / totalBlockCount) |
| + " suffix-bytes/block)" |
| : "")); |
| StringBuilder compressionCounts = new StringBuilder(); |
| for (int code = 0; code < compressionAlgorithms.length; ++code) { |
| if (compressionAlgorithms[code] == 0) { |
| continue; |
| } |
| if (compressionCounts.length() > 0) { |
| compressionCounts.append(", "); |
| } |
| compressionCounts.append(CompressionAlgorithm.byCode(code)); |
| compressionCounts.append(": "); |
| compressionCounts.append(compressionAlgorithms[code]); |
| } |
| out.println( |
| " " |
| + totalBlockSuffixBytes |
| + " compressed term suffix bytes" |
| + (totalBlockCount != 0 |
| ? " (" |
| + String.format( |
| Locale.ROOT, |
| "%.2f", |
| ((double) totalBlockSuffixBytes) / totalUncompressedBlockSuffixBytes) |
| + " compression ratio - compression count by algorithm: " |
| + compressionCounts |
| : "") |
| + ")"); |
| out.println( |
| " " |
| + totalBlockStatsBytes |
| + " term stats bytes " |
| + (totalBlockCount != 0 |
| ? " (" |
| + String.format( |
| Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes) / totalBlockCount) |
| + " stats-bytes/block)" |
| : "")); |
| out.println( |
| " " |
| + totalBlockOtherBytes |
| + " other bytes" |
| + (totalBlockCount != 0 |
| ? " (" |
| + String.format( |
| Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes) / totalBlockCount) |
| + " other-bytes/block)" |
| : "")); |
| if (totalBlockCount != 0) { |
| out.println(" by prefix length:"); |
| int total = 0; |
| for (int prefix = 0; prefix < blockCountByPrefixLen.length; prefix++) { |
| final int blockCount = blockCountByPrefixLen[prefix]; |
| total += blockCount; |
| if (blockCount != 0) { |
| out.println(" " + String.format(Locale.ROOT, "%2d", prefix) + ": " + blockCount); |
| } |
| } |
| assert totalBlockCount == total; |
| } |
| |
| try { |
| return bos.toString(IOUtils.UTF_8); |
| } catch (UnsupportedEncodingException bogus) { |
| throw new RuntimeException(bogus); |
| } |
| } |
| } |