| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.benchmark.byTask.feeds; |
| |
| |
| import java.io.Closeable; |
| import java.io.IOException; |
| import java.nio.file.FileVisitResult; |
| import java.nio.file.Files; |
| import java.nio.file.Path; |
| import java.nio.file.SimpleFileVisitor; |
| import java.nio.file.attribute.BasicFileAttributes; |
| import java.util.ArrayList; |
| |
| import org.apache.lucene.benchmark.byTask.utils.Config; |
| import org.apache.lucene.benchmark.byTask.utils.Format; |
| |
| /** |
| * Base class for source of data for benchmarking |
| * <p> |
| * Keeps track of various statistics, such as how many data items were generated, |
| * size in bytes etc. |
| * <p> |
| * Supports the following configuration parameters: |
| * <ul> |
| * <li><b>content.source.forever</b> - specifies whether to generate items |
| * forever (<b>default=true</b>). |
| * <li><b>content.source.verbose</b> - specifies whether messages should be |
| * output by the content source (<b>default=false</b>). |
| * <li><b>content.source.encoding</b> - specifies which encoding to use when |
| * reading the files of that content source. Certain implementations may define |
| * a default value if this parameter is not specified. (<b>default=null</b>). |
| * <li><b>content.source.log.step</b> - specifies for how many items a |
| * message should be logged. If set to 0 it means no logging should occur. |
| * <b>NOTE:</b> if verbose is set to false, logging should not occur even if |
| * logStep is not 0 (<b>default=0</b>). |
| * </ul> |
| */ |
| public abstract class ContentItemsSource implements Closeable { |
| |
| private long bytesCount; |
| private long totalBytesCount; |
| private int itemCount; |
| private int totalItemCount; |
| private Config config; |
| |
| private int lastPrintedNumUniqueTexts = 0; |
| private long lastPrintedNumUniqueBytes = 0; |
| private int printNum = 0; |
| |
| protected boolean forever; |
| protected int logStep; |
| protected boolean verbose; |
| protected String encoding; |
| |
| /** update count of bytes generated by this source */ |
| protected final synchronized void addBytes(long numBytes) { |
| bytesCount += numBytes; |
| totalBytesCount += numBytes; |
| } |
| |
| /** update count of items generated by this source */ |
| protected final synchronized void addItem() { |
| ++itemCount; |
| ++totalItemCount; |
| } |
| |
| /** |
| * A convenience method for collecting all the files of a content source from |
| * a given directory. The collected {@link Path} instances are stored in the |
| * given <code>files</code>. |
| */ |
| protected final void collectFiles(Path dir, final ArrayList<Path> files) throws IOException { |
| Files.walkFileTree(dir, new SimpleFileVisitor<Path>() { |
| @Override |
| public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { |
| if (Files.isReadable(file)) { |
| files.add(file.toRealPath()); |
| } |
| return FileVisitResult.CONTINUE; |
| } |
| }); |
| } |
| |
| /** |
| * Returns true whether it's time to log a message (depending on verbose and |
| * the number of items generated). |
| */ |
| protected final boolean shouldLog() { |
| return verbose && logStep > 0 && itemCount % logStep == 0; |
| } |
| |
| /** Called when reading from this content source is no longer required. */ |
| @Override |
| public abstract void close() throws IOException; |
| |
| /** Returns the number of bytes generated since last reset. */ |
| public final long getBytesCount() { return bytesCount; } |
| |
| /** Returns the number of generated items since last reset. */ |
| public final int getItemsCount() { return itemCount; } |
| |
| public final Config getConfig() { return config; } |
| |
| /** Returns the total number of bytes that were generated by this source. */ |
| public final long getTotalBytesCount() { return totalBytesCount; } |
| |
| /** Returns the total number of generated items. */ |
| public final int getTotalItemsCount() { return totalItemCount; } |
| |
| /** |
| * Resets the input for this content source, so that the test would behave as |
| * if it was just started, input-wise. |
| * <p> |
| * <b>NOTE:</b> the default implementation resets the number of bytes and |
| * items generated since the last reset, so it's important to call |
| * super.resetInputs in case you override this method. |
| */ |
| public void resetInputs() throws IOException { |
| bytesCount = 0; |
| itemCount = 0; |
| } |
| |
| /** |
| * Sets the {@link Config} for this content source. If you override this |
| * method, you must call super.setConfig. |
| */ |
| public void setConfig(Config config) { |
| this.config = config; |
| forever = config.get("content.source.forever", true); |
| logStep = config.get("content.source.log.step", 0); |
| verbose = config.get("content.source.verbose", false); |
| encoding = config.get("content.source.encoding", null); |
| } |
| |
| public void printStatistics(String itemsName) { |
| if (!verbose) { |
| return; |
| } |
| boolean print = false; |
| String col = " "; |
| StringBuilder sb = new StringBuilder(); |
| String newline = System.getProperty("line.separator"); |
| sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline); |
| int nut = getTotalItemsCount(); |
| if (nut > lastPrintedNumUniqueTexts) { |
| print = true; |
| sb.append("total count of ").append(itemsName).append(": ").append(Format.format(0,nut,col)).append(newline); |
| lastPrintedNumUniqueTexts = nut; |
| } |
| long nub = getTotalBytesCount(); |
| if (nub > lastPrintedNumUniqueBytes) { |
| print = true; |
| sb.append("total bytes of ").append(itemsName).append(": ").append(Format.format(0,nub,col)).append(newline); |
| lastPrintedNumUniqueBytes = nub; |
| } |
| if (getItemsCount() > 0) { |
| print = true; |
| sb.append("num ").append(itemsName).append(" added since last inputs reset: ").append(Format.format(0,getItemsCount(),col)).append(newline); |
| sb.append("total bytes added for ").append(itemsName).append(" since last inputs reset: ").append(Format.format(0,getBytesCount(),col)).append(newline); |
| } |
| if (print) { |
| System.out.println(sb.append(newline).toString()); |
| printNum++; |
| } |
| } |
| |
| } |