blob: 385edc8f2f67cf1a02f250dd88de5b36bb229720 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.benchmark.byTask.feeds;
import java.io.Closeable;
import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.Format;
/**
* Base class for source of data for benchmarking
* <p>
* Keeps track of various statistics, such as how many data items were generated,
* size in bytes etc.
* <p>
* Supports the following configuration parameters:
* <ul>
* <li><b>content.source.forever</b> - specifies whether to generate items
* forever (<b>default=true</b>).
* <li><b>content.source.verbose</b> - specifies whether messages should be
* output by the content source (<b>default=false</b>).
* <li><b>content.source.encoding</b> - specifies which encoding to use when
* reading the files of that content source. Certain implementations may define
* a default value if this parameter is not specified. (<b>default=null</b>).
* <li><b>content.source.log.step</b> - specifies for how many items a
* message should be logged. If set to 0 it means no logging should occur.
* <b>NOTE:</b> if verbose is set to false, logging should not occur even if
* logStep is not 0 (<b>default=0</b>).
* </ul>
*/
public abstract class ContentItemsSource implements Closeable {
private long bytesCount;
private long totalBytesCount;
private int itemCount;
private int totalItemCount;
private Config config;
private int lastPrintedNumUniqueTexts = 0;
private long lastPrintedNumUniqueBytes = 0;
private int printNum = 0;
protected boolean forever;
protected int logStep;
protected boolean verbose;
protected String encoding;
/** update count of bytes generated by this source */
protected final synchronized void addBytes(long numBytes) {
bytesCount += numBytes;
totalBytesCount += numBytes;
}
/** update count of items generated by this source */
protected final synchronized void addItem() {
++itemCount;
++totalItemCount;
}
/**
* A convenience method for collecting all the files of a content source from
* a given directory. The collected {@link Path} instances are stored in the
* given <code>files</code>.
*/
protected final void collectFiles(Path dir, final ArrayList<Path> files) throws IOException {
Files.walkFileTree(dir, new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
if (Files.isReadable(file)) {
files.add(file.toRealPath());
}
return FileVisitResult.CONTINUE;
}
});
}
/**
* Returns true whether it's time to log a message (depending on verbose and
* the number of items generated).
*/
protected final boolean shouldLog() {
return verbose && logStep > 0 && itemCount % logStep == 0;
}
/** Called when reading from this content source is no longer required. */
@Override
public abstract void close() throws IOException;
/** Returns the number of bytes generated since last reset. */
public final long getBytesCount() { return bytesCount; }
/** Returns the number of generated items since last reset. */
public final int getItemsCount() { return itemCount; }
public final Config getConfig() { return config; }
/** Returns the total number of bytes that were generated by this source. */
public final long getTotalBytesCount() { return totalBytesCount; }
/** Returns the total number of generated items. */
public final int getTotalItemsCount() { return totalItemCount; }
/**
* Resets the input for this content source, so that the test would behave as
* if it was just started, input-wise.
* <p>
* <b>NOTE:</b> the default implementation resets the number of bytes and
* items generated since the last reset, so it's important to call
* super.resetInputs in case you override this method.
*/
public void resetInputs() throws IOException {
bytesCount = 0;
itemCount = 0;
}
/**
* Sets the {@link Config} for this content source. If you override this
* method, you must call super.setConfig.
*/
public void setConfig(Config config) {
this.config = config;
forever = config.get("content.source.forever", true);
logStep = config.get("content.source.log.step", 0);
verbose = config.get("content.source.verbose", false);
encoding = config.get("content.source.encoding", null);
}
public void printStatistics(String itemsName) {
if (!verbose) {
return;
}
boolean print = false;
String col = " ";
StringBuilder sb = new StringBuilder();
String newline = System.getProperty("line.separator");
sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline);
int nut = getTotalItemsCount();
if (nut > lastPrintedNumUniqueTexts) {
print = true;
sb.append("total count of ").append(itemsName).append(": ").append(Format.format(0,nut,col)).append(newline);
lastPrintedNumUniqueTexts = nut;
}
long nub = getTotalBytesCount();
if (nub > lastPrintedNumUniqueBytes) {
print = true;
sb.append("total bytes of ").append(itemsName).append(": ").append(Format.format(0,nub,col)).append(newline);
lastPrintedNumUniqueBytes = nub;
}
if (getItemsCount() > 0) {
print = true;
sb.append("num ").append(itemsName).append(" added since last inputs reset: ").append(Format.format(0,getItemsCount(),col)).append(newline);
sb.append("total bytes added for ").append(itemsName).append(" since last inputs reset: ").append(Format.format(0,getBytesCount(),col)).append(newline);
}
if (print) {
System.out.println(sb.append(newline).toString());
printNum++;
}
}
}