blob: f6e7de5c00116adbe31194f027b138391d4b12d3 [file] [log] [blame]
using J2N.Text;
using Lucene.Net.Benchmarks.ByTask.Utils;
using Lucene.Net.Support;
using System;
using System.Collections.Generic;
using System.IO;
using System.Reflection;
using System.Text;
using Console = Lucene.Net.Support.SystemConsole;
namespace Lucene.Net.Benchmarks.ByTask.Feeds
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// Base class for source of data for benchmarking.
/// </summary>
/// <remarks>
/// Keeps track of various statistics, such as how many data items were generated,
/// size in bytes etc.
/// <para/>
/// Supports the following configuration parameters:
/// <list type="bullet">
/// <item><term>content.source.forever</term><description>specifies whether to generate items forever (<b>default=true</b>).</description></item>
/// <item><term>content.source.verbose</term><description>specifies whether messages should be output by the content source (<b>default=false</b>).</description></item>
/// <item><term>content.source.encoding</term><description>
/// specifies which encoding to use when
/// reading the files of that content source. Certain implementations may define
/// a default value if this parameter is not specified. (<b>default=null</b>).
/// </description></item>
/// <item><term>content.source.log.step</term><description>
/// specifies for how many items a
/// message should be logged. If set to 0 it means no logging should occur.
/// <b>NOTE:</b> if verbose is set to false, logging should not occur even if
/// logStep is not 0 (<b>default=0</b>).
/// </description></item>
/// </list>
/// </remarks>
public abstract class ContentItemsSource : IDisposable
{
private long bytesCount;
private long totalBytesCount;
private int itemCount;
private int totalItemCount;
private Config config;
private int lastPrintedNumUniqueTexts = 0;
private long lastPrintedNumUniqueBytes = 0;
private int printNum = 0;
protected bool m_forever;
protected int m_logStep;
protected bool m_verbose;
protected Encoding m_encoding;
/// <summary>update count of bytes generated by this source</summary>
protected void AddBytes(long numBytes)
{
lock (this)
{
bytesCount += numBytes;
totalBytesCount += numBytes;
}
}
/// <summary>update count of items generated by this source</summary>
protected void AddItem()
{
lock (this)
{
++itemCount;
++totalItemCount;
}
}
/// <summary>
/// A convenience method for collecting all the files of a content source from
/// a given directory. The collected <see cref="FileInfo"/> instances are stored in the
/// given <paramref name="files"/>.
/// </summary>
protected void CollectFiles(DirectoryInfo dir, IList<FileInfo> files)
{
CollectFilesImpl(dir, files);
files.Sort(new FileNameComparer());
}
private void CollectFilesImpl(DirectoryInfo dir, IList<FileInfo> files)
{
foreach (var sub in dir.EnumerateDirectories())
{
CollectFilesImpl(sub, files);
}
files.AddRange(dir.GetFiles());
}
private class FileNameComparer : IComparer<FileInfo>
{
public int Compare(FileInfo x, FileInfo y)
{
return x.FullName.CompareToOrdinal(y.FullName);
}
}
/// <summary>
/// Returns <c>true</c> whether it's time to log a message (depending on verbose and
/// the number of items generated).
/// </summary>
/// <returns></returns>
protected bool ShouldLog()
{
return m_verbose && m_logStep > 0 && itemCount % m_logStep == 0;
}
/// <summary>Called when reading from this content source is no longer required.</summary>
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
/// <summary>Called when reading from this content source is no longer required.</summary>
protected abstract void Dispose(bool disposing);
/// <summary>Returns the number of bytes generated since last reset.</summary>
public long BytesCount { get { return bytesCount; } }
/// <summary>Returns the number of generated items since last reset.</summary>
public int ItemsCount { get { return itemCount; } }
public Config Config { get { return config; } }
/// <summary>Returns the total number of bytes that were generated by this source.</summary>
public long TotalBytesCount { get { return totalBytesCount; } }
/// <summary>Returns the total number of generated items.</summary>
public int TotalItemsCount { get { return totalItemCount; } }
/// <summary>
/// Resets the input for this content source, so that the test would behave as
/// if it was just started, input-wise.
/// <para/>
/// <b>NOTE:</b> the default implementation resets the number of bytes and
/// items generated since the last reset, so it's important to call
/// <c>base.ResetInputs()</c> in case you override this method.
/// </summary>
public virtual void ResetInputs()
{
bytesCount = 0;
itemCount = 0;
}
/// <summary>
/// Sets the <see cref="Utils.Config"/> for this content source. If you override this
/// method, you must call <c>base.SetConfig(config)</c>.
/// </summary>
/// <param name="config"></param>
public virtual void SetConfig(Config config)
{
this.config = config;
m_forever = config.Get("content.source.forever", true);
m_logStep = config.Get("content.source.log.step", 0);
m_verbose = config.Get("content.source.verbose", false);
string encodingStr = config.Get("content.source.encoding", null);
if (!string.IsNullOrWhiteSpace(encodingStr))
{
m_encoding = Encoding.GetEncoding(encodingStr);
}
else
{
m_encoding = Encoding.GetEncoding(0); // Default system encoding
}
}
public virtual void PrintStatistics(string itemsName)
{
if (!m_verbose)
{
return;
}
bool print = false;
string col = " ";
StringBuilder sb = new StringBuilder();
string newline = Environment.NewLine;
sb.Append("------------> ").Append(GetType().GetTypeInfo().Name).Append(" statistics (").Append(printNum).Append("): ").Append(newline);
int nut = TotalItemsCount;
if (nut > lastPrintedNumUniqueTexts)
{
print = true;
sb.Append("total count of " + itemsName + ": ").Append(Formatter.Format(0, nut, col)).Append(newline);
lastPrintedNumUniqueTexts = nut;
}
long nub = TotalBytesCount;
if (nub > lastPrintedNumUniqueBytes)
{
print = true;
sb.Append("total bytes of " + itemsName + ": ").Append(Formatter.Format(0, nub, col)).Append(newline);
lastPrintedNumUniqueBytes = nub;
}
if (ItemsCount > 0)
{
print = true;
sb.Append("num " + itemsName + " added since last inputs reset: ").Append(Formatter.Format(0, ItemsCount, col)).Append(newline);
sb.Append("total bytes added for " + itemsName + " since last inputs reset: ").Append(Formatter.Format(0, BytesCount, col)).Append(newline);
}
if (print)
{
Console.WriteLine(sb.Append(newline).ToString());
printNum++;
}
}
}
}