blob: c4dcfe80d5eceeaf06781ff12efebbea92e91e46 [file] [log] [blame]
using J2N.Text;
using Lucene.Net.Support;
using System;
using System.Globalization;
#nullable enable
namespace Lucene.Net.Documents
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// Provides support for converting dates to strings and vice-versa.
/// The strings are structured so that lexicographic sorting orders
/// them by date, which makes them suitable for use as field values
/// and search terms.
///
/// <para/>This class also helps you to limit the resolution of your dates. Do not
/// save dates with a finer resolution than you really need, as then
/// <see cref="Search.TermRangeQuery"/> and <see cref="Search.PrefixQuery"/> will require more memory and become slower.
///
/// <para/>
/// Another approach is <see cref="Util.NumericUtils"/>, which provides
/// a sortable binary representation (prefix encoded) of numeric values, which
/// date/time are.
///
/// For indexing a <see cref="DateTime"/>, just get the <see cref="UnixTimeMillisecondsToTicks(long)"/> from <see cref="DateTime.Ticks"/> and index
/// this as a numeric value with <see cref="Int64Field"/> and use <see cref="Search.NumericRangeQuery{T}"/>
/// to query it.
/// </summary>
// LUCENENET: This class was refactored significantly to be usable on the .NET platform, but still allows
// all of the same features as Java and prior versions of Lucene.NET
public static class DateTools
{
/// <summary>
/// Returns the date format string for the specified <paramref name="resolution"/>
/// or <c>null</c> if the resolution is invalid.
/// </summary>
private static string? ToDateFormat(DateResolution resolution) => resolution switch
{
DateResolution.YEAR => "yyyy",
DateResolution.MONTH => "yyyyMM",
DateResolution.DAY => "yyyyMMdd",
DateResolution.HOUR => "yyyyMMddHH",
DateResolution.MINUTE => "yyyyMMddHHmm",
DateResolution.SECOND => "yyyyMMddHHmmss",
DateResolution.MILLISECOND => "yyyyMMddHHmmssfff",
_ => null, // Invalid option
};
/// <summary>
/// Converts a <see cref="DateTime"/> to a string suitable for indexing using the specified
/// <paramref name="resolution"/>.
/// <para/>
/// The <paramref name="date"/> is converted according to its <see cref="DateTime.Kind"/> property
/// to the Universal Coordinated Time (UTC) prior to rounding to the the specified
/// <paramref name="resolution"/>. If <see cref="DateTime.Kind"/> is <see cref="DateTimeKind.Unspecified"/>,
/// <see cref="DateTimeKind.Local"/> is assumed.
/// </summary>
/// <param name="date">The date to be converted.</param>
/// <param name="resolution">The desired resolution, see
/// <see cref="Round(DateTime, DateResolution)"/>.</param>
/// <returns>An invariant string in format <c>yyyyMMddHHmmssSSS</c> or shorter,
/// depending on <paramref name="resolution"/>; using UTC as the timezone.</returns>
/// <exception cref="ArgumentException">
/// <paramref name="resolution"/> is not defined in the <see cref="DateResolution"/> enum.
/// </exception>
public static string DateToString(DateTime date, DateResolution resolution)
{
return DateToStringInternal(date, timeZone: null, resolution);
}
/// <summary>
/// Converts a <see cref="DateTime"/> to a string suitable for indexing using the specified <paramref name="timeZone"/>
/// and <paramref name="resolution"/>.
/// <para/>
/// The <paramref name="date"/> is converted from the specified <paramref name="timeZone"/> to Universal Coordinated Time
/// (UTC) prior to rounding to the the specified <paramref name="resolution"/>.
/// </summary>
/// <param name="date">The date to be converted.</param>
/// <param name="timeZone">The time zone of the specified <paramref name="date"/>.</param>
/// <param name="resolution">The desired resolution, see
/// <see cref="Round(DateTime, DateResolution)"/>.</param>
/// <returns>An invariant string in format <c>yyyyMMddHHmmssSSS</c> or shorter,
/// depending on <paramref name="resolution"/>; using UTC as the timezone.</returns>
/// <exception cref="ArgumentNullException"><paramref name="timeZone"/> is <c>null</c>.</exception>
/// <exception cref="ArgumentException">
/// <paramref name="resolution"/> is not defined in the <see cref="DateResolution"/> enum.
/// </exception>
public static string DateToString(DateTime date, TimeZoneInfo timeZone, DateResolution resolution)
{
if (timeZone is null)
throw new ArgumentNullException(nameof(timeZone));
return DateToStringInternal(date, timeZone, resolution);
}
private static string DateToStringInternal(DateTime date, TimeZoneInfo? timeZone, DateResolution resolution)
{
string? format = ToDateFormat(resolution);
if (format is null)
throw new ArgumentException($"Unknown resolution {resolution}.");
DateTimeOffset timeZoneAdjusted = new DateTimeOffset(date.ToUniversalTime(), TimeSpan.Zero);
if (timeZone is not null && !TimeZoneInfo.Utc.Equals(timeZone))
{
timeZoneAdjusted = TimeZoneInfo.ConvertTime(timeZoneAdjusted, timeZone);
}
DateTime d = Round(timeZoneAdjusted.UtcDateTime, resolution);
return d.ToString(format, CultureInfo.InvariantCulture);
}
/// <summary>
/// Converts a <see cref="DateTimeOffset"/> to a string suitable for indexing using the specified
/// <paramref name="resolution"/>.
/// <para/>
/// The <paramref name="date"/> is converted using its <see cref="DateTimeOffset.UtcDateTime"/> property.
/// </summary>
/// <param name="date">The date to be converted.</param>
/// <param name="resolution">The desired resolution, see <see cref="Round(DateTime, DateResolution)"/>.</param>
/// <returns>An invariant string in format <c>yyyyMMddHHmmssSSS</c> or shorter,
/// depending on <paramref name="resolution"/>; using UTC as the timezone.</returns>
public static string DateToString(DateTimeOffset date, DateResolution resolution)
{
string? format = ToDateFormat(resolution);
if (format is null)
throw new ArgumentException($"Unknown resolution {resolution}.");
DateTime d = Round(date.UtcDateTime, resolution);
return d.ToString(format, CultureInfo.InvariantCulture);
}
/// <summary>
/// Converts from a numeric representation of a time to a string suitable for indexing.
/// <para/>
/// <b>NOTE:</b> For compatibility with Lucene.NET 3.0.3 and Lucene.NET 4.8.0-beta00001 through 4.8.0-beta00015
/// specify <paramref name="inputRepresentation"/> as <see cref="NumericRepresentation.TICKS_AS_MILLISECONDS"/>.
/// </summary>
/// <param name="time">The ticks that represent the date to be converted.</param>
/// <param name="resolution">The desired resolution, see
/// <see cref="Round(long, DateResolution, NumericRepresentation, NumericRepresentation)"/>.</param>
/// <param name="inputRepresentation">The numeric representation of <paramref name="time"/>.</param>
/// <returns>An invariant string in format <c>yyyyMMddHHmmssSSS</c> or shorter,
/// depending on <paramref name="resolution"/>; using GMT as timezone.</returns>
/// <exception cref="ArgumentException">
/// <paramref name="inputRepresentation"/> is not defined in the <see cref="NumericRepresentation"/> enum.
/// </exception>
public static string TimeToString(long time, DateResolution resolution,
NumericRepresentation inputRepresentation = NumericRepresentation.UNIX_TIME_MILLISECONDS)
{
string? format = ToDateFormat(resolution);
if (format is null)
throw new ArgumentException($"Unknown resolution {resolution}.");
DateTime date = new DateTime(Round(time, resolution, inputRepresentation, NumericRepresentation.TICKS), DateTimeKind.Utc);
return date.ToString(format, CultureInfo.InvariantCulture);
}
/// <summary>
/// Converts a string produced by <see cref="TimeToString(long, DateResolution, NumericRepresentation)"/> or
/// <see cref="DateToString(DateTime, DateResolution)"/> back to a time, represented as a <see cref="long"/>.
/// <para/>
/// <b>NOTE:</b> For compatibility with Lucene.NET 3.0.3 and Lucene.NET 4.8.0-beta00001 through 4.8.0-beta00015
/// specify <paramref name="outputRepresentation"/> as <see cref="NumericRepresentation.TICKS"/>.
/// </summary>
/// <param name="dateString"> The date string to be converted. </param>
/// <param name="outputRepresentation">The numeric representation of the return value.</param>
/// <returns>A numeric representation of <paramref name="dateString"/> represented as specified by
/// <paramref name="outputRepresentation"/>.</returns>
/// <exception cref="ParseException"><paramref name="dateString"/> is not in the expected format.</exception>
/// <exception cref="ArgumentNullException"><paramref name="dateString"/> is <c>null</c>.</exception>
/// <exception cref="ArgumentException">
/// <paramref name="outputRepresentation"/> is not defined in the <see cref="NumericRepresentation"/> enum.
/// </exception>
public static long StringToTime(string dateString, NumericRepresentation outputRepresentation = NumericRepresentation.UNIX_TIME_MILLISECONDS)
{
long ticks = StringToDate(dateString).Ticks;
return outputRepresentation switch
{
NumericRepresentation.UNIX_TIME_MILLISECONDS => TicksToUnixTimeMilliseconds(ticks),
NumericRepresentation.TICKS => ticks,
NumericRepresentation.TICKS_AS_MILLISECONDS => ticks / TimeSpan.TicksPerMillisecond,
_ => throw new ArgumentException($"'{outputRepresentation}' is not a valid {nameof(outputRepresentation)}.")
};
}
/// <summary>
/// Converts a string produced by <see cref="TimeToString(long, DateResolution, NumericRepresentation)"/> or
/// <see cref="DateToString(DateTime, DateResolution)"/> back to a time, represented as a
/// <see cref="DateTime"/> object.
/// </summary>
/// <param name="dateString"> the date string to be converted </param>
/// <returns> the parsed time as a <see cref="DateTime"/> object </returns>
/// <exception cref="ParseException"> if <paramref name="dateString"/> is not in the
/// expected format </exception>
/// <exception cref="ArgumentNullException"><paramref name="dateString"/> is <c>null</c>.</exception>
public static DateTime StringToDate(string dateString)
{
if (dateString is null)
throw new ArgumentNullException(nameof(dateString));
string? format = ToDateFormat((DateResolution)dateString.Length);
if (format is null || !DateTimeOffset.TryParseExact(dateString, format, DateTimeFormatInfo.InvariantInfo, DateTimeStyles.AssumeUniversal, out DateTimeOffset dateOffset))
throw new ParseException($"Input is not valid date string: '{dateString}'.", 0);
return dateOffset.DateTime;
}
/// <summary>
/// Limit a date's resolution. For example, the date <c>2004-09-21 13:50:11</c>
/// will be changed to <c>2004-09-01 00:00:00</c> when using
/// <see cref="DateResolution.MONTH"/>.
/// </summary>
/// <param name="date"> The <see cref="DateTime"/> to be rounded.</param>
/// <param name="resolution"> The desired resolution of the <see cref="DateTime"/> to be returned. </param>
/// <returns> The <see cref="DateTime"/> with all values more precise than <paramref name="resolution"/>
/// set to their minimum value (0 or 1 depending on the field).</returns>
/// <exception cref="ArgumentException">
/// <paramref name="resolution"/> is not defined in the <see cref="DateResolution"/> enum.
/// </exception>
public static DateTime Round(DateTime date, DateResolution resolution)
{
return new DateTime(Round(date.Ticks, resolution,
inputRepresentation: NumericRepresentation.TICKS,
outputRepresentation: NumericRepresentation.TICKS));
}
/// <summary>
/// Limit a date's resolution.
/// <para/>
/// For example, the time <c>1095774611000</c>
/// (which represents 2004-09-21 13:50:11) will be changed to
/// <c>1093996800000</c> (2004-09-01 00:00:00) when using
/// <see cref="DateResolution.MONTH"/> and <see cref="NumericRepresentation.UNIX_TIME_MILLISECONDS"/>
/// for both <paramref name="inputRepresentation"/> and <paramref name="outputRepresentation"/>.
/// <para/>
/// The ticks <c>632313714110000000</c>
/// (which represents 2004-09-21 13:50:11) will be changed to
/// <c>632295936000000000</c> (2004-09-01 00:00:00) when using
/// <see cref="DateResolution.MONTH"/> and <see cref="NumericRepresentation.TICKS"/>
/// for both <paramref name="inputRepresentation"/> and <paramref name="outputRepresentation"/>.
/// <para/>
/// <b>NOTE:</b> For compatibility with Lucene.NET 3.0.3 and Lucene.NET 4.8.0-beta00001 through 4.8.0-beta00015
/// specify <paramref name="inputRepresentation"/> as <see cref="NumericRepresentation.TICKS_AS_MILLISECONDS"/> and
/// <paramref name="outputRepresentation"/> as <see cref="NumericRepresentation.TICKS"/>.
/// </summary>
/// <param name="time">The ticks that represent the date to be rounded.</param>
/// <param name="resolution">The desired resolution of the date to be returned.</param>
/// <param name="inputRepresentation">The numeric representation of <paramref name="time"/>.</param>
/// <param name="outputRepresentation">The numeric representation of the return value.</param>
/// <returns>The date with all values more precise than <paramref name="resolution"/>
/// set to their minimum value (0 or 1 depending on the field). The return value is expressed in ticks.</returns>
/// <exception cref="ArgumentException">
/// <paramref name="resolution"/> is not defined in the <see cref="DateResolution"/> enum.
/// <para/>
/// -or-
/// <para/>
/// <paramref name="inputRepresentation"/> is not defined in the <see cref="NumericRepresentation"/> enum.
/// <para/>
/// -or-
/// <para/>
/// <paramref name="outputRepresentation"/> is not defined in the <see cref="NumericRepresentation"/> enum.
/// </exception>
public static long Round(long time, DateResolution resolution,
NumericRepresentation inputRepresentation = NumericRepresentation.UNIX_TIME_MILLISECONDS,
NumericRepresentation outputRepresentation = NumericRepresentation.UNIX_TIME_MILLISECONDS)
{
long ticks = inputRepresentation switch
{
NumericRepresentation.UNIX_TIME_MILLISECONDS => UnixTimeMillisecondsToTicks(time),
NumericRepresentation.TICKS => time,
NumericRepresentation.TICKS_AS_MILLISECONDS => time * TimeSpan.TicksPerMillisecond,
_ => throw new ArgumentException($"'{inputRepresentation}' is not a valid {nameof(inputRepresentation)}.")
};
DateTimeOffset dt = new DateTimeOffset(ticks, TimeSpan.Zero);
// Remove extra ticks beyond milliseconds
dt = new DateTimeOffset(dt.Year, dt.Month, dt.Day, dt.Hour, dt.Minute, dt.Second, dt.Millisecond, TimeSpan.Zero);
if (resolution == DateResolution.YEAR)
{
dt = dt.AddMonths(1 - dt.Month);
dt = dt.AddDays(1 - dt.Day);
dt = dt.AddHours(0 - dt.Hour);
dt = dt.AddMinutes(0 - dt.Minute);
dt = dt.AddSeconds(0 - dt.Second);
dt = dt.AddMilliseconds(0 - dt.Millisecond);
}
else if (resolution == DateResolution.MONTH)
{
dt = dt.AddDays(1 - dt.Day);
dt = dt.AddHours(0 - dt.Hour);
dt = dt.AddMinutes(0 - dt.Minute);
dt = dt.AddSeconds(0 - dt.Second);
dt = dt.AddMilliseconds(0 - dt.Millisecond);
}
else if (resolution == DateResolution.DAY)
{
dt = dt.AddHours(0 - dt.Hour);
dt = dt.AddMinutes(0 - dt.Minute);
dt = dt.AddSeconds(0 - dt.Second);
dt = dt.AddMilliseconds(0 - dt.Millisecond);
}
else if (resolution == DateResolution.HOUR)
{
dt = dt.AddMinutes(0 - dt.Minute);
dt = dt.AddSeconds(0 - dt.Second);
dt = dt.AddMilliseconds(0 - dt.Millisecond);
}
else if (resolution == DateResolution.MINUTE)
{
dt = dt.AddSeconds(0 - dt.Second);
dt = dt.AddMilliseconds(0 - dt.Millisecond);
}
else if (resolution == DateResolution.SECOND)
{
dt = dt.AddMilliseconds(0 - dt.Millisecond);
}
else if (resolution == DateResolution.MILLISECOND)
{
// don't cut off anything
}
else
{
throw new ArgumentException("unknown resolution " + resolution);
}
return outputRepresentation switch
{
NumericRepresentation.UNIX_TIME_MILLISECONDS => TicksToUnixTimeMilliseconds(dt.Ticks),
NumericRepresentation.TICKS => dt.Ticks,
NumericRepresentation.TICKS_AS_MILLISECONDS => dt.Ticks / TimeSpan.TicksPerMillisecond,
_ => throw new ArgumentException($"'{outputRepresentation}' is not a valid {nameof(outputRepresentation)}.")
};
}
/// <summary>
/// Converts from .NET ticks to the number of milliseconds since January 1, 1970, 00:00:00
/// UTC (also known as the "epoch").
/// <para/>
/// This is the value that is stored in Java Lucene indexes and can be used for storing values
/// that can be read by Java Lucene.
/// </summary>
/// <param name="ticks">The .NET ticks to be converted.</param>
/// <returns>The converted ticks to number of milliseconds since January 1, 1970, 00:00:00
/// UTC (also known as the "epoch").</returns>
public static long TicksToUnixTimeMilliseconds(long ticks)
{
return DateTimeOffsetUtil.ToUnixTimeMilliseconds(ticks);
}
/// <summary>
/// Converts from the number of milliseconds since January 1, 1970, 00:00:00 UTC
/// (also known as the "epoch") to .NET ticks.
/// </summary>
/// <param name="unixTimeMilliseconds">The number of milliseconds since January 1, 1970, 00:00:00
/// UTC (also known as the "epoch") to be converted.</param>
/// <returns>The converted .NET ticks that can be used to create a <see cref="DateTime"/> or <see cref="DateTimeOffset"/>.</returns>
public static long UnixTimeMillisecondsToTicks(long unixTimeMilliseconds)
{
return DateTimeOffsetUtil.GetTicksFromUnixTimeMilliseconds(unixTimeMilliseconds);
}
}
/// <summary>
/// Specifies the time granularity. </summary>
public enum DateResolution
{
/// <summary>
/// Limit a date's resolution to year granularity. </summary>
YEAR = 4,
/// <summary>
/// Limit a date's resolution to month granularity. </summary>
MONTH = 6,
/// <summary>
/// Limit a date's resolution to day granularity. </summary>
DAY = 8,
/// <summary>
/// Limit a date's resolution to hour granularity. </summary>
HOUR = 10,
/// <summary>
/// Limit a date's resolution to minute granularity. </summary>
MINUTE = 12,
/// <summary>
/// Limit a date's resolution to second granularity. </summary>
SECOND = 14,
/// <summary>
/// Limit a date's resolution to millisecond granularity. </summary>
MILLISECOND = 17
}
/// <summary>
/// Specifies how a time will be represented as a <see cref="long"/>.
/// </summary>
public enum NumericRepresentation
{
/// <summary>
/// The number of milliseconds since January 1, 1970, 00:00:00
/// UTC (also known as the "epoch"). This is the format that Lucene
/// uses, and it is recommended to store this value in the index for compatibility.
/// </summary>
UNIX_TIME_MILLISECONDS = 0,
/// <summary>
/// The .NET ticks representing a date. Specify this to pass the raw ticks from <see cref="DateTime.Ticks"/>
/// or to instantiate a new <see cref="DateTime"/> from the result.
/// </summary>
TICKS = 1,
/// <summary>
/// .NET ticks as total milliseconds.
/// Input values must be converted using the formula <c>ticks / <see cref="TimeSpan.TicksPerMillisecond"/></c>.
/// Output values can be converted to ticks using <c>ticks * <see cref="TimeSpan.TicksPerMillisecond"/></c>.
/// <para/>
/// This option is provided for compatibility with Lucene.NET 3.0.3 and Lucene.NET 4.8.0-beta00001 through 4.8.0-beta00015,
/// since it was the only option for input representation.
/// </summary>
TICKS_AS_MILLISECONDS = 2,
}
}