blob: 1820cad48bd09accfdcd6c75dec866b91d92bf09 [file] [log] [blame]
using J2N.Text;
using Lucene.Net.Diagnostics;
using Lucene.Net.Support;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using WritableArrayAttribute = Lucene.Net.Support.WritableArrayAttribute;
namespace Lucene.Net.Util
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// Represents <see cref="T:byte[]"/>, as a slice (offset + length) into an
/// existing <see cref="T:byte[]"/>. The <see cref="Bytes"/> property should never be <c>null</c>;
/// use <see cref="EMPTY_BYTES"/> if necessary.
///
/// <para/><b>Important note:</b> Unless otherwise noted, Lucene uses this class to
/// represent terms that are encoded as <b>UTF8</b> bytes in the index. To
/// convert them to a .NET <see cref="string"/> (which is UTF16), use <see cref="Utf8ToString()"/>.
/// Using code like <c>new String(bytes, offset, length)</c> to do this
/// is <b>wrong</b>, as it does not respect the correct character set
/// and may return wrong results (depending on the platform's defaults)!
/// </summary>
#if FEATURE_SERIALIZABLE
[Serializable]
#endif
// LUCENENET specific: Not implementing ICloneable per Microsoft's recommendation
[DebuggerDisplay("{ToString()} {Utf8ToString()}")]
public sealed class BytesRef : IComparable<BytesRef>, IComparable, IEquatable<BytesRef> // LUCENENET specific - implemented IComparable for FieldComparator, IEquatable<BytesRef>
{
/// <summary>
/// An empty byte array for convenience </summary>
[SuppressMessage("CodeQuality", "IDE0079:Remove unnecessary suppression", Justification = "This is a SonarCloud issue")]
[SuppressMessage("Performance", "S3887:Use an immutable collection or reduce the accessibility of the non-private readonly field", Justification = "Collection is immutable")]
[SuppressMessage("Performance", "S2386:Use an immutable collection or reduce the accessibility of the public static field", Justification = "Collection is immutable")]
public static readonly byte[] EMPTY_BYTES = Array.Empty<byte>();
/// <summary>
/// The contents of the BytesRef. Should never be <c>null</c>.
/// </summary>
[WritableArray]
[SuppressMessage("Microsoft.Performance", "CA1819", Justification = "Lucene's design requires some writable array properties")]
public byte[] Bytes
{
get => bytes;
set => bytes = value; // LUCENENET NOTE: Although the comments state this cannot be null, some of the tests depend on setting it to null!
}
private byte[] bytes;
/// <summary>
/// Offset of first valid byte.
/// </summary>
public int Offset { get; set; }
/// <summary>
/// Length of used bytes.
/// </summary>
public int Length { get; set; }
/// <summary>
/// Create a <see cref="BytesRef"/> with <see cref="EMPTY_BYTES"/> </summary>
public BytesRef()
: this(EMPTY_BYTES)
{
}
/// <summary>
/// This instance will directly reference <paramref name="bytes"/> w/o making a copy.
/// <paramref name="bytes"/> should not be <c>null</c>.
/// </summary>
public BytesRef(byte[] bytes, int offset, int length)
{
this.bytes = bytes;
this.Offset = offset;
this.Length = length;
if (Debugging.AssertsEnabled) Debugging.Assert(IsValid());
}
/// <summary>
/// This instance will directly reference <paramref name="bytes"/> w/o making a copy.
/// <paramref name="bytes"/> should not be <c>null</c>.
/// </summary>
public BytesRef(byte[] bytes)
: this(bytes, 0, bytes.Length)
{
}
/// <summary>
/// Creates a new <see cref="BytesRef"/> initialized with the given <see cref="ReadOnlySpan{T}"/> byte
/// array. The sequence is copied to the byte array.
/// </summary>
public BytesRef(ReadOnlySpan<byte> value) // LUCENENET specific
{
this.bytes = new byte[value.Length];
value.CopyTo(bytes);
this.Offset = 0;
this.Length = bytes.Length;
}
/// <summary>
/// Create a <see cref="BytesRef"/> pointing to a new array of size <paramref name="capacity"/>.
/// Offset and length will both be zero.
/// </summary>
public BytesRef(int capacity)
{
this.bytes = new byte[capacity];
}
/// <summary>
/// Initialize the <see cref="T:byte[]"/> from the UTF8 bytes
/// for the provided <see cref="ICharSequence"/>.
/// </summary>
/// <param name="text"> This must be well-formed
/// unicode text, with no unpaired surrogates. </param>
public BytesRef(ICharSequence text)
: this()
{
CopyChars(text);
}
/// <summary>
/// Initialize the <see cref="T:byte[]"/> from the UTF8 bytes
/// for the provided <see cref="ReadOnlySpan{Char}"/>.
/// </summary>
/// <param name="text"> This must be well-formed
/// unicode text, with no unpaired surrogates. </param>
public BytesRef(ReadOnlySpan<char> text) // LUCENENET specific
: this()
{
CopyChars(text);
}
/// <summary>
/// Initialize the <see cref="T:byte[]"/> from the UTF8 bytes
/// for the provided <see cref="string"/>.
/// </summary>
/// <param name="text"> This must be well-formed
/// unicode text, with no unpaired surrogates. </param>
public BytesRef(string text)
: this()
{
CopyChars(text);
}
/// <summary>
/// Copies the UTF8 bytes for this <see cref="ICharSequence"/>.
/// </summary>
/// <param name="text"> Must be well-formed unicode text, with no
/// unpaired surrogates or invalid UTF16 code units. </param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void CopyChars(ICharSequence text)
{
if (Debugging.AssertsEnabled) Debugging.Assert(Offset == 0); // TODO broken if offset != 0
UnicodeUtil.UTF16toUTF8(text, 0, text.Length, this);
}
/// <summary>
/// Copies the UTF8 bytes for this <see cref="ReadOnlySpan{Char}"/>.
/// </summary>
/// <param name="text"> Must be well-formed unicode text, with no
/// unpaired surrogates or invalid UTF16 code units. </param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void CopyChars(ReadOnlySpan<char> text) // LUCENENET specific
{
if (Debugging.AssertsEnabled) Debugging.Assert(Offset == 0); // TODO broken if offset != 0
UnicodeUtil.UTF16toUTF8(text, this);
}
/// <summary>
/// Copies the UTF8 bytes for this <see cref="string"/>.
/// </summary>
/// <param name="text"> Must be well-formed unicode text, with no
/// unpaired surrogates or invalid UTF16 code units. </param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void CopyChars(string text)
{
if (Debugging.AssertsEnabled) Debugging.Assert(Offset == 0); // TODO broken if offset != 0
UnicodeUtil.UTF16toUTF8(text, 0, text.Length, this);
}
/// <summary>
/// Expert: Compares the bytes against another <see cref="BytesRef"/>,
/// returning <c>true</c> if the bytes are equal.
/// <para/>
/// @lucene.internal
/// </summary>
/// <param name="other"> Another <see cref="BytesRef"/>, should not be <c>null</c>. </param>
public bool BytesEquals(BytesRef other)
{
if (Debugging.AssertsEnabled) Debugging.Assert(other != null);
if (Length == other.Length)
{
int otherUpto = other.Offset;
byte[] otherBytes = other.bytes;
int end = Offset + Length;
for (int upto = Offset; upto < end; upto++, otherUpto++)
{
if (bytes[upto] != otherBytes[otherUpto])
{
return false;
}
}
return true;
}
else
{
return false;
}
}
/// <summary>
/// Returns a shallow clone of this instance (the underlying bytes are
/// <b>not</b> copied and will be shared by both the returned object and this
/// object.
/// </summary>
/// <seealso cref="DeepCopyOf(BytesRef)"/>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public object Clone()
{
return new BytesRef(bytes, Offset, Length);
}
/// <summary>
/// Calculates the hash code as required by <see cref="Index.TermsHash"/> during indexing.
/// <para/> This is currently implemented as MurmurHash3 (32
/// bit), using the seed from
/// <see cref="StringHelper.GoodFastHashSeed"/>, but is subject to
/// change from release to release.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public override int GetHashCode()
{
return StringHelper.Murmurhash3_x86_32(this, StringHelper.GoodFastHashSeed);
}
public override bool Equals(object other)
{
if (other is null)
return false;
if (other is BytesRef otherBytes)
return this.BytesEquals(otherBytes);
return false;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
bool IEquatable<BytesRef>.Equals(BytesRef other) // LUCENENET specific - implemented IEquatable<BytesRef>
=> BytesEquals(other);
/// <summary>
/// Interprets stored bytes as UTF8 bytes, returning the
/// resulting <see cref="string"/>.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public string Utf8ToString()
{
CharsRef @ref = new CharsRef(Length);
UnicodeUtil.UTF8toUTF16(bytes, Offset, Length, @ref);
return @ref.ToString();
}
/// <summary>
/// Interprets stored bytes as UTF8 bytes, returning the
/// resulting <see cref="string"/>.
/// </summary>
/// <remarks>
/// LUCENENET specific version that does not throw exceptions on invalid UTF-8,
/// primarily for use in ToString() and other cases that should not throw exceptions,
/// such as when building a message for another exception.
/// </remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public string Utf8ToStringWithFallback()
{
CharsRef @ref = new CharsRef(Length);
UnicodeUtil.UTF8toUTF16WithFallback(bytes, Offset, Length, @ref);
return @ref.ToString();
}
#nullable enable
/// <summary>
/// Tries to interpret the stored bytes as UTF8 bytes, returning the
/// resulting <see cref="string"/> as an output parameter <paramref name="result"/>.
/// </summary>
/// <param name="result">The resulting string output.</param>
/// <returns><c>true</c> if successful, <c>false</c> otherwise.</returns>
public bool TryUtf8ToString([NotNullWhen(true)] out string? result)
{
if (UnicodeUtil.TryUTF8toUTF16(bytes, Offset, Length, out CharsRef? @ref))
{
result = @ref.ToString();
return true;
}
result = null;
return false;
}
#nullable restore
/// <summary>
/// Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] </summary>
public override string ToString()
{
StringBuilder sb = new StringBuilder();
sb.Append('[');
int end = Offset + Length;
for (int i = Offset; i < end; i++)
{
if (i > Offset)
{
sb.Append(' ');
}
sb.Append((bytes[i] & 0xff).ToString("x"));
}
sb.Append(']');
return sb.ToString();
}
/// <summary>
/// Copies the bytes from the given <see cref="BytesRef"/>
/// <para/>
/// NOTE: if this would exceed the array size, this method creates a
/// new reference array.
/// </summary>
public void CopyBytes(BytesRef other)
{
if (Bytes.Length - Offset < other.Length)
{
bytes = new byte[other.Length];
Offset = 0;
}
Arrays.Copy(other.bytes, other.Offset, bytes, Offset, other.Length);
Length = other.Length;
}
/// <summary>
/// Appends the bytes from the given <see cref="BytesRef"/>
/// <para/>
/// NOTE: if this would exceed the array size, this method creates a
/// new reference array.
/// </summary>
public void Append(BytesRef other)
{
int newLen = Length + other.Length;
if (bytes.Length - Offset < newLen)
{
var newBytes = new byte[newLen];
Arrays.Copy(bytes, Offset, newBytes, 0, Length);
Offset = 0;
bytes = newBytes;
}
Arrays.Copy(other.bytes, other.Offset, bytes, Length + Offset, other.Length);
Length = newLen;
}
/// <summary>
/// Appends the bytes from the given <see cref="ReadOnlySpan{Byte}"/>
/// <para/>
/// NOTE: if this would exceed the array size, this method creates a
/// new reference array.
/// </summary>
public void Append(ReadOnlySpan<byte> other) // LUCENENET specific
{
int newLen = Length + other.Length;
if (bytes.Length - Offset < newLen)
{
var newBytes = new byte[newLen];
Arrays.Copy(bytes, Offset, newBytes, 0, Length);
Offset = 0;
bytes = newBytes;
}
other.CopyTo(bytes.AsSpan(Length + Offset, other.Length));
Length = newLen;
}
/// <summary>
/// Used to grow the reference array.
/// <para/>
/// In general this should not be used as it does not take the offset into account.
/// <para/>
/// @lucene.internal
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Grow(int newLength)
{
if (Debugging.AssertsEnabled) Debugging.Assert(Offset == 0); // NOTE: senseless if offset != 0
bytes = ArrayUtil.Grow(bytes, newLength);
}
/// <summary>
/// Unsigned byte order comparison </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int CompareTo(object other) // LUCENENET specific: Implemented IComparable for FieldComparer
{
BytesRef br = other as BytesRef;
if (Debugging.AssertsEnabled) Debugging.Assert(br != null);
return utf8SortedAsUnicodeSortOrder.Compare(this, br);
}
/// <summary>
/// Unsigned byte order comparison </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int CompareTo(BytesRef other)
{
return utf8SortedAsUnicodeSortOrder.Compare(this, other);
}
private static readonly IComparer<BytesRef> utf8SortedAsUnicodeSortOrder = Utf8SortedAsUnicodeComparer.Instance;
public static IComparer<BytesRef> UTF8SortedAsUnicodeComparer => utf8SortedAsUnicodeSortOrder;
// LUCENENET NOTE: De-nested Utf8SortedAsUnicodeComparer class to prevent naming conflict
/// @deprecated this comparer is only a transition mechanism
[Obsolete("this comparer is only a transition mechanism")]
private static readonly IComparer<BytesRef> utf8SortedAsUTF16SortOrder = new Utf8SortedAsUtf16Comparer();
/// @deprecated this comparer is only a transition mechanism
[Obsolete("this comparer is only a transition mechanism")]
public static IComparer<BytesRef> UTF8SortedAsUTF16Comparer => utf8SortedAsUTF16SortOrder;
// LUCENENET NOTE: De-nested Utf8SortedAsUtf16Comparer class to prevent naming conflict
/// <summary>
/// Creates a new <see cref="BytesRef"/> that points to a copy of the bytes from
/// <paramref name="other"/>.
/// <para/>
/// The returned <see cref="BytesRef"/> will have a length of <c>other.Length</c>
/// and an offset of zero.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BytesRef DeepCopyOf(BytesRef other)
{
BytesRef copy = new BytesRef();
copy.CopyBytes(other);
return copy;
}
/// <summary>
/// Performs internal consistency checks.
/// Always returns true (or throws <see cref="InvalidOperationException"/>)
/// </summary>
public bool IsValid()
{
if (Bytes is null)
{
throw IllegalStateException.Create("bytes is null");
}
if (Length < 0)
{
throw IllegalStateException.Create("length is negative: " + Length);
}
if (Length > Bytes.Length)
{
throw IllegalStateException.Create("length is out of bounds: " + Length + ",bytes.length=" + Bytes.Length);
}
if (Offset < 0)
{
throw IllegalStateException.Create("offset is negative: " + Offset);
}
if (Offset > Bytes.Length)
{
throw IllegalStateException.Create("offset out of bounds: " + Offset + ",bytes.length=" + Bytes.Length);
}
if (Offset + Length < 0)
{
throw IllegalStateException.Create("offset+length is negative: offset=" + Offset + ",length=" + Length);
}
if (Offset + Length > Bytes.Length)
{
throw IllegalStateException.Create("offset+length out of bounds: offset=" + Offset + ",length=" + Length + ",bytes.length=" + Bytes.Length);
}
return true;
}
#region Operator overrides
#nullable enable
// LUCENENET specific - per csharpsquid:S1210, IComparable<T> should override comparison operators
public static bool operator <(BytesRef? left, BytesRef? right)
=> left is null ? right is not null : left.CompareTo(right) < 0;
public static bool operator <=(BytesRef? left, BytesRef? right)
=> left is null || left.CompareTo(right) <= 0;
public static bool operator >(BytesRef? left, BytesRef? right)
=> left is not null && left.CompareTo(right) > 0;
public static bool operator >=(BytesRef? left, BytesRef? right)
=> left is null ? right is null : left.CompareTo(right) >= 0;
public static bool operator ==(BytesRef? left, BytesRef? right)
=> left?.Equals(right) ?? right is null;
public static bool operator !=(BytesRef? left, BytesRef? right)
=> !(left == right);
#nullable restore
#endregion
#region AsSpan
/// <summary>
/// Creates a new readonly span over the portion of the target bytes.
/// </summary>
/// <returns>The read-only span representation of the bytes.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public ReadOnlySpan<byte> AsSpan() // LUCENENET specific
{
#if FEATURE_MEMORYMARSHAL_CREATEREADONLYSPAN && FEATURE_MEMORYMARSHAL_GETARRAYDATAREFERENCE
return MemoryMarshal.CreateReadOnlySpan<byte>(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(bytes), Offset), Length);
#else
return new ReadOnlySpan<byte>(bytes, Offset, Length);
#endif
}
/// <summary>
/// Creates a new read-only span over a portion of the target bytes from
/// a specified position to the end of the bytes.
/// </summary>
/// <param name="start">The index at which to begin this slice.</param>
/// <returns>The read-only span representation of the bytes.</returns>
/// <exception cref="ArgumentOutOfRangeException">
/// <paramref name="start"/> is less than 0 or greater than <c>text.Length</c>.
/// </exception>
public ReadOnlySpan<byte> AsSpan(int start) // LUCENENET specific
{
if ((uint)start > (uint)Length)
throw new ArgumentOutOfRangeException(nameof(start));
// Prevent overflow and ensure we stay within the backing array
uint totalOffset = (uint)Offset + (uint)start;
if (totalOffset > bytes.Length)
throw new ArgumentOutOfRangeException(nameof(start));
#if FEATURE_MEMORYMARSHAL_CREATEREADONLYSPAN && FEATURE_MEMORYMARSHAL_GETARRAYDATAREFERENCE
return MemoryMarshal.CreateReadOnlySpan<byte>(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(bytes),
(nint)totalOffset /* force zero-extension */), Length - start);
#else
return new ReadOnlySpan<byte>(bytes, checked((int)totalOffset), Length - start);
#endif
}
/// <summary>
/// Creates a new read-only span over a portion of the target bytes from a
/// specified position for a specified number of characters.
/// </summary>
/// <param name="start">The index at which to begin this slice.</param>
/// <param name="length">The desired length for the slice.</param>
/// <returns>The read-only span representation of the bytes.</returns>
/// <exception cref="ArgumentOutOfRangeException">
/// <paramref name="start"/>, <paramref name="length"/>, or
/// <paramref name="start"/> + <paramref name="length"/> is not
/// in the range of <see cref="Length"/>.
/// </exception>
public ReadOnlySpan<byte> AsSpan(int start, int length) // LUCENENET specific
{
if (IntPtr.Size == 8) // 64-bit process
{
// See comment in Span<T>.Slice for how this works.
if ((ulong)(uint)start + (ulong)(uint)length > (ulong)(uint)Length)
throw new ArgumentOutOfRangeException(nameof(start));
}
else
{
if ((uint)start > (uint)Length || (uint)length > (uint)(Length - start))
throw new ArgumentOutOfRangeException(nameof(start));
}
// Compute offset in uint to prevent overflow
uint totalOffset = (uint)Offset + (uint)start;
// Ensure we stay within the backing array bounds
if (totalOffset + (uint)length > (uint)bytes.Length)
throw new ArgumentOutOfRangeException(nameof(start));
#if FEATURE_MEMORYMARSHAL_CREATEREADONLYSPAN && FEATURE_MEMORYMARSHAL_GETARRAYDATAREFERENCE
return MemoryMarshal.CreateReadOnlySpan<byte>(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(bytes),
(nint)totalOffset /* force zero-extension */), length);
#else
return new ReadOnlySpan<byte>(bytes, checked((int)totalOffset), length);
#endif
}
/// <summary>
/// Creates a new read-only span over a portion of the
/// target bytes from a specified position to the end of the bytes.
/// </summary>
/// <param name="startIndex">The index at which to begin this slice.</param>
/// <returns>The read-only span representation of the bytes.</returns>
/// <exception cref="ArgumentOutOfRangeException"><paramref name="startIndex"/> is less
/// than 0 or greater than <c>text.Length</c>.</exception>
public ReadOnlySpan<byte> AsSpan(System.Index startIndex) // LUCENENET specific
{
int actualIndex = startIndex.GetOffset(Length);
if ((uint)actualIndex > (uint)Length)
throw new ArgumentOutOfRangeException(nameof(startIndex));
// Prevent overflow and ensure we stay within the backing array
uint totalOffset = (uint)Offset + (uint)actualIndex;
if (totalOffset > bytes.Length)
throw new ArgumentOutOfRangeException(nameof(startIndex));
#if FEATURE_MEMORYMARSHAL_CREATEREADONLYSPAN && FEATURE_MEMORYMARSHAL_GETARRAYDATAREFERENCE
return MemoryMarshal.CreateReadOnlySpan<byte>(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(bytes),
(nint)totalOffset /* force zero-extension */), Length - actualIndex);
#else
return new ReadOnlySpan<byte>(bytes, checked((int)totalOffset), Length - actualIndex);
#endif
}
/// <summary>
/// Creates a new read-only span over a portion of a target bytes
/// using the range start and end indexes.
/// </summary>
/// <param name="range">The range that has start and end indexes to use for slicing the bytes.</param>
/// <returns>The read-only span representation of the bytes.</returns>
/// <exception cref="ArgumentOutOfRangeException">
/// <paramref name="range"/>'s start or end index is not within the bounds of the bytes.
/// -or-
/// <paramref name="range"/>'s start index is greater than its end index.
/// </exception>
public ReadOnlySpan<byte> AsSpan(Range range) // LUCENENET specific
{
(int start, int length) = range.GetOffsetAndLength(Length);
// Compute offset in uint to prevent overflow
uint totalOffset = (uint)Offset + (uint)start;
// Ensure we stay within the backing array bounds
if (totalOffset + (uint)length > (uint)bytes.Length)
throw new ArgumentOutOfRangeException(nameof(start));
#if FEATURE_MEMORYMARSHAL_CREATEREADONLYSPAN && FEATURE_MEMORYMARSHAL_GETARRAYDATAREFERENCE
return MemoryMarshal.CreateReadOnlySpan<byte>(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(bytes),
(nint)totalOffset /* force zero-extension */), length);
#else
return new ReadOnlySpan<byte>(bytes, checked((int)totalOffset), length);
#endif
}
#endregion AsSpan
#region AsMemory
/// <summary>
/// Creates a new <see cref="ReadOnlyMemory{T}"/> over the portion of <see cref="Bytes"/>
/// between <see cref="Offset"/> and <see cref="Length"/>.
/// </summary>
/// <returns>The read-only byte memory representation of the backing array.</returns>
public ReadOnlyMemory<byte> AsMemory() // LUCENENET specific
{
return new ReadOnlyMemory<byte>(bytes, Offset, Length);
}
/// <summary>
/// Creates a new <see cref="ReadOnlyMemory{T}"/> over the portion of <see cref="Bytes"/>
/// between <see cref="Offset"/> + <paramref name="start"/> and
/// <see cref="Length"/> - <paramref name="start"/>.
/// </summary>
/// <param name="start">The index into the usable portion of this instance at which to begin this slice.</param>
/// <returns>The read-only byte memory representation of the backing array.</returns>
/// <exception cref="ArgumentOutOfRangeException">
/// <paramref name="start"/> is not in range of this instance
/// (<paramref name="start"/> is &lt;0 or &gt;<see cref="Length"/>).
/// </exception>
public ReadOnlyMemory<byte> AsMemory(int start) // LUCENENET specific
{
if ((uint)start > (uint)Length)
throw new ArgumentOutOfRangeException(nameof(start));
// Prevent overflow and ensure we stay within the backing array
uint totalOffset = (uint)Offset + (uint)start;
if (totalOffset > bytes.Length)
throw new ArgumentOutOfRangeException(nameof(start));
return new ReadOnlyMemory<byte>(bytes, checked((int)totalOffset), Length - start);
}
/// <summary>
/// Creates a new <see cref="ReadOnlyMemory{T}"/> over the portion of <see cref="Bytes"/>
/// between <see cref="Offset"/> + <paramref name="start"/> and <paramref name="length"/>.
/// </summary>
/// <param name="start">The index into the usable portion of this instance at which to begin this slice.</param>
/// <param name="length">The desired length for the slice (exclusive).</param>
/// <returns>The read-only byte memory representation of the backing array.</returns>
/// <exception cref="ArgumentOutOfRangeException"><paramref name="start"/>, <paramref name="length"/>,
/// or <paramref name="start"/> + <paramref name="length"/> is not in the range of this instance.</exception>
public ReadOnlyMemory<byte> AsMemory(int start, int length) // LUCENENET specific
{
if (IntPtr.Size == 8) // 64-bit process
{
// See comment in Span<T>.Slice for how this works.
if ((ulong)(uint)start + (ulong)(uint)length > (ulong)(uint)Length)
throw new ArgumentOutOfRangeException(nameof(start));
}
else
{
if ((uint)start > (uint)Length || (uint)length > (uint)(Length - start))
throw new ArgumentOutOfRangeException(nameof(start));
}
// Compute offset in uint to prevent overflow
uint totalOffset = (uint)Offset + (uint)start;
// Ensure we stay within the backing array bounds
if (totalOffset + (uint)length > (uint)bytes.Length)
throw new ArgumentOutOfRangeException(nameof(start));
return new ReadOnlyMemory<byte>(bytes, checked((int)totalOffset), length);
}
/// <summary>
/// Creates a new <see cref="ReadOnlyMemory{T}"/> over the portion of <see cref="Bytes"/>
/// between <see cref="Offset"/> + <paramref name="startIndex"/> and <see cref="Length"/>.
/// </summary>
/// <param name="startIndex">The index into the usable portion of this instance at which to begin this slice.</param>
/// <returns>The read-only byte memory representation of the backing array.</returns>
/// <exception cref="ArgumentOutOfRangeException"><paramref name="startIndex"/> is less
/// than 0 or greater than <see cref="Length"/>.</exception>
public ReadOnlyMemory<byte> AsMemory(System.Index startIndex) // LUCENENET specific
{
int actualIndex = startIndex.GetOffset(Length);
if ((uint)actualIndex > (uint)Length)
throw new ArgumentOutOfRangeException(nameof(startIndex));
// Prevent overflow and ensure we stay within the backing array
uint totalOffset = (uint)Offset + (uint)actualIndex;
if (totalOffset > bytes.Length)
throw new ArgumentOutOfRangeException(nameof(startIndex));
return new ReadOnlyMemory<byte>(bytes, checked((int)totalOffset), Length - actualIndex);
}
/// <summary>
/// Creates a new <see cref="ReadOnlyMemory{T}"/> over the portion of <see cref="Bytes"/>
/// between <see cref="Offset"/> + <paramref name="range"/>.Start and <paramref name="range"/>.Length.
/// </summary>
/// <param name="range">The range used to indicate the start and length of the sliced string.</param>
/// <returns>The read-only byte memory representation of the backing array.</returns>
/// <exception cref="ArgumentOutOfRangeException">
/// <paramref name="range"/>'s start or end index is not within the bounds of the array.
/// -or-
/// <paramref name="range"/>'s start index is greater than its end index.
/// </exception>
public ReadOnlyMemory<byte> AsMemory(Range range) // LUCENENET specific
{
(int start, int length) = range.GetOffsetAndLength(Length);
// Compute offset in uint to prevent overflow
uint totalOffset = (uint)Offset + (uint)start;
// Ensure we stay within the backing array bounds
if (totalOffset + (uint)length > (uint)bytes.Length)
throw new ArgumentOutOfRangeException(nameof(start));
return new ReadOnlyMemory<byte>(bytes, checked((int)totalOffset), length);
}
#endregion AsMemory
}
// LUCENENET: It is no longer good practice to use binary serialization.
// See: https://github.com/dotnet/corefx/issues/23584#issuecomment-325724568
#if FEATURE_SERIALIZABLE
[Serializable]
#endif
internal class Utf8SortedAsUnicodeComparer : IComparer<BytesRef>
{
public static readonly Utf8SortedAsUnicodeComparer Instance = new Utf8SortedAsUnicodeComparer();
// Only singleton
private Utf8SortedAsUnicodeComparer()
{
}
public virtual int Compare(BytesRef a, BytesRef b)
{
var aBytes = a.Bytes;
int aUpto = a.Offset;
var bBytes = b.Bytes;
int bUpto = b.Offset;
int aStop = aUpto + Math.Min(a.Length, b.Length);
while (aUpto < aStop)
{
int aByte = aBytes[aUpto++] & 0xff;
int bByte = bBytes[bUpto++] & 0xff;
int diff = aByte - bByte;
if (diff != 0)
{
return diff;
}
}
// One is a prefix of the other, or, they are equal:
return a.Length - b.Length;
}
}
/// @deprecated this comparer is only a transition mechanism
[Obsolete("this comparer is only a transition mechanism")]
// LUCENENET: It is no longer good practice to use binary serialization.
// See: https://github.com/dotnet/corefx/issues/23584#issuecomment-325724568
#if FEATURE_SERIALIZABLE
[Serializable]
#endif
internal class Utf8SortedAsUtf16Comparer : IComparer<BytesRef>
{
// Only singleton
internal Utf8SortedAsUtf16Comparer()
{
}
public virtual int Compare(BytesRef a, BytesRef b)
{
var aBytes = a.Bytes;
int aUpto = a.Offset;
var bBytes = b.Bytes;
int bUpto = b.Offset;
int aStop;
if (a.Length < b.Length)
{
aStop = aUpto + a.Length;
}
else
{
aStop = aUpto + b.Length;
}
while (aUpto < aStop)
{
int aByte = aBytes[aUpto++] & 0xff;
int bByte = bBytes[bUpto++] & 0xff;
if (aByte != bByte)
{
// See http://icu-project.org/docs/papers/utf16_code_point_order.html#utf-8-in-utf-16-order
// We know the terms are not equal, but, we may
// have to carefully fixup the bytes at the
// difference to match UTF16's sort order:
// NOTE: instead of moving supplementary code points (0xee and 0xef) to the unused 0xfe and 0xff,
// we move them to the unused 0xfc and 0xfd [reserved for future 6-byte character sequences]
// this reserves 0xff for preflex's term reordering (surrogate dance), and if unicode grows such
// that 6-byte sequences are needed we have much bigger problems anyway.
if (aByte >= 0xee && bByte >= 0xee)
{
if ((aByte & 0xfe) == 0xee)
{
aByte += 0xe;
}
if ((bByte & 0xfe) == 0xee)
{
bByte += 0xe;
}
}
return aByte - bByte;
}
}
// One is a prefix of the other, or, they are equal:
return a.Length - b.Length;
}
}
// LUCENENET specific
internal enum BytesRefFormat // For assert/test/logging
{
UTF8,
UTF8AsHex
}
// LUCENENET specific - when this object is a parameter of
// a method that calls string.Format(),
// defers execution of building a string until
// string.Format() is called.
// This struct is meant to wrap a directory parameter when passed as a string.Format() argument.
internal struct BytesRefFormatter // For assert/test/logging
{
#pragma warning disable IDE0044 // Add readonly modifier
private BytesRef bytesRef;
private BytesRefFormat format;
#pragma warning restore IDE0044 // Add readonly modifier
public BytesRefFormatter(BytesRef bytesRef, BytesRefFormat format)
{
this.bytesRef = bytesRef; // Allow null
this.format = format;
}
public override string ToString()
{
// Special case: null
if (bytesRef is null)
return "null";
switch (format)
{
case BytesRefFormat.UTF8:
if (bytesRef.TryUtf8ToString(out var utf8String))
{
return utf8String;
}
else
{
return bytesRef.ToString();
}
case BytesRefFormat.UTF8AsHex:
return UnicodeUtil.ToHexString(bytesRef.Utf8ToString());
default:
return bytesRef.ToString();
}
}
}
}