blob: 78a822eb1ac1500d8c125be0b645017ae45ec86e [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
using System.Text;
namespace Apache.Fory;
public sealed class StringSerializer : Serializer<string>
{
private const int MaxVarUInt36SmallBytes = 6;
public override string DefaultValue => null!;
public override void WriteData(WriteContext context, in string value, bool hasGenerics)
{
_ = hasGenerics;
WriteString(context, value ?? string.Empty);
}
public override string ReadData(ReadContext context)
{
return ReadString(context);
}
public static void WriteString(WriteContext context, string value)
{
string safe = value ?? string.Empty;
ForyStringEncoding encoding = SelectEncoding(safe);
switch (encoding)
{
case ForyStringEncoding.Latin1:
WriteLatin1(context, safe);
break;
case ForyStringEncoding.Utf8:
WriteUtf8(context, safe);
break;
case ForyStringEncoding.Utf16:
WriteUtf16(context, safe);
break;
default:
throw new EncodingException($"unsupported string encoding {encoding}");
}
}
public static string ReadString(ReadContext context)
{
ulong header = context.Reader.ReadVarUInt36Small();
ulong encoding = header & 0x03;
int byteLength = checked((int)(header >> 2));
ReadOnlySpan<byte> bytes = context.Reader.ReadSpan(byteLength);
return encoding switch
{
(ulong)ForyStringEncoding.Utf8 => Encoding.UTF8.GetString(bytes),
(ulong)ForyStringEncoding.Latin1 => DecodeLatin1(bytes),
(ulong)ForyStringEncoding.Utf16 => DecodeUtf16(bytes),
_ => throw new EncodingException($"unsupported string encoding {encoding}"),
};
}
private static string DecodeLatin1(ReadOnlySpan<byte> bytes)
{
return Encoding.Latin1.GetString(bytes);
}
private static string DecodeUtf16(ReadOnlySpan<byte> bytes)
{
if ((bytes.Length & 1) != 0)
{
throw new EncodingException("utf16 byte length is not even");
}
return Encoding.Unicode.GetString(bytes);
}
private static ForyStringEncoding SelectEncoding(string value)
{
int asciiCount = 0;
bool allLatin1 = true;
for (int i = 0; i < value.Length; i++)
{
char c = value[i];
if (c < 0x80)
{
asciiCount++;
}
else if (c > 0xFF)
{
allLatin1 = false;
}
}
if (allLatin1)
{
return ForyStringEncoding.Latin1;
}
return asciiCount * 2 >= value.Length ? ForyStringEncoding.Utf8 : ForyStringEncoding.Utf16;
}
private static void WriteLatin1(WriteContext context, string value)
{
int byteLength = value.Length;
ulong header = ((ulong)byteLength << 2) | (ulong)ForyStringEncoding.Latin1;
Span<byte> headerBuf = stackalloc byte[MaxVarUInt36SmallBytes];
int headerBytes = EncodeVarUInt36Small(headerBuf, header);
Span<byte> destination = context.Writer.GetSpan(headerBytes + byteLength);
headerBuf.Slice(0, headerBytes).CopyTo(destination);
int written = Encoding.Latin1.GetBytes(value.AsSpan(), destination.Slice(headerBytes));
context.Writer.Advance(headerBytes + written);
}
private static void WriteUtf8(WriteContext context, string value)
{
int maxByteLength = Encoding.UTF8.GetMaxByteCount(value.Length);
Span<byte> destination = context.Writer.GetSpan(MaxVarUInt36SmallBytes + maxByteLength);
Span<byte> payload = destination.Slice(MaxVarUInt36SmallBytes);
int written = Encoding.UTF8.GetBytes(value.AsSpan(), payload);
ulong header = ((ulong)written << 2) | (ulong)ForyStringEncoding.Utf8;
Span<byte> headerBuf = stackalloc byte[MaxVarUInt36SmallBytes];
int headerBytes = EncodeVarUInt36Small(headerBuf, header);
if (headerBytes != MaxVarUInt36SmallBytes)
{
payload.Slice(0, written).CopyTo(destination.Slice(headerBytes));
}
headerBuf.Slice(0, headerBytes).CopyTo(destination);
context.Writer.Advance(headerBytes + written);
}
private static void WriteUtf16(WriteContext context, string value)
{
int byteLength = checked(value.Length * 2);
ulong header = ((ulong)byteLength << 2) | (ulong)ForyStringEncoding.Utf16;
Span<byte> headerBuf = stackalloc byte[MaxVarUInt36SmallBytes];
int headerBytes = EncodeVarUInt36Small(headerBuf, header);
Span<byte> destination = context.Writer.GetSpan(headerBytes + byteLength);
headerBuf.Slice(0, headerBytes).CopyTo(destination);
int written = Encoding.Unicode.GetBytes(value.AsSpan(), destination.Slice(headerBytes));
context.Writer.Advance(headerBytes + written);
}
private static int EncodeVarUInt36Small(Span<byte> destination, ulong value)
{
int index = 0;
ulong remaining = value;
while (remaining >= 0x80)
{
destination[index] = unchecked((byte)((remaining & 0x7FuL) | 0x80uL));
index += 1;
remaining >>= 7;
}
destination[index] = unchecked((byte)remaining);
return index + 1;
}
}