blob: 3a1b6136122715ccf6098fd69784b8332fe144df [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
import Foundation
private let maxMetaStringLength = 32_767
public enum MetaStringEncoding: UInt8, CaseIterable, Sendable {
case utf8 = 0
case lowerSpecial = 1
case lowerUpperDigitSpecial = 2
case firstToLowerSpecial = 3
case allToLowerSpecial = 4
}
public final class MetaString: Equatable, Hashable, @unchecked Sendable {
public let value: String
public let encoding: MetaStringEncoding
public let specialChar1: Character
public let specialChar2: Character
public let bytes: [UInt8]
public let stripLastChar: Bool
public init(
value: String,
encoding: MetaStringEncoding,
specialChar1: Character,
specialChar2: Character,
bytes: [UInt8]
) throws {
if value.count >= maxMetaStringLength {
throw ForyError.encodingError("meta string too long")
}
if encoding != .utf8, bytes.isEmpty {
throw ForyError.encodingError("encoded meta string cannot be empty")
}
self.value = value
self.encoding = encoding
self.specialChar1 = specialChar1
self.specialChar2 = specialChar2
self.bytes = bytes
self.stripLastChar = encoding != .utf8 && (bytes[0] & 0x80) != 0
}
public static func empty(specialChar1: Character, specialChar2: Character) -> MetaString {
guard let emptyMetaString = try? MetaString(
value: "",
encoding: .utf8,
specialChar1: specialChar1,
specialChar2: specialChar2,
bytes: []
) else {
preconditionFailure("failed to create empty MetaString")
}
return emptyMetaString
}
public static func == (lhs: MetaString, rhs: MetaString) -> Bool {
lhs.value == rhs.value &&
lhs.encoding == rhs.encoding &&
lhs.specialChar1 == rhs.specialChar1 &&
lhs.specialChar2 == rhs.specialChar2 &&
lhs.bytes == rhs.bytes &&
lhs.stripLastChar == rhs.stripLastChar
}
public func hash(into hasher: inout Hasher) {
hasher.combine(value)
hasher.combine(encoding)
hasher.combine(specialChar1)
hasher.combine(specialChar2)
hasher.combine(bytes)
hasher.combine(stripLastChar)
}
}
public struct MetaStringEncoder: Sendable {
public let specialChar1: Character
public let specialChar2: Character
public init(specialChar1: Character, specialChar2: Character) {
self.specialChar1 = specialChar1
self.specialChar2 = specialChar2
}
public static let namespace = MetaStringEncoder(specialChar1: ".", specialChar2: "_")
public static let typeName = MetaStringEncoder(specialChar1: "$", specialChar2: "_")
public static let fieldName = MetaStringEncoder(specialChar1: "$", specialChar2: "_")
public func encode(_ input: String) throws -> MetaString {
try encodeAuto(input, allowedEncodings: nil)
}
public func encode(_ input: String, allowedEncodings: [MetaStringEncoding]) throws -> MetaString {
try encodeAuto(input, allowedEncodings: allowedEncodings)
}
public func encode(_ input: String, encoding: MetaStringEncoding) throws -> MetaString {
if input.count >= maxMetaStringLength {
throw ForyError.encodingError("meta string too long")
}
if input.isEmpty {
return MetaString.empty(specialChar1: specialChar1, specialChar2: specialChar2)
}
if encoding != .utf8, !isLatin(input) {
throw ForyError.encodingError("non-ASCII characters are not allowed for packed meta string")
}
switch encoding {
case .utf8:
return try MetaString(
value: input,
encoding: .utf8,
specialChar1: specialChar1,
specialChar2: specialChar2,
bytes: Array(input.utf8)
)
case .lowerSpecial:
return try MetaString(
value: input,
encoding: .lowerSpecial,
specialChar1: specialChar1,
specialChar2: specialChar2,
bytes: try encodeGeneric(input, bitsPerChar: 5, mapper: mapLowerSpecial)
)
case .lowerUpperDigitSpecial:
return try MetaString(
value: input,
encoding: .lowerUpperDigitSpecial,
specialChar1: specialChar1,
specialChar2: specialChar2,
bytes: try encodeGeneric(input, bitsPerChar: 6, mapper: mapLowerUpperDigitSpecial)
)
case .firstToLowerSpecial:
let lower = lowerFirstAscii(input)
return try MetaString(
value: input,
encoding: .firstToLowerSpecial,
specialChar1: specialChar1,
specialChar2: specialChar2,
bytes: try encodeGeneric(lower, bitsPerChar: 5, mapper: mapLowerSpecial)
)
case .allToLowerSpecial:
let lowered = escapeAllUpper(input)
return try MetaString(
value: input,
encoding: .allToLowerSpecial,
specialChar1: specialChar1,
specialChar2: specialChar2,
bytes: try encodeGeneric(lowered, bitsPerChar: 5, mapper: mapLowerSpecial)
)
}
}
private func encodeAuto(_ input: String, allowedEncodings: [MetaStringEncoding]?) throws -> MetaString {
if input.count >= maxMetaStringLength {
throw ForyError.encodingError("meta string too long")
}
if input.isEmpty {
return MetaString.empty(specialChar1: specialChar1, specialChar2: specialChar2)
}
if !isLatin(input) {
return try MetaString(
value: input,
encoding: .utf8,
specialChar1: specialChar1,
specialChar2: specialChar2,
bytes: Array(input.utf8)
)
}
let encoding = chooseEncoding(input, allowedEncodings: allowedEncodings)
return try encode(input, encoding: encoding)
}
private func chooseEncoding(_ input: String, allowedEncodings: [MetaStringEncoding]?) -> MetaStringEncoding {
let allow: (MetaStringEncoding) -> Bool = { encoding in
allowedEncodings?.contains(encoding) ?? true
}
var digitCount = 0
var upperCount = 0
var canLowerSpecial = true
var canLowerUpperDigitSpecial = true
for scalar in input.unicodeScalars {
let character = Character(scalar)
if canLowerSpecial {
let isValid =
(scalar.value >= 97 && scalar.value <= 122) ||
character == "." || character == "_" || character == "$" || character == "|"
if !isValid {
canLowerSpecial = false
}
}
if canLowerUpperDigitSpecial {
let isLower = scalar.value >= 97 && scalar.value <= 122
let isUpper = scalar.value >= 65 && scalar.value <= 90
let isDigit = scalar.value >= 48 && scalar.value <= 57
let isSpecial = character == specialChar1 || character == specialChar2
if !(isLower || isUpper || isDigit || isSpecial) {
canLowerUpperDigitSpecial = false
}
}
if scalar.value >= 48 && scalar.value <= 57 {
digitCount += 1
}
if scalar.value >= 65 && scalar.value <= 90 {
upperCount += 1
}
}
if canLowerSpecial, allow(.lowerSpecial) {
return .lowerSpecial
}
if canLowerUpperDigitSpecial {
if digitCount != 0, allow(.lowerUpperDigitSpecial) {
return .lowerUpperDigitSpecial
}
if upperCount == 1,
input.first?.isUppercase == true,
allow(.firstToLowerSpecial) {
return .firstToLowerSpecial
}
if ((input.count + upperCount) * 5) < (input.count * 6), allow(.allToLowerSpecial) {
return .allToLowerSpecial
}
if allow(.lowerUpperDigitSpecial) {
return .lowerUpperDigitSpecial
}
}
return .utf8
}
private func encodeGeneric(
_ input: String,
bitsPerChar: Int,
mapper: (Character) throws -> UInt8
) throws -> [UInt8] {
let chars = Array(input)
let totalBits = chars.count * bitsPerChar + 1
let byteLength = (totalBits + 7) / 8
var bytes = Array(repeating: UInt8(0), count: byteLength)
var currentBit = 1
for character in chars {
let value = try mapper(character)
for bitOffset in stride(from: bitsPerChar - 1, through: 0, by: -1) {
if ((value >> UInt8(bitOffset)) & 0x01) != 0 {
let bytePos = currentBit / 8
let bitPos = currentBit % 8
bytes[bytePos] |= UInt8(1 << (7 - bitPos))
}
currentBit += 1
}
}
if byteLength * 8 >= totalBits + bitsPerChar {
bytes[0] |= 0x80
}
return bytes
}
private func mapLowerSpecial(_ character: Character) throws -> UInt8 {
guard let scalar = character.unicodeScalars.first, character.unicodeScalars.count == 1 else {
throw ForyError.encodingError("unsupported character in LOWER_SPECIAL")
}
if scalar.value >= 97 && scalar.value <= 122 {
return UInt8(scalar.value - 97)
}
switch character {
case ".": return 26
case "_": return 27
case "$": return 28
case "|": return 29
default:
throw ForyError.encodingError("unsupported character in LOWER_SPECIAL")
}
}
private func mapLowerUpperDigitSpecial(_ character: Character) throws -> UInt8 {
guard let scalar = character.unicodeScalars.first, character.unicodeScalars.count == 1 else {
throw ForyError.encodingError("unsupported character in LOWER_UPPER_DIGIT_SPECIAL")
}
if scalar.value >= 97 && scalar.value <= 122 {
return UInt8(scalar.value - 97)
}
if scalar.value >= 65 && scalar.value <= 90 {
return UInt8(26 + scalar.value - 65)
}
if scalar.value >= 48 && scalar.value <= 57 {
return UInt8(52 + scalar.value - 48)
}
if character == specialChar1 {
return 62
}
if character == specialChar2 {
return 63
}
throw ForyError.encodingError("unsupported character in LOWER_UPPER_DIGIT_SPECIAL")
}
private func lowerFirstAscii(_ input: String) -> String {
guard let first = input.first else {
return input
}
let lowered = String(first).lowercased()
return lowered + input.dropFirst()
}
private func escapeAllUpper(_ input: String) -> String {
var out = String()
out.reserveCapacity(input.count * 2)
for character in input {
if character.isUppercase {
out.append("|")
out.append(String(character).lowercased())
} else {
out.append(character)
}
}
return out
}
private func isLatin(_ input: String) -> Bool {
for scalar in input.unicodeScalars where scalar.value > 255 {
return false
}
return true
}
}
public struct MetaStringDecoder: Sendable {
public let specialChar1: Character
public let specialChar2: Character
public init(specialChar1: Character, specialChar2: Character) {
self.specialChar1 = specialChar1
self.specialChar2 = specialChar2
}
public static let namespace = MetaStringDecoder(specialChar1: ".", specialChar2: "_")
public static let typeName = MetaStringDecoder(specialChar1: "$", specialChar2: "_")
public static let fieldName = MetaStringDecoder(specialChar1: "$", specialChar2: "_")
public func decode(bytes: [UInt8], encoding: MetaStringEncoding) throws -> MetaString {
let value: String
switch encoding {
case .utf8:
guard let decoded = String(bytes: bytes, encoding: .utf8) else {
throw ForyError.encodingError("invalid UTF-8 meta string payload")
}
value = decoded
case .lowerSpecial:
value = try decodeGeneric(bytes: bytes, bitsPerChar: 5, mapper: unmapLowerSpecial)
case .lowerUpperDigitSpecial:
value = try decodeGeneric(bytes: bytes, bitsPerChar: 6, mapper: unmapLowerUpperDigitSpecial)
case .firstToLowerSpecial:
let decoded = try decodeGeneric(bytes: bytes, bitsPerChar: 5, mapper: unmapLowerSpecial)
if let first = decoded.first {
value = String(first).uppercased() + decoded.dropFirst()
} else {
value = decoded
}
case .allToLowerSpecial:
let decoded = try decodeGeneric(bytes: bytes, bitsPerChar: 5, mapper: unmapLowerSpecial)
value = unescapeAllUpper(decoded)
}
return try MetaString(
value: value,
encoding: encoding,
specialChar1: specialChar1,
specialChar2: specialChar2,
bytes: bytes
)
}
private func decodeGeneric(
bytes: [UInt8],
bitsPerChar: Int,
mapper: (UInt8) throws -> Character
) throws -> String {
if bytes.isEmpty {
return ""
}
let stripLast = (bytes[0] & 0x80) != 0
let totalBits = bytes.count * 8
var bitIndex = 1
var result = String()
result.reserveCapacity(bytes.count)
while bitIndex + bitsPerChar <= totalBits,
!(stripLast && (bitIndex + 2 * bitsPerChar > totalBits)) {
var value: UInt8 = 0
for _ in 0..<bitsPerChar {
let byteIndex = bitIndex / 8
let intra = bitIndex % 8
let bit = (bytes[byteIndex] >> UInt8(7 - intra)) & 0x01
value = (value << 1) | bit
bitIndex += 1
}
result.append(try mapper(value))
}
return result
}
private func unmapLowerSpecial(_ value: UInt8) throws -> Character {
switch value {
case 0 ... 25:
return Character(UnicodeScalar(UInt32(97 + value))!)
case 26:
return "."
case 27:
return "_"
case 28:
return "$"
case 29:
return "|"
default:
throw ForyError.encodingError("invalid LOWER_SPECIAL value")
}
}
private func unmapLowerUpperDigitSpecial(_ value: UInt8) throws -> Character {
switch value {
case 0 ... 25:
return Character(UnicodeScalar(UInt32(97 + value))!)
case 26 ... 51:
return Character(UnicodeScalar(UInt32(65 + value - 26))!)
case 52 ... 61:
return Character(UnicodeScalar(UInt32(48 + value - 52))!)
case 62:
return specialChar1
case 63:
return specialChar2
default:
throw ForyError.encodingError("invalid LOWER_UPPER_DIGIT_SPECIAL value")
}
}
private func unescapeAllUpper(_ input: String) -> String {
var out = String()
out.reserveCapacity(input.count)
var iterator = input.makeIterator()
while let currentCharacter = iterator.next() {
if currentCharacter == "|", let nextCharacter = iterator.next() {
out.append(String(nextCharacter).uppercased())
} else {
out.append(currentCharacter)
}
}
return out
}
}
@inline(__always)
func writeMetaString(
context: WriteContext,
value: MetaString,
encodings: [MetaStringEncoding],
encoder: MetaStringEncoder
) throws {
let normalized: MetaString
if encodings.contains(value.encoding) {
normalized = value
} else {
normalized = try encoder.encode(value.value, allowedEncodings: encodings)
}
guard encodings.contains(normalized.encoding) else {
throw ForyError.encodingError("failed to normalize meta string encoding")
}
context.markMetaStringWriteStateUsed()
let bytes = normalized.bytes
let assignment = context.metaStringWriteState.assignIndexIfAbsent(for: normalized)
if assignment.isNew {
context.buffer.writeVarUInt32(UInt32(bytes.count) << 1)
if bytes.count > 16 {
context.buffer.writeInt64(Int64(bitPattern: metaStringHash(normalized)))
} else if !bytes.isEmpty {
context.buffer.writeUInt8(normalized.encoding.rawValue)
}
context.buffer.writeBytes(bytes)
} else {
context.buffer.writeVarUInt32(((assignment.index + 1) << 1) | 1)
}
}
@inline(__always)
func readMetaString(
context: ReadContext,
decoder: MetaStringDecoder,
encodings: [MetaStringEncoding]
) throws -> MetaString {
let header = try context.buffer.readVarUInt32()
let length = Int(header >> 1)
let isRef = (header & 1) == 1
if isRef {
let index = length - 1
guard let cached = context.getReadMetaString(at: index) else {
throw ForyError.invalidData("unknown meta string ref index \(index)")
}
return cached
}
let value: MetaString
if length == 0 {
value = MetaString.empty(
specialChar1: decoder.specialChar1,
specialChar2: decoder.specialChar2
)
} else {
let encoding: MetaStringEncoding
if length > 16 {
let hash = try context.buffer.readInt64()
let rawEncoding = UInt8(truncatingIfNeeded: hash & 0xFF)
guard let resolved = MetaStringEncoding(rawValue: rawEncoding) else {
throw ForyError.invalidData("invalid meta string encoding \(rawEncoding)")
}
encoding = resolved
} else {
let rawEncoding = try context.buffer.readUInt8()
guard let resolved = MetaStringEncoding(rawValue: rawEncoding) else {
throw ForyError.invalidData("invalid meta string encoding \(rawEncoding)")
}
encoding = resolved
}
guard encodings.contains(encoding) else {
throw ForyError.invalidData("meta string encoding \(encoding) not allowed in this context")
}
let bytes = try context.buffer.readBytes(count: length)
value = try decoder.decode(bytes: bytes, encoding: encoding)
}
context.appendReadMetaString(value)
return value
}
@inline(__always)
func readMetaString(
buffer: ByteBuffer,
decoder: MetaStringDecoder,
encodings: [MetaStringEncoding]
) throws -> MetaString {
let header = try buffer.readUInt8()
let encodingIndex = Int(header & 0b11)
guard encodingIndex < encodings.count else {
throw ForyError.invalidData("invalid meta string encoding index")
}
var length = Int(header >> 2)
if length >= 0b11_1111 {
length = 0b11_1111 + Int(try buffer.readVarUInt32())
}
let bytes = try buffer.readBytes(count: length)
return try decoder.decode(bytes: bytes, encoding: encodings[encodingIndex])
}
@inline(__always)
func metaStringHash(_ metaString: MetaString) -> UInt64 {
var hash = Int64(bitPattern: MurmurHash3.x64_128(metaString.bytes, seed: 47).0)
if hash != Int64.min {
hash = Swift.abs(hash)
}
var result = UInt64(bitPattern: hash)
if result == 0 {
result &+= 256
}
result &= 0xffffffffffffff00
result |= UInt64(metaString.encoding.rawValue & 0xFF)
return result
}