blob: dc7a53e21f81e6d72fe980f3dcae44b80de0d019 [file]
<?php
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
declare(strict_types=1);
namespace Test\Thrift\Unit\Lib\Protocol;
use PHPUnit\Framework\Attributes\DataProvider;
use PHPUnit\Framework\TestCase;
use Thrift\Protocol\TBinaryProtocol;
use Thrift\Protocol\TCompactProtocol;
use Thrift\Transport\TFramedTransport;
use Thrift\Transport\TMemoryBuffer;
/**
* Pins the byte-count contract for string serialization across protocols
* and transports.
*
* Thrift's binary protocols length-prefix strings with the *byte* count of
* the UTF-8 payload, not the character count. Until THRIFT-5759 this was
* guaranteed by a TStringFunc abstraction that detected the (since-removed)
* mbstring.func_overload PHP ini setting. After the cleanup, native
* strlen()/substr() must continue to deliver the same byte-oriented
* behavior — these tests fail if anything regresses to character counting.
*/
class StringByteCountTest extends TestCase
{
#[DataProvider('utf8StringsProvider')]
public function testTBinaryProtocolWritesByteLengthPrefix(string $value, int $expectedByteLength): void
{
$transport = new TMemoryBuffer();
$protocol = new TBinaryProtocol($transport);
$protocol->writeString($value);
$wire = $transport->getBuffer();
$this->assertSame($expectedByteLength + 4, strlen($wire));
$unpacked = unpack('N', substr($wire, 0, 4));
$this->assertSame($expectedByteLength, $unpacked[1]);
$this->assertSame($value, substr($wire, 4));
}
#[DataProvider('utf8StringsProvider')]
public function testTBinaryProtocolRoundTripsUtf8(string $value, int $expectedByteLength): void
{
$transport = new TMemoryBuffer();
$protocol = new TBinaryProtocol($transport);
$protocol->writeString($value);
$read = '';
$protocol->readString($read);
$this->assertSame($value, $read);
$this->assertSame($expectedByteLength, strlen($read));
}
#[DataProvider('utf8StringsProvider')]
public function testTCompactProtocolRoundTripsUtf8(string $value, int $expectedByteLength): void
{
$transport = new TMemoryBuffer();
$protocol = new TCompactProtocol($transport);
$protocol->writeString($value);
$read = '';
$protocol->readString($read);
$this->assertSame($value, $read);
$this->assertSame($expectedByteLength, strlen($read));
}
#[DataProvider('utf8StringsProvider')]
public function testTFramedTransportRoundTripsUtf8(string $value, int $expectedByteLength): void
{
$inner = new TMemoryBuffer();
$framed = new TFramedTransport($inner);
$protocol = new TBinaryProtocol($framed);
$protocol->writeString($value);
$framed->flush();
$reader = new TFramedTransport(new TMemoryBuffer($inner->getBuffer()));
$readProtocol = new TBinaryProtocol($reader);
$read = '';
$readProtocol->readString($read);
$this->assertSame($value, $read);
$this->assertSame($expectedByteLength, strlen($read));
}
public static function utf8StringsProvider(): iterable
{
yield 'ascii' => ['Afrikaans', 9];
yield 'latin-extended' => ['Aragonés', 9];
yield 'arabic' => ['العربية', 14];
yield 'malayalam' => ['മലയാളം', 18];
yield 'slavic-diacritics' => ['Slovenščina', 13];
yield 'cyrillic' => ['Українська', 20];
yield 'urdu' => ['اردو', 8];
yield 'cjk-han' => ['中文', 6];
yield 'cjk-cantonese' => ['粵語', 6];
yield 'emoji-4byte' => ['🚀', 4];
}
}