| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| import pytest |
| from pyfory.meta.metastring import ( |
| MetaStringEncoder, |
| MetaStringDecoder, |
| Encoding, |
| ) |
| |
| |
| def test_encode_metastring_lower_special(): |
| encoder = MetaStringEncoder(special_char1=".", special_char2="_") |
| decoder = MetaStringDecoder(special_char1=".", special_char2="_") |
| |
| # Test for encoding and decoding |
| encoded = encoder._encode_lower_special("abc_def") |
| assert len(encoded) == 5 |
| assert len(encoder.encode("org.apache.fory.benchmark.data").encoded_data) == 19 |
| assert len(encoder.encode("MediaContent").encoded_data) == 9 |
| decoded = decoder.decode(encoded, Encoding.LOWER_SPECIAL) |
| assert decoded == "abc_def" |
| |
| for i in range(128): |
| builder = "".join(chr(ord("a") + j % 26) for j in range(i)) |
| encoded = encoder._encode_lower_special(builder) |
| decoded = decoder.decode(encoded, Encoding.LOWER_SPECIAL) |
| assert decoded == builder |
| |
| |
| def test_encode_metastring_lower_upper_digit_special(): |
| encoder = MetaStringEncoder(special_char1=".", special_char2="_") |
| decoder = MetaStringDecoder(special_char1=".", special_char2="_") |
| |
| # Test for encoding and decoding |
| encoded = encoder._encode_lower_upper_digit_special("ExampleInput123") |
| assert len(encoded) == 12 |
| decoded = decoder.decode(encoded, Encoding.LOWER_UPPER_DIGIT_SPECIAL) |
| assert decoded == "ExampleInput123" |
| |
| for i in range(1, 128): |
| string = create_string(i) |
| encoded = encoder._encode_lower_upper_digit_special(string) |
| decoded = decoder.decode(encoded, Encoding.LOWER_UPPER_DIGIT_SPECIAL) |
| assert decoded == string, f"Failed at {i}" |
| |
| |
| def create_string(length): |
| chars = [] |
| for j in range(length): |
| n = j % 64 |
| if n < 26: |
| chars.append(chr(ord("a") + n)) |
| elif n < 52: |
| chars.append(chr(ord("A") + (n - 26))) |
| elif n < 62: |
| chars.append(chr(ord("0") + (n - 52))) |
| elif n == 62: |
| chars.append(".") |
| else: |
| chars.append("_") |
| return "".join(chars) |
| |
| |
| def test_metastring(): |
| encoder = MetaStringEncoder(special_char1=".", special_char2="_") |
| decoder = MetaStringDecoder(special_char1=".", special_char2="_") |
| |
| for i in range(1, 128): |
| try: |
| string = create_string(i) |
| metastring = encoder.encode(string) |
| assert metastring.encoding != Encoding.UTF_8 |
| assert metastring.original == string |
| |
| new_string = decoder.decode(metastring.encoded_data, metastring.encoding) |
| assert new_string == string |
| except Exception as e: |
| pytest.fail(f"Failed at {i} with exception: {str(e)}") |
| |
| |
| def test_encode_empty_string(): |
| encoder = MetaStringEncoder(special_char1=".", special_char2="_") |
| decoder = MetaStringDecoder(special_char1=".", special_char2="_") |
| |
| for encoding in [ |
| Encoding.LOWER_SPECIAL, |
| Encoding.LOWER_UPPER_DIGIT_SPECIAL, |
| Encoding.FIRST_TO_LOWER_SPECIAL, |
| Encoding.ALL_TO_LOWER_SPECIAL, |
| Encoding.UTF_8, |
| ]: |
| metastring = encoder.encode_with_encoding("", encoding) |
| assert len(metastring.encoded_data) == 0 |
| decoded = decoder.decode(metastring.encoded_data, metastring.encoding) |
| assert decoded == "" |
| |
| |
| def test_encode_characters_outside_of_lower_special(): |
| encoder = MetaStringEncoder(special_char1=".", special_char2="_") |
| |
| test_string = "abcdefABCDEF1234!@#" |
| metastring = encoder.encode(test_string) |
| assert metastring.encoding == Encoding.UTF_8 |
| |
| |
| def test_all_to_upper_special_encoding(): |
| encoder = MetaStringEncoder(special_char1=".", special_char2="_") |
| decoder = MetaStringDecoder(special_char1=".", special_char2="_") |
| |
| test_string = "ABC_DEF" |
| metastring = encoder.encode(test_string) |
| assert metastring.encoding == Encoding.LOWER_UPPER_DIGIT_SPECIAL |
| decoded_string = decoder.decode(metastring.encoded_data, metastring.encoding) |
| assert decoded_string == test_string |
| |
| |
| def test_first_to_lower_special_encoding(): |
| encoder = MetaStringEncoder(special_char1=".", special_char2="_") |
| decoder = MetaStringDecoder(special_char1=".", special_char2="_") |
| |
| test_string = "Aabcdef" |
| metastring = encoder.encode(test_string) |
| assert metastring.encoding == Encoding.FIRST_TO_LOWER_SPECIAL |
| decoded_string = decoder.decode(metastring.encoded_data, metastring.encoding) |
| assert decoded_string == test_string |
| |
| |
| def test_utf8_encoding(): |
| encoder = MetaStringEncoder(special_char1=".", special_char2="_") |
| decoder = MetaStringDecoder(special_char1=".", special_char2="_") |
| |
| test_string = "你好,世界" # Non-Latin characters |
| metastring = encoder.encode(test_string) |
| assert metastring.encoding == Encoding.UTF_8 |
| decoded_string = decoder.decode(metastring.encoded_data, metastring.encoding) |
| assert decoded_string == test_string |
| |
| |
| def test_strip_last_char(): |
| encoder = MetaStringEncoder(special_char1=".", special_char2="_") |
| |
| test_string = "abc" # encoded as 1|00000|00, 001|00010, exactly two bytes |
| encoded_metastring = encoder.encode(test_string) |
| assert not encoded_metastring.strip_last_char |
| |
| test_string = "abcde" # encoded as 1|00000|00, 001|00010, 00011|001, 00xxxxxx, stripped last char |
| encoded_metastring = encoder.encode(test_string) |
| assert encoded_metastring.strip_last_char |
| |
| |
| def test_empty_string(): |
| encoder = MetaStringEncoder(special_char1=".", special_char2="_") |
| decoder = MetaStringDecoder(special_char1=".", special_char2="_") |
| |
| metastring = encoder.encode("") |
| assert metastring.encoded_data == bytes() |
| |
| decoded = decoder.decode(metastring.encoded_data, metastring.encoding) |
| assert decoded == "" |
| |
| |
| def test_ascii_encoding(): |
| encoder = MetaStringEncoder(special_char1=".", special_char2="_") |
| |
| test_string = "asciiOnly" |
| encoded_metastring = encoder.encode(test_string) |
| assert encoded_metastring.encoding != Encoding.UTF_8 |
| assert encoded_metastring.encoding == Encoding.ALL_TO_LOWER_SPECIAL |
| |
| |
| def test_non_ascii_encoding(): |
| encoder = MetaStringEncoder(special_char1=".", special_char2="_") |
| |
| test_string = "こんにちは" # Non-ASCII string |
| encoded_metastring = encoder.encode(test_string) |
| assert encoded_metastring.encoding == Encoding.UTF_8 |
| |
| |
| def test_non_ascii_encoding_and_non_utf8(): |
| encoder = MetaStringEncoder(special_char1=".", special_char2="_") |
| |
| non_ascii_string = "こんにちは" # Non-ASCII string |
| |
| with pytest.raises(ValueError, match="Unsupported character for LOWER_SPECIAL encoding: こ"): |
| encoder.encode_with_encoding(non_ascii_string, Encoding.LOWER_SPECIAL) |