| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| import copy |
| import math |
| import uuid |
| from datetime import date, datetime, time, timedelta |
| from decimal import Decimal |
| from unittest.mock import MagicMock |
| |
| import numpy as np |
| import pandas as pd |
| import pytest |
| |
| from superset.utils import json |
| from superset.utils.core import ( |
| zlib_compress, |
| zlib_decompress, |
| ) |
| |
| |
| def test_json_loads(): |
| serialized_data = ( |
| '{"str": "Hello World", "int": 123456789, "float": 0.12345, "bool": true}' |
| ) |
| data = json.loads(serialized_data) |
| assert data["str"] == "Hello World" |
| assert data["int"] == 123456789 |
| assert data["float"] == 0.12345 |
| assert data["bool"] is True |
| |
| |
| def test_json_loads_exception(): |
| invalid = '{"a": 5, "b": [1, 5, ["g", "h]]}' |
| with pytest.raises(json.JSONDecodeError) as excinfo: |
| json.loads(invalid) |
| assert ( |
| str(excinfo.value) |
| == "Unterminated string starting at: line 1 column 28 (char 27)" |
| ) |
| |
| |
| def test_json_loads_encoding(): |
| unicode_data = b'{"a": "\u0073\u0074\u0072"}' |
| data = json.loads(unicode_data) |
| assert data["a"] == "str" |
| utf16_data = b'\xff\xfe{\x00"\x00a\x00"\x00:\x00 \x00"\x00s\x00t\x00r\x00"\x00}\x00' |
| data = json.loads(utf16_data, encoding="utf-16") |
| assert data["a"] == "str" |
| |
| |
| def test_json_loads_allow_nan(): |
| serialized_data = '{"float": NaN}' |
| with pytest.raises(json.JSONDecodeError) as excinfo: |
| json.loads(serialized_data) |
| assert str(excinfo.value) == "Expecting value: line 1 column 11 (char 10)" |
| data = json.loads(serialized_data, allow_nan=True) |
| assert isinstance(data, object) |
| assert math.isnan(data["float"]) is True |
| |
| |
| def test_json_dumps(): |
| data = { |
| "str": "Hello World", |
| "int": 123456789, |
| "float": 0.12345, |
| "bool": True, |
| } |
| json_str = json.dumps(data, default=json.pessimistic_json_iso_dttm_ser) |
| reloaded_data = json.loads(json_str) |
| assert reloaded_data["str"] == "Hello World" |
| assert reloaded_data["int"] == 123456789 |
| assert reloaded_data["float"] == 0.12345 |
| assert reloaded_data["bool"] is True |
| |
| |
| def test_json_dumps_encoding(): |
| data = { |
| "utf8": b"Hello World", |
| "utf16": b"\xff\xfeH\x00e\x00l\x00l\x00o\x00 \x00W\x00o\x00r\x00l\x00d\x00", |
| "bytes": b"\xff", |
| } |
| json_str = json.dumps(data, default=json.pessimistic_json_iso_dttm_ser) |
| reloaded_data = json.loads(json_str) |
| assert reloaded_data["utf8"] == "Hello World" |
| assert reloaded_data["utf16"] == "Hello World" |
| assert reloaded_data["bytes"] == "[bytes]" |
| |
| |
| def test_json_iso_dttm_ser(): |
| data = { |
| "datetime": datetime(2021, 1, 1, 0, 0, 0), |
| "date": date(2021, 1, 1), |
| "dttm": datetime(2020, 1, 1), |
| "dt": date(2020, 1, 1), |
| "t": time(), |
| } |
| |
| json_str = json.dumps(data, default=json.json_iso_dttm_ser) |
| reloaded_data = json.loads(json_str) |
| assert reloaded_data["datetime"] == "2021-01-01T00:00:00" |
| assert reloaded_data["date"] == "2021-01-01" |
| assert reloaded_data["dttm"] == "2020-01-01T00:00:00" |
| assert reloaded_data["dt"] == "2020-01-01" |
| assert reloaded_data["t"] == "00:00:00" |
| assert json.json_iso_dttm_ser(np.int64(1)) == 1 |
| |
| assert ( |
| json.json_iso_dttm_ser(np.datetime64(), pessimistic=True) |
| == "Unserializable [<class 'numpy.datetime64'>]" |
| ) |
| |
| with pytest.raises(TypeError): |
| json.json_iso_dttm_ser(np.datetime64()) |
| |
| |
| def test_pessimistic_json_iso_dttm_ser(): |
| data = { |
| "datetime": datetime(2021, 1, 1, 0, 0, 0), |
| "date": date(2021, 1, 1), |
| } |
| json_str = json.dumps(data, default=json.pessimistic_json_iso_dttm_ser) |
| reloaded_data = json.loads(json_str) |
| assert reloaded_data["datetime"] == "2021-01-01T00:00:00" |
| assert reloaded_data["date"] == "2021-01-01" |
| with pytest.raises(TypeError) as excinfo: |
| json.dumps({"UNSERIALIZABLE": MagicMock()}) |
| assert str(excinfo.value) == "_asdict() must return a dict, not MagicMock" |
| |
| |
| def test_pessimistic_json_iso_dttm_ser_nonutf8(): |
| data = { |
| "INVALID_UTF8_BYTES": b"\xff", |
| } |
| assert isinstance(data["INVALID_UTF8_BYTES"], bytes) |
| json_str = json.dumps(data, default=json.pessimistic_json_iso_dttm_ser) |
| reloaded_data = json.loads(json_str) |
| assert reloaded_data["INVALID_UTF8_BYTES"] == "[bytes]" |
| |
| |
| def test_pessimistic_json_iso_dttm_ser_utf16(): |
| data = { |
| "VALID_UTF16_BYTES": b"\xff\xfeS0\x930k0a0o0\x16NLu", |
| } |
| assert isinstance(data["VALID_UTF16_BYTES"], bytes) |
| json_str = json.dumps(data, default=json.pessimistic_json_iso_dttm_ser) |
| reloaded_data = json.loads(json_str) |
| assert reloaded_data["VALID_UTF16_BYTES"] == "こんにちは世界" |
| |
| |
| def test_validate_json(): |
| valid = '{"a": 5, "b": [1, 5, ["g", "h"]]}' |
| assert json.validate_json(valid) is None |
| invalid = '{"a": 5, "b": [1, 5, ["g", "h]]}' |
| with pytest.raises(json.JSONDecodeError) as excinfo: |
| json.validate_json(invalid) |
| assert ( |
| str(excinfo.value) |
| == "Unterminated string starting at: line 1 column 28 (char 27)" |
| ) |
| |
| |
| def test_sensitive_fields() -> None: |
| """ |
| Test masking/unmasking of sensitive fields. |
| """ |
| payload = { |
| "password": "SECRET", |
| "credentials": { |
| "user_id": "alice", |
| "user_token": "TOKEN", |
| }, |
| } |
| sensitive_fields = {"$.password", "$.credentials.user_token"} |
| |
| redacted_payload = json.redact_sensitive(payload, sensitive_fields) |
| assert redacted_payload == { |
| "password": "XXXXXXXXXX", |
| "credentials": { |
| "user_id": "alice", |
| "user_token": "XXXXXXXXXX", |
| }, |
| } |
| |
| new_payload = copy.deepcopy(redacted_payload) |
| new_payload["credentials"]["user_id"] = "bob" |
| |
| assert json.reveal_sensitive(payload, new_payload, sensitive_fields) == { |
| "password": "SECRET", |
| "credentials": { |
| "user_id": "bob", |
| "user_token": "TOKEN", |
| }, |
| } |
| |
| new_payload = copy.deepcopy(redacted_payload) |
| new_payload["credentials"]["user_token"] = "NEW_TOKEN" # noqa: S105 |
| |
| assert json.reveal_sensitive(payload, new_payload, sensitive_fields) == { |
| "password": "SECRET", |
| "credentials": { |
| "user_id": "alice", |
| "user_token": "NEW_TOKEN", |
| }, |
| } |
| |
| |
| def test_base_json_conv(): |
| assert json.base_json_conv(np.bool_(1)) is True |
| assert json.base_json_conv(np.int64(1)) == 1 |
| assert json.base_json_conv(np.array([1, 2, 3])) == [1, 2, 3] |
| assert json.base_json_conv(np.array(None)) is None |
| assert json.base_json_conv({1}) == [1] |
| assert json.base_json_conv(Decimal("1.0")) == 1.0 |
| assert isinstance(json.base_json_conv(uuid.uuid4()), str) |
| assert json.base_json_conv(time(12, 0)) == "12:00:00" |
| assert json.base_json_conv(timedelta(0)) == "0:00:00" |
| assert json.base_json_conv(b"") == "" |
| assert isinstance(json.base_json_conv(b"\xff\xfe"), str) |
| assert json.base_json_conv(pd.DateOffset(days=1)) == "DateOffset(days=1)" |
| assert json.base_json_conv(b"\xff") == "[bytes]" |
| |
| with pytest.raises(TypeError): |
| json.base_json_conv(np.datetime64()) |
| |
| |
| def test_zlib_compression(): |
| json_str = '{"test": 1}' |
| blob = zlib_compress(json_str) |
| got_str = zlib_decompress(blob) |
| assert json_str == got_str |
| |
| |
| def test_json_int_dttm_ser(): |
| dttm = datetime(2020, 1, 1) |
| ts = 1577836800000.0 |
| assert json.json_int_dttm_ser(dttm) == ts |
| assert json.json_int_dttm_ser(date(2020, 1, 1)) == ts |
| assert json.json_int_dttm_ser(datetime(1970, 1, 1)) == 0 |
| assert json.json_int_dttm_ser(date(1970, 1, 1)) == 0 |
| assert json.json_int_dttm_ser(dttm + timedelta(milliseconds=1)) == (ts + 1) |
| assert json.json_int_dttm_ser(np.int64(1)) == 1 |
| |
| with pytest.raises(TypeError): |
| json.json_int_dttm_ser(np.datetime64()) |
| |
| |
| def test_format_timedelta(): |
| assert json.format_timedelta(timedelta(0)) == "0:00:00" |
| assert json.format_timedelta(timedelta(days=1)) == "1 day, 0:00:00" |
| assert json.format_timedelta(timedelta(minutes=-6)) == "-0:06:00" |
| assert ( |
| json.format_timedelta(timedelta(0) - timedelta(days=1, hours=5, minutes=6)) |
| == "-1 day, 5:06:00" |
| ) |
| assert ( |
| json.format_timedelta(timedelta(0) - timedelta(days=16, hours=4, minutes=3)) |
| == "-16 days, 4:03:00" |
| ) |