blob: 33565774077f71aa1885968dee4021dbad7cdbfc [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import copy
import math
import uuid
from datetime import date, datetime, time, timedelta
from decimal import Decimal
from unittest.mock import MagicMock
import numpy as np
import pandas as pd
import pytest
from superset.utils import json
from superset.utils.core import (
zlib_compress,
zlib_decompress,
)
def test_json_loads():
serialized_data = (
'{"str": "Hello World", "int": 123456789, "float": 0.12345, "bool": true}'
)
data = json.loads(serialized_data)
assert data["str"] == "Hello World"
assert data["int"] == 123456789
assert data["float"] == 0.12345
assert data["bool"] is True
def test_json_loads_exception():
invalid = '{"a": 5, "b": [1, 5, ["g", "h]]}'
with pytest.raises(json.JSONDecodeError) as excinfo:
json.loads(invalid)
assert (
str(excinfo.value)
== "Unterminated string starting at: line 1 column 28 (char 27)"
)
def test_json_loads_encoding():
unicode_data = b'{"a": "\u0073\u0074\u0072"}'
data = json.loads(unicode_data)
assert data["a"] == "str"
utf16_data = b'\xff\xfe{\x00"\x00a\x00"\x00:\x00 \x00"\x00s\x00t\x00r\x00"\x00}\x00'
data = json.loads(utf16_data, encoding="utf-16")
assert data["a"] == "str"
def test_json_loads_allow_nan():
serialized_data = '{"float": NaN}'
with pytest.raises(json.JSONDecodeError) as excinfo:
json.loads(serialized_data)
assert str(excinfo.value) == "Expecting value: line 1 column 11 (char 10)"
data = json.loads(serialized_data, allow_nan=True)
assert isinstance(data, object)
assert math.isnan(data["float"]) is True
def test_json_dumps():
data = {
"str": "Hello World",
"int": 123456789,
"float": 0.12345,
"bool": True,
}
json_str = json.dumps(data, default=json.pessimistic_json_iso_dttm_ser)
reloaded_data = json.loads(json_str)
assert reloaded_data["str"] == "Hello World"
assert reloaded_data["int"] == 123456789
assert reloaded_data["float"] == 0.12345
assert reloaded_data["bool"] is True
def test_json_dumps_encoding():
data = {
"utf8": b"Hello World",
"utf16": b"\xff\xfeH\x00e\x00l\x00l\x00o\x00 \x00W\x00o\x00r\x00l\x00d\x00",
"bytes": b"\xff",
}
json_str = json.dumps(data, default=json.pessimistic_json_iso_dttm_ser)
reloaded_data = json.loads(json_str)
assert reloaded_data["utf8"] == "Hello World"
assert reloaded_data["utf16"] == "Hello World"
assert reloaded_data["bytes"] == "[bytes]"
def test_json_iso_dttm_ser():
data = {
"datetime": datetime(2021, 1, 1, 0, 0, 0),
"date": date(2021, 1, 1),
"dttm": datetime(2020, 1, 1),
"dt": date(2020, 1, 1),
"t": time(),
}
json_str = json.dumps(data, default=json.json_iso_dttm_ser)
reloaded_data = json.loads(json_str)
assert reloaded_data["datetime"] == "2021-01-01T00:00:00"
assert reloaded_data["date"] == "2021-01-01"
assert reloaded_data["dttm"] == "2020-01-01T00:00:00"
assert reloaded_data["dt"] == "2020-01-01"
assert reloaded_data["t"] == "00:00:00"
assert json.json_iso_dttm_ser(np.int64(1)) == 1
assert (
json.json_iso_dttm_ser(np.datetime64(), pessimistic=True)
== "Unserializable [<class 'numpy.datetime64'>]"
)
with pytest.raises(TypeError):
json.json_iso_dttm_ser(np.datetime64())
def test_pessimistic_json_iso_dttm_ser():
data = {
"datetime": datetime(2021, 1, 1, 0, 0, 0),
"date": date(2021, 1, 1),
}
json_str = json.dumps(data, default=json.pessimistic_json_iso_dttm_ser)
reloaded_data = json.loads(json_str)
assert reloaded_data["datetime"] == "2021-01-01T00:00:00"
assert reloaded_data["date"] == "2021-01-01"
with pytest.raises(TypeError) as excinfo:
json.dumps({"UNSERIALIZABLE": MagicMock()})
assert str(excinfo.value) == "_asdict() must return a dict, not MagicMock"
def test_pessimistic_json_iso_dttm_ser_nonutf8():
data = {
"INVALID_UTF8_BYTES": b"\xff",
}
assert isinstance(data["INVALID_UTF8_BYTES"], bytes)
json_str = json.dumps(data, default=json.pessimistic_json_iso_dttm_ser)
reloaded_data = json.loads(json_str)
assert reloaded_data["INVALID_UTF8_BYTES"] == "[bytes]"
def test_pessimistic_json_iso_dttm_ser_utf16():
data = {
"VALID_UTF16_BYTES": b"\xff\xfeS0\x930k0a0o0\x16NLu",
}
assert isinstance(data["VALID_UTF16_BYTES"], bytes)
json_str = json.dumps(data, default=json.pessimistic_json_iso_dttm_ser)
reloaded_data = json.loads(json_str)
assert reloaded_data["VALID_UTF16_BYTES"] == "こんにちは世界"
def test_validate_json():
valid = '{"a": 5, "b": [1, 5, ["g", "h"]]}'
assert json.validate_json(valid) is None
invalid = '{"a": 5, "b": [1, 5, ["g", "h]]}'
with pytest.raises(json.JSONDecodeError) as excinfo:
json.validate_json(invalid)
assert (
str(excinfo.value)
== "Unterminated string starting at: line 1 column 28 (char 27)"
)
def test_sensitive_fields() -> None:
"""
Test masking/unmasking of sensitive fields.
"""
payload = {
"password": "SECRET",
"credentials": {
"user_id": "alice",
"user_token": "TOKEN",
},
}
sensitive_fields = {"$.password", "$.credentials.user_token"}
redacted_payload = json.redact_sensitive(payload, sensitive_fields)
assert redacted_payload == {
"password": "XXXXXXXXXX",
"credentials": {
"user_id": "alice",
"user_token": "XXXXXXXXXX",
},
}
new_payload = copy.deepcopy(redacted_payload)
new_payload["credentials"]["user_id"] = "bob"
assert json.reveal_sensitive(payload, new_payload, sensitive_fields) == {
"password": "SECRET",
"credentials": {
"user_id": "bob",
"user_token": "TOKEN",
},
}
new_payload = copy.deepcopy(redacted_payload)
new_payload["credentials"]["user_token"] = "NEW_TOKEN" # noqa: S105
assert json.reveal_sensitive(payload, new_payload, sensitive_fields) == {
"password": "SECRET",
"credentials": {
"user_id": "alice",
"user_token": "NEW_TOKEN",
},
}
def test_base_json_conv():
assert json.base_json_conv(np.bool_(1)) is True
assert json.base_json_conv(np.int64(1)) == 1
assert json.base_json_conv(np.array([1, 2, 3])) == [1, 2, 3]
assert json.base_json_conv(np.array(None)) is None
assert json.base_json_conv({1}) == [1]
assert json.base_json_conv(Decimal("1.0")) == 1.0
assert isinstance(json.base_json_conv(uuid.uuid4()), str)
assert json.base_json_conv(time(12, 0)) == "12:00:00"
assert json.base_json_conv(timedelta(0)) == "0:00:00"
assert json.base_json_conv(b"") == ""
assert isinstance(json.base_json_conv(b"\xff\xfe"), str)
assert json.base_json_conv(pd.DateOffset(days=1)) == "DateOffset(days=1)"
assert json.base_json_conv(b"\xff") == "[bytes]"
with pytest.raises(TypeError):
json.base_json_conv(np.datetime64())
def test_zlib_compression():
json_str = '{"test": 1}'
blob = zlib_compress(json_str)
got_str = zlib_decompress(blob)
assert json_str == got_str
def test_json_int_dttm_ser():
dttm = datetime(2020, 1, 1)
ts = 1577836800000.0
assert json.json_int_dttm_ser(dttm) == ts
assert json.json_int_dttm_ser(date(2020, 1, 1)) == ts
assert json.json_int_dttm_ser(datetime(1970, 1, 1)) == 0
assert json.json_int_dttm_ser(date(1970, 1, 1)) == 0
assert json.json_int_dttm_ser(dttm + timedelta(milliseconds=1)) == (ts + 1)
assert json.json_int_dttm_ser(np.int64(1)) == 1
with pytest.raises(TypeError):
json.json_int_dttm_ser(np.datetime64())
def test_format_timedelta():
assert json.format_timedelta(timedelta(0)) == "0:00:00"
assert json.format_timedelta(timedelta(days=1)) == "1 day, 0:00:00"
assert json.format_timedelta(timedelta(minutes=-6)) == "-0:06:00"
assert (
json.format_timedelta(timedelta(0) - timedelta(days=1, hours=5, minutes=6))
== "-1 day, 5:06:00"
)
assert (
json.format_timedelta(timedelta(0) - timedelta(days=16, hours=4, minutes=3))
== "-16 days, 4:03:00"
)