blob: 44239ec6254f2f883ec9cd233a5401edef0c5193 [file] [log] [blame]
import pathlib
import pandas as pd
from hamilton.io.utils import SQL_METADATA, get_file_metadata, get_sql_metadata
def test_get_sql_metadata():
results = 5
table = "foo"
query = "SELECT foo FROM bar"
df = pd.DataFrame({"foo": ["bar"]})
metadata1 = get_sql_metadata(table, df)[SQL_METADATA]
metadata2 = get_sql_metadata(query, results)[SQL_METADATA]
metadata3 = get_sql_metadata(query, "foo")[SQL_METADATA]
assert metadata1["table_name"] == table
assert metadata1["rows"] == 1
assert metadata2["query"] == query
assert metadata2["rows"] == 5
assert metadata3["rows"] is None
def test_get_file_metadata(tmp_path: pathlib.Path):
file_path = tmp_path / "test.txt"
file_path.write_text("test")
metadata = get_file_metadata(file_path)
assert metadata["file_metadata"]["path"] == str(file_path)
assert metadata["file_metadata"]["size"] > 0
assert metadata["file_metadata"]["last_modified"] == file_path.stat().st_mtime
assert metadata["file_metadata"]["timestamp"] is not None
def test_get_file_metadata_url_schema():
url = "s3://bucket/key"
metadata = get_file_metadata(url)
assert metadata["file_metadata"]["path"] == url
assert metadata["file_metadata"]["scheme"] == "s3"