blob: c27674287d7d32bf1a87b81e110d5b2622695be8 [file]
import pandas as pd
import pytest
import ray
from hamilton import base, driver
from hamilton.plugins import h_ray
from .resources import example_module, smoke_screen_module
"""
For reference https://docs.ray.io/en/latest/auto_examples/testing-tips.html
"""
@pytest.fixture(scope="module")
def init():
ray.init()
yield "initialized"
ray.shutdown()
def test_ray_graph_adapter(init):
initial_columns = {
"signups": pd.Series([1, 10, 50, 100, 200, 400]),
"spend": pd.Series([10, 10, 20, 40, 40, 50]),
}
dr = driver.Driver(
initial_columns, example_module, adapter=h_ray.RayGraphAdapter(base.PandasDataFrameResult())
)
output_columns = [
"spend",
"signups",
"avg_3wk_spend",
"spend_per_signup",
]
df = dr.execute(output_columns)
assert set(df) == set(output_columns)
expected_column = pd.Series(
[0.0, 0.0, 13.33333, 23.33333, 33.33333, 43.33333], name="avg_3wk_spend"
)
pd.testing.assert_series_equal(
df.avg_3wk_spend.fillna(0.0), expected_column
) # fill na to get around NaN
# TODO: do some more asserting?
def test_smoke_screen_module(init):
config = {"region": "US"}
dr = driver.Driver(
config, smoke_screen_module, adapter=h_ray.RayGraphAdapter(base.PandasDataFrameResult())
)
output_columns = [
"raw_acquisition_cost",
"pessimistic_net_acquisition_cost",
"neutral_net_acquisition_cost",
"optimistic_net_acquisition_cost",
"series_with_start_date_end_date",
]
df = dr.execute(
inputs={"date_range": {"start_date": "20200101", "end_date": "20220801"}},
final_vars=output_columns,
)
epsilon = 0.00001
assert abs(df["raw_acquisition_cost"].mean() - 0.393808) < epsilon
assert abs(df["pessimistic_net_acquisition_cost"].mean() - 0.420769) < epsilon
assert abs(df["neutral_net_acquisition_cost"].mean() - 0.405582) < epsilon
assert abs(df["optimistic_net_acquisition_cost"].mean() - 0.399363) < epsilon
assert df["series_with_start_date_end_date"].iloc[0] == "date_20200101_date_20220801"