| import asyncio |
| from unittest import mock |
| |
| import pandas as pd |
| import pytest |
| |
| from hamilton import async_driver, base |
| from hamilton.lifecycle.base import ( |
| BasePostGraphConstruct, |
| BasePostGraphConstructAsync, |
| BasePostGraphExecute, |
| BasePostGraphExecuteAsync, |
| BasePostNodeExecute, |
| BasePostNodeExecuteAsync, |
| BasePreGraphExecute, |
| BasePreGraphExecuteAsync, |
| BasePreNodeExecute, |
| BasePreNodeExecuteAsync, |
| ) |
| |
| from .resources import simple_async_module |
| |
| |
| async def async_identity(n: int) -> int: |
| await asyncio.sleep(0.01) |
| return n |
| |
| |
| @pytest.mark.asyncio |
| async def test_await_dict_of_coroutines(): |
| tasks = {n: async_identity(n) for n in range(0, 10)} |
| results = await async_driver.await_dict_of_tasks(tasks) |
| assert results == {n: await async_identity(n) for n in range(0, 10)} |
| |
| |
| @pytest.mark.asyncio |
| async def test_await_dict_of_tasks(): |
| tasks = {n: asyncio.create_task(async_identity(n)) for n in range(0, 10)} |
| results = await async_driver.await_dict_of_tasks(tasks) |
| assert results == {n: await async_identity(n) for n in range(0, 10)} |
| |
| |
| # The following are not parameterized as we need to use the event loop -- fixtures will complicate this |
| @pytest.mark.asyncio |
| async def test_process_value_raw(): |
| assert await async_driver.process_value(1) == 1 |
| |
| |
| @pytest.mark.asyncio |
| async def test_process_value_coroutine(): |
| assert await async_driver.process_value(async_identity(1)) == 1 |
| |
| |
| @pytest.mark.asyncio |
| async def test_process_value_task(): |
| assert await async_driver.process_value(asyncio.create_task(async_identity(1))) == 1 |
| |
| |
| @pytest.mark.asyncio |
| async def test_driver_end_to_end(): |
| dr = async_driver.AsyncDriver({}, simple_async_module) |
| all_vars = [var.name for var in dr.list_available_variables() if var.name != "return_df"] |
| result = await dr.raw_execute(final_vars=all_vars, inputs={"external_input": 1}) |
| result["a"] = result["a"].to_dict() # convert to dict for comparison |
| result["b"] = result["b"].to_dict() # convert to dict for comparison |
| assert result == { |
| "a": pd.Series([1, 2, 3]).to_dict(), |
| "another_async_func": 8, |
| "async_func_with_param": 4, |
| "b": pd.Series([4, 5, 6]).to_dict(), |
| "external_input": 1, |
| "non_async_func_with_decorator": {"result_1": 9, "result_2": 5}, |
| "result_1": 9, |
| "result_2": 5, |
| "result_3": 1, |
| "result_4": 2, |
| "return_dict": {"result_3": 1, "result_4": 2}, |
| "simple_async_func": 2, |
| "simple_non_async_func": 7, |
| } |
| |
| |
| @pytest.mark.asyncio |
| @mock.patch("hamilton.telemetry.send_event_json") |
| @mock.patch("hamilton.telemetry.g_telemetry_enabled", True) |
| async def test_driver_end_to_end_telemetry(send_event_json): |
| dr = async_driver.AsyncDriver({}, simple_async_module, result_builder=base.DictResult()) |
| with mock.patch("hamilton.telemetry.g_telemetry_enabled", False): |
| # don't count this telemetry tracking invocation |
| all_vars = [var.name for var in dr.list_available_variables() if var.name != "return_df"] |
| result = await dr.execute(final_vars=all_vars, inputs={"external_input": 1}) |
| result["a"] = result["a"].to_dict() |
| result["b"] = result["b"].to_dict() |
| assert result == { |
| "a": pd.Series([1, 2, 3]).to_dict(), |
| "another_async_func": 8, |
| "async_func_with_param": 4, |
| "b": pd.Series([4, 5, 6]).to_dict(), |
| "external_input": 1, |
| "non_async_func_with_decorator": {"result_1": 9, "result_2": 5}, |
| "result_1": 9, |
| "result_2": 5, |
| "result_3": 1, |
| "result_4": 2, |
| "return_dict": {"result_3": 1, "result_4": 2}, |
| "simple_async_func": 2, |
| "simple_non_async_func": 7, |
| } |
| # to ensure the last telemetry invocation finishes executing |
| # get all tasks -- and the current task, and await all others. |
| tasks = asyncio.all_tasks() |
| current_task = asyncio.current_task() |
| await asyncio.gather(*[t for t in tasks if t != current_task]) |
| assert send_event_json.called |
| assert len(send_event_json.call_args_list) == 2 |
| |
| |
| @pytest.mark.asyncio |
| async def test_async_driver_end_to_end_async_lifecycle_methods(): |
| tracked_calls = [] |
| |
| class AsyncTrackingAdapter( |
| BasePostGraphConstructAsync, |
| BasePreGraphExecuteAsync, |
| BasePostGraphExecuteAsync, |
| BasePreNodeExecuteAsync, |
| BasePostNodeExecuteAsync, |
| ): |
| def __init__(self, calls: list, pause_time: float = 0.01): |
| self.pause_time = pause_time |
| self.calls = calls |
| |
| async def _pause(self): |
| return await asyncio.sleep(self.pause_time) |
| |
| async def pre_graph_execute(self, **kwargs): |
| await self._pause() |
| self.calls.append(("pre_graph_execute", kwargs)) |
| |
| async def post_graph_execute(self, **kwargs): |
| await self._pause() |
| self.calls.append(("post_graph_execute", kwargs)) |
| |
| async def pre_node_execute(self, **kwargs): |
| await self._pause() |
| self.calls.append(("pre_node_execute", kwargs)) |
| |
| async def post_node_execute(self, **kwargs): |
| await self._pause() |
| self.calls.append(("post_node_execute", kwargs)) |
| |
| async def post_graph_construct(self, **kwargs): |
| await self._pause() |
| self.calls.append(("post_graph_construct", kwargs)) |
| |
| adapter = AsyncTrackingAdapter(tracked_calls) |
| |
| dr = await async_driver.AsyncDriver( |
| {}, simple_async_module, result_builder=base.DictResult(), adapters=[adapter] |
| ).ainit() |
| all_vars = [var.name for var in dr.list_available_variables() if var.name != "return_df"] |
| result = await dr.execute(final_vars=all_vars, inputs={"external_input": 1}) |
| hooks_called = [call[0] for call in tracked_calls] |
| assert set(hooks_called) == { |
| "pre_graph_execute", |
| "post_graph_execute", |
| "pre_node_execute", |
| "post_node_execute", |
| "post_graph_construct", |
| } |
| result["a"] = result["a"].to_dict() |
| result["b"] = result["b"].to_dict() |
| assert result == { |
| "a": pd.Series([1, 2, 3]).to_dict(), |
| "another_async_func": 8, |
| "async_func_with_param": 4, |
| "b": pd.Series([4, 5, 6]).to_dict(), |
| "external_input": 1, |
| "non_async_func_with_decorator": {"result_1": 9, "result_2": 5}, |
| "result_1": 9, |
| "result_2": 5, |
| "result_3": 1, |
| "result_4": 2, |
| "return_dict": {"result_3": 1, "result_4": 2}, |
| "simple_async_func": 2, |
| "simple_non_async_func": 7, |
| } |
| |
| |
| @pytest.mark.asyncio |
| async def test_async_driver_end_to_end_sync_lifecycle_methods(): |
| tracked_calls = [] |
| |
| class AsyncTrackingAdapter( |
| BasePostGraphConstruct, |
| BasePreGraphExecute, |
| BasePostGraphExecute, |
| BasePreNodeExecute, |
| BasePostNodeExecute, |
| ): |
| def __init__(self, calls: list, pause_time: float = 0.01): |
| self.pause_time = pause_time |
| self.calls = calls |
| |
| def pre_graph_execute(self, **kwargs): |
| self.calls.append(("pre_graph_execute", kwargs)) |
| |
| def post_graph_execute(self, **kwargs): |
| self.calls.append(("post_graph_execute", kwargs)) |
| |
| def pre_node_execute(self, **kwargs): |
| self.calls.append(("pre_node_execute", kwargs)) |
| |
| def post_node_execute(self, **kwargs): |
| self.calls.append(("post_node_execute", kwargs)) |
| |
| def post_graph_construct(self, **kwargs): |
| self.calls.append(("post_graph_construct", kwargs)) |
| |
| adapter = AsyncTrackingAdapter(tracked_calls) |
| |
| dr = await async_driver.AsyncDriver( |
| {}, simple_async_module, result_builder=base.DictResult(), adapters=[adapter] |
| ).ainit() |
| all_vars = [var.name for var in dr.list_available_variables() if var.name != "return_df"] |
| result = await dr.execute(final_vars=all_vars, inputs={"external_input": 1}) |
| hooks_called = [call[0] for call in tracked_calls] |
| assert set(hooks_called) == { |
| "pre_graph_execute", |
| "post_graph_execute", |
| "pre_node_execute", |
| "post_node_execute", |
| "post_graph_construct", |
| } |
| result["a"] = result["a"].to_dict() |
| result["b"] = result["b"].to_dict() |
| assert result == { |
| "a": pd.Series([1, 2, 3]).to_dict(), |
| "another_async_func": 8, |
| "async_func_with_param": 4, |
| "b": pd.Series([4, 5, 6]).to_dict(), |
| "external_input": 1, |
| "non_async_func_with_decorator": {"result_1": 9, "result_2": 5}, |
| "result_1": 9, |
| "result_2": 5, |
| "result_3": 1, |
| "result_4": 2, |
| "return_dict": {"result_3": 1, "result_4": 2}, |
| "simple_async_func": 2, |
| "simple_non_async_func": 7, |
| } |