blob: 3f2ee73e1553aba49e8316e824bd5f44599f05a7 [file]
import pandas as pd
import unique_users
from hamilton.experimental.decorators import reuse
from hamilton.experimental.decorators.reuse import reuse_functions
from hamilton.function_modifiers import value
def website_interactions() -> pd.DataFrame:
"""Gives event-driven data with a series
:return: Some mock event data.
"""
data = [
("20220901-14:00:00", 1, "US"),
("20220901-18:30:00", 2, "US"),
("20220901-19:00:00", 1, "US"),
("20220902-08:00:00", 3, "US"),
("20220903-16:00:00", 1, "US"),
("20220907-13:00:00", 4, "US"),
("20220910-14:00:00", 1, "US"),
("20220911-12:00:00", 3, "US"),
("20220914-11:00:00", 1, "US"),
("20220915-07:30:00", 2, "US"),
("20220916-06:00:00", 1, "US"),
("20220917-16:00:00", 2, "US"),
("20220920-17:00:00", 5, "US"),
("20220922-09:30:00", 2, "US"),
("20220922-10:00:00", 1, "US"),
("20220924-07:00:00", 6, "US"),
("20220924-08:00:00", 1, "US"),
("20220925-21:00:00", 1, "US"),
("20220926-15:30:00", 2, "US"),
("20220901-14:00:00", 7, "CA"),
("20220901-18:30:00", 8, "CA"),
("20220901-19:00:00", 9, "CA"),
("20220902-08:00:00", 7, "CA"),
("20220903-16:00:00", 10, "CA"),
("20220907-13:00:00", 9, "CA"),
("20220910-14:00:00", 8, "CA"),
("20220911-12:00:00", 11, "CA"),
("20220914-11:00:00", 12, "CA"),
("20220915-07:30:00", 7, "CA"),
("20220916-06:00:00", 9, "CA"),
("20220917-16:00:00", 10, "CA"),
("20220920-17:00:00", 7, "CA"),
("20220922-09:30:00", 11, "CA"),
("20220922-10:00:00", 8, "CA"),
("20220924-07:00:00", 9, "CA"),
("20220924-08:00:00", 10, "CA"),
("20220925-21:00:00", 13, "CA"),
("20220926-15:30:00", 14, "CA"),
]
df = (
pd.DataFrame(data, columns=["timestamp", "user_id", "region"])
.set_index("timestamp")
.sort_index()
)
df.index = pd.DatetimeIndex(df.index)
return df
@reuse_functions(
with_inputs={"grain": value("day"), "region": value("US")},
namespace="daily_users_US",
outputs={"unique_users": "unique_users_daily_US"},
with_config={"region": "US"},
load_from=[unique_users],
)
def daily_user_data_US() -> reuse.MultiOutput(unique_users_daily_US=pd.Series):
pass
@reuse_functions(
with_inputs={"grain": value("week"), "region": value("US")},
namespace="weekly_users_US",
outputs={"unique_users": "unique_users_weekly_US"},
with_config={"region": "US"},
load_from=[unique_users],
)
def weekly_user_data_US() -> reuse.MultiOutput(unique_users_weekly_US=pd.Series):
pass
@reuse_functions(
with_inputs={"grain": value("month"), "region": value("US")},
namespace="monthly_users_US",
outputs={"unique_users": "unique_users_monthly_US"},
with_config={"region": "US"},
load_from=[unique_users],
)
def monthly_user_data_US() -> reuse.MultiOutput(unique_users_monthly_US=pd.Series):
pass
@reuse_functions(
with_inputs={"grain": value("day"), "region": value("CA")},
namespace="daily_user_data_CA",
outputs={"unique_users": "unique_users_daily_CA"},
with_config={"region": "CA"},
load_from=[unique_users],
)
def daily_user_data_CA() -> reuse.MultiOutput(unique_users_daily_CA=pd.Series):
pass
@reuse_functions(
with_inputs={"grain": value("month"), "region": value("CA")},
namespace="weekly_user_data_CA",
outputs={"unique_users": "unique_users_weekly_CA"},
with_config={"region": "CA"},
load_from=[unique_users],
)
def weekly_user_data_CA() -> reuse.MultiOutput(unique_users_weekly_CA=pd.Series):
pass
@reuse_functions(
with_inputs={"grain": value("day"), "region": value("CA")},
namespace="monthly_user_data_CA",
outputs={"unique_users": "unique_users_monthly_CA"},
with_config={"region": "CA"},
load_from=[unique_users],
)
def monthly_user_data_CA() -> reuse.MultiOutput(unique_users_monthly_CA=pd.Series):
pass