blob: 5363f5383e706c3dfc389e4a9038b9c1233567e9 [file]
"""
Invoke tasks for otava-test-data project.
Usage:
inv --list # List available tasks
inv test # Run tests
inv lint # Run linter
inv docs # Build documentation
inv generate # Generate test data
"""
from invoke import task, Context
import os
PROJECT_DIR = os.path.dirname(os.path.abspath(__file__))
@task
def install(c: Context, dev: bool = True, web: bool = False):
"""Install the package and dependencies."""
extras = ["dev", "docs"]
if web:
extras.append("web")
if dev:
c.run(f"uv pip install -e '.[{','.join(extras)}]'", pty=True)
else:
c.run("uv pip install -e .", pty=True)
@task
def test(c: Context, verbose: bool = False, coverage: bool = False, marker: str = ""):
"""Run the test suite."""
cmd = "uv run pytest"
if verbose:
cmd += " -v"
if coverage:
cmd += " --cov=otava_test_data --cov-report=term-missing"
if marker:
cmd += f" -m '{marker}'"
cmd += " src/otava_test_data/tests/"
c.run(cmd, pty=True)
@task
def test_generators(c: Context):
"""Run only the generator tests (no Otava required)."""
c.run("uv run pytest src/otava_test_data/tests/test_generators.py -v", pty=True)
@task
def test_otava(c: Context):
"""Run only the Otava integration tests."""
c.run("uv run pytest src/otava_test_data/tests/test_otava_integration.py -v", pty=True)
@task
def lint(c: Context, fix: bool = False):
"""Run the linter (ruff)."""
cmd = "uv run ruff check src/"
if fix:
cmd += " --fix"
c.run(cmd, pty=True)
@task
def format(c: Context, check: bool = False):
"""Format code with ruff."""
cmd = "uv run ruff format src/"
if check:
cmd += " --check"
c.run(cmd, pty=True)
@task
def generate(
c: Context,
output_dir: str = "./test_data",
lengths: str = "50,500",
seed: int = 42,
):
"""Generate test data CSV files."""
length_args = " ".join(f"-l {l}" for l in lengths.split(","))
c.run(
f"uv run python -m otava_test_data.cli generate "
f"--output-dir {output_dir} {length_args} --seed {seed}",
pty=True,
)
@task
def docs_build(c: Context):
"""Build Sphinx documentation."""
c.run("uv run sphinx-build -b html docs/ docs/_build/html", pty=True)
@task
def docs_serve(c: Context, port: int = 8000):
"""Serve documentation locally."""
c.run(
f"uv run python -m http.server {port} --directory docs/_build/html",
pty=True,
)
@task
def docs_init(c: Context):
"""Initialize Sphinx documentation structure."""
docs_dir = os.path.join(PROJECT_DIR, "docs")
os.makedirs(docs_dir, exist_ok=True)
# Create conf.py
conf_content = '''"""Sphinx configuration for otava-test-data."""
project = "otava-test-data"
copyright = "2025, Joe Drumgoole"
author = "Joe Drumgoole"
version = "0.1.0"
extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.napoleon",
"sphinx.ext.viewcode",
"myst_parser",
]
templates_path = ["_templates"]
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
html_theme = "sphinx_rtd_theme"
html_static_path = ["_static"]
# MyST settings for markdown
myst_enable_extensions = [
"colon_fence",
"deflist",
]
# Napoleon settings for docstrings
napoleon_google_docstring = True
napoleon_numpy_docstring = True
'''
conf_path = os.path.join(docs_dir, "conf.py")
with open(conf_path, "w") as f:
f.write(conf_content)
# Create index.md
index_content = '''# Otava Test Data
Test data generators for Apache Otava change point detection.
## Overview
This package provides generators for creating synthetic time series data
with known change points for testing and benchmarking change point detection
algorithms.
## Installation
```bash
pip install otava-test-data
```
## Quick Start
```python
from otava_test_data import step_function, noise_normal, combine
# Generate a step function with noise
step = step_function(length=500, value_before=100, value_after=120)
noise = noise_normal(length=500, mean=0, sigma=5)
ts = combine(step, noise)
# Export to CSV for Otava
ts.to_csv("test_data.csv")
# Access change point information
for cp in ts.change_points:
print(f"Change at index {cp.index}: {cp.description}")
```
## Contents
```{toctree}
:maxdepth: 2
generators
api
benchmark
```
'''
index_path = os.path.join(docs_dir, "index.md")
with open(index_path, "w") as f:
f.write(index_content)
# Create generators.md
generators_content = '''# Time Series Generators
## Basic Building Blocks
These generators create fundamental time series patterns used in performance
testing scenarios.
### Constant
A constant time series: `S = x, x, x, x...`
Represents an ideal performance test with no variation.
### Noise (Normal)
Normally distributed noise: `S = x1, x2, x3...` where `X ~ N(mean, sigma)`
Represents typical performance test output with random variation.
### Noise (Uniform)
Uniformly distributed noise (white noise): `random(min, max)`
### Outlier
Single deviating point (anomaly): `S = x, x, x, x, x, x', x, x...`
### Step Function
Single change point: `S = x1, x1, x1, x2, x2, x2...`
Represents a performance regression or improvement that persists.
### Regression + Fix
Temporary regression: `S = x1, x1... x2, ...x2, x3, x3...`
## Advanced Phenomena
### Banding
Oscillation between two values: `S = x1, x2, x2, x1, x2, x1...`
### Variance Change
Constant mean, changing variance: `S = N(mean, sigma1)..., N(mean, sigma2)...`
### Phase Change
Constant mean and variance, but phase shifts: `S = cos(x)..., sin(x)...`
### Multiple Changes
Multiple consecutive changes: `S = x0, x0... x1, x2, ... xn, xn...`
'''
gen_path = os.path.join(docs_dir, "generators.md")
with open(gen_path, "w") as f:
f.write(generators_content)
# Create api.md
api_content = '''# API Reference
```{eval-rst}
.. automodule:: otava_test_data
:members:
:undoc-members:
:show-inheritance:
.. automodule:: otava_test_data.generators.basic
:members:
:undoc-members:
:show-inheritance:
.. automodule:: otava_test_data.generators.advanced
:members:
:undoc-members:
:show-inheritance:
.. automodule:: otava_test_data.generators.combiner
:members:
:undoc-members:
:show-inheritance:
```
'''
api_path = os.path.join(docs_dir, "api.md")
with open(api_path, "w") as f:
f.write(api_content)
# Create benchmark.md
benchmark_content = '''# Benchmark Guide
## Generating Benchmark Data
Use the CLI to generate a comprehensive benchmark suite:
```bash
otava-gen generate --output-dir ./benchmark --lengths 50 500 --seed 42
```
This creates:
- CSV files for each test case
- `manifest.json` with metadata about each file
- `summary.json` with overall statistics
## Running Otava
```bash
# Example Otava invocation (adjust based on Otava's actual CLI)
otava analyze --input ./benchmark/0001_step_function_L500.csv
```
## Comparing Algorithms
The manifest.json file contains ground truth for each test case:
```python
import json
with open("benchmark/manifest.json") as f:
manifest = json.load(f)
for entry in manifest:
print(f"{entry['filename']}: {entry['n_change_points']} change points")
print(f" Expected indices: {entry['change_point_indices']}")
```
## Metrics
When comparing algorithms, consider:
1. **True Positive Rate**: % of actual change points detected
2. **False Positive Rate**: % of non-change-points flagged
3. **Location Accuracy**: How close detected points are to actual
4. **Latency**: How many points after change before detection
'''
bench_path = os.path.join(docs_dir, "benchmark.md")
with open(bench_path, "w") as f:
f.write(benchmark_content)
# Create _static and _templates directories
os.makedirs(os.path.join(docs_dir, "_static"), exist_ok=True)
os.makedirs(os.path.join(docs_dir, "_templates"), exist_ok=True)
print(f"Documentation initialized in {docs_dir}")
@task
def clean(c: Context):
"""Clean build artifacts."""
patterns = [
"build/",
"dist/",
"*.egg-info/",
"**/__pycache__/",
".pytest_cache/",
".ruff_cache/",
"docs/_build/",
".coverage",
"htmlcov/",
]
for pattern in patterns:
c.run(f"rm -rf {pattern}", warn=True)
print("Cleaned build artifacts")
@task
def check(c: Context):
"""Run all checks (lint, format check, tests)."""
print("Running lint...")
c.run("uv run ruff check src/", warn=True, pty=True)
print("\nRunning format check...")
c.run("uv run ruff format src/ --check", warn=True, pty=True)
print("\nRunning tests...")
c.run("uv run pytest src/otava_test_data/tests/test_generators.py -v", pty=True)
@task(pre=[check])
def release(c: Context, version: str = ""):
"""Prepare a release (run checks, update version, build)."""
if not version:
print("Error: Please specify version with --version")
return
# Update version in pyproject.toml
c.run(f"sed -i '' 's/version = \".*\"/version = \"{version}\"/' pyproject.toml")
# Update version in __init__.py
c.run(
f"sed -i '' 's/__version__ = \".*\"/__version__ = \"{version}\"/' "
"src/otava_test_data/__init__.py"
)
print(f"Updated version to {version}")
print("Remember to: git add, commit, tag, and push")
@task
def web(c: Context, host: str = "127.0.0.1", port: int = 8000, reload: bool = True):
"""Run the web visualization server (foreground)."""
reload_flag = "--reload" if reload else ""
c.run(
f"uv run uvicorn otava_test_data.web.main:app "
f"--host {host} --port {port} {reload_flag}",
pty=True,
)
WEB_PID_FILE = "/tmp/otava-web.pid"
WEB_DEFAULT_PORT = 8100
@task
def web_start(c: Context, host: str = "127.0.0.1", port: int = WEB_DEFAULT_PORT, reload: bool = True):
"""Start the web server in the background."""
import subprocess
import time
import urllib.request
# Check if already running
if os.path.exists(WEB_PID_FILE):
with open(WEB_PID_FILE, "r") as f:
pid = f.read().strip()
# Check if process is still running
result = c.run(f"ps -p {pid}", warn=True, hide=True)
if result.ok:
print(f"Web server already running (PID: {pid})")
return
else:
# Stale PID file
os.remove(WEB_PID_FILE)
cmd = [
"uv", "run", "uvicorn", "otava_test_data.web.main:app",
"--host", host, "--port", str(port),
]
if reload:
cmd.append("--reload")
# Start in background
proc = subprocess.Popen(
cmd,
# stdout=subprocess.DEVNULL,
# stderr=subprocess.DEVNULL,
start_new_session=True,
)
# Save PID
with open(WEB_PID_FILE, "w") as f:
f.write(str(proc.pid))
# Wait and check if server started
time.sleep(2)
try:
urllib.request.urlopen(f"http://{host}:{port}/api/generators", timeout=5)
print(f"Web server started at http://{host}:{port} (PID: {proc.pid})")
except Exception as e:
print(f"Warning: Server may not be ready yet - {e}")
print(f"Check http://{host}:{port} manually")
@task
def web_stop(c: Context):
"""Stop the web server running in the background."""
if not os.path.exists(WEB_PID_FILE):
print("Web server is not running (no PID file found)")
return
with open(WEB_PID_FILE, "r") as f:
pid = f.read().strip()
# Kill the process group (handles child processes from --reload)
result = c.run(f"pkill -P {pid}", warn=True, hide=True)
result = c.run(f"kill {pid}", warn=True, hide=True)
if result.ok:
print(f"Web server stopped (PID: {pid})")
else:
print(f"Process {pid} may already be stopped")
os.remove(WEB_PID_FILE)
@task
def web_restart(c: Context, host: str = "127.0.0.1", port: int = WEB_DEFAULT_PORT, reload: bool = True):
"""Restart the web server."""
web_stop(c)
import time
time.sleep(1)
web_start(c, host=host, port=port, reload=reload)
@task
def web_status(c: Context):
"""Check if the web server is running."""
if not os.path.exists(WEB_PID_FILE):
print("Web server is not running (no PID file)")
return
with open(WEB_PID_FILE, "r") as f:
pid = f.read().strip()
result = c.run(f"ps -p {pid} -o pid,command", warn=True, hide=True)
if result.ok:
print(f"Web server is running:")
print(result.stdout)
else:
print(f"Web server is not running (stale PID: {pid})")
os.remove(WEB_PID_FILE)
@task
def web_check(c: Context):
"""Check if the web server starts correctly."""
import subprocess
import time
import urllib.request
# Start server in background
proc = subprocess.Popen(
["uv", "run", "uvicorn", "otava_test_data.web.main:app",
"--host", "127.0.0.1", "--port", "8765"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
try:
time.sleep(3) # Wait for server to start
# Check if server responds
response = urllib.request.urlopen("http://127.0.0.1:8765/api/generators")
data = response.read()
print("Web server check: OK")
print(f"Generators endpoint returned {len(data)} bytes")
except Exception as e:
print(f"Web server check: FAILED - {e}")
finally:
proc.terminate()
proc.wait()