blob: a8bd6d689a2362f4a51f3c4d856f3fdbec24a063 [file]
"""
Command-line interface for generating Otava test data.
Usage:
otava-gen --help
otava-gen generate --output-dir ./test_data
otava-gen generate --lengths 50 500 --seed 42
"""
import argparse
import sys
import json
from pathlib import Path
from otava_test_data.generators.combiner import CombinationGenerator
def create_parser() -> argparse.ArgumentParser:
"""Create the argument parser."""
parser = argparse.ArgumentParser(
prog="otava-gen",
description="Generate test data for Apache Otava change point detection",
)
subparsers = parser.add_subparsers(dest="command", help="Available commands")
# Generate command
gen_parser = subparsers.add_parser(
"generate",
help="Generate test data CSV files",
)
gen_parser.add_argument(
"--output-dir", "-o",
type=str,
default="./otava_test_data",
help="Output directory for CSV files (default: ./otava_test_data)",
)
gen_parser.add_argument(
"--lengths", "-l",
type=int,
nargs="+",
default=[50, 500],
help="Time series lengths to generate (default: 50 500)",
)
gen_parser.add_argument(
"--seed", "-s",
type=int,
default=42,
help="Random seed for reproducibility (default: 42)",
)
gen_parser.add_argument(
"--noise-levels",
type=float,
nargs="+",
default=[0.0, 2.0, 5.0, 10.0],
help="Noise sigma levels to apply (default: 0 2 5 10)",
)
gen_parser.add_argument(
"--no-combinations",
action="store_true",
help="Skip generating pairwise combinations",
)
gen_parser.add_argument(
"--manifest-only",
action="store_true",
help="Only generate manifest JSON, no CSV files",
)
# List command
list_parser = subparsers.add_parser(
"list",
help="List available generators",
)
# Info command
info_parser = subparsers.add_parser(
"info",
help="Show information about a generator",
)
info_parser.add_argument(
"generator",
type=str,
help="Generator name",
)
return parser
def cmd_generate(args: argparse.Namespace) -> int:
"""Execute the generate command."""
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
print(f"Generating test data in {output_dir}")
print(f" Lengths: {args.lengths}")
print(f" Seed: {args.seed}")
print(f" Noise levels: {args.noise_levels}")
print(f" Include combinations: {not args.no_combinations}")
generator = CombinationGenerator(lengths=args.lengths, seed=args.seed)
all_series = generator.generate_all_test_cases(
include_combinations=not args.no_combinations,
noise_levels=args.noise_levels,
)
manifest = []
for i, ts in enumerate(all_series):
# Create safe filename
safe_name = (
ts.generator_name
.replace("(", "_")
.replace(")", "")
.replace(", ", "_")
.replace(" ", "_")
.replace("+", "_plus_")
)[:50]
filename = f"{i:04d}_{safe_name}_L{len(ts)}.csv"
entry = {
"id": i,
"filename": filename,
"generator": ts.generator_name,
"length": len(ts),
"n_change_points": len(ts.change_points),
"change_point_indices": ts.get_change_point_indices(),
"change_point_types": [cp.change_type for cp in ts.change_points],
"parameters": ts.parameters,
}
manifest.append(entry)
if not args.manifest_only:
filepath = output_dir / filename
ts.to_csv(str(filepath))
if (i + 1) % 50 == 0:
print(f" Generated {i + 1} files...")
# Write manifest
manifest_path = output_dir / "manifest.json"
with open(manifest_path, "w") as f:
json.dump(manifest, f, indent=2)
# Write summary
summary = {
"total_files": len(manifest),
"lengths": args.lengths,
"seed": args.seed,
"noise_levels": args.noise_levels,
"generators_used": list(set(e["generator"].split("(")[0] for e in manifest)),
"total_change_points": sum(e["n_change_points"] for e in manifest),
}
summary_path = output_dir / "summary.json"
with open(summary_path, "w") as f:
json.dump(summary, f, indent=2)
print(f"\nGenerated {len(manifest)} test cases")
print(f" Manifest: {manifest_path}")
print(f" Summary: {summary_path}")
return 0
def cmd_list(args: argparse.Namespace) -> int:
"""Execute the list command."""
generator = CombinationGenerator()
blocks = generator.generate_basic_blocks()
print("Available generators:")
print()
for name, func in blocks:
doc = func.__doc__ or "No description"
first_line = doc.strip().split("\n")[0]
print(f" {name:25s} {first_line}")
return 0
def cmd_info(args: argparse.Namespace) -> int:
"""Execute the info command."""
generator = CombinationGenerator()
blocks = dict(generator.generate_basic_blocks())
if args.generator not in blocks:
print(f"Unknown generator: {args.generator}")
print(f"Available: {', '.join(blocks.keys())}")
return 1
func = blocks[args.generator]
print(f"Generator: {args.generator}")
print()
print(func.__doc__ or "No documentation available")
return 0
def main() -> int:
"""Main entry point."""
parser = create_parser()
args = parser.parse_args()
if args.command is None:
parser.print_help()
return 0
try:
if args.command == "generate":
return cmd_generate(args)
elif args.command == "list":
return cmd_list(args)
elif args.command == "info":
return cmd_info(args)
else:
parser.print_help()
return 1
except KeyboardInterrupt:
print("\nInterrupted")
return 130
if __name__ == "__main__":
sys.exit(main())