blob: 95a6dbcf86f3cb10bfcf1661cf9376411b7a2ba4 [file] [log] [blame]
from typing import Any
import pandas as pd
def _is_listy(v: Any) -> bool:
"""Check if value is list-like (list, tuple, numpy array, pandas Series)."""
import numpy as np
return isinstance(v, (list, tuple, np.ndarray, pd.Series))
def _to_list(v: Any) -> list:
"""Convert value to list format."""
import numpy as np
if isinstance(v, np.ndarray):
return v.tolist()
if isinstance(v, pd.Series):
return v.to_list()
if isinstance(v, (list, tuple)):
return list(v)
# scalar → list of one
return [v]
def _explode_row(model_id: str, ts: Any, params: dict, outputs: dict) -> pd.DataFrame:
"""
Explode a single row where some fields in params/outputs may be arrays.
Strategy:
- Collect all keys from params + outputs
- Determine the per-key sequence lengths (only for list-like values)
- If no list-like values exist → return a single-row dataframe
- Otherwise, define max_len = max(list lengths)
- For each key:
* if list-like: pad/truncate to max_len (pads with None)
* if scalar: repeat the scalar max_len times
- Return a dataframe with max_len rows, adding a 'step' index (0..max_len-1)
"""
# Flatten key space
all_keys = list(dict.fromkeys([*params.keys(), *outputs.keys()]))
# Compute lengths for list-like values
lengths = []
for k in all_keys:
v = params.get(k, outputs.get(k, None)) # prefer params; either is fine for length check
if _is_listy(v):
lengths.append(len(_to_list(v)))
if not lengths:
# No arrays: single-row record
row = {"model_id": model_id, "ts": ts, "step": 0}
# Merge params & outputs; params take precedence on key collisions
merged = {**outputs, **params}
row.update(merged)
return pd.DataFrame([row])
max_len = max(lengths)
def _series_for(k: str) -> list:
# prefer params[k] over outputs[k] only for value source when both present
if k in params:
v = params[k]
else:
v = outputs.get(k, None)
if _is_listy(v):
lst = _to_list(v)
# pad to max_len
if len(lst) < max_len:
lst = lst + [None] * (max_len - len(lst))
elif len(lst) > max_len:
lst = lst[:max_len]
return lst
else:
# scalar → repeat
return [v] * max_len
data = {
"model_id": [model_id] * max_len,
"ts": [ts] * max_len,
"step": list(range(max_len)),
}
for k in all_keys:
data[k] = _series_for(k)
return pd.DataFrame(data)