Client of a submarine server that creates and manages experients and logs.
create_experiment(experiment_spec: json) -> dict
Create an experiment.
Parameters
Returns: The detailed info about the submarine experiment.
Example
from submarine import * client = ExperimentClient() client.create_experiment({ "meta": { "name": "tf-mnist-json", "namespace": "default", "framework": "TensorFlow", "cmd": "python /var/tf_mnist/mnist_with_summaries.py --log_dir=/train/log --learning_rate=0.01 --batch_size=150", "envVars": { "ENV_1": "ENV1" } }, "environment": { "image": "apache/submarine:tf-mnist-with-summaries-1.0" }, "spec": { "Ps": { "replicas": 1, "resources": "cpu=1,memory=1024M" }, "Worker": { "replicas": 1, "resources": "cpu=1,memory=1024M" } } })
patch_experiment(id: str, experiment_spec: json) -> dict
Patch an experiment.
Parameters
Returns
Example
client.patch_experiment("experiment_1626160071451_0008", { "meta": { "name": "tf-mnist-json", "namespace": "default", "framework": "TensorFlow", "cmd": "python /var/tf_mnist/mnist_with_summaries.py --log_dir=/train/log --learning_rate=0.01 --batch_size=150", "envVars": { "ENV_1": "ENV1" } }, "environment": { "image": "apache/submarine:tf-mnist-with-summaries-1.0" }, "spec": { "Worker": { "replicas": 2, "resources": "cpu=1,memory=1024M" } } })
get_experiment(id: str) -> dict
Get the experiment's detailed info by id.
Parameters
Returns
Example
experiment = client.get_experiment("experiment_1626160071451_0008")
list_experiments(status: Optional[str]=None) -> list[dict]
List all experiment for the user.
Parameters
Returns
Example
experiments = client.list_experiments()
delete_experiment(id: str) -> dict
Delete the submarine experiment.
Parameters
Returns
Example
client.delete_experiment("experiment_1626160071451_0008")
get_log(id: str, onlyMaster: Optional[bool]=False) -> None
Print training logs of all pod of the experiment. By default print all the logs of Pod.
Parameters
Return
Example
client.get_log("experiment_1626160071451_0009")
list_log(status: str) -> list[dict]
List experiment log.
Parameters
Returns
Example
logs = client.list_log("Succeeded")
wait_for_finish(id: str, polling_interval: Optional[int]=10) -> dict
Waits until the experiment is finished or failed.
Parameters
Returns
Example
logs = client.wait_for_finish("experiment_1626160071451_0009", 5)