blob: d0ef50f9ef0e873bf4bf6b8a0014822307b849bd [file] [log] [blame]
{
"apiVersion": "kubeflow.org/v1",
"kind": "TFJob",
"metadata": {
"creationTimestamp": "2022-07-30T15:53:52.000+08:00",
"generation": 1,
"labels": {
"submarine-experiment-name": "tensorflow-dist-mnist"
},
"name": "experiment-1659167632755-0001",
"namespace": "default",
"resourceVersion": "39556",
"uid": "d9b3c2dd-ce17-400a-a4f7-2781294ff3d5"
},
"spec": {
"tfReplicaSpecs": {
"Ps": {
"replicas": 1,
"template": {
"metadata": {
"annotations": {
"sidecar.istio.io/inject": "false"
}
},
"spec": {
"containers": [
{
"command": [
"python",
"/var/tf_mnist/mnist_with_summaries.py",
"--log_dir\u003d/train/log",
"--learning_rate\u003d0.01",
"--batch_size\u003d150"
],
"env": [
{
"name": "ENV_1",
"value": "ENV1"
}
],
"image": "apache/submarine:tf-mnist-with-summaries-1.0",
"name": "tensorflow",
"resources": {
"limits": {
"cpu": "4",
"memory": "4096M"
},
"requests": {
"cpu": "4",
"memory": "2048M"
}
},
"volumeMounts": [
{
"mountPath": "/logs",
"name": "volume",
"subPath": "submarine-tensorboard/tensorflow-dist-mnist"
}
]
}
],
"volumes": [
{
"name": "volume",
"persistentVolumeClaim": {
"claimName": "submarine-tensorboard-pvc"
}
}
]
}
},
"restartPolicy": "OnFailure"
},
"Worker": {
"replicas": 2,
"template": {
"metadata": {
"annotations": {
"sidecar.istio.io/inject": "false"
}
},
"spec": {
"containers": [
{
"command": [
"python",
"/var/tf_mnist/mnist_with_summaries.py",
"--log_dir\u003d/train/log",
"--learning_rate\u003d0.01",
"--batch_size\u003d150"
],
"env": [
{
"name": "ENV_1",
"value": "ENV1"
}
],
"image": "apache/submarine:tf-mnist-with-summaries-1.0",
"name": "tensorflow",
"resources": {
"limits": {
"cpu": "2",
"memory": "2048M",
"nvidia.com/gpu": "1"
},
"requests": {
"cpu": "2",
"memory": "1024M",
"nvidia.com/gpu": "1"
}
},
"volumeMounts": [
{
"mountPath": "/logs",
"name": "volume",
"subPath": "submarine-tensorboard/tensorflow-dist-mnist"
}
]
}
],
"volumes": [
{
"name": "volume",
"persistentVolumeClaim": {
"claimName": "submarine-tensorboard-pvc"
}
}
]
}
},
"restartPolicy": "OnFailure"
}
},
"backoffLimit": 3
},
"status": {
"conditions": [
{
"lastTransitionTime": "2022-07-30T07:59:01Z",
"lastUpdateTime": "2022-07-30T07:59:01Z",
"message": "TFJob experiment-1659167632755-0001 is created.",
"reason": "TFJobCreated",
"status": "True",
"type": "Created"
},
{
"lastTransitionTime": "2022-07-30T07:59:02Z",
"lastUpdateTime": "2022-07-30T07:59:02Z",
"message": "TFJob default/experiment-1659167632755-0001 is running.",
"reason": "TFJobRunning",
"status": "True",
"type": "Running"
}
],
"replicaStatuses": {
"PS": {
"active": 1
},
"Worker": {
"active": 2
}
}
}
}