blob: ea5625a267aaabb7096c3a9dfc6484b44ed2f5a7 [file] [log] [blame]
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"script_processor = ScriptProcessor(\n",
" command=['python3'],\n",
" image_uri='<account_number>.dkr.ecr.<region>.amazonaws.com/aws-sagemaker-hamilton:latest', # Change to the actual URI\n",
" role='arn:aws:iam::<account_number>:role/SageMakerScriptProcessorRole', # Change to the actual URI\n",
" instance_count=1,\n",
" instance_type='ml.t3.medium'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# All inputs and outputs should be located in /opt/ml/processing/\n",
"script_processor.run(\n",
" code='processing.py',\n",
" inputs=[\n",
" ProcessingInput(\n",
" source='data/',\n",
" destination='/opt/ml/processing/input/data'\n",
" ),\n",
" ProcessingInput(\n",
" source='app/',\n",
" destination='/opt/ml/processing/input/code/app'\n",
" )\n",
" ],\n",
" outputs=[\n",
" ProcessingOutput(\n",
" source='/opt/ml/processing/output/',\n",
" destination='s3://path/to/output/directory' # Change to the actual URI\n",
" )\n",
" ]\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"After job finishes, the new files will appear in `s3://path/to/output/directory`:\n",
"- `output_table.csv`\n",
"- `dag_visualization.svg`"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "hamilton",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}