blob: 6e194d4a055eed42486b2838ef75afcb4454a055 [file] [log] [blame]
:py:mod:`tests.system.providers.amazon.aws.example_sagemaker`
=============================================================
.. py:module:: tests.system.providers.amazon.aws.example_sagemaker
Module Contents
---------------
Functions
~~~~~~~~~
.. autoapisummary::
tests.system.providers.amazon.aws.example_sagemaker.set_up
tests.system.providers.amazon.aws.example_sagemaker.delete_ecr_repository
tests.system.providers.amazon.aws.example_sagemaker.delete_logs
Attributes
~~~~~~~~~~
.. autoapisummary::
tests.system.providers.amazon.aws.example_sagemaker.DAG_ID
tests.system.providers.amazon.aws.example_sagemaker.ROLE_ARN_KEY
tests.system.providers.amazon.aws.example_sagemaker.KNN_IMAGE_URI_KEY
tests.system.providers.amazon.aws.example_sagemaker.sys_test_context_task
tests.system.providers.amazon.aws.example_sagemaker.DATASET
tests.system.providers.amazon.aws.example_sagemaker.SAMPLE_SIZE
tests.system.providers.amazon.aws.example_sagemaker.PREPROCESS_SCRIPT_TEMPLATE
tests.system.providers.amazon.aws.example_sagemaker.test_context
tests.system.providers.amazon.aws.example_sagemaker.test_run
.. py:data:: DAG_ID
:annotation: = example_sagemaker
.. py:data:: ROLE_ARN_KEY
:annotation: = ROLE_ARN
.. py:data:: KNN_IMAGE_URI_KEY
:annotation: = KNN_IMAGE_URI
.. py:data:: sys_test_context_task
.. py:data:: DATASET
:annotation: = Multiline-String
.. raw:: html
<details><summary>Show Value</summary>
.. code-block:: text
:linenos:
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
7.0,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
4.9,2.5,4.5,1.7,Iris-virginica
7.3,2.9,6.3,1.8,Iris-virginica
.. raw:: html
</details>
.. py:data:: SAMPLE_SIZE
.. py:data:: PREPROCESS_SCRIPT_TEMPLATE
:annotation: = Multiline-String
.. raw:: html
<details><summary>Show Value</summary>
.. code-block:: text
:linenos:
import boto3
import numpy as np
import pandas as pd
def main():
# Load the Iris dataset from {input_path}/input.csv, split it into train/test
# subsets, and write them to {output_path}/ for the Processing Operator.
columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
iris = pd.read_csv('{input_path}/input.csv', names=columns)
# Process data
iris['species'] = iris['species'].replace({{'Iris-virginica': 0, 'Iris-versicolor': 1, 'Iris-setosa': 2}})
iris = iris[['species', 'sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
# Split into test and train data
iris_train, iris_test = np.split(
iris.sample(frac=1, random_state=np.random.RandomState()), [int(0.7 * len(iris))]
)
# Remove the "answers" from the test set
iris_test.drop(['species'], axis=1, inplace=True)
# Write the splits to disk
iris_train.to_csv('{output_path}/train.csv', index=False, header=False)
iris_test.to_csv('{output_path}/test.csv', index=False, header=False)
print('Preprocessing Done.')
if __name__ == "__main__":
main()
.. raw:: html
</details>
.. py:function:: set_up(env_id, knn_image_uri, role_arn)
.. py:function:: delete_ecr_repository(repository_name)
.. py:function:: delete_logs(env_id)
.. py:data:: test_context
.. py:data:: test_run