blob: 194dd2f5d322530285520490360fbbeee9cdc8c0 [file] [log] [blame]
:mod:`airflow.contrib.hooks.gcp_transfer_hook`
==============================================
.. py:module:: airflow.contrib.hooks.gcp_transfer_hook
Module Contents
---------------
.. data:: TIME_TO_SLEEP_IN_SECONDS
:annotation: = 10
.. py:class:: GcpTransferJobsStatus
.. attribute:: ENABLED
:annotation: = ENABLED
.. attribute:: DISABLED
:annotation: = DISABLED
.. attribute:: DELETED
:annotation: = DELETED
.. py:class:: GcpTransferOperationStatus
.. attribute:: IN_PROGRESS
:annotation: = IN_PROGRESS
.. attribute:: PAUSED
:annotation: = PAUSED
.. attribute:: SUCCESS
:annotation: = SUCCESS
.. attribute:: FAILED
:annotation: = FAILED
.. attribute:: ABORTED
:annotation: = ABORTED
.. data:: ACCESS_KEY_ID
:annotation: = accessKeyId
.. data:: ALREADY_EXISTING_IN_SINK
:annotation: = overwriteObjectsAlreadyExistingInSink
.. data:: AWS_ACCESS_KEY
:annotation: = awsAccessKey
.. data:: AWS_S3_DATA_SOURCE
:annotation: = awsS3DataSource
.. data:: BODY
:annotation: = body
.. data:: BUCKET_NAME
:annotation: = bucketName
.. data:: DAY
:annotation: = day
.. data:: DESCRIPTION
:annotation: = description
.. data:: FILTER
:annotation: = filter
.. data:: FILTER_JOB_NAMES
:annotation: = job_names
.. data:: FILTER_PROJECT_ID
:annotation: = project_id
.. data:: GCS_DATA_SINK
:annotation: = gcsDataSink
.. data:: GCS_DATA_SOURCE
:annotation: = gcsDataSource
.. data:: HOURS
:annotation: = hours
.. data:: HTTP_DATA_SOURCE
:annotation: = httpDataSource
.. data:: LIST_URL
:annotation: = list_url
.. data:: METADATA
:annotation: = metadata
.. data:: MINUTES
:annotation: = minutes
.. data:: MONTH
:annotation: = month
.. data:: NAME
:annotation: = name
.. data:: OBJECT_CONDITIONS
:annotation: = object_conditions
.. data:: OPERATIONS
:annotation: = operations
.. data:: PROJECT_ID
:annotation: = projectId
.. data:: SCHEDULE
:annotation: = schedule
.. data:: SCHEDULE_END_DATE
:annotation: = scheduleEndDate
.. data:: SCHEDULE_START_DATE
:annotation: = scheduleStartDate
.. data:: SECONDS
:annotation: = seconds
.. data:: SECRET_ACCESS_KEY
:annotation: = secretAccessKey
.. data:: START_TIME_OF_DAY
:annotation: = startTimeOfDay
.. data:: STATUS
:annotation: = status
.. data:: STATUS1
:annotation: = status
.. data:: TRANSFER_JOB
:annotation: = transfer_job
.. data:: TRANSFER_JOB_FIELD_MASK
:annotation: = update_transfer_job_field_mask
.. data:: TRANSFER_JOBS
:annotation: = transferJobs
.. data:: TRANSFER_OPERATIONS
:annotation: = transferOperations
.. data:: TRANSFER_OPTIONS
:annotation: = transfer_options
.. data:: TRANSFER_SPEC
:annotation: = transferSpec
.. data:: YEAR
:annotation: = year
.. data:: NEGATIVE_STATUSES
.. py:class:: GCPTransferServiceHook(api_version='v1', gcp_conn_id='google_cloud_default', delegate_to=None)
Bases: :class:`airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook`
Hook for Google Storage Transfer Service.
.. attribute:: _conn
.. method:: get_conn(self)
Retrieves connection to Google Storage Transfer service.
:return: Google Storage Transfer service object
:rtype: dict
.. method:: create_transfer_job(self, body)
Creates a transfer job that runs periodically.
:param body: (Required) A request body, as described in
https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs/patch#request-body
:type body: dict
:return: transfer job.
See:
https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs#TransferJob
:rtype: dict
.. method:: get_transfer_job(self, job_name, project_id=None)
Gets the latest state of a long-running operation in Google Storage
Transfer Service.
:param job_name: (Required) Name of the job to be fetched
:type job_name: str
:param project_id: (Optional) the ID of the project that owns the Transfer
Job. If set to None or missing, the default project_id from the GCP
connection is used.
:type project_id: str
:return: Transfer Job
:rtype: dict
.. method:: list_transfer_job(self, filter)
Lists long-running operations in Google Storage Transfer
Service that match the specified filter.
:param filter: (Required) A request filter, as described in
https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs/list#body.QUERY_PARAMETERS.filter
:type filter: dict
:return: List of Transfer Jobs
:rtype: list[dict]
.. method:: update_transfer_job(self, job_name, body)
Updates a transfer job that runs periodically.
:param job_name: (Required) Name of the job to be updated
:type job_name: str
:param body: A request body, as described in
https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs/patch#request-body
:type body: dict
:return: If successful, TransferJob.
:rtype: dict
.. method:: delete_transfer_job(self, job_name, project_id)
Deletes a transfer job. This is a soft delete. After a transfer job is
deleted, the job and all the transfer executions are subject to garbage
collection. Transfer jobs become eligible for garbage collection
30 days after soft delete.
:param job_name: (Required) Name of the job to be deleted
:type job_name: str
:param project_id: (Optional) the ID of the project that owns the Transfer
Job. If set to None or missing, the default project_id from the GCP
connection is used.
:type project_id: str
:rtype: None
.. method:: cancel_transfer_operation(self, operation_name)
Cancels an transfer operation in Google Storage Transfer Service.
:param operation_name: Name of the transfer operation.
:type operation_name: str
:rtype: None
.. method:: get_transfer_operation(self, operation_name)
Gets an transfer operation in Google Storage Transfer Service.
:param operation_name: (Required) Name of the transfer operation.
:type operation_name: str
:return: transfer operation
See:
https://cloud.google.com/storage-transfer/docs/reference/rest/v1/Operation
:rtype: dict
.. method:: list_transfer_operations(self, filter)
Gets an transfer operation in Google Storage Transfer Service.
:param filter: (Required) A request filter, as described in
https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs/list#body.QUERY_PARAMETERS.filter
With one additional improvement:
* project_id is optional if you have a project id defined
in the connection
See: :ref:`howto/connection:gcp`
:type filter: dict
:return: transfer operation
:rtype: list[dict]
.. method:: pause_transfer_operation(self, operation_name)
Pauses an transfer operation in Google Storage Transfer Service.
:param operation_name: (Required) Name of the transfer operation.
:type operation_name: str
:rtype: None
.. method:: resume_transfer_operation(self, operation_name)
Resumes an transfer operation in Google Storage Transfer Service.
:param operation_name: (Required) Name of the transfer operation.
:type operation_name: str
:rtype: None
.. method:: wait_for_transfer_job(self, job, expected_statuses=(GcpTransferOperationStatus.SUCCESS, ), timeout=60)
Waits until the job reaches the expected state.
:param job: Transfer job
See:
https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs#TransferJob
:type job: dict
:param expected_statuses: State that is expected
See:
https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferOperations#Status
:type expected_statuses: set[str]
:param timeout:
:type timeout: time in which the operation must end in seconds
:rtype: None
.. method:: _inject_project_id(self, body, param_name, target_key)
.. staticmethod:: operations_contain_expected_statuses(operations, expected_statuses)
Checks whether the operation list has an operation with the
expected status, then returns true
If it encounters operations in FAILED or ABORTED state
throw :class:`airflow.exceptions.AirflowException`.
:param operations: (Required) List of transfer operations to check.
:type operations: list[dict]
:param expected_statuses: (Required) status that is expected
See:
https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferOperations#Status
:type expected_statuses: set[str]
:return: If there is an operation with the expected state
in the operation list, returns true,
:raises: airflow.exceptions.AirflowException If it encounters operations
with a state in the list,
:rtype: bool