blob: 86f27d4588eef75f265690bc0a208aebc834cf65 [file] [log] [blame]
#
# Copyright (C) 2014-2015 LinkedIn Corp. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use
# this file except in compliance with the License. You may obtain a copy of the
# License at http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied.
#
# =======================================
# General Configuration
# =======================================
# Location of trash directory. This directory must be accessible by user running job.
# If location doesn't exist, it will be automatically created with permissions 700.
# If parent of location doesn't exist, it will be automatically created with permissions 777.
# OPTIONAL. Defaults to /path/to/user/home/_GOBBLIN_TRASH
gobblin.trash.location=/data/trash
# =======================================
# DatasetCleaner configuration
# =======================================
# ---------------------------------------
# General Configuration
# ---------------------------------------
# OPTIONAL:
# Simulate deletes (no actual deletions will be executed). Default: false
gobblin.retention.simulate=false
# Skip trash and permanently delete files instead. Default: false
gobblin.retention.skip.trash=false
# For each dataset version deleted, also remove any now-empty ancestor directories. Default: true.
gobblin.retention.delete.empty.directories=true
# ---------------------------------------
# For tracking data
# ---------------------------------------
gobblin.retention.dataset.profile.class=gobblin.data.management.retention.profile.TrackingDatasetProfile
# Glob pattern to look for datasets.
gobblin.retention.dataset.pattern=/data/datasets/*
# Datetime directory pattern.
gobblin.retention.datetime.pattern=yyyy/MM/dd/hh
# OPTIONAL:
# Regex for datasets to skip (regex will be applied to absolute path of each dataset).
# gobblin.retention.dataset.blacklist=IGNORE
# Minutes to retain data.
gobblin.retention.minutes.retained=1440
# ----------------------------------------
# For snapshot data
# ----------------------------------------
# gobblin.retention.dataset.profile.class=gobblin.data.management.retention.profile.SnapshotDatasetProfile
# Glob pattern to look for datasets.
# gobblin.retention.dataset.pattern=/data/datasets/*
# OPTIONAL:
# Regex for datasets to skip (regex will be applied to absolute path of each dataset).
# gobblin.retention.dataset.blacklist=IGNORE
# Regex to extract watermark from directory name.
# gobblin.retention.watermark.regex=([.]*)
# Number of versions to retain.
# gobblin.retention.version.retained=2
# =========================================
# Trash Collector Job
# =========================================
# Class of trash snapshot cleanup policy. Default: TimeBasedSnapshotCleanupPolicy
gobblin.trash.snapshot.cleanup.policy.class=gobblin.data.management.trash.TimeBasedSnapshotCleanupPolicy
# Trash snapshot retention in minuted. Default: 1440 / one day
gobblin.trash.snapshot.retention.minutes=1440