blob: 05480c3d8a8d0377df7f5a507c9b4bd61329db5a [file] [log] [blame]
# Gobblin system configuration properties
taskexecutor.threadpool.size=1
taskretry.threadpool.coresize=0
taskretry.threadpool.maxsize=1
task.status.reportintervalinms=5000
# This assumes all dependent jars are put into the 'lib' directory
classpath=lib/*
mapred.job.queue.name=<queue name>
# Azkaban config
type=java
job.class=gobblin.azkaban.AzkabanJobLauncher
launcher.type=MAPREDUCE
# Job properties
job.jars=lib/*
job.lock.enabled=false
# Writer properties
writer.destination.type=HDFS
writer.output.format=AVRO
# Qualitychecker properties
qualitychecker.task.policies=gobblin.policies.count.RowCountPolicy,gobblin.policies.schema.SchemaCompatibilityPolicy
qualitychecker.task.policy.types=OPTIONAL,OPTIONAL
# Publisher properties
data.publisher.type=gobblin.publisher.BaseDataPublisher
data.publisher.replace.final.dir=false
# Extract properties
extract.delta.fields=SystemModstamp
extract.primary.key.fields=Id
# Source class properties
source.class=gobblin.salesforce.SalesforceSource
source.timezone=UTC
source.querybased.schema=Core
source.querybased.fetch.size=<records per batch ex: 5000>
source.querybased.is.specific.api.active=true
source.querybased.start.value=CURRENTDAY-1
workunit.retry.enabled=false
# Converter properties
converter.classes=gobblin.converter.avro.JsonIntermediateToAvroConverter
converter.avro.timestamp.format=yyyy-MM-dd'T'HH:mm:ss.SSS'Z',yyyy-MM-dd'T'HH:mm:ss.000+0000
converter.avro.date.format=yyyy-MM-dd
converter.avro.time.format=HH:mm:ss,HH:mm:ss.000'Z'
# Proxy settings - optional
source.conn.use.proxy.url=<Proxy host name - optional>
source.conn.use.proxy.port=<Proxy port number - optional>
# Salesforce connection details
source.conn.client.id=<client id>
source.conn.client.secret=<client secret>
source.conn.username=<username>
source.conn.password=<password>
source.conn.security.token=<token>
source.conn.host=<salesforce host name>
source.conn.version=<salesforce version>
source.conn.timeout=<connection timeout in seconds ex: 7200000>
# Config properties
user.to.proxy=<user to proxy - optional>
writer.fs.uri=<namenode uri>
# Target directory in HDFS
data.publisher.final.dir=<directory to publish data in Hadoop>
# Staging directories - Refer documentation for more details
writer.staging.dir=<dir name>
writer.output.dir=<dir name>
qualitychecker.row.err.file=<dir name>
state.store.dir=<dir name>
mr.job.root.dir=<dir name>
job.lock.dir=<dir name>