blob: 7e30d851073739e72315850eeceaf777d47af40c [file] [log] [blame]
#######
# SQL #
#######
config:
sql.connection:
url: "jdbc:mysql://localhost:3306/crawl"
user: "myuser"
password: "mypassword"
rewriteBatchedStatements: "true"
useBatchMultiSend: "true"
sql.max.urls.per.bucket: 5
sql.status.table: "urls"
sql.spout.max.results: 100
# time in secs for which the URLs will be considered for fetching after a ack or fail
spout.ttl.purgatory: 30
# Min time (in msecs) to allow between 2 successive queries to SQL
spout.min.delay.queries: 2000
# Delay since previous query date (in secs) after which the nextFetchDate value will be reset to the current time
# Setting this to -1 or a large value means that the ES will cache the results but also that less and less results
# might be returned.
spout.reset.fetchdate.after: 120
sql.metrics.table: "metrics"
sql.index.table: "content"
# Metrics consumers:
topology.metrics.consumer.register:
- class: "org.apache.stormcrawler.sql.metrics.MetricsConsumer"
parallelism.hint: 1