blob: 0b27acfb5dda9e126dac024595defd8866236e30 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#######
# SQL #
#######
config:
sql.connection:
url: "jdbc:mysql://localhost:3306/crawl"
user: "myuser"
password: "mypassword"
rewriteBatchedStatements: "true"
useBatchMultiSend: "true"
sql.max.urls.per.bucket: 5
sql.status.table: "urls"
sql.spout.max.results: 100
# time in secs for which the URLs will be considered for fetching after a ack or fail
spout.ttl.purgatory: 30
# Min time (in msecs) to allow between 2 successive queries to SQL
spout.min.delay.queries: 2000
# Delay since previous query date (in secs) after which the nextFetchDate value will be reset to the current time
# Setting this to -1 or a large value means that the ES will cache the results but also that less and less results
# might be returned.
spout.reset.fetchdate.after: 120
sql.metrics.table: "metrics"
sql.index.table: "content"
# Metrics consumers:
topology.metrics.consumer.register:
- class: "org.apache.stormcrawler.sql.metrics.MetricsConsumer"
parallelism.hint: 1