| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| ####### |
| # SQL # |
| ####### |
| config: |
| sql.connection: |
| url: "jdbc:mysql://localhost:3306/crawl" |
| user: "myuser" |
| password: "mypassword" |
| rewriteBatchedStatements: "true" |
| useBatchMultiSend: "true" |
| |
| sql.max.urls.per.bucket: 5 |
| |
| sql.status.table: "urls" |
| |
| sql.spout.max.results: 100 |
| |
| # time in secs for which the URLs will be considered for fetching after a ack or fail |
| spout.ttl.purgatory: 30 |
| |
| # Min time (in msecs) to allow between 2 successive queries to SQL |
| spout.min.delay.queries: 2000 |
| |
| # Delay since previous query date (in secs) after which the nextFetchDate value will be reset to the current time |
| # Setting this to -1 or a large value means that the ES will cache the results but also that less and less results |
| # might be returned. |
| spout.reset.fetchdate.after: 120 |
| |
| sql.metrics.table: "metrics" |
| |
| sql.index.table: "content" |
| |
| # Metrics consumers: |
| topology.metrics.consumer.register: |
| - class: "org.apache.stormcrawler.sql.metrics.MetricsConsumer" |
| parallelism.hint: 1 |
| |