external/sql/sql-conf.yaml - stormcrawler - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements. See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License. You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 #######
 # SQL #
 #######
 config:
   sql.connection:
    url: "jdbc:mysql://localhost:3306/crawl"
    user: "myuser"
    password: "mypassword"
    rewriteBatchedStatements: "true"
    useBatchMultiSend: "true"

   sql.max.urls.per.bucket: 5

   sql.status.table: "urls"

   sql.spout.max.results: 100

     # time in secs for which the URLs will be considered for fetching after a ack or fail
   spout.ttl.purgatory: 30

   # Min time (in msecs) to allow between 2 successive queries to SQL
   spout.min.delay.queries: 2000

   # Delay since previous query date (in secs) after which the nextFetchDate value will be reset to the current time
   # Setting this to -1 or a large value means that the ES will cache the results but also that less and less results
   # might be returned.
   spout.reset.fetchdate.after: 120

   sql.metrics.table: "metrics"

   sql.index.table: "content"

     # Metrics consumers:
   topology.metrics.consumer.register:
      - class: "org.apache.stormcrawler.sql.metrics.MetricsConsumer"
        parallelism.hint: 1
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	#######
	# SQL #
	#######
	config:
	sql.connection:
	url: "jdbc:mysql://localhost:3306/crawl"
	user: "myuser"
	password: "mypassword"
	rewriteBatchedStatements: "true"
	useBatchMultiSend: "true"

	sql.max.urls.per.bucket: 5

	sql.status.table: "urls"

	sql.spout.max.results: 100

	# time in secs for which the URLs will be considered for fetching after a ack or fail
	spout.ttl.purgatory: 30

	# Min time (in msecs) to allow between 2 successive queries to SQL
	spout.min.delay.queries: 2000

	# Delay since previous query date (in secs) after which the nextFetchDate value will be reset to the current time
	# Setting this to -1 or a large value means that the ES will cache the results but also that less and less results
	# might be returned.
	spout.reset.fetchdate.after: 120

	sql.metrics.table: "metrics"

	sql.index.table: "content"

	# Metrics consumers:
	topology.metrics.consumer.register:
	- class: "org.apache.stormcrawler.sql.metrics.MetricsConsumer"
	parallelism.hint: 1