external/sql/sql-conf.yaml - stormcrawler - Git at Google

 #######
 # SQL #
 #######
 config:
   sql.connection:
    url: "jdbc:mysql://localhost:3306/crawl"
    user: "myuser"
    password: "mypassword"
    rewriteBatchedStatements: "true"
    useBatchMultiSend: "true"

   sql.max.urls.per.bucket: 5

   sql.status.table: "urls"

   sql.spout.max.results: 100

     # time in secs for which the URLs will be considered for fetching after a ack or fail
   spout.ttl.purgatory: 30

   # Min time (in msecs) to allow between 2 successive queries to SQL
   spout.min.delay.queries: 2000

   # Delay since previous query date (in secs) after which the nextFetchDate value will be reset to the current time
   # Setting this to -1 or a large value means that the ES will cache the results but also that less and less results
   # might be returned.
   spout.reset.fetchdate.after: 120

   sql.metrics.table: "metrics"

   sql.index.table: "content"

     # Metrics consumers:
   topology.metrics.consumer.register:
      - class: "org.apache.stormcrawler.sql.metrics.MetricsConsumer"
        parallelism.hint: 1
	#######
	# SQL #
	#######
	config:
	sql.connection:
	url: "jdbc:mysql://localhost:3306/crawl"
	user: "myuser"
	password: "mypassword"
	rewriteBatchedStatements: "true"
	useBatchMultiSend: "true"

	sql.max.urls.per.bucket: 5

	sql.status.table: "urls"

	sql.spout.max.results: 100

	# time in secs for which the URLs will be considered for fetching after a ack or fail
	spout.ttl.purgatory: 30

	# Min time (in msecs) to allow between 2 successive queries to SQL
	spout.min.delay.queries: 2000

	# Delay since previous query date (in secs) after which the nextFetchDate value will be reset to the current time
	# Setting this to -1 or a large value means that the ES will cache the results but also that less and less results
	# might be returned.
	spout.reset.fetchdate.after: 120

	sql.metrics.table: "metrics"

	sql.index.table: "content"

	# Metrics consumers:
	topology.metrics.consumer.register:
	- class: "org.apache.stormcrawler.sql.metrics.MetricsConsumer"
	parallelism.hint: 1