src/main/resources/responder.properties - incubator-retired-pirk - Git at Google

 ###############################################################################
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 ###############################################################################

 ##
 ## Required Properties
 ##

 #dataInputFormat -- required -- 'base', 'elasticsearch', or 'standalone' -- Specify the input format
 pir.dataInputFormat=

 #outputFile -- required -- Fully qualified name of output file in hdfs
 pir.outputFile=

 #One of the following two options is required - launcher prefered

 #launcher -- required -- full class name of a class implementing ResponderPlugin
 #ie. org.apache.pirk.responder.wideskies.standalone.StandaloneResponderPluginProcessing platform technology for the responder
 #launcher=

 #platform -- required -- 'mapreduce', 'spark', 'sparkstreaming', 'standalone', or 'storm'
 #Processing platform technology for the responder
 platform=

 #queryInput -- required -- Fully qualified dir in hdfs of Query files
 pir.queryInput=


 ##
 ## Optional Args - Leave empty if not using/not changing default values
 ##

 #inputData -- required if baseInputFormat = 'base'
 #Fully qualified name of input file/directory in hdfs; used if inputFormat = 'base'
 #pir.inputData=

 #dataSchemas -- optional -- Comma separated list of data schema file names to load
 #responder.dataSchemas=

 #querySchemas -- optional -- Comma separated list of query schema file names to load
 #responder.querySchemas=

 #allowAdHocQuerySchemas -- 'true' or 'false'
 #If true, allows embedded QuerySchemas for a query.
 #Defaults to 'false'
 #pir.allowEmbeddedQuerySchemas=

 #colMultReduceByKey -- 'true' or 'false' -- Spark only
 #If true, uses reduceByKey in performing column multiplication; if false, uses groupByKey -> reduce
 #Defaults to 'false'
 #pir.colMultReduceByKey=

 #baseInputFormat -- required if baseInputFormat = 'base'
 #Full class name of the InputFormat to use when reading in the data - must extend BaseInputFormat
 #pir.baseInputFormat=

 #esQuery -- required if baseInputFormat = 'elasticsearch' -- ElasticSearch query
 #if using 'elasticsearch' input format
 #pir.esQuery=

 #esResource --  required if baseInputFormat = 'elasticsearch'
 #Requires the format <index>/<type> : Elasticsearch resource where data is read and written to
 #pir.esResource=

 #useHDFSLookupTable -- 'true' or 'false' - Whether or not to generate and use the
 #hdfs lookup table for modular exponentiation
 #Defaults to 'false'
 #pir.useHDFSLookupTable=

 #baseQuery -- ElasticSearch-like query if using 'base' input format -
 #used to filter records in the RecordReader
 #Defaults to ?q=*
 #pir.baseQuery=

 #limitHitsPerSelector -- 'true' or 'false'
 #Whether or not to limit the number of hits per selector
 #Defaults to 'true'
 #pir.limitHitsPerSelector=

 #mapreduceMapJavaOpts -- Amount of heap (in MB) to allocate per map task
 #Defaults to -Xmx2800m
 #mapreduce.map.java.opts=

 #mapreduceMapMemoryMb -- Amount of memory (in MB) to allocate per map task
 #Defaults to 3000
 #mapreduce.map.memory.mb=

 #mapreduceReduceJavaOpts
 #Amount of heap (in MB) to allocate per reduce task
 #Defaults to -Xmx2800m
 #mapreduce.reduce.java.opts=

 #mapreduceReduceMemoryMb
 #Amount of memory (in MB) to allocate per reduce task
 #Defaults to 3000
 #mapreduce.reduce.memory.mb=

 #stopListFile -- optional (unless using StopListFilter) -- Fully qualified file in hdfs
 #containing stoplist terms; used by the StopListFilter
 #pir.stopListFile=

 #useLocalCache -- 'true' or 'false'
 #Whether or not to use the local cache for modular exponentiation
 #Defaults to 'true'
 #pir.useLocalCache=

 #useModExpJoin -- 'true' or 'false' -- Spark only
 #Whether or not to pre-compute the modular exponentiation table and join it to the data
 #partitions when performing the encrypted row calculations
 #Defaults to 'false'
 #pir.useModExpJoin=

 #numReduceTasks -- optional -- Number of reduce tasks
 #pir.numReduceTasks=

 #numColMultPartitions -- optional, Spark only
 #Number of partitions to use when performing  column multiplication
 #pir.numColMultPartitions=

 #maxHitsPerSelector --  optional -- Max number of hits encrypted per selector
 #pir.maxHitsPerSelector=

 #dataParts -- optional -- Number of partitions for the input data
 #pir.numDataPartitions=

 #numExpLookupPartitions -- optional -- Number of partitions for the exp lookup table
 #pir.numExpLookupPartitions=

 ##Props for Spark Streaming

 #batchSeconds - optional - Batch size (in seconds) for Spark Streaming - defaults to 30 sec
 #pir.sparkstreaming.batchSeconds=

 #windowLength - optional - Window size (in seconds) for Spark Streaming - defaults to 60 sec
 #pir.sparkstreaming.windowLength=

 #queueStream - optional - Use queue stream for Spark Streaming - defaults to false
 #pir.sparkstreaming.useQueueStream=

 #pir.sparkstreaming.maxBatches - optional - Spark Streaming - Max number of batches to process
 #defaults to -1 (no maximum)
 #pir.sparkstreaming.maxBatches=

 #spark.streaming.stopGracefullyOnShutdown - Spark Streaming - Whether or not to stop 'gracefully' during shutdown
 #default is false
 #spark.streaming.stopGracefullyOnShutdown=

  ##Properties for Kafka
  #kafka.topic = topicName
  #kafka.clientId = pirk_spout

  # Kafka Zookeepers
  #kafka.zk = localhost:2181
  # Read from beginning of Kafka topic on startup
  #kafka.forceFromStart = false


  ##Properties for Storm
  #storm.topoName = pir
  #storm.workers = 1
  #storm.numAckers = 1
  #storm.maxSpoutPending=10
  #storm.worker.heapMemory=6000
  #storm.componentOnheapMem= 600.0

  # This should be set to the number of Kafka partitions
  #storm.spout.parallelism = 1

  #storm.hashbolt.parallelism = 1
  #storm.encrowcalcbolt.parallelism = 1
  # This bolt is most computationally expensive and should have the highest value
  #storm.enccolmultbolt.parallelism = 2

  # These may be useful for tuning
  #storm.executor.receiveBufferSize = 1024
  #storm.executor.sendBufferSize = 1024
  #storm.transferBufferSize = 8

  # Frequency with which PIR matrix elements are flushed out
  #storm.encrowcalcbolt.ticktuple = 60

  # Design configurations:
  # Hashbolt emits individual tuples for each data partition when splitPartitions =true
  # emits the batch of data partitions for a record in a single tuple when =false
  #storm.splitPartitions = true
  # A task running EncColMultBolt will only be responsible for multiplying a subset of the row
  # for any individual column when saltColumns = true
  # All multiplication for a single column is done on a single EncColMultBolt instance when = false
  #storm.saltColumns = true
  # Only makes sense to tune if saltColumns=true
  #storm.rowDivs = 1
	###############################################################################
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	###############################################################################

	##
	## Required Properties
	##

	#dataInputFormat -- required -- 'base', 'elasticsearch', or 'standalone' -- Specify the input format
	pir.dataInputFormat=

	#outputFile -- required -- Fully qualified name of output file in hdfs
	pir.outputFile=

	#One of the following two options is required - launcher prefered

	#launcher -- required -- full class name of a class implementing ResponderPlugin
	#ie. org.apache.pirk.responder.wideskies.standalone.StandaloneResponderPluginProcessing platform technology for the responder
	#launcher=

	#platform -- required -- 'mapreduce', 'spark', 'sparkstreaming', 'standalone', or 'storm'
	#Processing platform technology for the responder
	platform=

	#queryInput -- required -- Fully qualified dir in hdfs of Query files
	pir.queryInput=


	##
	## Optional Args - Leave empty if not using/not changing default values
	##

	#inputData -- required if baseInputFormat = 'base'
	#Fully qualified name of input file/directory in hdfs; used if inputFormat = 'base'
	#pir.inputData=

	#dataSchemas -- optional -- Comma separated list of data schema file names to load
	#responder.dataSchemas=

	#querySchemas -- optional -- Comma separated list of query schema file names to load
	#responder.querySchemas=

	#allowAdHocQuerySchemas -- 'true' or 'false'
	#If true, allows embedded QuerySchemas for a query.
	#Defaults to 'false'
	#pir.allowEmbeddedQuerySchemas=

	#colMultReduceByKey -- 'true' or 'false' -- Spark only
	#If true, uses reduceByKey in performing column multiplication; if false, uses groupByKey -> reduce
	#Defaults to 'false'
	#pir.colMultReduceByKey=

	#baseInputFormat -- required if baseInputFormat = 'base'
	#Full class name of the InputFormat to use when reading in the data - must extend BaseInputFormat
	#pir.baseInputFormat=

	#esQuery -- required if baseInputFormat = 'elasticsearch' -- ElasticSearch query
	#if using 'elasticsearch' input format
	#pir.esQuery=

	#esResource -- required if baseInputFormat = 'elasticsearch'
	#Requires the format <index>/<type> : Elasticsearch resource where data is read and written to
	#pir.esResource=

	#useHDFSLookupTable -- 'true' or 'false' - Whether or not to generate and use the
	#hdfs lookup table for modular exponentiation
	#Defaults to 'false'
	#pir.useHDFSLookupTable=

	#baseQuery -- ElasticSearch-like query if using 'base' input format -
	#used to filter records in the RecordReader
	#Defaults to ?q=*
	#pir.baseQuery=

	#limitHitsPerSelector -- 'true' or 'false'
	#Whether or not to limit the number of hits per selector
	#Defaults to 'true'
	#pir.limitHitsPerSelector=

	#mapreduceMapJavaOpts -- Amount of heap (in MB) to allocate per map task
	#Defaults to -Xmx2800m
	#mapreduce.map.java.opts=

	#mapreduceMapMemoryMb -- Amount of memory (in MB) to allocate per map task
	#Defaults to 3000
	#mapreduce.map.memory.mb=

	#mapreduceReduceJavaOpts
	#Amount of heap (in MB) to allocate per reduce task
	#Defaults to -Xmx2800m
	#mapreduce.reduce.java.opts=

	#mapreduceReduceMemoryMb
	#Amount of memory (in MB) to allocate per reduce task
	#Defaults to 3000
	#mapreduce.reduce.memory.mb=

	#stopListFile -- optional (unless using StopListFilter) -- Fully qualified file in hdfs
	#containing stoplist terms; used by the StopListFilter
	#pir.stopListFile=

	#useLocalCache -- 'true' or 'false'
	#Whether or not to use the local cache for modular exponentiation
	#Defaults to 'true'
	#pir.useLocalCache=

	#useModExpJoin -- 'true' or 'false' -- Spark only
	#Whether or not to pre-compute the modular exponentiation table and join it to the data
	#partitions when performing the encrypted row calculations
	#Defaults to 'false'
	#pir.useModExpJoin=

	#numReduceTasks -- optional -- Number of reduce tasks
	#pir.numReduceTasks=

	#numColMultPartitions -- optional, Spark only
	#Number of partitions to use when performing column multiplication
	#pir.numColMultPartitions=

	#maxHitsPerSelector -- optional -- Max number of hits encrypted per selector
	#pir.maxHitsPerSelector=

	#dataParts -- optional -- Number of partitions for the input data
	#pir.numDataPartitions=

	#numExpLookupPartitions -- optional -- Number of partitions for the exp lookup table
	#pir.numExpLookupPartitions=

	##Props for Spark Streaming

	#batchSeconds - optional - Batch size (in seconds) for Spark Streaming - defaults to 30 sec
	#pir.sparkstreaming.batchSeconds=

	#windowLength - optional - Window size (in seconds) for Spark Streaming - defaults to 60 sec
	#pir.sparkstreaming.windowLength=

	#queueStream - optional - Use queue stream for Spark Streaming - defaults to false
	#pir.sparkstreaming.useQueueStream=

	#pir.sparkstreaming.maxBatches - optional - Spark Streaming - Max number of batches to process
	#defaults to -1 (no maximum)
	#pir.sparkstreaming.maxBatches=

	#spark.streaming.stopGracefullyOnShutdown - Spark Streaming - Whether or not to stop 'gracefully' during shutdown
	#default is false
	#spark.streaming.stopGracefullyOnShutdown=

	##Properties for Kafka
	#kafka.topic = topicName
	#kafka.clientId = pirk_spout

	# Kafka Zookeepers
	#kafka.zk = localhost:2181
	# Read from beginning of Kafka topic on startup
	#kafka.forceFromStart = false


	##Properties for Storm
	#storm.topoName = pir
	#storm.workers = 1
	#storm.numAckers = 1
	#storm.maxSpoutPending=10
	#storm.worker.heapMemory=6000
	#storm.componentOnheapMem= 600.0

	# This should be set to the number of Kafka partitions
	#storm.spout.parallelism = 1

	#storm.hashbolt.parallelism = 1
	#storm.encrowcalcbolt.parallelism = 1
	# This bolt is most computationally expensive and should have the highest value
	#storm.enccolmultbolt.parallelism = 2

	# These may be useful for tuning
	#storm.executor.receiveBufferSize = 1024
	#storm.executor.sendBufferSize = 1024
	#storm.transferBufferSize = 8

	# Frequency with which PIR matrix elements are flushed out
	#storm.encrowcalcbolt.ticktuple = 60

	# Design configurations:
	# Hashbolt emits individual tuples for each data partition when splitPartitions =true
	# emits the batch of data partitions for a record in a single tuple when =false
	#storm.splitPartitions = true
	# A task running EncColMultBolt will only be responsible for multiplying a subset of the row
	# for any individual column when saltColumns = true
	# All multiplication for a single column is done on a single EncColMultBolt instance when = false
	#storm.saltColumns = true
	# Only makes sense to tune if saltColumns=true
	#storm.rowDivs = 1