blob: bb38b1d4111121162192934bffc9a53449f209a2 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
## Required Properties
#dataInputFormat -- required -- 'base', 'elasticsearch', or 'standalone' -- Specify the input format
#outputFile -- required -- Fully qualified name of output file in hdfs
#One of the following two options is required - launcher prefered
#launcher -- required -- full class name of a class implementing ResponderPlugin
#ie. org.apache.pirk.responder.wideskies.standalone.StandaloneResponderPluginProcessing platform technology for the responder
#platform -- required -- 'mapreduce', 'spark', 'sparkstreaming', 'standalone', or 'storm'
#Processing platform technology for the responder
#queryInput -- required -- Fully qualified dir in hdfs of Query files
## Optional Args - Leave empty if not using/not changing default values
#inputData -- required if baseInputFormat = 'base'
#Fully qualified name of input file/directory in hdfs; used if inputFormat = 'base'
#dataSchemas -- optional -- Comma separated list of data schema file names to load
#querySchemas -- optional -- Comma separated list of query schema file names to load
#allowAdHocQuerySchemas -- 'true' or 'false'
#If true, allows embedded QuerySchemas for a query.
#Defaults to 'false'
#colMultReduceByKey -- 'true' or 'false' -- Spark only
#If true, uses reduceByKey in performing column multiplication; if false, uses groupByKey -> reduce
#Defaults to 'false'
#baseInputFormat -- required if baseInputFormat = 'base'
#Full class name of the InputFormat to use when reading in the data - must extend BaseInputFormat
#esQuery -- required if baseInputFormat = 'elasticsearch' -- ElasticSearch query
#if using 'elasticsearch' input format
#esResource -- required if baseInputFormat = 'elasticsearch'
#Requires the format <index>/<type> : Elasticsearch resource where data is read and written to
#useHDFSLookupTable -- 'true' or 'false' - Whether or not to generate and use the
#hdfs lookup table for modular exponentiation
#Defaults to 'false'
#baseQuery -- ElasticSearch-like query if using 'base' input format -
#used to filter records in the RecordReader
#Defaults to ?q=*
#limitHitsPerSelector -- 'true' or 'false'
#Whether or not to limit the number of hits per selector
#Defaults to 'true'
#mapreduceMapJavaOpts -- Amount of heap (in MB) to allocate per map task
#Defaults to -Xmx2800m
#mapreduceMapMemoryMb -- Amount of memory (in MB) to allocate per map task
#Defaults to 3000
#Amount of heap (in MB) to allocate per reduce task
#Defaults to -Xmx2800m
#Amount of memory (in MB) to allocate per reduce task
#Defaults to 3000
#stopListFile -- optional (unless using StopListFilter) -- Fully qualified file in hdfs
#containing stoplist terms; used by the StopListFilter
#useLocalCache -- 'true' or 'false'
#Whether or not to use the local cache for modular exponentiation
#Defaults to 'true'
#useModExpJoin -- 'true' or 'false' -- Spark only
#Whether or not to pre-compute the modular exponentiation table and join it to the data
#partitions when performing the encrypted row calculations
#Defaults to 'false'
#numReduceTasks -- optional -- Number of reduce tasks
#numColMultPartitions -- optional, Spark only
#Number of partitions to use when performing column multiplication
#maxHitsPerSelector -- optional -- Max number of hits encrypted per selector
#dataParts -- optional -- Number of partitions for the input data
#numExpLookupPartitions -- optional -- Number of partitions for the exp lookup table
##Props for Spark Streaming
#batchSeconds - optional - Batch size (in seconds) for Spark Streaming - defaults to 30 sec
#windowLength - optional - Window size (in seconds) for Spark Streaming - defaults to 60 sec
#queueStream - optional - Use queue stream for Spark Streaming - defaults to false
#pir.sparkstreaming.maxBatches - optional - Spark Streaming - Max number of batches to process
#defaults to -1 (no maximum)
#spark.streaming.stopGracefullyOnShutdown - Spark Streaming - Whether or not to stop 'gracefully' during shutdown
#default is false
##Properties for Kafka
#kafka.topic = topicName
#kafka.clientId = pirk_spout
# Kafka Zookeepers
#kafka.zk = localhost:2181
# Read from beginning of Kafka topic on startup
#kafka.forceFromStart = false
##Properties for Storm
#storm.topoName = pir
#storm.workers = 1
#storm.numAckers = 1
#storm.componentOnheapMem= 600.0
# This should be set to the number of Kafka partitions
#storm.spout.parallelism = 1
#storm.hashbolt.parallelism = 1
#storm.encrowcalcbolt.parallelism = 1
# This bolt is most computationally expensive and should have the highest value
#storm.enccolmultbolt.parallelism = 2
# These may be useful for tuning
#storm.executor.receiveBufferSize = 1024
#storm.executor.sendBufferSize = 1024
#storm.transferBufferSize = 8
# Frequency with which PIR matrix elements are flushed out
#storm.encrowcalcbolt.ticktuple = 60
# Design configurations:
# Hashbolt emits individual tuples for each data partition when splitPartitions =true
# emits the batch of data partitions for a record in a single tuple when =false
#storm.splitPartitions = true
# A task running EncColMultBolt will only be responsible for multiplying a subset of the row
# for any individual column when saltColumns = true
# All multiplication for a single column is done on a single EncColMultBolt instance when = false
#storm.saltColumns = true
# Only makes sense to tune if saltColumns=true
#storm.rowDivs = 1