| ############################################################################### |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| ############################################################################### |
| |
| ## |
| ## Required Properties |
| ## |
| |
| #dataInputFormat -- required -- 'base', 'elasticsearch', or 'standalone' -- Specify the input format |
| pir.dataInputFormat= |
| |
| #outputFile -- required -- Fully qualified name of output file in hdfs |
| pir.outputFile= |
| |
| #One of the following two options is required - launcher prefered |
| |
| #launcher -- required -- full class name of a class implementing ResponderPlugin |
| #ie. org.apache.pirk.responder.wideskies.standalone.StandaloneResponderPluginProcessing platform technology for the responder |
| #launcher= |
| |
| #platform -- required -- 'mapreduce', 'spark', 'sparkstreaming', 'standalone', or 'storm' |
| #Processing platform technology for the responder |
| platform= |
| |
| #queryInput -- required -- Fully qualified dir in hdfs of Query files |
| pir.queryInput= |
| |
| |
| ## |
| ## Optional Args - Leave empty if not using/not changing default values |
| ## |
| |
| #inputData -- required if baseInputFormat = 'base' |
| #Fully qualified name of input file/directory in hdfs; used if inputFormat = 'base' |
| #pir.inputData= |
| |
| #dataSchemas -- optional -- Comma separated list of data schema file names to load |
| #responder.dataSchemas= |
| |
| #querySchemas -- optional -- Comma separated list of query schema file names to load |
| #responder.querySchemas= |
| |
| #allowAdHocQuerySchemas -- 'true' or 'false' |
| #If true, allows embedded QuerySchemas for a query. |
| #Defaults to 'false' |
| #pir.allowEmbeddedQuerySchemas= |
| |
| #colMultReduceByKey -- 'true' or 'false' -- Spark only |
| #If true, uses reduceByKey in performing column multiplication; if false, uses groupByKey -> reduce |
| #Defaults to 'false' |
| #pir.colMultReduceByKey= |
| |
| #baseInputFormat -- required if baseInputFormat = 'base' |
| #Full class name of the InputFormat to use when reading in the data - must extend BaseInputFormat |
| #pir.baseInputFormat= |
| |
| #esQuery -- required if baseInputFormat = 'elasticsearch' -- ElasticSearch query |
| #if using 'elasticsearch' input format |
| #pir.esQuery= |
| |
| #esResource -- required if baseInputFormat = 'elasticsearch' |
| #Requires the format <index>/<type> : Elasticsearch resource where data is read and written to |
| #pir.esResource= |
| |
| #useHDFSLookupTable -- 'true' or 'false' - Whether or not to generate and use the |
| #hdfs lookup table for modular exponentiation |
| #Defaults to 'false' |
| #pir.useHDFSLookupTable= |
| |
| #baseQuery -- ElasticSearch-like query if using 'base' input format - |
| #used to filter records in the RecordReader |
| #Defaults to ?q=* |
| #pir.baseQuery= |
| |
| #limitHitsPerSelector -- 'true' or 'false' |
| #Whether or not to limit the number of hits per selector |
| #Defaults to 'true' |
| #pir.limitHitsPerSelector= |
| |
| #mapreduceMapJavaOpts -- Amount of heap (in MB) to allocate per map task |
| #Defaults to -Xmx2800m |
| #mapreduce.map.java.opts= |
| |
| #mapreduceMapMemoryMb -- Amount of memory (in MB) to allocate per map task |
| #Defaults to 3000 |
| #mapreduce.map.memory.mb= |
| |
| #mapreduceReduceJavaOpts |
| #Amount of heap (in MB) to allocate per reduce task |
| #Defaults to -Xmx2800m |
| #mapreduce.reduce.java.opts= |
| |
| #mapreduceReduceMemoryMb |
| #Amount of memory (in MB) to allocate per reduce task |
| #Defaults to 3000 |
| #mapreduce.reduce.memory.mb= |
| |
| #stopListFile -- optional (unless using StopListFilter) -- Fully qualified file in hdfs |
| #containing stoplist terms; used by the StopListFilter |
| #pir.stopListFile= |
| |
| #useLocalCache -- 'true' or 'false' |
| #Whether or not to use the local cache for modular exponentiation |
| #Defaults to 'true' |
| #pir.useLocalCache= |
| |
| #useModExpJoin -- 'true' or 'false' -- Spark only |
| #Whether or not to pre-compute the modular exponentiation table and join it to the data |
| #partitions when performing the encrypted row calculations |
| #Defaults to 'false' |
| #pir.useModExpJoin= |
| |
| #numReduceTasks -- optional -- Number of reduce tasks |
| #pir.numReduceTasks= |
| |
| #numColMultPartitions -- optional, Spark only |
| #Number of partitions to use when performing column multiplication |
| #pir.numColMultPartitions= |
| |
| #maxHitsPerSelector -- optional -- Max number of hits encrypted per selector |
| #pir.maxHitsPerSelector= |
| |
| #dataParts -- optional -- Number of partitions for the input data |
| #pir.numDataPartitions= |
| |
| #numExpLookupPartitions -- optional -- Number of partitions for the exp lookup table |
| #pir.numExpLookupPartitions= |
| |
| ##Props for Spark Streaming |
| |
| #batchSeconds - optional - Batch size (in seconds) for Spark Streaming - defaults to 30 sec |
| #pir.sparkstreaming.batchSeconds= |
| |
| #windowLength - optional - Window size (in seconds) for Spark Streaming - defaults to 60 sec |
| #pir.sparkstreaming.windowLength= |
| |
| #queueStream - optional - Use queue stream for Spark Streaming - defaults to false |
| #pir.sparkstreaming.useQueueStream= |
| |
| #pir.sparkstreaming.maxBatches - optional - Spark Streaming - Max number of batches to process |
| #defaults to -1 (no maximum) |
| #pir.sparkstreaming.maxBatches= |
| |
| #spark.streaming.stopGracefullyOnShutdown - Spark Streaming - Whether or not to stop 'gracefully' during shutdown |
| #default is false |
| #spark.streaming.stopGracefullyOnShutdown= |
| |
| ##Properties for Kafka |
| #kafka.topic = topicName |
| #kafka.clientId = pirk_spout |
| |
| # Kafka Zookeepers |
| #kafka.zk = localhost:2181 |
| # Read from beginning of Kafka topic on startup |
| #kafka.forceFromStart = false |
| |
| |
| ##Properties for Storm |
| #storm.topoName = pir |
| #storm.workers = 1 |
| #storm.numAckers = 1 |
| #storm.maxSpoutPending=10 |
| #storm.worker.heapMemory=6000 |
| #storm.componentOnheapMem= 600.0 |
| |
| # This should be set to the number of Kafka partitions |
| #storm.spout.parallelism = 1 |
| |
| #storm.hashbolt.parallelism = 1 |
| #storm.encrowcalcbolt.parallelism = 1 |
| # This bolt is most computationally expensive and should have the highest value |
| #storm.enccolmultbolt.parallelism = 2 |
| |
| # These may be useful for tuning |
| #storm.executor.receiveBufferSize = 1024 |
| #storm.executor.sendBufferSize = 1024 |
| #storm.transferBufferSize = 8 |
| |
| # Frequency with which PIR matrix elements are flushed out |
| #storm.encrowcalcbolt.ticktuple = 60 |
| |
| # Design configurations: |
| # Hashbolt emits individual tuples for each data partition when splitPartitions =true |
| # emits the batch of data partitions for a record in a single tuple when =false |
| #storm.splitPartitions = true |
| # A task running EncColMultBolt will only be responsible for multiplying a subset of the row |
| # for any individual column when saltColumns = true |
| # All multiplication for a single column is done on a single EncColMultBolt instance when = false |
| #storm.saltColumns = true |
| # Only makes sense to tune if saltColumns=true |
| #storm.rowDivs = 1 |