NUTCH-2781 Increase default Java heap size
- increase default value for NUTCH_HEAPSIZE to 4096 MB (from 1000 MB)
- remove -Dmapred.child.java.opts=-Xmx1000m from default options in bin/crawl
diff --git a/src/bin/crawl b/src/bin/crawl
index 56bb237..2e85bad 100755
--- a/src/bin/crawl
+++ b/src/bin/crawl
@@ -218,7 +218,7 @@
# note that some of the options listed here could be set in the
# corresponding hadoop site xml param file
-commonOptions="-D mapreduce.job.reduces=$NUM_TASKS -D mapred.child.java.opts=-Xmx1000m -D mapreduce.reduce.speculative=false -D mapreduce.map.speculative=false -D mapreduce.map.output.compress=true"
+commonOptions="-D mapreduce.job.reduces=$NUM_TASKS -D mapreduce.reduce.speculative=false -D mapreduce.map.speculative=false -D mapreduce.map.output.compress=true"
# check that hadoop can be found on the path
if [ $mode = "distributed" ]; then
diff --git a/src/bin/nutch b/src/bin/nutch
index 2b3d2a0..e79b391 100755
--- a/src/bin/nutch
+++ b/src/bin/nutch
@@ -22,7 +22,7 @@
# NUTCH_JAVA_HOME The java implementation to use. Overrides JAVA_HOME.
#
# NUTCH_HEAPSIZE The maximum amount of heap to use, in MB.
-# Default is 1000.
+# Default is 4096.
#
# NUTCH_OPTS Extra Java runtime options.
# Multiple options must be separated by white space.
@@ -136,7 +136,7 @@
fi
JAVA="$JAVA_HOME/bin/java"
-JAVA_HEAP_MAX=-Xmx1000m
+JAVA_HEAP_MAX=-Xmx4096m
# check envvars which might override default args
if [ "$NUTCH_HEAPSIZE" != "" ]; then