NUTCH-2781 Increase default Java heap size
- increase default value for NUTCH_HEAPSIZE to 4096 MB (from 1000 MB)
- remove -Dmapred.child.java.opts=-Xmx1000m from default options in bin/crawl
diff --git a/src/bin/crawl b/src/bin/crawl
index 56bb237..2e85bad 100755
--- a/src/bin/crawl
+++ b/src/bin/crawl
@@ -218,7 +218,7 @@
 
 # note that some of the options listed here could be set in the
 # corresponding hadoop site xml param file
-commonOptions="-D mapreduce.job.reduces=$NUM_TASKS -D mapred.child.java.opts=-Xmx1000m -D mapreduce.reduce.speculative=false -D mapreduce.map.speculative=false -D mapreduce.map.output.compress=true"
+commonOptions="-D mapreduce.job.reduces=$NUM_TASKS -D mapreduce.reduce.speculative=false -D mapreduce.map.speculative=false -D mapreduce.map.output.compress=true"
 
  # check that hadoop can be found on the path
 if [ $mode = "distributed" ]; then
diff --git a/src/bin/nutch b/src/bin/nutch
index 2b3d2a0..e79b391 100755
--- a/src/bin/nutch
+++ b/src/bin/nutch
@@ -22,7 +22,7 @@
 #   NUTCH_JAVA_HOME The java implementation to use.  Overrides JAVA_HOME.
 #
 #   NUTCH_HEAPSIZE  The maximum amount of heap to use, in MB. 
-#                   Default is 1000.
+#                   Default is 4096.
 #
 #   NUTCH_OPTS      Extra Java runtime options.
 #                   Multiple options must be separated by white space.
@@ -136,7 +136,7 @@
 fi
 
 JAVA="$JAVA_HOME/bin/java"
-JAVA_HEAP_MAX=-Xmx1000m 
+JAVA_HEAP_MAX=-Xmx4096m
 
 # check envvars which might override default args
 if [ "$NUTCH_HEAPSIZE" != "" ]; then