Merge pull request #7 from tpalsulich/drat-script

Add go option and add minimal error handling to drat script
diff --git a/README.md b/README.md
index b3e2f2e..42ead85 100644
--- a/README.md
+++ b/README.md
@@ -30,18 +30,18 @@
 
 1. Set your `$DRAT_HOME` environment variable, e.g., to `/usr/local/drat/deploy`
 
+2. Start Apache™ OODT:  
+   `$DRAT_HOME/bin/oodt start`
+
 ### Automated method:
-2. Go!  
+3. Go!  
    `cd $DRAT_HOME/bin`  
    `./drat go $HOME/your-repo`  
-   This will start up OODT, crawl the repo, index it, map it, and reduce it.
+   This will crawl the repo, index it, map it, and reduce it.
 
 ### Manual method:
 If you would rather run the individual commands yourself, use the manual method:
 
-2. Start Apache™ OODT:  
-   `$DRAT_HOME/bin/oodt start`
-
 3. Crawl the repository of interest, e.g., `$HOME/your-repo`:  
    `$DRAT_HOME/bin/drat crawl $HOME/your-repo`
 
@@ -103,24 +103,19 @@
 You should be good to go to re-run the analysis at that point.
 
 ##If you want to analyze an entirely new code base
+   `$DRAT_HOME/bin/oodt stop`
    `$DRAT_HOME/bin/drat reset`
+   `$DRAT_HOME/bin/oodt start`
 
 **You shouldn't need to run these**, but the manual version of `reset` is:
 
-1. Shut down OODT with
-   `cd $DRAT_HOME/bin && ./oodt stop`  
-
-2. Blow away the following dirs:  
+1. Blow away the following dirs:  
    `rm -rf $DRAT_HOME/data/workflow`  
    `rm -rf $DRAT_HOME/filemgr/catalog`  
    `rm -rf $DRAT_HOME/solr/drat/data`
    
-3. Blow away files in following dirs:  
+2. Blow away files in following dirs:  
    `rm -rf $DRAT_HOME/data/archive/*`  
-   
-4. Restart OODT by:  
-   `cd $DRAT_HOME/bin && ./oodt start`  
-   
 
 Useful Environment Variables
 ==
diff --git a/distribution/src/main/resources/bin/drat b/distribution/src/main/resources/bin/drat
index 82224bb..aa28e89 100755
--- a/distribution/src/main/resources/bin/drat
+++ b/distribution/src/main/resources/bin/drat
@@ -15,45 +15,131 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Stop on first error.
+set -e
+
+# Print out usage information for this script.
 function print_help {
-    echo "Usage: Call drat [crawl, index, map, and reduce] in order to analyze a repository." 
+    echo "Usage: drat [crawl, index, map, reduce] in order to analyze a repository." 
     echo "       Alternatively, call 'drat go' to run all four automatically."
-    echo "       Navigate to http://localhost:8080/opsui/ to view the OODT browser."
-    echo "drat"
-    echo "     go <path to repo>     | start OODT and analyze the repository"
-    echo "     crawl <path to repo>  | crawl the repositories files"
-    echo "     index <path to repo>  | index the crawled files"
-    echo "     map                   | fire off the MapReduce mapper"
-    echo "     reduce                | fire off the MapReduce reducer"
-    echo "     help                  | print this message"
-    echo "     reset                 | prepare to analyze an entirely new repo"
-    echo "                           | CAUTION: will delete previous crawls!"
+    echo "       drat"
+    echo "            go <path to repo>     | start OODT and analyze the repository"
+    echo "            crawl <path to repo>  | crawl the repository files"
+    echo "            index <path to repo>  | index the crawled files"
+    echo "            map                   | fire off the MapReduce mapper"
+    echo "            reduce                | fire off the MapReduce reducer"
+    echo "            help                  | print this message"
+    echo "            reset                 | prepare to analyze an entirely new repo"
+    echo "                                  | CAUTION: will delete previous crawls!"
 }
 
-if [ "$1" = "crawl" ]; then
-    $DRAT_HOME/crawler/bin/crawler_launcher --operation --metPC --metExtractorConfig $DRAT_HOME/extractors/code/default.cpr.conf --metExtractor org.apache.oodt.cas.metadata.extractors.CopyAndRewriteExtractor --filemgrUrl http://localhost:9000 --clientTransferer org.apache.oodt.cas.filemgr.datatransfer.InPlaceDataTransferFactory --productPath $2
-elif [[ "$1" = "index" ]]; then
-    java -Djava.ext.dirs=$DRAT_HOME/filemgr/lib -DSOLR_INDEXER_CONFIG=$DRAT_HOME/filemgr/etc/indexer.properties org.apache.oodt.cas.filemgr.tools.SolrIndexer --all --fmUrl http://localhost:9000 --optimize --solrUrl http://localhost:8080/solr/drat $2
-elif [[ "$1" = "map" ]]; then
-    $DRAT_HOME/workflow/bin/wmgr-client --url http://localhost:9001 --operation --dynWorkflow --taskIds urn:drat:MimePartitioner
-elif [[ "$1" = "reduce" ]]; then
-    $DRAT_HOME/workflow/bin/wmgr-client --url http://localhost:9001 --operation --dynWorkflow --taskIds urn:drat:RatAggregator
-elif [[ "$1" = "reset" ]]; then
-    echo "This will remove any previous or current crawls and restart OODT."
-    read -p "Do you wish to continue?" yN
-        case $yn in
-            [Yy]* )
-                $DRAT_HOME/bin/oodt stop
-                rm -rf $DRAT_HOME/data/workflow
-                rm -rf $DRAT_HOME/filemgr/catalog
-                rm -rf $DRAT_HOME/solr/drat/data
-                rm -rf $DRAT_HOME/data/archive/*
-                $DRAT_HOME/bin/oodt start;;
-            [Nn]* ) echo "Exiting..."; exit;;
-            * ) echo "Aborting..."; exit;;
-        esac
-elif [[ "$1" = "help" ]]; then
-    print_help
-else
-    print_help
-fi
\ No newline at end of file
+FILEMGR_URL=http://localhost:9000
+SOLR_URL=http://localhost:8080/solr/drat
+CLIENT_URL=http://localhost:9001
+
+# Crawl the given repository. Expects one argument -- the file path of the repo to be crawled.
+function crawl {
+    $DRAT_HOME/crawler/bin/crawler_launcher --operation --metPC --metExtractorConfig \
+    $DRAT_HOME/extractors/code/default.cpr.conf --metExtractor org.apache.oodt.cas.metadata.extractors.CopyAndRewriteExtractor \
+    --filemgrUrl $FILEMGR_URL --clientTransferer org.apache.oodt.cas.filemgr.datatransfer.InPlaceDataTransferFactory --productPath $1
+}
+
+# Index the crawled files of the given repo. Expects one argument -- the file path of the repo to be indexed.
+function index {
+    java -Djava.ext.dirs=$DRAT_HOME/filemgr/lib -DSOLR_INDEXER_CONFIG=$DRAT_HOME/filemgr/etc/indexer.properties \
+    org.apache.oodt.cas.filemgr.tools.SolrIndexer --all --fmUrl $FILEMGR_URL --optimize --solrUrl $SOLR_URL $1
+}
+
+# Fire off the MapReduce mapper. Expects no arguments.
+function map {
+    $DRAT_HOME/workflow/bin/wmgr-client --url $CLIENT_URL --operation --dynWorkflow --taskIds urn:drat:MimePartitioner
+}
+
+# Fire off the MapReduce reducer. Expects no arguments.
+function reduce {
+    $DRAT_HOME/workflow/bin/wmgr-client --url $CLIENT_URL --operation --dynWorkflow --taskIds urn:drat:RatAggregator
+}
+
+# Ensure the number of arguments matches the expected number. Expects three arguments:
+# the option name, the actual number of arguments, and the expected number of arguments.
+function check_num_args {
+    if [[ "$2" != "$3" ]]; then
+            echo "Expected $(($3 - 1)) args for $1, but got $(($2 - 1))."   # Use (( )) for arithmetic evaluation.
+            print_help
+            exit 1
+    fi
+}
+
+# Start parsing the arguments.
+case $1 in
+    crawl)
+        check_num_args $1 $# 2
+        crawl $2
+    ;;
+    index)
+        check_num_args $1 $# 2
+        index $2
+    ;;
+    map)
+        check_num_args $1 $# 1
+        map
+    ;;
+    reduce)
+        check_num_args $1 $# 1
+        reduce
+    ;;
+    go)
+        # Add in some sleep just to give commands time to finish up. Some issues with Solr, otherwise.
+        check_num_args $1 $# 2
+        echo "Crawling $2"
+        crawl $2
+        sleep 1
+        echo
+        echo "Indexing $2"
+        index $2
+        sleep 1
+        echo
+        echo "Firing off the MapReduce mapper"
+        map
+        echo
+        echo "Firing off the MapReduce reducer"
+        reduce
+        echo "Navigate to http://localhost:8080/opsui/ to view the OODT browser and http://localhost:8080/solr to view the Solr catalog."
+    ;;
+    reset)
+        check_num_args $1 $# 1
+        echo "Please stop OODT by running oodt stop before running reset."
+        echo "This will remove any previous or current crawls."
+        read -p "Do you wish to continue? [yN] " yn
+            case $yn in
+                [Yy]*)
+                    echo
+                    echo "rm -rf $DRAT_HOME/data/workflow"
+                    rm -rf $DRAT_HOME/data/workflow
+                    echo "rm -rf $DRAT_HOME/filemgr/catalog"
+                    rm -rf $DRAT_HOME/filemgr/catalog
+                    echo "rm -rf $DRAT_HOME/solr/drat/data"
+                    rm -rf $DRAT_HOME/solr/drat/data
+                    echo "rm -rf $DRAT_HOME/data/archive/*"
+                    rm -rf $DRAT_HOME/data/archive/*
+                    echo "Please restart OODT with oodt start if you with to run another crawl."
+                ;;
+                [Nn]*)
+                    echo "Reset cancelled. Exiting..."
+                    exit 0
+                ;;
+                *) 
+                    echo "Aborting..."
+                    exit 1
+                ;;
+            esac
+    ;;
+    help)
+        print_help
+    ;;
+    *)
+        echo "Unrecognized option: '$1'"
+        print_help
+        exit 1
+    ;;
+esac
\ No newline at end of file