Merge pull request #7 from tpalsulich/drat-script
Add go option and add minimal error handling to drat script
diff --git a/README.md b/README.md
index b3e2f2e..42ead85 100644
--- a/README.md
+++ b/README.md
@@ -30,18 +30,18 @@
1. Set your `$DRAT_HOME` environment variable, e.g., to `/usr/local/drat/deploy`
+2. Start Apache™ OODT:
+ `$DRAT_HOME/bin/oodt start`
+
### Automated method:
-2. Go!
+3. Go!
`cd $DRAT_HOME/bin`
`./drat go $HOME/your-repo`
- This will start up OODT, crawl the repo, index it, map it, and reduce it.
+ This will crawl the repo, index it, map it, and reduce it.
### Manual method:
If you would rather run the individual commands yourself, use the manual method:
-2. Start Apache™ OODT:
- `$DRAT_HOME/bin/oodt start`
-
3. Crawl the repository of interest, e.g., `$HOME/your-repo`:
`$DRAT_HOME/bin/drat crawl $HOME/your-repo`
@@ -103,24 +103,19 @@
You should be good to go to re-run the analysis at that point.
##If you want to analyze an entirely new code base
+ `$DRAT_HOME/bin/oodt stop`
`$DRAT_HOME/bin/drat reset`
+ `$DRAT_HOME/bin/oodt start`
**You shouldn't need to run these**, but the manual version of `reset` is:
-1. Shut down OODT with
- `cd $DRAT_HOME/bin && ./oodt stop`
-
-2. Blow away the following dirs:
+1. Blow away the following dirs:
`rm -rf $DRAT_HOME/data/workflow`
`rm -rf $DRAT_HOME/filemgr/catalog`
`rm -rf $DRAT_HOME/solr/drat/data`
-3. Blow away files in following dirs:
+2. Blow away files in following dirs:
`rm -rf $DRAT_HOME/data/archive/*`
-
-4. Restart OODT by:
- `cd $DRAT_HOME/bin && ./oodt start`
-
Useful Environment Variables
==
diff --git a/distribution/src/main/resources/bin/drat b/distribution/src/main/resources/bin/drat
index 82224bb..aa28e89 100755
--- a/distribution/src/main/resources/bin/drat
+++ b/distribution/src/main/resources/bin/drat
@@ -15,45 +15,131 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+# Stop on first error.
+set -e
+
+# Print out usage information for this script.
function print_help {
- echo "Usage: Call drat [crawl, index, map, and reduce] in order to analyze a repository."
+ echo "Usage: drat [crawl, index, map, reduce] in order to analyze a repository."
echo " Alternatively, call 'drat go' to run all four automatically."
- echo " Navigate to http://localhost:8080/opsui/ to view the OODT browser."
- echo "drat"
- echo " go <path to repo> | start OODT and analyze the repository"
- echo " crawl <path to repo> | crawl the repositories files"
- echo " index <path to repo> | index the crawled files"
- echo " map | fire off the MapReduce mapper"
- echo " reduce | fire off the MapReduce reducer"
- echo " help | print this message"
- echo " reset | prepare to analyze an entirely new repo"
- echo " | CAUTION: will delete previous crawls!"
+ echo " drat"
+ echo " go <path to repo> | start OODT and analyze the repository"
+ echo " crawl <path to repo> | crawl the repository files"
+ echo " index <path to repo> | index the crawled files"
+ echo " map | fire off the MapReduce mapper"
+ echo " reduce | fire off the MapReduce reducer"
+ echo " help | print this message"
+ echo " reset | prepare to analyze an entirely new repo"
+ echo " | CAUTION: will delete previous crawls!"
}
-if [ "$1" = "crawl" ]; then
- $DRAT_HOME/crawler/bin/crawler_launcher --operation --metPC --metExtractorConfig $DRAT_HOME/extractors/code/default.cpr.conf --metExtractor org.apache.oodt.cas.metadata.extractors.CopyAndRewriteExtractor --filemgrUrl http://localhost:9000 --clientTransferer org.apache.oodt.cas.filemgr.datatransfer.InPlaceDataTransferFactory --productPath $2
-elif [[ "$1" = "index" ]]; then
- java -Djava.ext.dirs=$DRAT_HOME/filemgr/lib -DSOLR_INDEXER_CONFIG=$DRAT_HOME/filemgr/etc/indexer.properties org.apache.oodt.cas.filemgr.tools.SolrIndexer --all --fmUrl http://localhost:9000 --optimize --solrUrl http://localhost:8080/solr/drat $2
-elif [[ "$1" = "map" ]]; then
- $DRAT_HOME/workflow/bin/wmgr-client --url http://localhost:9001 --operation --dynWorkflow --taskIds urn:drat:MimePartitioner
-elif [[ "$1" = "reduce" ]]; then
- $DRAT_HOME/workflow/bin/wmgr-client --url http://localhost:9001 --operation --dynWorkflow --taskIds urn:drat:RatAggregator
-elif [[ "$1" = "reset" ]]; then
- echo "This will remove any previous or current crawls and restart OODT."
- read -p "Do you wish to continue?" yN
- case $yn in
- [Yy]* )
- $DRAT_HOME/bin/oodt stop
- rm -rf $DRAT_HOME/data/workflow
- rm -rf $DRAT_HOME/filemgr/catalog
- rm -rf $DRAT_HOME/solr/drat/data
- rm -rf $DRAT_HOME/data/archive/*
- $DRAT_HOME/bin/oodt start;;
- [Nn]* ) echo "Exiting..."; exit;;
- * ) echo "Aborting..."; exit;;
- esac
-elif [[ "$1" = "help" ]]; then
- print_help
-else
- print_help
-fi
\ No newline at end of file
+FILEMGR_URL=http://localhost:9000
+SOLR_URL=http://localhost:8080/solr/drat
+CLIENT_URL=http://localhost:9001
+
+# Crawl the given repository. Expects one argument -- the file path of the repo to be crawled.
+function crawl {
+ $DRAT_HOME/crawler/bin/crawler_launcher --operation --metPC --metExtractorConfig \
+ $DRAT_HOME/extractors/code/default.cpr.conf --metExtractor org.apache.oodt.cas.metadata.extractors.CopyAndRewriteExtractor \
+ --filemgrUrl $FILEMGR_URL --clientTransferer org.apache.oodt.cas.filemgr.datatransfer.InPlaceDataTransferFactory --productPath $1
+}
+
+# Index the crawled files of the given repo. Expects one argument -- the file path of the repo to be indexed.
+function index {
+ java -Djava.ext.dirs=$DRAT_HOME/filemgr/lib -DSOLR_INDEXER_CONFIG=$DRAT_HOME/filemgr/etc/indexer.properties \
+ org.apache.oodt.cas.filemgr.tools.SolrIndexer --all --fmUrl $FILEMGR_URL --optimize --solrUrl $SOLR_URL $1
+}
+
+# Fire off the MapReduce mapper. Expects no arguments.
+function map {
+ $DRAT_HOME/workflow/bin/wmgr-client --url $CLIENT_URL --operation --dynWorkflow --taskIds urn:drat:MimePartitioner
+}
+
+# Fire off the MapReduce reducer. Expects no arguments.
+function reduce {
+ $DRAT_HOME/workflow/bin/wmgr-client --url $CLIENT_URL --operation --dynWorkflow --taskIds urn:drat:RatAggregator
+}
+
+# Ensure the number of arguments matches the expected number. Expects three arguments:
+# the option name, the actual number of arguments, and the expected number of arguments.
+function check_num_args {
+ if [[ "$2" != "$3" ]]; then
+ echo "Expected $(($3 - 1)) args for $1, but got $(($2 - 1))." # Use (( )) for arithmetic evaluation.
+ print_help
+ exit 1
+ fi
+}
+
+# Start parsing the arguments.
+case $1 in
+ crawl)
+ check_num_args $1 $# 2
+ crawl $2
+ ;;
+ index)
+ check_num_args $1 $# 2
+ index $2
+ ;;
+ map)
+ check_num_args $1 $# 1
+ map
+ ;;
+ reduce)
+ check_num_args $1 $# 1
+ reduce
+ ;;
+ go)
+ # Add in some sleep just to give commands time to finish up. Some issues with Solr, otherwise.
+ check_num_args $1 $# 2
+ echo "Crawling $2"
+ crawl $2
+ sleep 1
+ echo
+ echo "Indexing $2"
+ index $2
+ sleep 1
+ echo
+ echo "Firing off the MapReduce mapper"
+ map
+ echo
+ echo "Firing off the MapReduce reducer"
+ reduce
+ echo "Navigate to http://localhost:8080/opsui/ to view the OODT browser and http://localhost:8080/solr to view the Solr catalog."
+ ;;
+ reset)
+ check_num_args $1 $# 1
+ echo "Please stop OODT by running oodt stop before running reset."
+ echo "This will remove any previous or current crawls."
+ read -p "Do you wish to continue? [yN] " yn
+ case $yn in
+ [Yy]*)
+ echo
+ echo "rm -rf $DRAT_HOME/data/workflow"
+ rm -rf $DRAT_HOME/data/workflow
+ echo "rm -rf $DRAT_HOME/filemgr/catalog"
+ rm -rf $DRAT_HOME/filemgr/catalog
+ echo "rm -rf $DRAT_HOME/solr/drat/data"
+ rm -rf $DRAT_HOME/solr/drat/data
+ echo "rm -rf $DRAT_HOME/data/archive/*"
+ rm -rf $DRAT_HOME/data/archive/*
+ echo "Please restart OODT with oodt start if you with to run another crawl."
+ ;;
+ [Nn]*)
+ echo "Reset cancelled. Exiting..."
+ exit 0
+ ;;
+ *)
+ echo "Aborting..."
+ exit 1
+ ;;
+ esac
+ ;;
+ help)
+ print_help
+ ;;
+ *)
+ echo "Unrecognized option: '$1'"
+ print_help
+ exit 1
+ ;;
+esac
\ No newline at end of file