| #!/bin/sh |
| |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| # Stop on first error. |
| set -e |
| |
| # Print out usage information for this script. |
| function print_help { |
| echo "Usage: drat [crawl, index, map, reduce] in order to analyze a repository." |
| echo " Alternatively, call 'drat go' to run all four automatically." |
| echo " drat" |
| echo " go <path to repo> | start OODT and analyze the repository" |
| echo " crawl <path to repo> | crawl the repository files" |
| echo " index <path to repo> | index the crawled files" |
| echo " map | fire off the MapReduce mapper" |
| echo " reduce | fire off the MapReduce reducer" |
| echo " help | print this message" |
| echo " reset | prepare to analyze an entirely new repo" |
| echo " | CAUTION: will delete previous crawls!" |
| } |
| |
| FILEMGR_URL=http://localhost:9000 |
| SOLR_URL=http://localhost:8080/solr/drat |
| CLIENT_URL=http://localhost:9001 |
| |
| # Crawl the given repository. Expects one argument -- the file path of the repo to be crawled. |
| function crawl { |
| $DRAT_HOME/crawler/bin/crawler_launcher --operation --metPC --metExtractorConfig \ |
| $DRAT_HOME/extractors/code/default.cpr.conf --metExtractor org.apache.oodt.cas.metadata.extractors.CopyAndRewriteExtractor \ |
| --filemgrUrl $FILEMGR_URL --clientTransferer org.apache.oodt.cas.filemgr.datatransfer.InPlaceDataTransferFactory --productPath $1 |
| } |
| |
| # Index the crawled files of the given repo. Expects one argument -- the file path of the repo to be indexed. |
| function index { |
| java -Djava.ext.dirs=$DRAT_HOME/filemgr/lib -DSOLR_INDEXER_CONFIG=$DRAT_HOME/filemgr/etc/indexer.properties \ |
| org.apache.oodt.cas.filemgr.tools.SolrIndexer --all --fmUrl $FILEMGR_URL --optimize --solrUrl $SOLR_URL $1 |
| } |
| |
| # Fire off the MapReduce mapper. Expects no arguments. |
| function map { |
| $DRAT_HOME/workflow/bin/wmgr-client --url $CLIENT_URL --operation --dynWorkflow --taskIds urn:drat:MimePartitioner |
| } |
| |
| # Fire off the MapReduce reducer. Expects no arguments. |
| function reduce { |
| $DRAT_HOME/workflow/bin/wmgr-client --url $CLIENT_URL --operation --dynWorkflow --taskIds urn:drat:RatAggregator |
| } |
| |
| # Ensure the number of arguments matches the expected number. Expects three arguments: |
| # the option name, the actual number of arguments, and the expected number of arguments. |
| function check_num_args { |
| if [[ "$2" != "$3" ]]; then |
| echo "Expected $(($3 - 1)) args for $1, but got $(($2 - 1))." # Use (( )) for arithmetic evaluation. |
| print_help |
| exit 1 |
| fi |
| } |
| |
| # Start parsing the arguments. |
| case $1 in |
| crawl) |
| check_num_args $1 $# 2 |
| crawl $2 |
| ;; |
| index) |
| check_num_args $1 $# 2 |
| index $2 |
| ;; |
| map) |
| check_num_args $1 $# 1 |
| map |
| ;; |
| reduce) |
| check_num_args $1 $# 1 |
| reduce |
| ;; |
| go) |
| # Add in some sleep just to give commands time to finish up. Some issues with Solr, otherwise. |
| check_num_args $1 $# 2 |
| echo "Crawling $2" |
| crawl $2 |
| sleep 1 |
| echo |
| echo "Indexing $2" |
| index $2 |
| sleep 1 |
| echo |
| echo "Firing off the MapReduce mapper" |
| map |
| echo |
| echo "Firing off the MapReduce reducer" |
| reduce |
| echo "Navigate to http://localhost:8080/opsui/ to view the OODT browser and http://localhost:8080/solr to view the Solr catalog." |
| ;; |
| reset) |
| check_num_args $1 $# 1 |
| echo "Please stop OODT by running oodt stop before running reset." |
| echo "This will remove any previous or current crawls." |
| read -p "Do you wish to continue? [yN] " yn |
| case $yn in |
| [Yy]*) |
| echo |
| echo "rm -rf $DRAT_HOME/data/workflow" |
| rm -rf $DRAT_HOME/data/workflow |
| echo "rm -rf $DRAT_HOME/filemgr/catalog" |
| rm -rf $DRAT_HOME/filemgr/catalog |
| echo "rm -rf $DRAT_HOME/solr/drat/data" |
| rm -rf $DRAT_HOME/solr/drat/data |
| echo "rm -rf $DRAT_HOME/data/archive/*" |
| rm -rf $DRAT_HOME/data/archive/* |
| echo "Please restart OODT with oodt start if you with to run another crawl." |
| ;; |
| [Nn]*) |
| echo "Reset cancelled. Exiting..." |
| exit 0 |
| ;; |
| *) |
| echo "Aborting..." |
| exit 1 |
| ;; |
| esac |
| ;; |
| help) |
| print_help |
| ;; |
| *) |
| echo "Unrecognized option: '$1'" |
| print_help |
| exit 1 |
| ;; |
| esac |