|  | # Licensed to the Apache Software Foundation (ASF) under one or more | 
|  | # contributor license agreements.  See the NOTICE file distributed with | 
|  | # this work for additional information regarding copyright ownership. | 
|  | # The ASF licenses this file to You under the Apache License, Version 2.0 | 
|  | # (the "License"); you may not use this file except in compliance with | 
|  | # the License.  You may obtain a copy of the License at | 
|  | # | 
|  | #     http://www.apache.org/licenses/LICENSE-2.0 | 
|  | # | 
|  | # Unless required by applicable law or agreed to in writing, software | 
|  | # distributed under the License is distributed on an "AS IS" BASIS, | 
|  | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | # See the License for the specific language governing permissions and | 
|  | # limitations under the License. | 
|  |  | 
|  | # Script to create a file system job and run it, sending the files to the null output connector | 
|  | # Argument: the base path of the API service, e.g. "http://localhost:8345/mcf-api-service". | 
|  |  | 
|  | # Decode the argument, if any | 
|  | if __args__.__size__ > 4 || __args__.__size__ < 1 || __args__.__size__ == 3 then | 
|  | error "Usage: file-crawl-example <file_path> [ <url_path> [ <user_name> <password> ] ]"; | 
|  | ; | 
|  | if __args__.__size__ == 1 then | 
|  | set basepath = "http://localhost:8345/mcf-api-service"; | 
|  | else | 
|  | set basepath = __args__[1]; | 
|  | ; | 
|  | if __args__.__size__ == 4 then | 
|  | set username = __args__[2]; | 
|  | set password = __args__[3]; | 
|  | else | 
|  | set username = ""; | 
|  | set password = ""; | 
|  | ; | 
|  |  | 
|  | set baseurl = (new url basepath) + "json"; | 
|  |  | 
|  | # Define all the connection names, job names, etc. | 
|  | set outputConnectionName = "Null Output"; | 
|  | set outputConnectionDescription = "Null Output Connection"; | 
|  | set repositoryConnectionName = "File System"; | 
|  | set repositoryConnectionDescription = "File System Connection"; | 
|  | set fileCrawlPath = __args__[0]; | 
|  | set fileCrawlJobName = "File system crawl of "+fileCrawlPath; | 
|  |  | 
|  | # First, login | 
|  | POST result = { | 
|  | << "userID" : username :  :  >>, | 
|  | << "password" : password : : >> } | 
|  | to baseurl + "LOGIN"; | 
|  | if result.__OK__ then | 
|  | print "Login successful"; | 
|  | else | 
|  | error "Login failed"; | 
|  | ; | 
|  |  | 
|  | # Now, create the null output connection, unless it's already there. | 
|  | PUT result = { | 
|  | << "outputconnection" : "" :  : | 
|  | << "description" : outputConnectionDescription :  :  >>, | 
|  | << "configuration" : "" :  :  >>, | 
|  | << "class_name" : "org.apache.manifoldcf.agents.output.nullconnector.NullConnector" :  :  >>, | 
|  | << "name" : outputConnectionName :  :  >>, | 
|  | << "max_connections" : "100" :  :  >> >> } | 
|  | to baseurl + "outputconnections" + new connectionname outputConnectionName; | 
|  | if result.__CREATED__ || result.__OK__ then | 
|  | print "Output connection created (or already exists)"; | 
|  | else | 
|  | error "Unexpected result: "+result.__script__; | 
|  | ; | 
|  |  | 
|  | # Same deal with the repository connection | 
|  | PUT result = { | 
|  | << "repositoryconnection" : "" :  : | 
|  | << "description" : repositoryConnectionDescription :  :  >>, | 
|  | << "configuration" : "" :  :  >>, | 
|  | << "class_name" : "org.apache.manifoldcf.crawler.connectors.filesystem.FileConnector" :  :  >>, | 
|  | << "name" : repositoryConnectionName :  :  >>, | 
|  | << "max_connections" : "100" :  :  >> >> } | 
|  | to baseurl + "repositoryconnections" + new connectionname repositoryConnectionName; | 
|  | if result.__CREATED__ || result.__OK__ then | 
|  | print "Repository connection created (or already exists)"; | 
|  | else | 
|  | error "Unexpected result: "+result.__script__; | 
|  | ; | 
|  |  | 
|  | # Create the job (if it can't find it) | 
|  | POST result = { | 
|  | << "job" : "" :  : | 
|  | << "start_mode" : "manual" :  :  >>, | 
|  | << "reseed_interval" : "3600000" :  :  >>, | 
|  | << "recrawl_interval" : "86400000" :  :  >>, | 
|  | << "run_mode" : "scan once" :  :  >>, | 
|  | << "hopcount_mode" : "never delete" :  :  >>, | 
|  | << "description" : fileCrawlJobName :  :  >>, | 
|  | << "repository_connection" : "File System" :  :  >>, | 
|  | << "document_specification" : "" :  : | 
|  | << "startpoint" : "" : "path"=fileCrawlPath : | 
|  | << "include" : "" : "match"="*", "type"="file" :  >>, | 
|  | << "include" : "" : "match"="*", "type"="directory" :  >> >> >>, | 
|  | << "pipelinestage" : "" :  : | 
|  | << "stage_id" : 0 :  :  >>, | 
|  | << "stage_isoutput": "true" :  :  >>, | 
|  | << "stage_specification" : "" :  :  >>, | 
|  | << "stage_connectionname" : "Null Output" :  :  >> >>, | 
|  | << "priority" : "5" :  :  >>, | 
|  | << "expiration_interval" : "infinite" :  :  >> >> } | 
|  | to baseurl + "jobs"; | 
|  | if result.__CREATED__ then | 
|  | print "Job created"; | 
|  | set jobid = result.__value__[0].__value__; | 
|  | else | 
|  | error "Unexpected result: "+result.__script__; | 
|  | ; | 
|  |  | 
|  | print "The job id is "+jobid; | 
|  |  | 
|  | # Start the job | 
|  | PUT result = { } | 
|  | to baseurl + "start" + jobid; | 
|  |  | 
|  | # Wait for the job to finish | 
|  | while true do | 
|  | GET result = baseurl + "jobstatuses" + jobid; | 
|  | if !result.__OK__ then | 
|  | error "Couldn't get job status"; | 
|  | ; | 
|  |  | 
|  | # Find the job's status | 
|  | set jobstatus = result.__value__.__dict__["jobstatus"]; | 
|  | if isnull jobstatus then | 
|  | error "Couldn't find job status in response: " + result.__script__; | 
|  | ; | 
|  |  | 
|  | set thestatus = jobstatus.__dict__["status"].__value__; | 
|  |  | 
|  | if thestatus == "done" || thestatus == "error" then | 
|  | break; | 
|  | ; | 
|  | wait 10000; | 
|  | ; | 
|  |  | 
|  | if thestatus == "error" then | 
|  | print "The job aborted, with error: " + jobstatus.__dict__["error_text"].__value; | 
|  | else | 
|  | print "The job completed"; | 
|  | ; | 
|  |  |