blob: eef1a8d2ee7fcb2a4eae47ec01094796005f9610 [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more contributor
license agreements. See the NOTICE.txt file distributed with this work for
additional information regarding copyright ownership. The ASF licenses this
file to you under the Apache License, Version 2.0 (the "License"); you may not
use this file except in compliance with the License. You may obtain a copy of
the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
License for the specific language governing permissions and limitations under
the License.
TODO(bfoster): Make this a working example.
-->
<pgeConfig>
<import namespace="optional-NS" file="absolute-or-relative-to-this-file-another-pgeConfig-import-file-path"/>
<!-- input file for science PGE that need to be created -->
<dynInputFiles>
<!-- one or more of these -->
<file path="path-with-name-to-create" writerClass="class-path-to-SciPgeConfigFileWriter" args="zero-or-more-args-comma-segregated"/>
</dynInputFiles>
<!-- how to run science PGE -->
<exe dir="set-working-dir-for-script" shellType="script-type">
<!-- one or more of these -->
<cmd>line-in-script-file</cmd>
</exe>
<!-- files to ingest -->
<output>
<!-- one or more of these -->
<!-- if a dir path is specified WITHOUT specifying an associated
set of files tags, along with the regExp and metFileWriterClass
and args required to produce metadata CAS-PGE will REQUIRE that
you provide (as part of the tasks.xml definition for this task:
PGETask/Ingest/MimeExtractorRepo - Path to AutoDetectProductCrawler's
MimeExtractorRepo XML config
And an AutoDetectProductCrawler will be created in lieu of the
StdProductCrawler. In addition you will not see the extracted
metadata produced as .met files in the job working directory for
these CAS-PGE jobs as the extracted metadata won't be serialized.
-->
<dir path="path-to-an-output-dir" createBeforeExe="true-or-false" />
<dir path="path-to-an-output-dir" createBeforeExe="true-or-false">
<!-- back compatability re-introduced in OODT-667
by specifying one or more files you cause CAS-PGE to generate a StdProductCrawler,
to crawl generated metadata files derived from calling the metFileWriterClass implementation
on files matching Java RegEx or file name (exact match) in path-to-an-output-dir. Met files are serialized and stored in the job
directory where the CAS-PGE job ran, and are then ingested into the file manager along
with the origin data file.
-->
<files regExp="java-regex"
name="file-name"
metFileWriterClass="org.apache.oodt.cas.pge.writers.PcsMetFileWriter.subclass"
args="zero-or-more-args-comma-segregated">
<!--
A convention for renaming the files as they are processed on output.
By default uses the PathUtilsNamingConvention.
-->
<renamingConv namingExpr="/[SomePathUtils]/[Metadata]" envReplace="true">
<metadata key="some-key" val="some-val"/>
<metadata key="some-other-key" val="zero-or-more-vals-comma-segregated"/>
</renamingConv>
</files>
</dir>
</output>
<!-- metadata keys you want to set -->
<customMetadata>
<!-- one or more of these -->
<metadata key="name-of-metadata-field" val="metadata-value"/>
</customMetadata>
</pgeConfig>