blob: e45259bf9fd633ca3a5287a8d89dace2d56285bd [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<cas:producttypes xmlns:cas="http://oodt.jpl.nasa.gov/1.0/cas">
<type id="urn:oodt:GenericFile" name="GenericFile">
<repository path="file://[HOME]/files"/>
<versioner class="org.apache.oodt.cas.filemgr.versioning.BasicVersioner"/>
<!--
You can optionally specify a 'trim' tag to identify whether
or not you want newlines trimmed from the product type description. To
turn off trimming (now done by default), include the following attribute with your
description definition: trim="false"
-->
<description>The default product type for any kind of file.</description>
<!-- Global aggregate product type metadata: you can declare this in the
same way you'd wire together a CAS met file, even using multiple values
per key.
This metadata can be looked up via the Product Type API, and also used
in browsers and so forth to display global metadata about the collection
of Products part of this product type.
-->
<metadata>
<keyval>
<key>ProductType</key>
<val>GenericFile</val>
</keyval>
<!-- if you are using the org.apache.oodt.cas.filemgr.versioning.ProductTypeMetVersioner
you can set a met key like below to define your filePathSpec
<keyval>
<key>filePathSpec</key>
<val>/[Filename]</val>
</keyval>
-->
</metadata>
<metExtractors>
<extractor
class="org.apache.oodt.cas.filemgr.metadata.extractors.CoreMetExtractor">
<configuration>
<!-- you can optionally include the envReplace tag to turn on/off environment var replacement -->
<property name="nsAware" value="true" />
<property name="elementNs" value="CAS" />
<property name="elements"
value="ProductReceivedTime,ProductName,ProductId" />
</configuration>
</extractor>
<extractor class="org.apache.oodt.cas.filemgr.metadata.extractors.examples.MimeTypeExtractor" />
<extractor class="org.apache.oodt.cas.filemgr.metadata.extractors.examples.FinalFileLocationExtractor">
<configuration>
<!-- this property specifies whether you want the FILE_LOCATION field
computed by this extractor to replace any other FILE_LOCATION met
attribute.
-->
<property name="replace" value="true"/>
</configuration>
</extractor>
<!--
The below enables the FilenameRegexMetExtractor.
It allows a user to specify a filename pattern with Java Regex that identifies groups
within a particular file name. Those groups are then mapped to Metadata key names, and
extracted as metadta from a file's name.
In the example below, there are two groups, Filename, and ProductId, for files
of the form:
Filename_ProductId.txt
<extractor class="org.apache.oodt.cas.filemgr.metadata.extrctors.examples.FilenameRegexMetExtractor">
<configuration>
<property name="filenamePattern" value="(\\w*)_(\\d*)\\.txt"/>
<property name="metadataKeys" value="Filename,ProductId"/>
</configuration>
</extractor>
-->
<!--
The below enables the TikaAutoDetectExtractor. It does not take any configuration parameters
and simple runs Apache Tika on the reference for the Product and assumes it's a single file
product (Product.STRUCTURE_FLAT).
<extractor class="org.apache.oodt.cas.filemgr.metadata.extractors.examples.TikaAutoDetectExtractor"/>
-->
</metExtractors>
</type>
</cas:producttypes>