blob: 080d3fbe8ae4fa6744206124504ecfc9e56a2027 [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<pipeline>
<info>
<name>0015-apache-tika-parent</name>
<name_sync_with_filename>Y</name_sync_with_filename>
<description/>
<extended_description/>
<pipeline_version/>
<pipeline_type>Normal</pipeline_type>
<parameters>
</parameters>
<capture_transform_performance>N</capture_transform_performance>
<transform_performance_capturing_delay>1000</transform_performance_capturing_delay>
<transform_performance_capturing_size_limit>100</transform_performance_capturing_size_limit>
<created_user>-</created_user>
<created_date>2021/12/10 12:35:45.686</created_date>
<modified_user>-</modified_user>
<modified_date>2021/12/10 12:35:45.686</modified_date>
<key_for_session_key>H4sIAAAAAAAAAAMAAAAAAAAAAAA=</key_for_session_key>
<is_key_private>N</is_key_private>
</info>
<notepads>
</notepads>
<order>
<hop>
<from>files</from>
<to>ETL metadata injection</to>
<enabled>Y</enabled>
</hop>
<hop>
<from>metadata</from>
<to>ETL metadata injection</to>
<enabled>Y</enabled>
</hop>
<hop>
<from>ETL metadata injection</from>
<to>validate</to>
<enabled>Y</enabled>
</hop>
</order>
<transform>
<name>ETL metadata injection</name>
<type>MetaInject</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<filename>${PROJECT_HOME}/0015-apache-tika-child.hpl</filename>
<run_configuration>local</run_configuration>
<source_transform>OUTPUT</source_transform>
<source_output_fields>
<source_output_field>
<source_output_field_name>fileSize</source_output_field_name>
<source_output_field_type>Integer</source_output_field_type>
<source_output_field_length>-1</source_output_field_length>
<source_output_field_precision>0</source_output_field_precision>
</source_output_field>
<source_output_field>
<source_output_field_name>filename</source_output_field_name>
<source_output_field_type>String</source_output_field_type>
<source_output_field_length>250</source_output_field_length>
<source_output_field_precision>-1</source_output_field_precision>
</source_output_field>
<source_output_field>
<source_output_field_name>rowNumber</source_output_field_name>
<source_output_field_type>Integer</source_output_field_type>
<source_output_field_length>10</source_output_field_length>
<source_output_field_precision>0</source_output_field_precision>
</source_output_field>
<source_output_field>
<source_output_field_name>shortFilename</source_output_field_name>
<source_output_field_type>String</source_output_field_type>
<source_output_field_length>100</source_output_field_length>
<source_output_field_precision>-1</source_output_field_precision>
</source_output_field>
<source_output_field>
<source_output_field_name>extension</source_output_field_name>
<source_output_field_type>String</source_output_field_type>
<source_output_field_length>100</source_output_field_length>
<source_output_field_precision>-1</source_output_field_precision>
</source_output_field>
<source_output_field>
<source_output_field_name>path</source_output_field_name>
<source_output_field_type>String</source_output_field_type>
<source_output_field_length>100</source_output_field_length>
<source_output_field_precision>-1</source_output_field_precision>
</source_output_field>
<source_output_field>
<source_output_field_name>hiddenFlag</source_output_field_name>
<source_output_field_type>String</source_output_field_type>
<source_output_field_length>-1</source_output_field_length>
<source_output_field_precision>-1</source_output_field_precision>
</source_output_field>
<source_output_field>
<source_output_field_name>lastModificationDate</source_output_field_name>
<source_output_field_type>String</source_output_field_type>
<source_output_field_length>-1</source_output_field_length>
<source_output_field_precision>-1</source_output_field_precision>
</source_output_field>
<source_output_field>
<source_output_field_name>uri</source_output_field_name>
<source_output_field_type>String</source_output_field_type>
<source_output_field_length>100</source_output_field_length>
<source_output_field_precision>-1</source_output_field_precision>
</source_output_field>
<source_output_field>
<source_output_field_name>rootUri</source_output_field_name>
<source_output_field_type>String</source_output_field_type>
<source_output_field_length>100</source_output_field_length>
<source_output_field_precision>-1</source_output_field_precision>
</source_output_field>
<source_output_field>
<source_output_field_name>checksum</source_output_field_name>
<source_output_field_type>String</source_output_field_type>
<source_output_field_length>-1</source_output_field_length>
<source_output_field_precision>-1</source_output_field_precision>
</source_output_field>
</source_output_fields>
<target_file/>
<create_parent_folder>Y</create_parent_folder>
<no_execution>N</no_execution>
<stream_source_transform/>
<stream_target_transform/>
<mappings>
<mapping>
<target_transform_name>Apache Tika</target_transform_name>
<target_attribute_key>file-size-field</target_attribute_key>
<target_detail>N</target_detail>
<source_transform>metadata</source_transform>
<source_field>fileSizeField</source_field>
</mapping>
<mapping>
<target_transform_name>Apache Tika</target_transform_name>
<target_attribute_key>metadata-field</target_attribute_key>
<target_detail>N</target_detail>
<source_transform>metadata</source_transform>
<source_field>metadataField</source_field>
</mapping>
<mapping>
<target_transform_name>Apache Tika</target_transform_name>
<target_attribute_key>mask</target_attribute_key>
<target_detail>Y</target_detail>
<source_transform>files</source_transform>
<source_field>fileMask</source_field>
</mapping>
<mapping>
<target_transform_name>Apache Tika</target_transform_name>
<target_attribute_key>name</target_attribute_key>
<target_detail>Y</target_detail>
<source_transform>files</source_transform>
<source_field>fileName</source_field>
</mapping>
<mapping>
<target_transform_name>Apache Tika</target_transform_name>
<target_attribute_key>root-uri-field</target_attribute_key>
<target_detail>N</target_detail>
<source_transform>metadata</source_transform>
<source_field>rootUriField</source_field>
</mapping>
<mapping>
<target_transform_name>Apache Tika</target_transform_name>
<target_attribute_key>include-filename-field</target_attribute_key>
<target_detail>N</target_detail>
<source_transform>metadata</source_transform>
<source_field>filenameField</source_field>
</mapping>
<mapping>
<target_transform_name>Apache Tika</target_transform_name>
<target_attribute_key>extension-field</target_attribute_key>
<target_detail>N</target_detail>
<source_transform>metadata</source_transform>
<source_field>extensionField</source_field>
</mapping>
<mapping>
<target_transform_name>Apache Tika</target_transform_name>
<target_attribute_key>hidden-field</target_attribute_key>
<target_detail>N</target_detail>
<source_transform>metadata</source_transform>
<source_field>hiddenFlagField</source_field>
</mapping>
<mapping>
<target_transform_name>Apache Tika</target_transform_name>
<target_attribute_key>include-row-number-field</target_attribute_key>
<target_detail>N</target_detail>
<source_transform>metadata</source_transform>
<source_field>rowNumberField</source_field>
</mapping>
<mapping>
<target_transform_name>Apache Tika</target_transform_name>
<target_attribute_key>content-field</target_attribute_key>
<target_detail>N</target_detail>
<source_transform>metadata</source_transform>
<source_field>contentField</source_field>
</mapping>
<mapping>
<target_transform_name>Apache Tika</target_transform_name>
<target_attribute_key>required</target_attribute_key>
<target_detail>Y</target_detail>
<source_transform>files</source_transform>
<source_field>fileRequired</source_field>
</mapping>
<mapping>
<target_transform_name>Apache Tika</target_transform_name>
<target_attribute_key>include-sub-folders</target_attribute_key>
<target_detail>Y</target_detail>
<source_transform>files</source_transform>
<source_field>fileIncludeSubFolders</source_field>
</mapping>
<mapping>
<target_transform_name>Apache Tika</target_transform_name>
<target_attribute_key>last-modification-time-field</target_attribute_key>
<target_detail>N</target_detail>
<source_transform>metadata</source_transform>
<source_field>lastModDateField</source_field>
</mapping>
<mapping>
<target_transform_name>Apache Tika</target_transform_name>
<target_attribute_key>output-format</target_attribute_key>
<target_detail>N</target_detail>
<source_transform>metadata</source_transform>
<source_field>outputFormat</source_field>
</mapping>
<mapping>
<target_transform_name>Apache Tika</target_transform_name>
<target_attribute_key>path-field</target_attribute_key>
<target_detail>N</target_detail>
<source_transform>metadata</source_transform>
<source_field>pathField</source_field>
</mapping>
<mapping>
<target_transform_name>Apache Tika</target_transform_name>
<target_attribute_key>uri-field</target_attribute_key>
<target_detail>N</target_detail>
<source_transform>metadata</source_transform>
<source_field>uriField</source_field>
</mapping>
<mapping>
<target_transform_name>Apache Tika</target_transform_name>
<target_attribute_key>short-filename-field</target_attribute_key>
<target_detail>N</target_detail>
<source_transform>metadata</source_transform>
<source_field>shortFilenameField</source_field>
</mapping>
</mappings>
<attributes/>
<GUI>
<xloc>368</xloc>
<yloc>128</yloc>
</GUI>
</transform>
<transform>
<name>files</name>
<type>DataGrid</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<fields>
<field>
<set_empty_string>N</set_empty_string>
<length>-1</length>
<name>fileName</name>
<precision>-1</precision>
<type>String</type>
</field>
<field>
<set_empty_string>N</set_empty_string>
<length>-1</length>
<name>fileMask</name>
<precision>-1</precision>
<type>String</type>
</field>
<field>
<set_empty_string>N</set_empty_string>
<length>-1</length>
<name>fileExcludeMask</name>
<precision>-1</precision>
<type>String</type>
</field>
<field>
<set_empty_string>N</set_empty_string>
<length>-1</length>
<name>fileRequired</name>
<precision>-1</precision>
<type>String</type>
</field>
<field>
<set_empty_string>N</set_empty_string>
<length>-1</length>
<name>fileIncludeSubFolders</name>
<precision>-1</precision>
<type>String</type>
</field>
</fields>
<data>
<line>
<item>zip:s3://apache-hop/tika-test-files.zip</item>
<item>.*</item>
<item/>
<item>Y</item>
<item>Y</item>
</line>
</data>
<attributes/>
<GUI>
<xloc>144</xloc>
<yloc>128</yloc>
</GUI>
</transform>
<transform>
<name>metadata</name>
<type>DataGrid</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<fields>
<field>
<set_empty_string>N</set_empty_string>
<length>-1</length>
<name>outputFormat</name>
<precision>-1</precision>
<type>String</type>
</field>
<field>
<set_empty_string>N</set_empty_string>
<length>-1</length>
<name>contentField</name>
<precision>-1</precision>
<type>String</type>
</field>
<field>
<set_empty_string>N</set_empty_string>
<length>-1</length>
<name>fileSizeField</name>
<precision>-1</precision>
<type>String</type>
</field>
<field>
<set_empty_string>N</set_empty_string>
<length>-1</length>
<name>filenameField</name>
<precision>-1</precision>
<type>String</type>
</field>
<field>
<set_empty_string>N</set_empty_string>
<length>-1</length>
<name>rowNumberField</name>
<precision>-1</precision>
<type>String</type>
</field>
<field>
<set_empty_string>N</set_empty_string>
<length>-1</length>
<name>shortFilenameField</name>
<precision>-1</precision>
<type>String</type>
</field>
<field>
<set_empty_string>N</set_empty_string>
<length>-1</length>
<name>extensionField</name>
<precision>-1</precision>
<type>String</type>
</field>
<field>
<set_empty_string>N</set_empty_string>
<length>-1</length>
<name>pathField</name>
<precision>-1</precision>
<type>String</type>
</field>
<field>
<set_empty_string>N</set_empty_string>
<length>-1</length>
<name>hiddenFlagField</name>
<precision>-1</precision>
<type>String</type>
</field>
<field>
<set_empty_string>N</set_empty_string>
<length>-1</length>
<name>lastModDateField</name>
<precision>-1</precision>
<type>String</type>
</field>
<field>
<set_empty_string>N</set_empty_string>
<length>-1</length>
<name>uriField</name>
<precision>-1</precision>
<type>String</type>
</field>
<field>
<set_empty_string>N</set_empty_string>
<length>-1</length>
<name>rootUriField</name>
<precision>-1</precision>
<type>String</type>
</field>
<field>
<set_empty_string>N</set_empty_string>
<length>-1</length>
<name>metadataField</name>
<precision>-1</precision>
<type>String</type>
</field>
</fields>
<data>
<line>
<item>Plain text</item>
<item>content</item>
<item>fileSize</item>
<item>filename</item>
<item>rowNumber</item>
<item>shortFilename</item>
<item>extension</item>
<item>path</item>
<item>hiddenFlag</item>
<item>lastModificationDate</item>
<item>uri</item>
<item>rootUri</item>
<item>metadata</item>
</line>
</data>
<attributes/>
<GUI>
<xloc>144</xloc>
<yloc>272</yloc>
</GUI>
</transform>
<transform>
<name>validate</name>
<type>Dummy</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<attributes/>
<GUI>
<xloc>576</xloc>
<yloc>128</yloc>
</GUI>
</transform>
<transform_error_handling>
</transform_error_handling>
<attributes/>
</pipeline>