| <?xml version="1.0" encoding="UTF-8"?> |
| <!-- |
| |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| |
| --> |
| <pipeline> |
| <info> |
| <name>0015-apache-tika-parent</name> |
| <name_sync_with_filename>Y</name_sync_with_filename> |
| <description/> |
| <extended_description/> |
| <pipeline_version/> |
| <pipeline_type>Normal</pipeline_type> |
| <parameters> |
| </parameters> |
| <capture_transform_performance>N</capture_transform_performance> |
| <transform_performance_capturing_delay>1000</transform_performance_capturing_delay> |
| <transform_performance_capturing_size_limit>100</transform_performance_capturing_size_limit> |
| <created_user>-</created_user> |
| <created_date>2021/12/10 12:35:45.686</created_date> |
| <modified_user>-</modified_user> |
| <modified_date>2021/12/10 12:35:45.686</modified_date> |
| <key_for_session_key>H4sIAAAAAAAAAAMAAAAAAAAAAAA=</key_for_session_key> |
| <is_key_private>N</is_key_private> |
| </info> |
| <notepads> |
| </notepads> |
| <order> |
| <hop> |
| <from>files</from> |
| <to>ETL metadata injection</to> |
| <enabled>Y</enabled> |
| </hop> |
| <hop> |
| <from>metadata</from> |
| <to>ETL metadata injection</to> |
| <enabled>Y</enabled> |
| </hop> |
| <hop> |
| <from>ETL metadata injection</from> |
| <to>validate</to> |
| <enabled>Y</enabled> |
| </hop> |
| </order> |
| <transform> |
| <name>ETL metadata injection</name> |
| <type>MetaInject</type> |
| <description/> |
| <distribute>Y</distribute> |
| <custom_distribution/> |
| <copies>1</copies> |
| <partitioning> |
| <method>none</method> |
| <schema_name/> |
| </partitioning> |
| <filename>${PROJECT_HOME}/0015-apache-tika-child.hpl</filename> |
| <run_configuration>local</run_configuration> |
| <source_transform>OUTPUT</source_transform> |
| <source_output_fields> |
| <source_output_field> |
| <source_output_field_name>fileSize</source_output_field_name> |
| <source_output_field_type>Integer</source_output_field_type> |
| <source_output_field_length>-1</source_output_field_length> |
| <source_output_field_precision>0</source_output_field_precision> |
| </source_output_field> |
| <source_output_field> |
| <source_output_field_name>filename</source_output_field_name> |
| <source_output_field_type>String</source_output_field_type> |
| <source_output_field_length>250</source_output_field_length> |
| <source_output_field_precision>-1</source_output_field_precision> |
| </source_output_field> |
| <source_output_field> |
| <source_output_field_name>rowNumber</source_output_field_name> |
| <source_output_field_type>Integer</source_output_field_type> |
| <source_output_field_length>10</source_output_field_length> |
| <source_output_field_precision>0</source_output_field_precision> |
| </source_output_field> |
| <source_output_field> |
| <source_output_field_name>shortFilename</source_output_field_name> |
| <source_output_field_type>String</source_output_field_type> |
| <source_output_field_length>100</source_output_field_length> |
| <source_output_field_precision>-1</source_output_field_precision> |
| </source_output_field> |
| <source_output_field> |
| <source_output_field_name>extension</source_output_field_name> |
| <source_output_field_type>String</source_output_field_type> |
| <source_output_field_length>100</source_output_field_length> |
| <source_output_field_precision>-1</source_output_field_precision> |
| </source_output_field> |
| <source_output_field> |
| <source_output_field_name>path</source_output_field_name> |
| <source_output_field_type>String</source_output_field_type> |
| <source_output_field_length>100</source_output_field_length> |
| <source_output_field_precision>-1</source_output_field_precision> |
| </source_output_field> |
| <source_output_field> |
| <source_output_field_name>hiddenFlag</source_output_field_name> |
| <source_output_field_type>String</source_output_field_type> |
| <source_output_field_length>-1</source_output_field_length> |
| <source_output_field_precision>-1</source_output_field_precision> |
| </source_output_field> |
| <source_output_field> |
| <source_output_field_name>lastModificationDate</source_output_field_name> |
| <source_output_field_type>String</source_output_field_type> |
| <source_output_field_length>-1</source_output_field_length> |
| <source_output_field_precision>-1</source_output_field_precision> |
| </source_output_field> |
| <source_output_field> |
| <source_output_field_name>uri</source_output_field_name> |
| <source_output_field_type>String</source_output_field_type> |
| <source_output_field_length>100</source_output_field_length> |
| <source_output_field_precision>-1</source_output_field_precision> |
| </source_output_field> |
| <source_output_field> |
| <source_output_field_name>rootUri</source_output_field_name> |
| <source_output_field_type>String</source_output_field_type> |
| <source_output_field_length>100</source_output_field_length> |
| <source_output_field_precision>-1</source_output_field_precision> |
| </source_output_field> |
| <source_output_field> |
| <source_output_field_name>checksum</source_output_field_name> |
| <source_output_field_type>String</source_output_field_type> |
| <source_output_field_length>-1</source_output_field_length> |
| <source_output_field_precision>-1</source_output_field_precision> |
| </source_output_field> |
| </source_output_fields> |
| <target_file/> |
| <create_parent_folder>Y</create_parent_folder> |
| <no_execution>N</no_execution> |
| <stream_source_transform/> |
| <stream_target_transform/> |
| <mappings> |
| <mapping> |
| <target_transform_name>Apache Tika</target_transform_name> |
| <target_attribute_key>file-size-field</target_attribute_key> |
| <target_detail>N</target_detail> |
| <source_transform>metadata</source_transform> |
| <source_field>fileSizeField</source_field> |
| </mapping> |
| <mapping> |
| <target_transform_name>Apache Tika</target_transform_name> |
| <target_attribute_key>metadata-field</target_attribute_key> |
| <target_detail>N</target_detail> |
| <source_transform>metadata</source_transform> |
| <source_field>metadataField</source_field> |
| </mapping> |
| <mapping> |
| <target_transform_name>Apache Tika</target_transform_name> |
| <target_attribute_key>mask</target_attribute_key> |
| <target_detail>Y</target_detail> |
| <source_transform>files</source_transform> |
| <source_field>fileMask</source_field> |
| </mapping> |
| <mapping> |
| <target_transform_name>Apache Tika</target_transform_name> |
| <target_attribute_key>name</target_attribute_key> |
| <target_detail>Y</target_detail> |
| <source_transform>files</source_transform> |
| <source_field>fileName</source_field> |
| </mapping> |
| <mapping> |
| <target_transform_name>Apache Tika</target_transform_name> |
| <target_attribute_key>root-uri-field</target_attribute_key> |
| <target_detail>N</target_detail> |
| <source_transform>metadata</source_transform> |
| <source_field>rootUriField</source_field> |
| </mapping> |
| <mapping> |
| <target_transform_name>Apache Tika</target_transform_name> |
| <target_attribute_key>include-filename-field</target_attribute_key> |
| <target_detail>N</target_detail> |
| <source_transform>metadata</source_transform> |
| <source_field>filenameField</source_field> |
| </mapping> |
| <mapping> |
| <target_transform_name>Apache Tika</target_transform_name> |
| <target_attribute_key>extension-field</target_attribute_key> |
| <target_detail>N</target_detail> |
| <source_transform>metadata</source_transform> |
| <source_field>extensionField</source_field> |
| </mapping> |
| <mapping> |
| <target_transform_name>Apache Tika</target_transform_name> |
| <target_attribute_key>hidden-field</target_attribute_key> |
| <target_detail>N</target_detail> |
| <source_transform>metadata</source_transform> |
| <source_field>hiddenFlagField</source_field> |
| </mapping> |
| <mapping> |
| <target_transform_name>Apache Tika</target_transform_name> |
| <target_attribute_key>include-row-number-field</target_attribute_key> |
| <target_detail>N</target_detail> |
| <source_transform>metadata</source_transform> |
| <source_field>rowNumberField</source_field> |
| </mapping> |
| <mapping> |
| <target_transform_name>Apache Tika</target_transform_name> |
| <target_attribute_key>content-field</target_attribute_key> |
| <target_detail>N</target_detail> |
| <source_transform>metadata</source_transform> |
| <source_field>contentField</source_field> |
| </mapping> |
| <mapping> |
| <target_transform_name>Apache Tika</target_transform_name> |
| <target_attribute_key>required</target_attribute_key> |
| <target_detail>Y</target_detail> |
| <source_transform>files</source_transform> |
| <source_field>fileRequired</source_field> |
| </mapping> |
| <mapping> |
| <target_transform_name>Apache Tika</target_transform_name> |
| <target_attribute_key>include-sub-folders</target_attribute_key> |
| <target_detail>Y</target_detail> |
| <source_transform>files</source_transform> |
| <source_field>fileIncludeSubFolders</source_field> |
| </mapping> |
| <mapping> |
| <target_transform_name>Apache Tika</target_transform_name> |
| <target_attribute_key>last-modification-time-field</target_attribute_key> |
| <target_detail>N</target_detail> |
| <source_transform>metadata</source_transform> |
| <source_field>lastModDateField</source_field> |
| </mapping> |
| <mapping> |
| <target_transform_name>Apache Tika</target_transform_name> |
| <target_attribute_key>output-format</target_attribute_key> |
| <target_detail>N</target_detail> |
| <source_transform>metadata</source_transform> |
| <source_field>outputFormat</source_field> |
| </mapping> |
| <mapping> |
| <target_transform_name>Apache Tika</target_transform_name> |
| <target_attribute_key>path-field</target_attribute_key> |
| <target_detail>N</target_detail> |
| <source_transform>metadata</source_transform> |
| <source_field>pathField</source_field> |
| </mapping> |
| <mapping> |
| <target_transform_name>Apache Tika</target_transform_name> |
| <target_attribute_key>uri-field</target_attribute_key> |
| <target_detail>N</target_detail> |
| <source_transform>metadata</source_transform> |
| <source_field>uriField</source_field> |
| </mapping> |
| <mapping> |
| <target_transform_name>Apache Tika</target_transform_name> |
| <target_attribute_key>short-filename-field</target_attribute_key> |
| <target_detail>N</target_detail> |
| <source_transform>metadata</source_transform> |
| <source_field>shortFilenameField</source_field> |
| </mapping> |
| </mappings> |
| <attributes/> |
| <GUI> |
| <xloc>368</xloc> |
| <yloc>128</yloc> |
| </GUI> |
| </transform> |
| <transform> |
| <name>files</name> |
| <type>DataGrid</type> |
| <description/> |
| <distribute>Y</distribute> |
| <custom_distribution/> |
| <copies>1</copies> |
| <partitioning> |
| <method>none</method> |
| <schema_name/> |
| </partitioning> |
| <fields> |
| <field> |
| <set_empty_string>N</set_empty_string> |
| <length>-1</length> |
| <name>fileName</name> |
| <precision>-1</precision> |
| <type>String</type> |
| </field> |
| <field> |
| <set_empty_string>N</set_empty_string> |
| <length>-1</length> |
| <name>fileMask</name> |
| <precision>-1</precision> |
| <type>String</type> |
| </field> |
| <field> |
| <set_empty_string>N</set_empty_string> |
| <length>-1</length> |
| <name>fileExcludeMask</name> |
| <precision>-1</precision> |
| <type>String</type> |
| </field> |
| <field> |
| <set_empty_string>N</set_empty_string> |
| <length>-1</length> |
| <name>fileRequired</name> |
| <precision>-1</precision> |
| <type>String</type> |
| </field> |
| <field> |
| <set_empty_string>N</set_empty_string> |
| <length>-1</length> |
| <name>fileIncludeSubFolders</name> |
| <precision>-1</precision> |
| <type>String</type> |
| </field> |
| </fields> |
| <data> |
| <line> |
| <item>zip:s3://apache-hop/tika-test-files.zip</item> |
| <item>.*</item> |
| <item/> |
| <item>Y</item> |
| <item>Y</item> |
| </line> |
| </data> |
| <attributes/> |
| <GUI> |
| <xloc>144</xloc> |
| <yloc>128</yloc> |
| </GUI> |
| </transform> |
| <transform> |
| <name>metadata</name> |
| <type>DataGrid</type> |
| <description/> |
| <distribute>Y</distribute> |
| <custom_distribution/> |
| <copies>1</copies> |
| <partitioning> |
| <method>none</method> |
| <schema_name/> |
| </partitioning> |
| <fields> |
| <field> |
| <set_empty_string>N</set_empty_string> |
| <length>-1</length> |
| <name>outputFormat</name> |
| <precision>-1</precision> |
| <type>String</type> |
| </field> |
| <field> |
| <set_empty_string>N</set_empty_string> |
| <length>-1</length> |
| <name>contentField</name> |
| <precision>-1</precision> |
| <type>String</type> |
| </field> |
| <field> |
| <set_empty_string>N</set_empty_string> |
| <length>-1</length> |
| <name>fileSizeField</name> |
| <precision>-1</precision> |
| <type>String</type> |
| </field> |
| <field> |
| <set_empty_string>N</set_empty_string> |
| <length>-1</length> |
| <name>filenameField</name> |
| <precision>-1</precision> |
| <type>String</type> |
| </field> |
| <field> |
| <set_empty_string>N</set_empty_string> |
| <length>-1</length> |
| <name>rowNumberField</name> |
| <precision>-1</precision> |
| <type>String</type> |
| </field> |
| <field> |
| <set_empty_string>N</set_empty_string> |
| <length>-1</length> |
| <name>shortFilenameField</name> |
| <precision>-1</precision> |
| <type>String</type> |
| </field> |
| <field> |
| <set_empty_string>N</set_empty_string> |
| <length>-1</length> |
| <name>extensionField</name> |
| <precision>-1</precision> |
| <type>String</type> |
| </field> |
| <field> |
| <set_empty_string>N</set_empty_string> |
| <length>-1</length> |
| <name>pathField</name> |
| <precision>-1</precision> |
| <type>String</type> |
| </field> |
| <field> |
| <set_empty_string>N</set_empty_string> |
| <length>-1</length> |
| <name>hiddenFlagField</name> |
| <precision>-1</precision> |
| <type>String</type> |
| </field> |
| <field> |
| <set_empty_string>N</set_empty_string> |
| <length>-1</length> |
| <name>lastModDateField</name> |
| <precision>-1</precision> |
| <type>String</type> |
| </field> |
| <field> |
| <set_empty_string>N</set_empty_string> |
| <length>-1</length> |
| <name>uriField</name> |
| <precision>-1</precision> |
| <type>String</type> |
| </field> |
| <field> |
| <set_empty_string>N</set_empty_string> |
| <length>-1</length> |
| <name>rootUriField</name> |
| <precision>-1</precision> |
| <type>String</type> |
| </field> |
| <field> |
| <set_empty_string>N</set_empty_string> |
| <length>-1</length> |
| <name>metadataField</name> |
| <precision>-1</precision> |
| <type>String</type> |
| </field> |
| </fields> |
| <data> |
| <line> |
| <item>Plain text</item> |
| <item>content</item> |
| <item>fileSize</item> |
| <item>filename</item> |
| <item>rowNumber</item> |
| <item>shortFilename</item> |
| <item>extension</item> |
| <item>path</item> |
| <item>hiddenFlag</item> |
| <item>lastModificationDate</item> |
| <item>uri</item> |
| <item>rootUri</item> |
| <item>metadata</item> |
| </line> |
| </data> |
| <attributes/> |
| <GUI> |
| <xloc>144</xloc> |
| <yloc>272</yloc> |
| </GUI> |
| </transform> |
| <transform> |
| <name>validate</name> |
| <type>Dummy</type> |
| <description/> |
| <distribute>Y</distribute> |
| <custom_distribution/> |
| <copies>1</copies> |
| <partitioning> |
| <method>none</method> |
| <schema_name/> |
| </partitioning> |
| <attributes/> |
| <GUI> |
| <xloc>576</xloc> |
| <yloc>128</yloc> |
| </GUI> |
| </transform> |
| <transform_error_handling> |
| </transform_error_handling> |
| <attributes/> |
| </pipeline> |