| <?xml version="1.0" encoding="UTF-8"?> |
| <!-- |
| |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| |
| --> |
| <pipeline> |
| <info> |
| <name>0037-apache-tika</name> |
| <name_sync_with_filename>Y</name_sync_with_filename> |
| <description/> |
| <extended_description/> |
| <pipeline_version/> |
| <pipeline_type>Normal</pipeline_type> |
| <pipeline_status>0</pipeline_status> |
| <parameters> |
| </parameters> |
| <capture_transform_performance>N</capture_transform_performance> |
| <transform_performance_capturing_delay>1000</transform_performance_capturing_delay> |
| <transform_performance_capturing_size_limit>100</transform_performance_capturing_size_limit> |
| <created_user>-</created_user> |
| <created_date>2021/12/10 11:50:34.956</created_date> |
| <modified_user>-</modified_user> |
| <modified_date>2021/12/10 11:50:34.956</modified_date> |
| <key_for_session_key>H4sIAAAAAAAAAAMAAAAAAAAAAAA=</key_for_session_key> |
| <is_key_private>N</is_key_private> |
| </info> |
| <notepads> |
| </notepads> |
| <order> |
| <hop> |
| <from>Apache Tika</from> |
| <to>contentChecksum</to> |
| <enabled>Y</enabled> |
| </hop> |
| <hop> |
| <from>contentChecksum</from> |
| <to>remove content</to> |
| <enabled>Y</enabled> |
| </hop> |
| <hop> |
| <from>remove content</from> |
| <to>validate</to> |
| <enabled>Y</enabled> |
| </hop> |
| </order> |
| <transform> |
| <name>Apache Tika</name> |
| <type>Tika</type> |
| <description/> |
| <distribute>Y</distribute> |
| <custom_distribution/> |
| <copies>1</copies> |
| <partitioning> |
| <method>none</method> |
| <schema_name/> |
| </partitioning> |
| <add-result-file>N</add-result-file> |
| <content-field>content</content-field> |
| <extension-field>extension</extension-field> |
| <file-in-field>N</file-in-field> |
| <file-size-field>fileSize</file-size-field> |
| <include-filename-field>filename</include-filename-field> |
| <files> |
| <file> |
| <include-sub-folders>Y</include-sub-folders> |
| <mask>.*</mask> |
| <name>zip:s3://apache-hop/tika-test-files.zip</name> |
| <required>Y</required> |
| </file> |
| </files> |
| <hidden-field>hiddenFlag</hidden-field> |
| <ignore-empty-file>N</ignore-empty-file> |
| <last-modification-time-field>lastModificationDate</last-modification-time-field> |
| <metadata-field>metadata</metadata-field> |
| <output-format>Plain text</output-format> |
| <path-field>path</path-field> |
| <root-uri-field>rootUri</root-uri-field> |
| <row-limit>0</row-limit> |
| <include-row-number-field>rowNumber</include-row-number-field> |
| <short-filename-field>shortFilename</short-filename-field> |
| <uri-field>uri</uri-field> |
| <attributes/> |
| <GUI> |
| <xloc>128</xloc> |
| <yloc>96</yloc> |
| </GUI> |
| </transform> |
| <transform> |
| <name>contentChecksum</name> |
| <type>CheckSum</type> |
| <description/> |
| <distribute>Y</distribute> |
| <custom_distribution/> |
| <copies>1</copies> |
| <partitioning> |
| <method>none</method> |
| <schema_name/> |
| </partitioning> |
| <checksumtype>SHA-512</checksumtype> |
| <fields> |
| <field> |
| <name>content</name> |
| </field> |
| </fields> |
| <resultfieldName>checksum</resultfieldName> |
| <resultType>string</resultType> |
| <attributes/> |
| <GUI> |
| <xloc>272</xloc> |
| <yloc>96</yloc> |
| </GUI> |
| </transform> |
| <transform> |
| <name>remove content</name> |
| <type>SelectValues</type> |
| <description/> |
| <distribute>Y</distribute> |
| <custom_distribution/> |
| <copies>1</copies> |
| <partitioning> |
| <method>none</method> |
| <schema_name/> |
| </partitioning> |
| <fields> |
| <select_unspecified>N</select_unspecified> |
| <remove> |
| <name>content</name> |
| </remove> |
| <remove> |
| <name>metadata</name> |
| </remove> |
| <remove> |
| <name>lastModificationDate</name> |
| </remove> |
| </fields> |
| <attributes/> |
| <GUI> |
| <xloc>416</xloc> |
| <yloc>96</yloc> |
| </GUI> |
| </transform> |
| <transform> |
| <name>validate</name> |
| <type>Dummy</type> |
| <description/> |
| <distribute>Y</distribute> |
| <custom_distribution/> |
| <copies>1</copies> |
| <partitioning> |
| <method>none</method> |
| <schema_name/> |
| </partitioning> |
| <attributes/> |
| <GUI> |
| <xloc>560</xloc> |
| <yloc>96</yloc> |
| </GUI> |
| </transform> |
| <transform_error_handling> |
| </transform_error_handling> |
| <attributes/> |
| </pipeline> |