blob: 8ddd5f7a6bafe805f3af9f6940f0154767d4ce05 [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<pipeline>
<info>
<name>00018-avro-file-input</name>
<name_sync_with_filename>Y</name_sync_with_filename>
<description/>
<extended_description/>
<pipeline_version/>
<pipeline_type>Normal</pipeline_type>
<parameters>
</parameters>
<capture_transform_performance>N</capture_transform_performance>
<transform_performance_capturing_delay>1000</transform_performance_capturing_delay>
<transform_performance_capturing_size_limit>100</transform_performance_capturing_size_limit>
<created_user>-</created_user>
<created_date>2021/05/28 10:59:00.237</created_date>
<modified_user>-</modified_user>
<modified_date>2021/05/28 10:59:00.237</modified_date>
<key_for_session_key>H4sIAAAAAAAAAAMAAAAAAAAAAAA=</key_for_session_key>
<is_key_private>N</is_key_private>
</info>
<notepads>
</notepads>
<order>
<hop>
<from>files/userdata1.avro</from>
<to>filename only</to>
<enabled>Y</enabled>
</hop>
<hop>
<from>filename only</from>
<to>Avro File Input</to>
<enabled>Y</enabled>
</hop>
<hop>
<from>Avro File Input</from>
<to>Avro Decode</to>
<enabled>Y</enabled>
</hop>
<hop>
<from>Avro Decode</from>
<to>Output</to>
<enabled>Y</enabled>
</hop>
<hop>
<from>Output</from>
<to>Reservoir sampling</to>
<enabled>Y</enabled>
</hop>
<hop>
<from>Reservoir sampling</from>
<to>Verify</to>
<enabled>Y</enabled>
</hop>
</order>
<transform>
<name>Avro Decode</name>
<type>AvroDecode</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<ignore_missing>Y</ignore_missing>
<source_field>avro</source_field>
<fields>
<field>
<source_avro_type>String</source_avro_type>
<source_field>birthdate</source_field>
<target_field_name>birthdate</target_field_name>
<target_format>M/d/yyyy</target_format>
<target_length/>
<target_precision/>
<target_type>Date</target_type>
</field>
<field>
<source_avro_type>Union</source_avro_type>
<source_field>cc</source_field>
<target_field_name>cc</target_field_name>
<target_format/>
<target_length/>
<target_precision/>
<target_type>String</target_type>
</field>
<field>
<source_avro_type>String</source_avro_type>
<source_field>comments</source_field>
<target_field_name>comments</target_field_name>
<target_format/>
<target_length/>
<target_precision/>
<target_type>String</target_type>
</field>
<field>
<source_avro_type>String</source_avro_type>
<source_field>country</source_field>
<target_field_name>country</target_field_name>
<target_format/>
<target_length/>
<target_precision/>
<target_type>String</target_type>
</field>
<field>
<source_avro_type>String</source_avro_type>
<source_field>email</source_field>
<target_field_name>email</target_field_name>
<target_format/>
<target_length/>
<target_precision/>
<target_type>String</target_type>
</field>
<field>
<source_avro_type>String</source_avro_type>
<source_field>first_name</source_field>
<target_field_name>first_name</target_field_name>
<target_format/>
<target_length/>
<target_precision/>
<target_type>String</target_type>
</field>
<field>
<source_avro_type>String</source_avro_type>
<source_field>gender</source_field>
<target_field_name>gender</target_field_name>
<target_format/>
<target_length/>
<target_precision/>
<target_type>String</target_type>
</field>
<field>
<source_avro_type>Long</source_avro_type>
<source_field>id</source_field>
<target_field_name>id</target_field_name>
<target_format/>
<target_length/>
<target_precision/>
<target_type>Integer</target_type>
</field>
<field>
<source_avro_type>String</source_avro_type>
<source_field>ip_address</source_field>
<target_field_name>ip_address</target_field_name>
<target_format/>
<target_length/>
<target_precision/>
<target_type>Internet Address</target_type>
</field>
<field>
<source_avro_type>String</source_avro_type>
<source_field>last_name</source_field>
<target_field_name>last_name</target_field_name>
<target_format/>
<target_length/>
<target_precision/>
<target_type>String</target_type>
</field>
<field>
<source_avro_type>String</source_avro_type>
<source_field>registration_dttm</source_field>
<target_field_name>registration_dttm</target_field_name>
<target_format>yyyy-MM-dd'T'HH:mm:ssXXX</target_format>
<target_length/>
<target_precision/>
<target_type>Date</target_type>
</field>
<field>
<source_avro_type>Union</source_avro_type>
<source_field>salary</source_field>
<target_field_name>salary</target_field_name>
<target_format/>
<target_length/>
<target_precision/>
<target_type>String</target_type>
</field>
<field>
<source_avro_type>String</source_avro_type>
<source_field>title</source_field>
<target_field_name>title</target_field_name>
<target_format/>
<target_length/>
<target_precision/>
<target_type>String</target_type>
</field>
</fields>
<attributes/>
<GUI>
<xloc>560</xloc>
<yloc>96</yloc>
</GUI>
</transform>
<transform>
<name>Avro File Input</name>
<type>AvroFileInput</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<data_filename_field>filename</data_filename_field>
<output_field>avro</output_field>
<attributes/>
<GUI>
<xloc>416</xloc>
<yloc>96</yloc>
</GUI>
</transform>
<transform>
<name>Output</name>
<type>SelectValues</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<fields>
<field>
<name>birthdate</name>
</field>
<field>
<name>cc</name>
</field>
<field>
<name>comments</name>
</field>
<field>
<name>country</name>
</field>
<field>
<name>email</name>
</field>
<field>
<name>first_name</name>
</field>
<field>
<name>gender</name>
</field>
<field>
<name>id</name>
</field>
<field>
<name>ip_address</name>
</field>
<field>
<name>last_name</name>
</field>
<field>
<name>registration_dttm</name>
</field>
<field>
<name>salary</name>
</field>
<field>
<name>title</name>
</field>
<select_unspecified>N</select_unspecified>
</fields>
<attributes/>
<GUI>
<xloc>688</xloc>
<yloc>96</yloc>
</GUI>
</transform>
<transform>
<name>filename only</name>
<type>SelectValues</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<fields>
<field>
<name>filename</name>
</field>
<select_unspecified>N</select_unspecified>
</fields>
<attributes/>
<GUI>
<xloc>272</xloc>
<yloc>96</yloc>
</GUI>
</transform>
<transform>
<name>files/userdata1.avro</name>
<type>GetFileNames</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<filter>
<filterfiletype>all_files</filterfiletype>
</filter>
<doNotFailIfNoFile>N</doNotFailIfNoFile>
<rownum>N</rownum>
<isaddresult>Y</isaddresult>
<filefield>N</filefield>
<rownum_field/>
<filename_Field/>
<wildcard_Field/>
<exclude_wildcard_Field/>
<dynamic_include_subfolders>N</dynamic_include_subfolders>
<limit>0</limit>
<file>
<name>${PROJECT_HOME}/files/</name>
<filemask>userdata1.avro</filemask>
<exclude_filemask/>
<file_required>N</file_required>
<include_subfolders>N</include_subfolders>
</file>
<attributes/>
<GUI>
<xloc>128</xloc>
<yloc>96</yloc>
</GUI>
</transform>
<transform>
<name>Reservoir sampling</name>
<type>ReservoirSampling</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<reservoir_sampling>
<sample_size>100</sample_size>
<seed>54321</seed>
</reservoir_sampling>
<attributes/>
<GUI>
<xloc>816</xloc>
<yloc>96</yloc>
</GUI>
</transform>
<transform>
<name>Verify</name>
<type>Dummy</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<attributes/>
<GUI>
<xloc>944</xloc>
<yloc>96</yloc>
</GUI>
</transform>
<transform_error_handling>
</transform_error_handling>
<attributes/>
</pipeline>