| <?xml version="1.0" encoding="UTF-8"?> |
| <!-- |
| |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| |
| --> |
| <pipeline> |
| <info> |
| <name>0006-groupby-basics</name> |
| <name_sync_with_filename>Y</name_sync_with_filename> |
| <description/> |
| <extended_description/> |
| <pipeline_version/> |
| <pipeline_type>Normal</pipeline_type> |
| <parameters> |
| </parameters> |
| <capture_transform_performance>N</capture_transform_performance> |
| <transform_performance_capturing_delay>1000</transform_performance_capturing_delay> |
| <transform_performance_capturing_size_limit>100</transform_performance_capturing_size_limit> |
| <created_user>-</created_user> |
| <created_date>2021/04/19 12:06:20.350</created_date> |
| <modified_user>-</modified_user> |
| <modified_date>2021/04/19 12:06:20.350</modified_date> |
| <key_for_session_key>H4sIAAAAAAAAAAMAAAAAAAAAAAA=</key_for_session_key> |
| <is_key_private>N</is_key_private> |
| </info> |
| <notepads> |
| <notepad> |
| <note>We convert floating point numbers |
| to strings to avoid rounding issues |
| when comparing data.</note> |
| <xloc>624</xloc> |
| <yloc>128</yloc> |
| <width>190</width> |
| <heigth>58</heigth> |
| <fontname>Noto Sans</fontname> |
| <fontsize>11</fontsize> |
| <fontbold>N</fontbold> |
| <fontitalic>N</fontitalic> |
| <fontcolorred>14</fontcolorred> |
| <fontcolorgreen>58</fontcolorgreen> |
| <fontcolorblue>90</fontcolorblue> |
| <backgroundcolorred>201</backgroundcolorred> |
| <backgroundcolorgreen>232</backgroundcolorgreen> |
| <backgroundcolorblue>251</backgroundcolorblue> |
| <bordercolorred>14</bordercolorred> |
| <bordercolorgreen>58</bordercolorgreen> |
| <bordercolorblue>90</bordercolorblue> |
| </notepad> |
| </notepads> |
| <order> |
| <hop> |
| <from>files/customers-100.txt</from> |
| <to>sort by state</to> |
| <enabled>Y</enabled> |
| </hop> |
| <hop> |
| <from>sort by state</from> |
| <to>Group by state</to> |
| <enabled>Y</enabled> |
| </hop> |
| <hop> |
| <from>Group by state</from> |
| <to>numbers to string</to> |
| <enabled>Y</enabled> |
| </hop> |
| <hop> |
| <from>numbers to string</from> |
| <to>Output</to> |
| <enabled>Y</enabled> |
| </hop> |
| </order> |
| <transform> |
| <name>Group by state</name> |
| <type>GroupBy</type> |
| <description/> |
| <distribute>Y</distribute> |
| <custom_distribution/> |
| <copies>1</copies> |
| <partitioning> |
| <method>none</method> |
| <schema_name/> |
| </partitioning> |
| <all_rows>N</all_rows> |
| <ignore_aggregate>N</ignore_aggregate> |
| <field_ignore/> |
| <directory>${java.io.tmpdir}</directory> |
| <prefix>grp</prefix> |
| <add_linenr>N</add_linenr> |
| <linenr_fieldname/> |
| <give_back_row>N</give_back_row> |
| <group> |
| <field> |
| <name>stateCode</name> |
| </field> |
| <field> |
| <name>state</name> |
| </field> |
| </group> |
| <fields> |
| <field> |
| <aggregate>count</aggregate> |
| <subject>id</subject> |
| <type>COUNT_ALL</type> |
| <valuefield/> |
| </field> |
| <field> |
| <aggregate>countDistinct</aggregate> |
| <subject>housenr</subject> |
| <type>COUNT_DISTINCT</type> |
| <valuefield/> |
| </field> |
| <field> |
| <aggregate>min</aggregate> |
| <subject>firstname</subject> |
| <type>MIN</type> |
| <valuefield/> |
| </field> |
| <field> |
| <aggregate>max</aggregate> |
| <subject>firstname</subject> |
| <type>MAX</type> |
| <valuefield/> |
| </field> |
| <field> |
| <aggregate>average</aggregate> |
| <subject>id</subject> |
| <type>AVERAGE</type> |
| <valuefield/> |
| </field> |
| <field> |
| <aggregate>median</aggregate> |
| <subject>id</subject> |
| <type>MEDIAN</type> |
| <valuefield/> |
| </field> |
| <field> |
| <aggregate>percentile90</aggregate> |
| <subject>id</subject> |
| <type>PERCENTILE</type> |
| <valuefield>90</valuefield> |
| </field> |
| <field> |
| <aggregate>firstNonNull</aggregate> |
| <subject>zip</subject> |
| <type>FIRST</type> |
| <valuefield/> |
| </field> |
| <field> |
| <aggregate>lastNonNull</aggregate> |
| <subject>zip</subject> |
| <type>LAST</type> |
| <valuefield/> |
| </field> |
| <field> |
| <aggregate>firstValue</aggregate> |
| <subject>city</subject> |
| <type>FIRST_INCL_NULL</type> |
| <valuefield/> |
| </field> |
| <field> |
| <aggregate>lastValue</aggregate> |
| <subject>city</subject> |
| <type>LAST_INCL_NULL</type> |
| <valuefield/> |
| </field> |
| <field> |
| <aggregate>stdDev</aggregate> |
| <subject>id</subject> |
| <type>STD_DEV</type> |
| <valuefield/> |
| </field> |
| <field> |
| <aggregate>nrRows</aggregate> |
| <subject/> |
| <type>COUNT_ANY</type> |
| <valuefield/> |
| </field> |
| <field> |
| <aggregate>percentile90NearestRank</aggregate> |
| <subject>id</subject> |
| <type>PERCENTILE_NEAREST_RANK</type> |
| <valuefield>90</valuefield> |
| </field> |
| <field> |
| <aggregate>stdDevSample</aggregate> |
| <subject>id</subject> |
| <type>STD_DEV_SAMPLE</type> |
| <valuefield>20</valuefield> |
| </field> |
| </fields> |
| <attributes/> |
| <GUI> |
| <xloc>512</xloc> |
| <yloc>64</yloc> |
| </GUI> |
| </transform> |
| <transform> |
| <name>Output</name> |
| <type>Dummy</type> |
| <description/> |
| <distribute>Y</distribute> |
| <custom_distribution/> |
| <copies>1</copies> |
| <partitioning> |
| <method>none</method> |
| <schema_name/> |
| </partitioning> |
| <attributes/> |
| <GUI> |
| <xloc>960</xloc> |
| <yloc>64</yloc> |
| </GUI> |
| </transform> |
| <transform> |
| <name>files/customers-100.txt</name> |
| <type>CSVInput</type> |
| <description/> |
| <distribute>Y</distribute> |
| <custom_distribution/> |
| <copies>1</copies> |
| <partitioning> |
| <method>none</method> |
| <schema_name/> |
| </partitioning> |
| <filename>${PROJECT_HOME}/files/customers-100.txt</filename> |
| <filename_field/> |
| <rownum_field/> |
| <include_filename>N</include_filename> |
| <separator>;</separator> |
| <enclosure>"</enclosure> |
| <header>Y</header> |
| <buffer_size>50000</buffer_size> |
| <lazy_conversion>N</lazy_conversion> |
| <add_filename_result>N</add_filename_result> |
| <parallel>N</parallel> |
| <newline_possible>N</newline_possible> |
| <encoding/> |
| <fields> |
| <field> |
| <name>id</name> |
| <type>Integer</type> |
| <format> #</format> |
| <currency>$</currency> |
| <decimal>.</decimal> |
| <group>,</group> |
| <length>15</length> |
| <precision>0</precision> |
| <trim_type>none</trim_type> |
| </field> |
| <field> |
| <name>name</name> |
| <type>String</type> |
| <format/> |
| <currency>$</currency> |
| <decimal>.</decimal> |
| <group>,</group> |
| <length>50</length> |
| <precision>-1</precision> |
| <trim_type>none</trim_type> |
| </field> |
| <field> |
| <name>firstname</name> |
| <type>String</type> |
| <format/> |
| <currency>$</currency> |
| <decimal>.</decimal> |
| <group>,</group> |
| <length>50</length> |
| <precision>-1</precision> |
| <trim_type>none</trim_type> |
| </field> |
| <field> |
| <name>zip</name> |
| <type>String</type> |
| <format/> |
| <currency>$</currency> |
| <decimal>.</decimal> |
| <group>,</group> |
| <length>30</length> |
| <precision>0</precision> |
| <trim_type>none</trim_type> |
| </field> |
| <field> |
| <name>city</name> |
| <type>String</type> |
| <format/> |
| <currency>$</currency> |
| <decimal>.</decimal> |
| <group>,</group> |
| <length>8</length> |
| <precision>-1</precision> |
| <trim_type>none</trim_type> |
| </field> |
| <field> |
| <name>birthdate</name> |
| <type>Date</type> |
| <format>yyyy/MM/dd</format> |
| <currency>$</currency> |
| <decimal>.</decimal> |
| <group>,</group> |
| <length>-1</length> |
| <precision>-1</precision> |
| <trim_type>none</trim_type> |
| </field> |
| <field> |
| <name>street</name> |
| <type>String</type> |
| <format/> |
| <currency>$</currency> |
| <decimal>.</decimal> |
| <group>,</group> |
| <length>11</length> |
| <precision>-1</precision> |
| <trim_type>none</trim_type> |
| </field> |
| <field> |
| <name>housenr</name> |
| <type>String</type> |
| <format/> |
| <currency>$</currency> |
| <decimal>.</decimal> |
| <group>,</group> |
| <length>50</length> |
| <precision>0</precision> |
| <trim_type>none</trim_type> |
| </field> |
| <field> |
| <name>stateCode</name> |
| <type>String</type> |
| <format/> |
| <currency>$</currency> |
| <decimal>.</decimal> |
| <group>,</group> |
| <length>10</length> |
| <precision>-1</precision> |
| <trim_type>none</trim_type> |
| </field> |
| <field> |
| <name>state</name> |
| <type>String</type> |
| <format/> |
| <currency>$</currency> |
| <decimal>.</decimal> |
| <group>,</group> |
| <length>50</length> |
| <precision>-1</precision> |
| <trim_type>none</trim_type> |
| </field> |
| </fields> |
| <attributes/> |
| <GUI> |
| <xloc>112</xloc> |
| <yloc>64</yloc> |
| </GUI> |
| </transform> |
| <transform> |
| <name>numbers to string</name> |
| <type>SelectValues</type> |
| <description/> |
| <distribute>Y</distribute> |
| <custom_distribution/> |
| <copies>1</copies> |
| <partitioning> |
| <method>none</method> |
| <schema_name/> |
| </partitioning> |
| <fields> |
| <select_unspecified>N</select_unspecified> |
| <meta> |
| <name>median</name> |
| <rename>median</rename> |
| <type>String</type> |
| <length>-2</length> |
| <precision>-2</precision> |
| <conversion_mask>0.000;-0.000</conversion_mask> |
| <date_format_lenient>false</date_format_lenient> |
| <date_format_locale/> |
| <date_format_timezone/> |
| <lenient_string_to_number>false</lenient_string_to_number> |
| <encoding/> |
| <decimal_symbol/> |
| <grouping_symbol/> |
| <currency_symbol/> |
| <storage_type/> |
| </meta> |
| <meta> |
| <name>percentile90</name> |
| <rename>percentile90</rename> |
| <type>String</type> |
| <length>-2</length> |
| <precision>-2</precision> |
| <conversion_mask>0.000;-0.000</conversion_mask> |
| <date_format_lenient>false</date_format_lenient> |
| <date_format_locale/> |
| <date_format_timezone/> |
| <lenient_string_to_number>false</lenient_string_to_number> |
| <encoding/> |
| <decimal_symbol/> |
| <grouping_symbol/> |
| <currency_symbol/> |
| <storage_type/> |
| </meta> |
| <meta> |
| <name>stdDev</name> |
| <rename>stdDev</rename> |
| <type>String</type> |
| <length>-2</length> |
| <precision>-2</precision> |
| <conversion_mask>0.000;-0.000</conversion_mask> |
| <date_format_lenient>false</date_format_lenient> |
| <date_format_locale/> |
| <date_format_timezone/> |
| <lenient_string_to_number>false</lenient_string_to_number> |
| <encoding/> |
| <decimal_symbol/> |
| <grouping_symbol/> |
| <currency_symbol/> |
| <storage_type/> |
| </meta> |
| <meta> |
| <name>percentile90NearestRank</name> |
| <rename>percentile90NearestRank</rename> |
| <type>String</type> |
| <length>-2</length> |
| <precision>-2</precision> |
| <conversion_mask>0.000;-0.000</conversion_mask> |
| <date_format_lenient>false</date_format_lenient> |
| <date_format_locale/> |
| <date_format_timezone/> |
| <lenient_string_to_number>false</lenient_string_to_number> |
| <encoding/> |
| <decimal_symbol/> |
| <grouping_symbol/> |
| <currency_symbol/> |
| <storage_type/> |
| </meta> |
| <meta> |
| <name>stdDevSample</name> |
| <rename>stdDevSample</rename> |
| <type>String</type> |
| <length>-2</length> |
| <precision>-2</precision> |
| <conversion_mask>0.000;-0.000</conversion_mask> |
| <date_format_lenient>false</date_format_lenient> |
| <date_format_locale/> |
| <date_format_timezone/> |
| <lenient_string_to_number>false</lenient_string_to_number> |
| <encoding/> |
| <decimal_symbol/> |
| <grouping_symbol/> |
| <currency_symbol/> |
| <storage_type/> |
| </meta> |
| </fields> |
| <attributes/> |
| <GUI> |
| <xloc>688</xloc> |
| <yloc>64</yloc> |
| </GUI> |
| </transform> |
| <transform> |
| <name>sort by state</name> |
| <type>SortRows</type> |
| <description/> |
| <distribute>Y</distribute> |
| <custom_distribution/> |
| <copies>1</copies> |
| <partitioning> |
| <method>none</method> |
| <schema_name/> |
| </partitioning> |
| <directory>${java.io.tmpdir}</directory> |
| <prefix>out</prefix> |
| <sort_size>1000000</sort_size> |
| <free_memory/> |
| <compress>N</compress> |
| <compress_variable/> |
| <unique_rows>N</unique_rows> |
| <fields> |
| <field> |
| <name>stateCode</name> |
| <ascending>Y</ascending> |
| <case_sensitive>Y</case_sensitive> |
| <collator_enabled>N</collator_enabled> |
| <collator_strength>0</collator_strength> |
| <presorted>N</presorted> |
| </field> |
| <field> |
| <name>state</name> |
| <ascending>Y</ascending> |
| <case_sensitive>Y</case_sensitive> |
| <collator_enabled>N</collator_enabled> |
| <collator_strength>0</collator_strength> |
| <presorted>N</presorted> |
| </field> |
| </fields> |
| <attributes/> |
| <GUI> |
| <xloc>320</xloc> |
| <yloc>64</yloc> |
| </GUI> |
| </transform> |
| <transform_error_handling> |
| </transform_error_handling> |
| <attributes/> |
| </pipeline> |