flink-libraries/flink-table/src/main/scala/org/apache/flink/table/plan/nodes/dataset/DataSetSort.scala - flink - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.flink.table.plan.nodes.dataset

 import java.util

 import org.apache.calcite.plan.{RelOptCluster, RelTraitSet}
 import org.apache.calcite.rel.`type`.RelDataType
 import org.apache.calcite.rel.metadata.RelMetadataQuery
 import org.apache.calcite.rel.{RelCollation, RelNode, RelWriter, SingleRel}
 import org.apache.calcite.rex.{RexLiteral, RexNode}
 import org.apache.flink.api.java.DataSet
 import org.apache.flink.table.api.{BatchQueryConfig, BatchTableEnvironment, TableException}
 import org.apache.flink.table.plan.nodes.CommonSort
 import org.apache.flink.table.runtime.aggregate.SortUtil.directionToOrder
 import org.apache.flink.table.runtime.{CountPartitionFunction, LimitFilterFunction}
 import org.apache.flink.types.Row

 import scala.collection.JavaConverters._

 class DataSetSort(
     cluster: RelOptCluster,
     traitSet: RelTraitSet,
     inp: RelNode,
     collations: RelCollation,
     rowRelDataType: RelDataType,
     offset: RexNode,
     fetch: RexNode)
   extends SingleRel(cluster, traitSet, inp)
   with CommonSort
   with DataSetRel {

   private val limitStart: Long =  getFetchLimitStart(offset)

   private val limitEnd: Long = getFetchLimitEnd(fetch, offset)

   override def deriveRowType(): RelDataType = rowRelDataType

   override def copy(traitSet: RelTraitSet, inputs: util.List[RelNode]): RelNode = {
     new DataSetSort(
       cluster,
       traitSet,
       inputs.get(0),
       collations,
       getRowType,
       offset,
       fetch
     )
   }

   override def estimateRowCount(metadata: RelMetadataQuery): Double = {
     val inputRowCnt = metadata.getRowCount(this.getInput)
     if (inputRowCnt == null) {
       inputRowCnt
     } else {
       val rowCount = (inputRowCnt - limitStart).max(1.0)
       if (fetch != null) {
         val limit = RexLiteral.intValue(fetch)
         rowCount.min(limit)
       } else {
         rowCount
       }
     }
   }

   override def translateToPlan(
       tableEnv: BatchTableEnvironment,
       queryConfig: BatchQueryConfig): DataSet[Row] = {

     if (fieldCollations.isEmpty) {
       throw new TableException("Limiting the result without sorting is not allowed " +
         "as it could lead to arbitrary results.")
     }

     val config = tableEnv.getConfig

     val inputDs = inp.asInstanceOf[DataSetRel].translateToPlan(tableEnv, queryConfig)

     val currentParallelism = inputDs.getExecutionEnvironment.getParallelism
     var partitionedDs = if (currentParallelism == 1) {
       inputDs
     } else {
       inputDs.partitionByRange(fieldCollations.map(_._1): _*)
         .withOrders(fieldCollations.map(_._2): _*)
     }

     fieldCollations.foreach { fieldCollation =>
       partitionedDs = partitionedDs.sortPartition(fieldCollation._1, fieldCollation._2)
     }

     if (offset == null && fetch == null) {
       partitionedDs
     } else {
       val countFunction = new CountPartitionFunction[Row]

       val partitionCountName = s"prepare offset/fetch"

       val partitionCount = partitionedDs
         .mapPartition(countFunction)
         .name(partitionCountName)

       val broadcastName = "countPartition"

       val limitFunction = new LimitFilterFunction[Row](
         limitStart,
         limitEnd,
         broadcastName)

       val limitName = s"offset: $$offsetToString(offset), fetch: $$fetchToString(fetch, offset))"

       partitionedDs
         .filter(limitFunction)
         .name(limitName)
         .withBroadcastSet(partitionCount, broadcastName)
     }
   }

   private val fieldCollations = collations.getFieldCollations.asScala
     .map(c => (c.getFieldIndex, directionToOrder(c.getDirection)))

   override def toString: String = {
     sortToString(getRowType, collations, offset, fetch)
   }

   override def explainTerms(pw: RelWriter): RelWriter = {
     sortExplainTerms(
       super.explainTerms(pw),
       getRowType,
       collations,
       offset,
       fetch)
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.flink.table.plan.nodes.dataset

	import java.util

	import org.apache.calcite.plan.{RelOptCluster, RelTraitSet}
	import org.apache.calcite.rel.`type`.RelDataType
	import org.apache.calcite.rel.metadata.RelMetadataQuery
	import org.apache.calcite.rel.{RelCollation, RelNode, RelWriter, SingleRel}
	import org.apache.calcite.rex.{RexLiteral, RexNode}
	import org.apache.flink.api.java.DataSet
	import org.apache.flink.table.api.{BatchQueryConfig, BatchTableEnvironment, TableException}
	import org.apache.flink.table.plan.nodes.CommonSort
	import org.apache.flink.table.runtime.aggregate.SortUtil.directionToOrder
	import org.apache.flink.table.runtime.{CountPartitionFunction, LimitFilterFunction}
	import org.apache.flink.types.Row

	import scala.collection.JavaConverters._

	class DataSetSort(
	cluster: RelOptCluster,
	traitSet: RelTraitSet,
	inp: RelNode,
	collations: RelCollation,
	rowRelDataType: RelDataType,
	offset: RexNode,
	fetch: RexNode)
	extends SingleRel(cluster, traitSet, inp)
	with CommonSort
	with DataSetRel {

	private val limitStart: Long = getFetchLimitStart(offset)

	private val limitEnd: Long = getFetchLimitEnd(fetch, offset)

	override def deriveRowType(): RelDataType = rowRelDataType

	override def copy(traitSet: RelTraitSet, inputs: util.List[RelNode]): RelNode = {
	new DataSetSort(
	cluster,
	traitSet,
	inputs.get(0),
	collations,
	getRowType,
	offset,
	fetch
	)
	}

	override def estimateRowCount(metadata: RelMetadataQuery): Double = {
	val inputRowCnt = metadata.getRowCount(this.getInput)
	if (inputRowCnt == null) {
	inputRowCnt
	} else {
	val rowCount = (inputRowCnt - limitStart).max(1.0)
	if (fetch != null) {
	val limit = RexLiteral.intValue(fetch)
	rowCount.min(limit)
	} else {
	rowCount
	}
	}
	}

	override def translateToPlan(
	tableEnv: BatchTableEnvironment,
	queryConfig: BatchQueryConfig): DataSet[Row] = {

	if (fieldCollations.isEmpty) {
	throw new TableException("Limiting the result without sorting is not allowed " +
	"as it could lead to arbitrary results.")
	}

	val config = tableEnv.getConfig

	val inputDs = inp.asInstanceOf[DataSetRel].translateToPlan(tableEnv, queryConfig)

	val currentParallelism = inputDs.getExecutionEnvironment.getParallelism
	var partitionedDs = if (currentParallelism == 1) {
	inputDs
	} else {
	inputDs.partitionByRange(fieldCollations.map(_._1): _*)
	.withOrders(fieldCollations.map(_._2): _*)
	}

	fieldCollations.foreach { fieldCollation =>
	partitionedDs = partitionedDs.sortPartition(fieldCollation._1, fieldCollation._2)
	}

	if (offset == null && fetch == null) {
	partitionedDs
	} else {
	val countFunction = new CountPartitionFunction[Row]

	val partitionCountName = s"prepare offset/fetch"

	val partitionCount = partitionedDs
	.mapPartition(countFunction)
	.name(partitionCountName)

	val broadcastName = "countPartition"

	val limitFunction = new LimitFilterFunction[Row](
	limitStart,
	limitEnd,
	broadcastName)

	val limitName = s"offset: $$offsetToString(offset), fetch: $$fetchToString(fetch, offset))"

	partitionedDs
	.filter(limitFunction)
	.name(limitName)
	.withBroadcastSet(partitionCount, broadcastName)
	}
	}

	private val fieldCollations = collations.getFieldCollations.asScala
	.map(c => (c.getFieldIndex, directionToOrder(c.getDirection)))

	override def toString: String = {
	sortToString(getRowType, collations, offset, fetch)
	}

	override def explainTerms(pw: RelWriter): RelWriter = {
	sortExplainTerms(
	super.explainTerms(pw),
	getRowType,
	collations,
	offset,
	fetch)
	}
	}