| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| import { |
| DataTransformOption, ExternalDataTransform, ExternalSource, ExternalDimensionDefinition |
| } from '../../../../src/data/helper/transform'; |
| import { |
| DimensionName, DimensionLoose, OptionDataValue |
| } from '../../../../src/util/types'; |
| import { hasOwn, assert, map, each } from 'zrender/src/core/util'; |
| import { quantile } from '../../../../src/util/number'; |
| |
| |
| /** |
| * @usage |
| * |
| * ```js |
| * dataset: [{ |
| * source: [ |
| * ['aa', 'bb', 'cc', 'tag'], |
| * [12, 0.33, 5200, 'AA'], |
| * [21, 0.65, 7100, 'AA'], |
| * [51, 0.15, 1100, 'BB'], |
| * [71, 0.75, 9100, 'BB'], |
| * ... |
| * ] |
| * }, { |
| * transform: { |
| * type: 'my:aggregate', |
| * config: { |
| * resultDimensions: [ |
| * // by default, use the same name with `from`. |
| * { from: 'aa', method: 'sum' }, |
| * { from: 'bb', method: 'count' }, |
| * { from: 'cc' }, // method by default: use the first value. |
| * { from: 'dd', method: 'Q1' }, |
| * { from: 'tag' } |
| * ], |
| * groupBy: 'tag' |
| * } |
| * } |
| * // Then the result data will be: |
| * // [ |
| * // ['aa', 'bb', 'cc', 'tag'], |
| * // [12, 0.33, 5200, 'AA'], |
| * // [21, 0.65, 8100, 'BB'], |
| * // ... |
| * // ] |
| * }] |
| * ``` |
| * |
| * Current supported methods (case insensitive): |
| * 'sum' |
| * 'count' |
| * 'average' |
| * 'Q1' |
| * 'Q3' |
| * 'Q2' or 'median' |
| * 'min' |
| * 'max' |
| */ |
| |
| export interface AggregateTransformOption extends DataTransformOption { |
| type: 'myTransform:aggregate'; |
| config: { |
| // Mandatory |
| resultDimensions: { |
| // Optional. The name of the result dimensions. |
| // If not provided, inherit the name from `from`. |
| name: DimensionName; |
| // Mandatory. `from` is used to reference dimension from `source`. |
| from: DimensionLoose; |
| // Optional. Aggregate method. Currently only these method supported. |
| // If not provided, use `'first'`. |
| method: AggregateMethodLoose; |
| }[]; |
| // Optional |
| groupBy: DimensionLoose; |
| }; |
| } |
| |
| const METHOD_INTERNAL = { |
| 'SUM': true, |
| 'COUNT': true, |
| 'FIRST': true, |
| 'AVERAGE': true, |
| 'Q1': true, |
| 'Q2': true, |
| 'Q3': true, |
| 'MIN': true, |
| 'MAX': true |
| } as const; |
| const METHOD_NEEDS_COLLECT = { |
| AVERAGE: ['COUNT'] |
| } as const; |
| const METHOD_NEEDS_GATHER_VALUES = { |
| Q1: true, |
| Q2: true, |
| Q3: true |
| } as const; |
| const METHOD_ALIAS = { |
| MEDIAN: 'Q2' |
| } as const; |
| |
| type AggregateMethodLoose = |
| AggregateMethodInternal |
| | 'sum' | 'count' | 'first' | 'average' | 'Q1' | 'Q2' | 'Q3' | 'median' | 'min' | 'max'; |
| type AggregateMethodInternal = keyof typeof METHOD_INTERNAL; |
| |
| |
| class ResultDimInfoInternal { |
| |
| readonly method: AggregateMethodInternal; |
| readonly name: DimensionName; |
| readonly index: number; |
| readonly indexInUpstream: number; |
| |
| readonly collectionInfoList = [] as { |
| method: AggregateMethodInternal; |
| indexInLine: number; |
| }[]; |
| |
| // FIXME: refactor |
| readonly gatheredValuesByGroup: { [groupVal: string]: number[] } = {}; |
| readonly gatheredValuesNoGroup = [] as number[]; |
| readonly needGatherValues: boolean = false; |
| |
| __collectionResult: TravelResult<CollectionResultLine>; |
| |
| private _collectionInfoMap = {} as { |
| // number is the index of `list` |
| [method in AggregateMethodInternal]: number |
| }; |
| |
| constructor( |
| index: number, |
| indexInUpstream: number, |
| method: AggregateMethodInternal, |
| name: DimensionName, |
| needGatherValues: boolean |
| ) { |
| this.method = method; |
| this.name = name; |
| this.index = index; |
| this.indexInUpstream = indexInUpstream; |
| this.needGatherValues = needGatherValues; |
| } |
| |
| addCollectionInfo(item: ResultDimInfoInternal['collectionInfoList'][number]) { |
| this._collectionInfoMap[item.method] = this.collectionInfoList.length; |
| this.collectionInfoList.push(item); |
| } |
| |
| getCollectionInfo(method: AggregateMethodInternal) { |
| return this.collectionInfoList[this._collectionInfoMap[method]]; |
| } |
| |
| // FIXME: temp implementation. Need refactor. |
| gatherValue(groupByDimInfo: ExternalDimensionDefinition, groupVal: OptionDataValue, value: OptionDataValue) { |
| // FIXME: convert to number compulsorily temporarily. |
| value = +value; |
| if (groupByDimInfo) { |
| if (groupVal != null) { |
| const groupValStr = groupVal + ''; |
| const values = this.gatheredValuesByGroup[groupValStr] |
| || (this.gatheredValuesByGroup[groupValStr] = []); |
| values.push(value); |
| } |
| } |
| else { |
| this.gatheredValuesNoGroup.push(value); |
| } |
| } |
| } |
| |
| type CreateInTravel<LINE> = ( |
| upstream: ExternalSource, |
| dataIndex: number, |
| dimInfoList: ResultDimInfoInternal[], |
| groupByDimInfo?: ExternalDimensionDefinition, |
| groupByVal?: OptionDataValue |
| ) => LINE; |
| type UpdateInTravel<LINE> = ( |
| upstream: ExternalSource, |
| dataIndex: number, |
| targetLine: LINE, |
| dimInfoList: ResultDimInfoInternal[], |
| groupByDimInfo?: ExternalDimensionDefinition, |
| groupByVal?: OptionDataValue |
| ) => void; |
| |
| export const transform: ExternalDataTransform<AggregateTransformOption> = { |
| |
| type: 'myTransform:aggregate', |
| |
| transform: function (params) { |
| const upstream = params.upstream; |
| const config = params.config; |
| |
| const groupByDimInfo = prepareGroupByDimInfo(config, upstream); |
| const { finalResultDimInfoList, collectionDimInfoList } = prepareDimensions( |
| config, upstream, groupByDimInfo |
| ); |
| |
| // Collect |
| let collectionResult: TravelResult<CollectionResultLine>; |
| if (collectionDimInfoList.length) { |
| collectionResult = travel( |
| groupByDimInfo, |
| upstream, |
| collectionDimInfoList, |
| createCollectionResultLine, |
| updateCollectionResultLine |
| ); |
| } |
| |
| each(collectionDimInfoList, dimInfo => { |
| dimInfo.__collectionResult = collectionResult; |
| // FIXME: just for Q1, Q2, Q3: need asc. |
| asc(dimInfo.gatheredValuesNoGroup); |
| each(dimInfo.gatheredValuesByGroup, values => { |
| asc(values); |
| }); |
| }); |
| |
| // Calculate |
| const finalResult = travel( |
| groupByDimInfo, |
| upstream, |
| finalResultDimInfoList, |
| createFinalResultLine, |
| updateFinalResultLine |
| ); |
| |
| return { |
| dimensions: map(finalResultDimInfoList, item => item.name), |
| data: finalResult.outList |
| }; |
| } |
| }; |
| |
| function prepareDimensions( |
| config: AggregateTransformOption['config'], |
| upstream: ExternalSource, |
| groupByDimInfo: ExternalDimensionDefinition |
| ): { |
| finalResultDimInfoList: ResultDimInfoInternal[]; |
| collectionDimInfoList: ResultDimInfoInternal[]; |
| } { |
| const resultDimensionsConfig = config.resultDimensions; |
| const finalResultDimInfoList: ResultDimInfoInternal[] = []; |
| const collectionDimInfoList: ResultDimInfoInternal[] = []; |
| let gIndexInLine = 0; |
| |
| for (let i = 0; i < resultDimensionsConfig.length; i++) { |
| const resultDimInfoConfig = resultDimensionsConfig[i]; |
| |
| const dimInfoInUpstream = upstream.getDimensionInfo(resultDimInfoConfig.from); |
| assert(dimInfoInUpstream, 'Can not find dimension by `from`: ' + resultDimInfoConfig.from); |
| |
| const rawMethod = resultDimInfoConfig.method; |
| |
| assert( |
| groupByDimInfo.index !== dimInfoInUpstream.index || rawMethod == null, |
| `Dimension ${dimInfoInUpstream.name} is the "groupBy" dimension, must not have any "method".` |
| ); |
| |
| const method = normalizeMethod(rawMethod); |
| assert(method, 'method is required'); |
| |
| const name = resultDimInfoConfig.name != null ? resultDimInfoConfig.name : dimInfoInUpstream.name; |
| |
| const finalResultDimInfo = new ResultDimInfoInternal( |
| finalResultDimInfoList.length, |
| dimInfoInUpstream.index, |
| method, |
| name, |
| hasOwn(METHOD_NEEDS_GATHER_VALUES, method) |
| ); |
| finalResultDimInfoList.push(finalResultDimInfo); |
| |
| // For collection. |
| let needCollect = false; |
| if (hasOwn(METHOD_NEEDS_COLLECT, method)) { |
| needCollect = true; |
| const collectionTargetMethods = METHOD_NEEDS_COLLECT[method as keyof typeof METHOD_NEEDS_COLLECT]; |
| for (let j = 0; j < collectionTargetMethods.length; j++) { |
| finalResultDimInfo.addCollectionInfo({ |
| method: collectionTargetMethods[j], |
| indexInLine: gIndexInLine++ |
| }); |
| } |
| } |
| if (hasOwn(METHOD_NEEDS_GATHER_VALUES, method)) { |
| needCollect = true; |
| } |
| if (needCollect) { |
| collectionDimInfoList.push(finalResultDimInfo); |
| } |
| } |
| |
| return { collectionDimInfoList, finalResultDimInfoList }; |
| } |
| |
| function prepareGroupByDimInfo( |
| config: AggregateTransformOption['config'], |
| upstream: ExternalSource |
| ): ExternalDimensionDefinition { |
| const groupByConfig = config.groupBy; |
| let groupByDimInfo; |
| if (groupByConfig != null) { |
| groupByDimInfo = upstream.getDimensionInfo(groupByConfig); |
| assert(groupByDimInfo, 'Can not find dimension by `groupBy`: ' + groupByConfig); |
| } |
| return groupByDimInfo; |
| } |
| |
| interface TravelResult<LINE> { |
| mapByGroup: { [groupVal: string]: LINE }; |
| outList: LINE[]; |
| } |
| |
| function travel<LINE>( |
| groupByDimInfo: ExternalDimensionDefinition, |
| upstream: ExternalSource, |
| resultDimInfoList: ResultDimInfoInternal[], |
| doCreate: CreateInTravel<LINE>, |
| doUpdate: UpdateInTravel<LINE> |
| ): TravelResult<LINE> { |
| const outList: TravelResult<LINE>['outList'] = []; |
| let mapByGroup: TravelResult<LINE>['mapByGroup']; |
| |
| if (groupByDimInfo) { |
| mapByGroup = {}; |
| |
| for (let dataIndex = 0, len = upstream.count(); dataIndex < len; dataIndex++) { |
| const groupByVal = upstream.retrieveValue(dataIndex, groupByDimInfo.index); |
| |
| // PENDING: when value is null/undefined |
| if (groupByVal == null) { |
| continue; |
| } |
| |
| const groupByValStr = groupByVal + ''; |
| |
| if (!hasOwn(mapByGroup, groupByValStr)) { |
| const newLine = doCreate(upstream, dataIndex, resultDimInfoList, groupByDimInfo, groupByVal); |
| outList.push(newLine); |
| mapByGroup[groupByValStr] = newLine; |
| } |
| else { |
| const targetLine = mapByGroup[groupByValStr]; |
| doUpdate(upstream, dataIndex, targetLine, resultDimInfoList, groupByDimInfo, groupByVal); |
| } |
| } |
| } |
| else { |
| const targetLine = doCreate(upstream, 0, resultDimInfoList); |
| outList.push(targetLine); |
| for (let dataIndex = 1, len = upstream.count(); dataIndex < len; dataIndex++) { |
| doUpdate(upstream, dataIndex, targetLine, resultDimInfoList); |
| } |
| } |
| |
| return { mapByGroup, outList }; |
| } |
| |
| function normalizeMethod(method: AggregateMethodLoose): AggregateMethodInternal { |
| if (method == null) { |
| return 'FIRST'; |
| } |
| let methodInternal = method.toUpperCase() as AggregateMethodInternal; |
| methodInternal = hasOwn(METHOD_ALIAS, methodInternal) |
| ? METHOD_ALIAS[methodInternal as keyof typeof METHOD_ALIAS] |
| : methodInternal; |
| assert(hasOwn(METHOD_INTERNAL, methodInternal), `Illegal method ${method}.`); |
| return methodInternal; |
| } |
| |
| |
| |
| type CollectionResultLine = number[]; |
| |
| const createCollectionResultLine: CreateInTravel<CollectionResultLine> = ( |
| upstream, dataIndex, collectionDimInfoList, groupByDimInfo, groupByVal |
| ) => { |
| const newLine = [] as number[]; |
| for (let i = 0; i < collectionDimInfoList.length; i++) { |
| const dimInfo = collectionDimInfoList[i]; |
| const collectionInfoList = dimInfo.collectionInfoList; |
| for (let j = 0; j < collectionInfoList.length; j++) { |
| const collectionInfo = collectionInfoList[j]; |
| // FIXME: convert to number compulsorily temporarily. |
| newLine[collectionInfo.indexInLine] = +lineCreator[collectionInfo.method]( |
| upstream, dataIndex, dimInfo, groupByDimInfo, groupByVal |
| ); |
| } |
| // FIXME: refactor |
| if (dimInfo.needGatherValues) { |
| const val = upstream.retrieveValue(dataIndex, dimInfo.indexInUpstream); |
| dimInfo.gatherValue(groupByDimInfo, groupByVal, val); |
| } |
| } |
| return newLine; |
| }; |
| |
| const updateCollectionResultLine: UpdateInTravel<CollectionResultLine> = ( |
| upstream, dataIndex, targetLine: number[], collectionDimInfoList, groupByDimInfo, groupByVal |
| ) => { |
| for (let i = 0; i < collectionDimInfoList.length; i++) { |
| const dimInfo = collectionDimInfoList[i]; |
| const collectionInfoList = dimInfo.collectionInfoList; |
| for (let j = 0; j < collectionInfoList.length; j++) { |
| const collectionInfo = collectionInfoList[j]; |
| const indexInLine = collectionInfo.indexInLine; |
| // FIXME: convert to number compulsorily temporarily. |
| targetLine[indexInLine] = +lineUpdater[collectionInfo.method]( |
| targetLine[indexInLine], upstream, dataIndex, dimInfo, groupByDimInfo, groupByVal |
| ); |
| } |
| // FIXME: refactor |
| if (dimInfo.needGatherValues) { |
| const val = upstream.retrieveValue(dataIndex, dimInfo.indexInUpstream); |
| dimInfo.gatherValue(groupByDimInfo, groupByVal, val); |
| } |
| } |
| }; |
| |
| |
| |
| type FinalResultLine = OptionDataValue[]; |
| |
| const createFinalResultLine: CreateInTravel<FinalResultLine> = ( |
| upstream, dataIndex, finalResultDimInfoList, groupByDimInfo, groupByVal |
| ) => { |
| const newLine = []; |
| for (let i = 0; i < finalResultDimInfoList.length; i++) { |
| const dimInfo = finalResultDimInfoList[i]; |
| const method = dimInfo.method; |
| newLine[i] = isGroupByDimension(groupByDimInfo, dimInfo) |
| ? groupByVal |
| : lineCreator[method]( |
| upstream, dataIndex, dimInfo, groupByDimInfo, groupByVal |
| ); |
| } |
| return newLine; |
| }; |
| |
| const updateFinalResultLine: UpdateInTravel<FinalResultLine> = ( |
| upstream, dataIndex, targetLine, finalResultDimInfoList, groupByDimInfo, groupByVal |
| ) => { |
| for (let i = 0; i < finalResultDimInfoList.length; i++) { |
| const dimInfo = finalResultDimInfoList[i]; |
| if (isGroupByDimension(groupByDimInfo, dimInfo)) { |
| continue; |
| } |
| const method = dimInfo.method; |
| targetLine[i] = lineUpdater[method]( |
| targetLine[i], upstream, dataIndex, dimInfo, groupByDimInfo, groupByVal |
| ); |
| } |
| }; |
| |
| function isGroupByDimension( |
| groupByDimInfo: ExternalDimensionDefinition, |
| targetDimInfo: ResultDimInfoInternal |
| ): boolean { |
| return groupByDimInfo && targetDimInfo.indexInUpstream === groupByDimInfo.index; |
| } |
| |
| function asc(list: number[]) { |
| list.sort((a, b) => { |
| return a - b; |
| }); |
| } |
| |
| const lineCreator: { |
| [key in AggregateMethodInternal]: ( |
| upstream: ExternalSource, |
| dataIndex: number, |
| dimInfo: ResultDimInfoInternal, |
| groupByDimInfo: ExternalDimensionDefinition, |
| groupByVal: OptionDataValue |
| ) => OptionDataValue |
| } = { |
| 'SUM'() { |
| return 0; |
| }, |
| 'COUNT'() { |
| return 1; |
| }, |
| 'FIRST'(upstream, dataIndex, dimInfo) { |
| return upstream.retrieveValue(dataIndex, dimInfo.indexInUpstream); |
| }, |
| 'MIN'(upstream, dataIndex, dimInfo) { |
| return upstream.retrieveValue(dataIndex, dimInfo.indexInUpstream); |
| }, |
| 'MAX'(upstream, dataIndex, dimInfo) { |
| return upstream.retrieveValue(dataIndex, dimInfo.indexInUpstream); |
| }, |
| 'AVERAGE'(upstream, dataIndex, dimInfo, groupByDimInfo, groupByVal) { |
| // FIXME: refactor, bad implementation. |
| const collectLine = groupByDimInfo |
| ? dimInfo.__collectionResult.mapByGroup[groupByVal + ''] |
| : dimInfo.__collectionResult.outList[0]; |
| return (upstream.retrieveValue(dataIndex, dimInfo.indexInUpstream) as number) |
| / collectLine[dimInfo.getCollectionInfo('COUNT').indexInLine]; |
| }, |
| // FIXME: refactor |
| 'Q1'(upstream, dataIndex, dimInfo, groupByDimInfo, groupByVal) { |
| return lineCreatorForQ(0.25, dimInfo, groupByDimInfo, groupByVal); |
| }, |
| 'Q2'(upstream, dataIndex, dimInfo, groupByDimInfo, groupByVal) { |
| return lineCreatorForQ(0.5, dimInfo, groupByDimInfo, groupByVal); |
| }, |
| 'Q3'(upstream, dataIndex, dimInfo, groupByDimInfo, groupByVal) { |
| return lineCreatorForQ(0.75, dimInfo, groupByDimInfo, groupByVal); |
| } |
| }; |
| |
| const lineUpdater: { |
| [key in AggregateMethodInternal]: ( |
| val: OptionDataValue, |
| upstream: ExternalSource, |
| dataIndex: number, |
| dimInfo: ResultDimInfoInternal, |
| groupByDimInfo: ExternalDimensionDefinition, |
| groupByVal: OptionDataValue |
| ) => OptionDataValue |
| } = { |
| 'SUM'(val, upstream, dataIndex, dimInfo) { |
| // FIXME: handle other types |
| return (val as number) + (upstream.retrieveValue(dataIndex, dimInfo.indexInUpstream) as number); |
| }, |
| 'COUNT'(val) { |
| return (val as number) + 1; |
| }, |
| 'FIRST'(val) { |
| return val; |
| }, |
| 'MIN'(val, upstream, dataIndex, dimInfo) { |
| return Math.min(val as number, upstream.retrieveValue(dataIndex, dimInfo.indexInUpstream) as number); |
| }, |
| 'MAX'(val, upstream, dataIndex, dimInfo) { |
| return Math.max(val as number, upstream.retrieveValue(dataIndex, dimInfo.indexInUpstream) as number); |
| }, |
| 'AVERAGE'(val, upstream, dataIndex, dimInfo, groupByDimInfo, groupByVal) { |
| // FIXME: refactor, bad implementation. |
| const collectLine = groupByDimInfo |
| ? dimInfo.__collectionResult.mapByGroup[groupByVal + ''] |
| : dimInfo.__collectionResult.outList[0]; |
| return (val as number) |
| + (upstream.retrieveValue(dataIndex, dimInfo.indexInUpstream) as number) |
| / collectLine[dimInfo.getCollectionInfo('COUNT').indexInLine]; |
| }, |
| 'Q1'(val, upstream, dataIndex, dimInfo) { |
| return val; |
| }, |
| 'Q2'(val, upstream, dataIndex, dimInfo) { |
| return val; |
| }, |
| 'Q3'(val, upstream, dataIndex, dimInfo) { |
| return val; |
| } |
| }; |
| |
| function lineCreatorForQ( |
| percent: number, |
| dimInfo: ResultDimInfoInternal, |
| groupByDimInfo: ExternalDimensionDefinition, |
| groupByVal: OptionDataValue |
| ) { |
| const gatheredValues = groupByDimInfo |
| ? dimInfo.gatheredValuesByGroup[groupByVal + ''] |
| : dimInfo.gatheredValuesNoGroup; |
| return quantile(gatheredValues, percent); |
| } |