blob: 35ae29d1b7ae4d11836f711c6058319f7356c192 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <arrow/status.h>
#include "udf/python/python_client.h"
namespace doris {
class PythonUDTFClient;
using PythonUDTFClientPtr = std::shared_ptr<PythonUDTFClient>;
/**
* Python UDTF Client
*
* Implements simplified UDTF (User-Defined Table Function):
*
* Handler Function:
* - evaluate_func(*args): Process input arguments and yield output rows
*
* UDTF Characteristics:
* - Takes scalar or table inputs
* - Returns table (multiple rows)
* - Simple yield pattern
*
* Example:
* ```python
* def evaluate_func(text, delimiter):
* # Split string by delimiter and return multiple results
* for item in text.split(delimiter):
* # or yield (item, )
* yield item
* ```
*
* Communication protocol with Python server:
* 1. Send input row batch to Python
* 2. Python calls evaluate_func() for each input row
* 3. Collect all output rows and return
*/
class PythonUDTFClient : public PythonClient {
public:
PythonUDTFClient() = default;
~PythonUDTFClient() override = default;
static Status create(const PythonUDFMeta& func_meta, ProcessPtr process,
PythonUDTFClientPtr* client);
/**
* Evaluate UDTF on input rows
*
* Protocol (ListArray-based):
* Python server returns a RecordBatch with 1 column:
* - Column 0: ListArray where each list element corresponds to one input row's outputs
*
* Example:
* Input: 3 rows
* Output ListArray:
* [0]: [val1, val2, val3] (3 elements for input row 0)
* [1]: [] (0 elements for input row 1)
* [2]: [val4, val5, val6, val7] (4 elements for input row 2)
*
* @param input Input row batch (columns = UDTF function parameters)
* @param list_array Output ListArray (length = num_input_rows)
* @return Status
*/
Status evaluate(const arrow::RecordBatch& input, std::shared_ptr<arrow::ListArray>* list_array);
private:
DISALLOW_COPY_AND_ASSIGN(PythonUDTFClient);
};
} // namespace doris