blob: cddc3697f22c33d831f3ed801d7db568f5f82bb3 [file] [log] [blame]
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
import ast
import datetime
import pickle
from pyflink.common import Row, RowKind
from pyflink.common.typeinfo import (RowTypeInfo, TupleTypeInfo, Types, BasicArrayTypeInfo,
PrimitiveArrayTypeInfo, MapTypeInfo, ListTypeInfo,
ObjectArrayTypeInfo, ExternalTypeInfo)
from pyflink.java_gateway import get_gateway
def convert_to_python_obj(data, type_info):
if type_info == Types.PICKLED_BYTE_ARRAY():
return pickle.loads(data)
elif isinstance(type_info, ExternalTypeInfo):
return convert_to_python_obj(data, type_info._type_info)
else:
gateway = get_gateway()
pickle_bytes = gateway.jvm.PythonBridgeUtils. \
getPickledBytesFromJavaObject(data, type_info.get_java_type_info())
if isinstance(type_info, RowTypeInfo) or isinstance(type_info, TupleTypeInfo):
field_data = zip(list(pickle_bytes[1:]), type_info.get_field_types())
fields = []
for data, field_type in field_data:
if len(data) == 0:
fields.append(None)
else:
fields.append(pickled_bytes_to_python_converter(data, field_type))
if isinstance(type_info, RowTypeInfo):
return Row.of_kind(RowKind(int.from_bytes(pickle_bytes[0], 'little')), *fields)
else:
return tuple(fields)
else:
return pickled_bytes_to_python_converter(pickle_bytes, type_info)
def pickled_bytes_to_python_converter(data, field_type):
if isinstance(field_type, RowTypeInfo):
row_kind = RowKind(int.from_bytes(data[0], 'little'))
data = zip(list(data[1:]), field_type.get_field_types())
fields = []
for d, d_type in data:
fields.append(pickled_bytes_to_python_converter(d, d_type))
row = Row.of_kind(row_kind, *fields)
return row
else:
data = pickle.loads(data)
if field_type == Types.SQL_TIME():
seconds, microseconds = divmod(data, 10 ** 6)
minutes, seconds = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60)
return datetime.time(hours, minutes, seconds, microseconds)
elif field_type == Types.SQL_DATE():
return field_type.from_internal_type(data)
elif field_type == Types.SQL_TIMESTAMP():
return field_type.from_internal_type(int(data.timestamp() * 10 ** 6))
elif field_type == Types.FLOAT():
return field_type.from_internal_type(ast.literal_eval(data))
elif isinstance(field_type,
(BasicArrayTypeInfo, PrimitiveArrayTypeInfo, ObjectArrayTypeInfo)):
element_type = field_type._element_type
elements = []
for element_bytes in data:
elements.append(pickled_bytes_to_python_converter(element_bytes, element_type))
return elements
elif isinstance(field_type, MapTypeInfo):
key_type = field_type._key_type_info
value_type = field_type._value_type_info
zip_kv = zip(data[0], data[1])
return dict((pickled_bytes_to_python_converter(k, key_type),
pickled_bytes_to_python_converter(v, value_type))
for k, v in zip_kv)
elif isinstance(field_type, ListTypeInfo):
element_type = field_type.elem_type
elements = []
for element_bytes in data:
elements.append(pickled_bytes_to_python_converter(element_bytes, element_type))
return elements
else:
return field_type.from_internal_type(data)