blob: b7bc7aa7cdf751e7e04d09f6c0ed3146ed406f68 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class CarbonSchemaReader(object):
"""
How to use it:
1. create CarbonSchemaReader: carbonSchemaReader = CarbonSchemaReader()
2. read schema from path: schema = carbonSchemaReader.readSchema(path)
path can be a folder, carbonindex file and carbondata file.
"""
def __init__(self):
from jnius import autoclass
self.carbonSchemaReader = autoclass('org.apache.carbondata.sdk.file.CarbonSchemaReader')
self.Schema = autoclass('org.apache.carbondata.sdk.file.Schema')
def readSchema(self, path, getAsBuffer=False, validateSchema=False, conf=None):
"""
Read CarbonData schema from path.
:param path: data path, path can be a folder, carbonindex file name and carbondata file name
:param getAsBuffer: whether get as buffer
:param validateSchema: whether validate schema.
:param conf: configuration for ak, sk, endpoint and so on.
:return: CarbonData schema.
"""
if getAsBuffer == True:
return self.carbonSchemaReader.getArrowSchemaAsBytes(path)
if conf is None:
schema = self.carbonSchemaReader.readSchema(path, validateSchema)
else:
schema = self.carbonSchemaReader.readSchema(path, validateSchema, conf)
newSchema = schema.asOriginOrder()
return newSchema
def reorderSchemaBasedOnProjection(self, columns, schema):
fields = schema.getFields()
updateFields = list()
for column in columns:
for field in fields:
if column.casefold() == field.getFieldName().casefold():
updateFields.append(field)
break
updatedSchema = self.Schema(updateFields)
return updatedSchema
def getProjectionBasedOnSchema(self, schema):
fields = schema.getFields()
projection = list()
for field in fields:
projection.append(field.getFieldName())
return projection