python/pycarbon/sdk/CarbonSchemaReader.py - carbondata - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.


 class CarbonSchemaReader(object):
   """
   How to use it:
   1. create CarbonSchemaReader: carbonSchemaReader = CarbonSchemaReader()
   2. read schema from path: schema = carbonSchemaReader.readSchema(path)
   path can be a folder, carbonindex file and carbondata file.
   """
   def __init__(self):
     from jnius import autoclass
     self.carbonSchemaReader = autoclass('org.apache.carbondata.sdk.file.CarbonSchemaReader')
     self.Schema = autoclass('org.apache.carbondata.sdk.file.Schema')

   def readSchema(self, path, getAsBuffer=False, validateSchema=False, conf=None):
     """
     Read CarbonData schema from path.
     :param path: data path, path can be a folder, carbonindex file name and carbondata file name
     :param getAsBuffer:  whether get as buffer
     :param validateSchema: whether validate schema.
     :param conf: configuration for ak, sk, endpoint and so on.
     :return: CarbonData schema.
     """
     if getAsBuffer == True:
       return self.carbonSchemaReader.getArrowSchemaAsBytes(path)
     if conf is None:
       schema = self.carbonSchemaReader.readSchema(path, validateSchema)
     else:
       schema = self.carbonSchemaReader.readSchema(path, validateSchema, conf)
     newSchema = schema.asOriginOrder()
     return newSchema

   def reorderSchemaBasedOnProjection(self, columns, schema):
     fields = schema.getFields()
     updateFields = list()
     for column in columns:
       for field in fields:
         if column.casefold() == field.getFieldName().casefold():
           updateFields.append(field)
           break

     updatedSchema = self.Schema(updateFields)
     return updatedSchema

   def getProjectionBasedOnSchema(self, schema):
     fields = schema.getFields()
     projection = list()
     for field in fields:
       projection.append(field.getFieldName())
     return projection
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.


	class CarbonSchemaReader(object):
	"""
	How to use it:
	1. create CarbonSchemaReader: carbonSchemaReader = CarbonSchemaReader()
	2. read schema from path: schema = carbonSchemaReader.readSchema(path)
	path can be a folder, carbonindex file and carbondata file.
	"""
	def __init__(self):
	from jnius import autoclass
	self.carbonSchemaReader = autoclass('org.apache.carbondata.sdk.file.CarbonSchemaReader')
	self.Schema = autoclass('org.apache.carbondata.sdk.file.Schema')

	def readSchema(self, path, getAsBuffer=False, validateSchema=False, conf=None):
	"""
	Read CarbonData schema from path.
	:param path: data path, path can be a folder, carbonindex file name and carbondata file name
	:param getAsBuffer: whether get as buffer
	:param validateSchema: whether validate schema.
	:param conf: configuration for ak, sk, endpoint and so on.
	:return: CarbonData schema.
	"""
	if getAsBuffer == True:
	return self.carbonSchemaReader.getArrowSchemaAsBytes(path)
	if conf is None:
	schema = self.carbonSchemaReader.readSchema(path, validateSchema)
	else:
	schema = self.carbonSchemaReader.readSchema(path, validateSchema, conf)
	newSchema = schema.asOriginOrder()
	return newSchema

	def reorderSchemaBasedOnProjection(self, columns, schema):
	fields = schema.getFields()
	updateFields = list()
	for column in columns:
	for field in fields:
	if column.casefold() == field.getFieldName().casefold():
	updateFields.append(field)
	break

	updatedSchema = self.Schema(updateFields)
	return updatedSchema

	def getProjectionBasedOnSchema(self, schema):
	fields = schema.getFields()
	projection = list()
	for field in fields:
	projection.append(field.getFieldName())
	return projection