blob: 7b345929fc0979736b61e37f543b2ab539c42868 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "wkb_parse.h"
#include <cstddef>
#include <istream>
#include <sstream>
#include <vector>
#include "geo/ByteOrderDataInStream.h"
#include "geo/ByteOrderValues.h"
#include "geo/geo_types.h"
#include "geo/wkb_parse_ctx.h"
#include "geo_tobinary_type.h"
namespace doris {
unsigned char ASCIIHexToUChar(char val) {
switch (val) {
case '0':
return 0;
case '1':
return 1;
case '2':
return 2;
case '3':
return 3;
case '4':
return 4;
case '5':
return 5;
case '6':
return 6;
case '7':
return 7;
case '8':
return 8;
case '9':
return 9;
case 'A':
case 'a':
return 10;
case 'B':
case 'b':
return 11;
case 'C':
case 'c':
return 12;
case 'D':
case 'd':
return 13;
case 'E':
case 'e':
return 14;
case 'F':
case 'f':
return 15;
default:
return GEO_PARSE_WKB_SYNTAX_ERROR;
}
}
GeoParseStatus WkbParse::parse_wkb(std::istream& is, GeoShape** shape) {
WkbParseContext ctx;
ctx = *(WkbParse::read_hex(is, &ctx));
if (ctx.parse_status == GEO_PARSE_OK) {
*shape = ctx.shape;
} else {
ctx.parse_status = GEO_PARSE_WKT_SYNTAX_ERROR;
}
return ctx.parse_status;
}
WkbParseContext* WkbParse::read_hex(std::istream& is, WkbParseContext* ctx) {
// setup input/output stream
std::stringstream os(std::ios_base::binary | std::ios_base::in | std::ios_base::out);
while (true) {
const int input_high = is.get();
if (input_high == std::char_traits<char>::eof()) {
break;
}
const int input_low = is.get();
if (input_low == std::char_traits<char>::eof()) {
ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
return ctx;
}
const char high = static_cast<char>(input_high);
const char low = static_cast<char>(input_low);
const unsigned char result_high = ASCIIHexToUChar(high);
const unsigned char result_low = ASCIIHexToUChar(low);
const unsigned char value = static_cast<unsigned char>((result_high << 4) + result_low);
// write the value to the output stream
os << value;
}
return WkbParse::read(os, ctx);
}
WkbParseContext* WkbParse::read(std::istream& is, WkbParseContext* ctx) {
is.seekg(0, std::ios::end);
auto size = is.tellg();
is.seekg(0, std::ios::beg);
// Check if size is valid
if (size <= 0) {
ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
return ctx;
}
std::vector<unsigned char> buf(static_cast<size_t>(size));
if (!is.read(reinterpret_cast<char*>(buf.data()), static_cast<std::streamsize>(size))) {
ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
return ctx;
}
// Ensure we have at least one byte for byte order
if (buf.empty()) {
ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
return ctx;
}
// First read the byte order using machine endian
auto byteOrder = buf[0];
// Create ByteOrderDataInStream with the correct byte order
if (byteOrder == byteOrder::wkbNDR) {
ctx->dis = ByteOrderDataInStream(buf.data(), buf.size());
ctx->dis.setOrder(ByteOrderValues::ENDIAN_LITTLE);
} else if (byteOrder == byteOrder::wkbXDR) {
ctx->dis = ByteOrderDataInStream(buf.data(), buf.size());
ctx->dis.setOrder(ByteOrderValues::ENDIAN_BIG);
} else {
ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
return ctx;
}
std::unique_ptr<GeoShape> shape = readGeometry(ctx);
if (!shape) {
ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
return ctx;
}
ctx->shape = shape.release();
return ctx;
}
std::unique_ptr<GeoShape> WkbParse::readGeometry(WkbParseContext* ctx) {
try {
// Ensure we have enough data to read
if (ctx->dis.size() < 5) { // At least 1 byte for order and 4 bytes for type
return nullptr;
}
// Skip the byte order as we've already handled it
ctx->dis.readByte();
uint32_t typeInt = ctx->dis.readUnsigned();
// Check if geometry has SRID
bool has_srid = (typeInt & WKB_SRID_FLAG) != 0;
// Read SRID if present
if (has_srid) {
ctx->dis.readUnsigned(); // Read and store SRID if needed
}
// Get the base geometry type
uint32_t geometryType = typeInt & WKB_TYPE_MASK;
std::unique_ptr<GeoShape> shape;
switch (geometryType) {
case wkbType::wkbPoint:
shape = readPoint(ctx);
break;
case wkbType::wkbLine:
shape = readLine(ctx);
break;
case wkbType::wkbPolygon:
shape = readPolygon(ctx);
break;
default:
return nullptr;
}
return shape;
} catch (...) {
// Handle any exceptions from reading operations
return nullptr;
}
}
std::unique_ptr<GeoPoint> WkbParse::readPoint(WkbParseContext* ctx) {
GeoCoordinateList coords = WkbParse::readCoordinateList(1, ctx);
if (coords.list.empty()) {
return nullptr;
}
std::unique_ptr<GeoPoint> point = GeoPoint::create_unique();
if (!point || point->from_coord(coords.list[0]) != GEO_PARSE_OK) {
return nullptr;
}
return point;
}
std::unique_ptr<GeoLine> WkbParse::readLine(WkbParseContext* ctx) {
uint32_t size = ctx->dis.readUnsigned();
if (minMemSize(wkbLine, size, ctx) != GEO_PARSE_OK) {
return nullptr;
}
GeoCoordinateList coords = WkbParse::readCoordinateList(size, ctx);
if (coords.list.empty()) {
return nullptr;
}
std::unique_ptr<GeoLine> line = GeoLine::create_unique();
if (!line || line->from_coords(coords) != GEO_PARSE_OK) {
return nullptr;
}
return line;
}
std::unique_ptr<GeoPolygon> WkbParse::readPolygon(WkbParseContext* ctx) {
uint32_t num_loops = ctx->dis.readUnsigned();
if (minMemSize(wkbPolygon, num_loops, ctx) != GEO_PARSE_OK) {
return nullptr;
}
GeoCoordinateListList coordss;
for (uint32_t i = 0; i < num_loops; ++i) {
uint32_t size = ctx->dis.readUnsigned();
if (size < 3) { // A polygon loop must have at least 3 points
return nullptr;
}
auto coords = std::make_unique<GeoCoordinateList>();
*coords = WkbParse::readCoordinateList(size, ctx);
if (coords->list.empty()) {
return nullptr;
}
coordss.add(coords.release());
}
std::unique_ptr<GeoPolygon> polygon = GeoPolygon::create_unique();
if (!polygon || polygon->from_coords(coordss) != GEO_PARSE_OK) {
return nullptr;
}
return polygon;
}
GeoCoordinateList WkbParse::readCoordinateList(unsigned size, WkbParseContext* ctx) {
GeoCoordinateList coords;
for (uint32_t i = 0; i < size; i++) {
if (!readCoordinate(ctx)) {
return GeoCoordinateList();
}
unsigned int j = 0;
GeoCoordinate coord;
coord.x = ctx->ordValues[j++];
coord.y = ctx->ordValues[j++];
coords.add(coord);
}
return coords;
}
GeoParseStatus WkbParse::minMemSize(int wkbType, uint64_t size, WkbParseContext* ctx) {
uint64_t minSize = 0;
constexpr uint64_t minCoordSize = 2 * sizeof(double);
//constexpr uint64_t minPtSize = (1+4) + minCoordSize;
//constexpr uint64_t minLineSize = (1+4+4); // empty line
constexpr uint64_t minLoopSize = 4; // empty loop
//constexpr uint64_t minPolySize = (1+4+4); // empty polygon
//constexpr uint64_t minGeomSize = minLineSize;
switch (wkbType) {
case wkbLine:
minSize = size * minCoordSize;
break;
case wkbPolygon:
minSize = size * minLoopSize;
break;
}
if (ctx->dis.size() < minSize) {
return GEO_PARSE_WKB_SYNTAX_ERROR;
}
return GEO_PARSE_OK;
}
bool WkbParse::readCoordinate(WkbParseContext* ctx) {
for (std::size_t i = 0; i < ctx->inputDimension; ++i) {
ctx->ordValues[i] = ctx->dis.readDouble();
}
return true;
}
} // namespace doris