blob: 2ecd415436b2bcfae4367d8e3f142e0999e8cece [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// Package adbc defines the interfaces for Arrow Database
// Connectivity.
//
// An Arrow-based interface between applications and database
// drivers. ADBC aims to provide a vendor-independent API for SQL
// and Substrait-based database access that is targeted at
// analytics/OLAP use cases.
//
// This API is intended to be implemented directly by drivers and
// used directly by client applications. To assist portability
// between different vendors, a "driver manager" library is also
// provided, which implements this same API, but dynamically loads
// drivers internally and forwards calls appropriately.
//
// In general, it's expected for objects to allow serialized access
// safely from multiple goroutines, but not necessarily concurrent
// access. Specific implementations may allow concurrent access.
//
// EXPERIMENTAL. Interface subject to change.
package adbc
import (
"context"
"fmt"
"github.com/apache/arrow/go/v12/arrow"
"github.com/apache/arrow/go/v12/arrow/array"
)
//go:generate go run golang.org/x/tools/cmd/stringer -type Status -linecomment
//go:generate go run golang.org/x/tools/cmd/stringer -type InfoCode -linecomment
// Error is the detailed error for an operation
type Error struct {
// Msg is a string representing a human readable error message
Msg string
// Code is the ADBC status representing this error
Code Status
// VendorCode is a vendor-specific error codee, if applicable
VendorCode int32
// SqlState is a SQLSTATE error code, if provided, as defined
// by the SQL:2003 standard. If not set, it will be "\0\0\0\0\0"
SqlState [5]byte
}
func (e Error) Error() string {
return fmt.Sprintf("%s: SqlState: %s, msg: %s", e.Code, string(e.SqlState[:]), e.Msg)
}
// Status represents an error code for operations that may fail
type Status uint8
const (
// No Error
StatusOK Status = iota // OK
// An unknown error occurred.
//
// May indicate a driver-side or database-side error
StatusUnknown // Unknown
// The operation is not implemented or supported.
//
// May indicate a driver-side or database-side error
StatusNotImplemented // Not Implemented
// A requested resource was not found.
//
// May indicate a driver-side or database-side error
StatusNotFound // Not Found
// A requested resource already exists
//
// May indicate a driver-side or database-side error
StatusAlreadyExists // Already Exists
// The arguments are invalid, likely a programming error.
//
// For instance, they may be of the wrong format, or out of range.
//
// May indicate a driver-side or database-side error.
StatusInvalidArgument // Invalid Argument
// The preconditions for the operation are not met, likely a
// programming error.
//
// For instance, the object may be uninitialized, or may not
// have been fully configured.
//
// May indicate a driver-side or database-side error
StatusInvalidState // Invalid State
// Invalid data was processed (not a programming error)
//
// For instance, a division by zero may have occurred during query
// execution.
//
// May indicate a database-side error only.
StatusInvalidData // Invalid Data
// The database's integrity was affected.
//
// For instance, a foreign key check may have failed, or a uniqueness
// constraint may have been violated.
//
// May indicate a database-side error only.
StatusIntegrity // Integrity Issue
// An error internal to the driver or database occurred.
//
// May indicate a driver-side or database-side error.
StatusInternal // Internal
// An I/O error occurred.
//
// For instance a remote service may be unavailable.
//
// May indicate a driver-side or database-side error.
StatusIO // I/O
// The operation was cancelled, not due to a timeout.
//
// May indicate a driver-side or database-side error.
StatusCancelled // Cancelled
// The operation was cancelled due to a timeout.
//
// May indicate a driver-side or database-side error.
StatusTimeout // Timeout
// Authentication failed.
//
// May indicate a database-side error only.
StatusUnauthenticated // Unauthenticated
// The client is not authorized to perform the given operation.
//
// May indicate a database-side error only.
StatusUnauthorized // Unauthorized
)
// Canonical option values
const (
OptionValueEnabled = "true"
OptionValueDisabled = "false"
OptionKeyAutoCommit = "adbc.connection.autocommit"
OptionKeyIngestTargetTable = "adbc.ingest.target_table"
OptionKeyIngestMode = "adbc.ingest.mode"
OptionKeyIsolationLevel = "adbc.connection.transaction.isolation_level"
OptionKeyReadOnly = "adbc.connection.readonly"
OptionValueIngestModeCreate = "adbc.ingest.mode.create"
OptionValueIngestModeAppend = "adbc.ingest.mode.append"
OptionKeyURI = "uri"
OptionKeyUsername = "username"
OptionKeyPassword = "password"
)
type OptionIsolationLevel string
const (
LevelDefault OptionIsolationLevel = "adbc.connection.transaction.isolation.default"
LevelReadUncommitted OptionIsolationLevel = "adbc.connection.transaction.isolation.read_uncommitted"
LevelReadCommitted OptionIsolationLevel = "adbc.connection.transaction.isolation.read_committed"
LevelRepeatableRead OptionIsolationLevel = "adbc.connection.transaction.isolation.repeatable_read"
LevelSnapshot OptionIsolationLevel = "adbc.connection.transaction.isolation.snapshot"
LevelSerializable OptionIsolationLevel = "adbc.connection.transaction.isolation.serializable"
LevelLinearizable OptionIsolationLevel = "adbc.connection.transaction.isolation.linearizable"
)
// Driver is the entry point for the interface. It is similar to
// database/sql.Driver taking a map of keys and values as options
// to initialize a Connection to the database. Any common connection
// state can live in the Driver itself, for example an in-memory database
// can place ownership of the actual database in this driver.
//
// Any connection specific options should be set using SetOptions before
// calling Open.
//
// The provided context.Context is for dialing purposes only
// (see net.DialContext) and should not be stored or used for other purposes.
// A default timeout should still be used when dialing as a connection
// pool may call Connect asynchronously to any query.
//
// A driver can also optionally implement io.Closer if there is a need
// or desire for it.
type Driver interface {
NewDatabase(opts map[string]string) (Database, error)
}
type Database interface {
SetOptions(map[string]string) error
Open(ctx context.Context) (Connection, error)
}
type InfoCode uint32
const (
// The database vendor/product name (e.g. the server name)
// (type: utf8)
InfoVendorName InfoCode = 0 // VendorName
// The database vendor/product version (type: utf8)
InfoVendorVersion InfoCode = 1 // VendorVersion
// The database vendor/product Arrow library version (type: utf8)
InfoVendorArrowVersion InfoCode = 2 // VendorArrowVersion
// The driver name (type: utf8)
InfoDriverName InfoCode = 100 // DriverName
// The driver version (type: utf8)
InfoDriverVersion InfoCode = 101 // DriverVersion
// The driver Arrow library version (type: utf8)
InfoDriverArrowVersion InfoCode = 102 // DriverArrowVersion
)
type ObjectDepth int
const (
ObjectDepthAll ObjectDepth = iota
ObjectDepthCatalogs
ObjectDepthDBSchemas
ObjectDepthTables
ObjectDepthColumns = ObjectDepthAll
)
// Connection is an active Database connection.
//
// It provides methods for creating statements, using transactions
// and so on.
//
// Connections are not required to be safely accessible by concurrent
// goroutines.
type Connection interface {
// Metadata methods
//
// Generally these methods return an array.RecordReader that
// can be consumed to retrieve metadata about the database as Arrow
// data. The returned metadata has an expected schema given in the
// doc strings of the specific methods. Schema fields are nullable
// unless otherwise marked. While no Statement is used in these
// methods, the result set may count as an active statement to the
// driver for the purposes of concurrency management (e.g. if the
// driver has a limit on concurrent active statements and it must
// execute a SQL query internally in order to implement the metadata
// method).
//
// Some methods accept "search pattern" arguments, which are strings
// that can contain the special character "%" to match zero or more
// characters, or "_" to match exactly one character. (See the
// documentation of DatabaseMetaData in JDBC or "Pattern Value Arguments"
// in the ODBC documentation.) Escaping is not currently supported.
// GetInfo returns metadata about the database/driver.
//
// The result is an Arrow dataset with the following schema:
//
// Field Name | Field Type
// ----------------------------|-----------------------------
// info_name | uint32 not null
// info_value | INFO_SCHEMA
//
// INFO_SCHEMA is a dense union with members:
//
// Field Name (Type Code) | Field Type
// ----------------------------|-----------------------------
// string_value (0) | utf8
// bool_value (1) | bool
// int64_value (2) | int64
// int32_bitmask (3) | int32
// string_list (4) | list<utf8>
// int32_to_int32_list_map (5) | map<int32, list<int32>>
//
// Each metadatum is identified by an integer code. The recognized
// codes are defined as constants. Codes [0, 10_000) are reserved
// for ADBC usage. Drivers/vendors will ignore requests for unrecognized
// codes (the row will be omitted from the result).
GetInfo(ctx context.Context, infoCodes []InfoCode) (array.RecordReader, error)
// GetObjects gets a hierarchical view of all catalogs, database schemas,
// tables, and columns.
//
// The result is an Arrow Dataset with the following schema:
//
// Field Name | Field Type
// ----------------------------|----------------------------
// catalog_name | utf8
// catalog_db_schemas | list<DB_SCHEMA_SCHEMA>
//
// DB_SCHEMA_SCHEMA is a Struct with the fields:
//
// Field Name | Field Type
// ----------------------------|----------------------------
// db_schema_name | utf8
// db_schema_tables | list<TABLE_SCHEMA>
//
// TABLE_SCHEMA is a Struct with the fields:
//
// Field Name | Field Type
// ----------------------------|----------------------------
// table_name | utf8 not null
// table_type | utf8 not null
// table_columns | list<COLUMN_SCHEMA>
// table_constraints | list<CONSTRAINT_SCHEMA>
//
// COLUMN_SCHEMA is a Struct with the fields:
//
// Field Name | Field Type | Comments
// ----------------------------|---------------------|---------
// column_name | utf8 not null |
// ordinal_position | int32 | (1)
// remarks | utf8 | (2)
// xdbc_data_type | int16 | (3)
// xdbc_type_name | utf8 | (3)
// xdbc_column_size | int32 | (3)
// xdbc_decimal_digits | int16 | (3)
// xdbc_num_prec_radix | int16 | (3)
// xdbc_nullable | int16 | (3)
// xdbc_column_def | utf8 | (3)
// xdbc_sql_data_type | int16 | (3)
// xdbc_datetime_sub | int16 | (3)
// xdbc_char_octet_length | int32 | (3)
// xdbc_is_nullable | utf8 | (3)
// xdbc_scope_catalog | utf8 | (3)
// xdbc_scope_schema | utf8 | (3)
// xdbc_scope_table | utf8 | (3)
// xdbc_is_autoincrement | bool | (3)
// xdbc_is_generatedcolumn | bool | (3)
//
// 1. The column's ordinal position in the table (starting from 1).
// 2. Database-specific description of the column.
// 3. Optional Value. Should be null if not supported by the driver.
// xdbc_values are meant to provide JDBC/ODBC-compatible metadata
// in an agnostic manner.
//
// CONSTRAINT_SCHEMA is a Struct with the fields:
//
// Field Name | Field Type | Comments
// ----------------------------|---------------------|---------
// constraint_name | utf8 |
// constraint_type | utf8 not null | (1)
// constraint_column_names | list<utf8> not null | (2)
// constraint_column_usage | list<USAGE_SCHEMA> | (3)
//
// 1. One of 'CHECK', 'FOREIGN KEY', 'PRIMARY KEY', or 'UNIQUE'.
// 2. The columns on the current table that are constrained, in order.
// 3. For FOREIGN KEY only, the referenced table and columns.
//
// USAGE_SCHEMA is a Struct with fields:
//
// Field Name | Field Type
// ----------------------------|----------------------------
// fk_catalog | utf8
// fk_db_schema | utf8
// fk_table | utf8 not null
// fk_column_name | utf8 not null
//
// For the parameters: If nil is passed, then that parameter will not
// be filtered by at all. If an empty string, then only objects without
// that property (ie: catalog or db schema) will be returned.
//
// tableName and columnName must be either nil (do not filter by
// table name or column name) or non-empty.
//
// All non-empty, non-nil strings should be a search pattern (as described
// earlier).
GetObjects(ctx context.Context, depth ObjectDepth, catalog, dbSchema, tableName, columnName *string, tableType []string) (array.RecordReader, error)
GetTableSchema(ctx context.Context, catalog, dbSchema *string, tableName string) (*arrow.Schema, error)
// GetTableTypes returns a list of the table types in the database.
//
// The result is an arrow dataset with the following schema:
//
// Field Name | Field Type
// ----------------|--------------
// table_type | utf8 not null
//
GetTableTypes(context.Context) (array.RecordReader, error)
// Commit commits any pending transactions on this connection, it should
// only be used if autocommit is disabled.
//
// Behavior is undefined if this is mixed with SQL transaction statements.
Commit(context.Context) error
// Rollback rolls back any pending transactions. Only used if autocommit
// is disabled.
//
// Behavior is undefined if this is mixed with SQL transaction statements.
Rollback(context.Context) error
// NewStatement initializes a new statement object tied to this connection
NewStatement() (Statement, error)
// Close closes this connection and releases any associated resources.
Close() error
// ReadPartition constructs a statement for a partition of a query. The
// results can then be read independently using the returned RecordReader.
//
// A partition can be retrieved by using ExecutePartitions on a statement.
ReadPartition(ctx context.Context, serializedPartition []byte) (array.RecordReader, error)
}
// PostInitOptions is an optional interface which can be implemented by
// drivers which allow modifying and setting options after initializing
// a connection or statement.
type PostInitOptions interface {
SetOption(key, value string) error
}
// Partitions represent a partitioned result set.
//
// Some backends may internally partition the results. These partitions
// are exposed to clients who may wish to integrate them with a threaded
// or distributed execution model, where partitions can be divided among
// threads or machines and fetched in parallel.
//
// To use partitioning, execute the statement with ExecutePartitions to
// get the partition descriptors. Then call ReadPartition on a connection
// to turn individual descriptors into RecordReader instances. This may
// be done on a different connection than the one the partition was
// created with, or even in a different process on a different machine.
//
// Drivers are not required to support partitioning.
type Partitions struct {
NumPartitions uint64
PartitionIDs [][]byte
}
// Statement is a container for all state needed to execute a database
// query, such as the query itself, parameters for prepared statements,
// driver parameters, etc.
//
// Statements may represent a single query or a prepared statement.
//
// Statements may be used multiple times and can be reconfigured
// (e.g. they can be reused to execute multiple different queries).
// However, executing a statement (and changing certain other state)
// will invalidate result sets obtained prior to that execution.
//
// Multiple statements may be created from a single connection.
// However, the driver may block or error if they are used concurrently
// (whether from a single goroutine or from multiple simultaneous
// goroutines).
//
// Statements are not required to be goroutine-safe, but they can be
// used from multiple goroutines as long as clients serialize accesses
// to a statement.
type Statement interface {
// Close releases any relevant resources associated with this statement
// and closes it (particularly if it is a prepared statement).
//
// A statement instance should not be used after Close is called.
Close() error
// SetOption sets a string option on this statement
SetOption(key, val string) error
// SetSqlQuery sets the query string to be executed.
//
// The query can then be executed with any of the Execute methods.
// For queries expected to be executed repeatedly, Prepare should be
// called before execution.
SetSqlQuery(query string) error
// ExecuteQuery executes the current query or prepared statement
// and returnes a RecordReader for the results along with the number
// of rows affected if known, otherwise it will be -1.
//
// This invalidates any prior result sets on this statement.
ExecuteQuery(context.Context) (array.RecordReader, int64, error)
// ExecuteUpdate executes a statement that does not generate a result
// set. It returns the number of rows affected if known, otherwise -1.
ExecuteUpdate(context.Context) (int64, error)
// Prepare turns this statement into a prepared statement to be executed
// multiple times. This invalidates any prior result sets.
Prepare(context.Context) error
// SetSubstraitPlan allows setting a serialized Substrait execution
// plan into the query or for querying Substrait-related metadata.
//
// Drivers are not required to support both SQL and Substrait semantics.
// If they do, it may be via converting between representations internally.
//
// Like SetSqlQuery, after this is called the query can be executed
// using any of the Execute methods. If the query is expected to be
// executed repeatedly, Prepare should be called first on the statement.
SetSubstraitPlan(plan []byte) error
// Bind uses an arrow record batch to bind parameters to the query.
//
// This can be used for bulk inserts or for prepared statements.
// The driver will call release on the passed in Record when it is done,
// but it may not do this until the statement is closed or another
// record is bound.
Bind(ctx context.Context, values arrow.Record) error
// BindStream uses a record batch stream to bind parameters for this
// query. This can be used for bulk inserts or prepared statements.
//
// The driver will call Release on the record reader, but may not do this
// until Close is called.
BindStream(ctx context.Context, stream array.RecordReader) error
// GetParameterSchema returns an Arrow schema representation of
// the expected parameters to be bound.
//
// This retrieves an Arrow Schema describing the number, names, and
// types of the parameters in a parameterized statement. The fields
// of the schema should be in order of the ordinal position of the
// parameters; named parameters should appear only once.
//
// If the parameter does not have a name, or a name cannot be determined,
// the name of the corresponding field in the schema will be an empty
// string. If the type cannot be determined, the type of the corresponding
// field will be NA (NullType).
//
// This should be called only after calling Prepare.
//
// This should return an error with StatusNotImplemented if the schema
// cannot be determined.
GetParameterSchema() (*arrow.Schema, error)
// ExecutePartitions executes the current statement and gets the results
// as a partitioned result set.
//
// It returns the Schema of the result set, the collection of partition
// descriptors and the number of rows affected, if known. If unknown,
// the number of rows affected will be -1.
//
// If the driver does not support partitioned results, this will return
// an error with a StatusNotImplemented code.
ExecutePartitions(context.Context) (*arrow.Schema, Partitions, int64, error)
}