| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| // Package adbc defines the interfaces for Arrow Database |
| // Connectivity. |
| // |
| // An Arrow-based interface between applications and database |
| // drivers. ADBC aims to provide a vendor-independent API for SQL |
| // and Substrait-based database access that is targeted at |
| // analytics/OLAP use cases. |
| // |
| // This API is intended to be implemented directly by drivers and |
| // used directly by client applications. To assist portability |
| // between different vendors, a "driver manager" library is also |
| // provided, which implements this same API, but dynamically loads |
| // drivers internally and forwards calls appropriately. |
| // |
| // In general, it's expected for objects to allow serialized access |
| // safely from multiple goroutines, but not necessarily concurrent |
| // access. Specific implementations may allow concurrent access. |
| // |
| // EXPERIMENTAL. Interface subject to change. |
| package adbc |
| |
| import ( |
| "context" |
| "fmt" |
| |
| "github.com/apache/arrow/go/v12/arrow" |
| "github.com/apache/arrow/go/v12/arrow/array" |
| ) |
| |
| //go:generate go run golang.org/x/tools/cmd/stringer -type Status -linecomment |
| //go:generate go run golang.org/x/tools/cmd/stringer -type InfoCode -linecomment |
| |
| // Error is the detailed error for an operation |
| type Error struct { |
| // Msg is a string representing a human readable error message |
| Msg string |
| // Code is the ADBC status representing this error |
| Code Status |
| // VendorCode is a vendor-specific error codee, if applicable |
| VendorCode int32 |
| // SqlState is a SQLSTATE error code, if provided, as defined |
| // by the SQL:2003 standard. If not set, it will be "\0\0\0\0\0" |
| SqlState [5]byte |
| } |
| |
| func (e Error) Error() string { |
| return fmt.Sprintf("%s: SqlState: %s, msg: %s", e.Code, string(e.SqlState[:]), e.Msg) |
| } |
| |
| // Status represents an error code for operations that may fail |
| type Status uint8 |
| |
| const ( |
| // No Error |
| StatusOK Status = iota // OK |
| // An unknown error occurred. |
| // |
| // May indicate a driver-side or database-side error |
| StatusUnknown // Unknown |
| // The operation is not implemented or supported. |
| // |
| // May indicate a driver-side or database-side error |
| StatusNotImplemented // Not Implemented |
| // A requested resource was not found. |
| // |
| // May indicate a driver-side or database-side error |
| StatusNotFound // Not Found |
| // A requested resource already exists |
| // |
| // May indicate a driver-side or database-side error |
| StatusAlreadyExists // Already Exists |
| // The arguments are invalid, likely a programming error. |
| // |
| // For instance, they may be of the wrong format, or out of range. |
| // |
| // May indicate a driver-side or database-side error. |
| StatusInvalidArgument // Invalid Argument |
| // The preconditions for the operation are not met, likely a |
| // programming error. |
| // |
| // For instance, the object may be uninitialized, or may not |
| // have been fully configured. |
| // |
| // May indicate a driver-side or database-side error |
| StatusInvalidState // Invalid State |
| // Invalid data was processed (not a programming error) |
| // |
| // For instance, a division by zero may have occurred during query |
| // execution. |
| // |
| // May indicate a database-side error only. |
| StatusInvalidData // Invalid Data |
| // The database's integrity was affected. |
| // |
| // For instance, a foreign key check may have failed, or a uniqueness |
| // constraint may have been violated. |
| // |
| // May indicate a database-side error only. |
| StatusIntegrity // Integrity Issue |
| // An error internal to the driver or database occurred. |
| // |
| // May indicate a driver-side or database-side error. |
| StatusInternal // Internal |
| // An I/O error occurred. |
| // |
| // For instance a remote service may be unavailable. |
| // |
| // May indicate a driver-side or database-side error. |
| StatusIO // I/O |
| // The operation was cancelled, not due to a timeout. |
| // |
| // May indicate a driver-side or database-side error. |
| StatusCancelled // Cancelled |
| // The operation was cancelled due to a timeout. |
| // |
| // May indicate a driver-side or database-side error. |
| StatusTimeout // Timeout |
| // Authentication failed. |
| // |
| // May indicate a database-side error only. |
| StatusUnauthenticated // Unauthenticated |
| // The client is not authorized to perform the given operation. |
| // |
| // May indicate a database-side error only. |
| StatusUnauthorized // Unauthorized |
| ) |
| |
| // Canonical option values |
| const ( |
| OptionValueEnabled = "true" |
| OptionValueDisabled = "false" |
| OptionKeyAutoCommit = "adbc.connection.autocommit" |
| OptionKeyIngestTargetTable = "adbc.ingest.target_table" |
| OptionKeyIngestMode = "adbc.ingest.mode" |
| OptionKeyIsolationLevel = "adbc.connection.transaction.isolation_level" |
| OptionKeyReadOnly = "adbc.connection.readonly" |
| OptionValueIngestModeCreate = "adbc.ingest.mode.create" |
| OptionValueIngestModeAppend = "adbc.ingest.mode.append" |
| OptionKeyURI = "uri" |
| OptionKeyUsername = "username" |
| OptionKeyPassword = "password" |
| ) |
| |
| type OptionIsolationLevel string |
| |
| const ( |
| LevelDefault OptionIsolationLevel = "adbc.connection.transaction.isolation.default" |
| LevelReadUncommitted OptionIsolationLevel = "adbc.connection.transaction.isolation.read_uncommitted" |
| LevelReadCommitted OptionIsolationLevel = "adbc.connection.transaction.isolation.read_committed" |
| LevelRepeatableRead OptionIsolationLevel = "adbc.connection.transaction.isolation.repeatable_read" |
| LevelSnapshot OptionIsolationLevel = "adbc.connection.transaction.isolation.snapshot" |
| LevelSerializable OptionIsolationLevel = "adbc.connection.transaction.isolation.serializable" |
| LevelLinearizable OptionIsolationLevel = "adbc.connection.transaction.isolation.linearizable" |
| ) |
| |
| // Driver is the entry point for the interface. It is similar to |
| // database/sql.Driver taking a map of keys and values as options |
| // to initialize a Connection to the database. Any common connection |
| // state can live in the Driver itself, for example an in-memory database |
| // can place ownership of the actual database in this driver. |
| // |
| // Any connection specific options should be set using SetOptions before |
| // calling Open. |
| // |
| // The provided context.Context is for dialing purposes only |
| // (see net.DialContext) and should not be stored or used for other purposes. |
| // A default timeout should still be used when dialing as a connection |
| // pool may call Connect asynchronously to any query. |
| // |
| // A driver can also optionally implement io.Closer if there is a need |
| // or desire for it. |
| type Driver interface { |
| NewDatabase(opts map[string]string) (Database, error) |
| } |
| |
| type Database interface { |
| SetOptions(map[string]string) error |
| Open(ctx context.Context) (Connection, error) |
| } |
| |
| type InfoCode uint32 |
| |
| const ( |
| // The database vendor/product name (e.g. the server name) |
| // (type: utf8) |
| InfoVendorName InfoCode = 0 // VendorName |
| // The database vendor/product version (type: utf8) |
| InfoVendorVersion InfoCode = 1 // VendorVersion |
| // The database vendor/product Arrow library version (type: utf8) |
| InfoVendorArrowVersion InfoCode = 2 // VendorArrowVersion |
| |
| // The driver name (type: utf8) |
| InfoDriverName InfoCode = 100 // DriverName |
| // The driver version (type: utf8) |
| InfoDriverVersion InfoCode = 101 // DriverVersion |
| // The driver Arrow library version (type: utf8) |
| InfoDriverArrowVersion InfoCode = 102 // DriverArrowVersion |
| ) |
| |
| type ObjectDepth int |
| |
| const ( |
| ObjectDepthAll ObjectDepth = iota |
| ObjectDepthCatalogs |
| ObjectDepthDBSchemas |
| ObjectDepthTables |
| ObjectDepthColumns = ObjectDepthAll |
| ) |
| |
| // Connection is an active Database connection. |
| // |
| // It provides methods for creating statements, using transactions |
| // and so on. |
| // |
| // Connections are not required to be safely accessible by concurrent |
| // goroutines. |
| type Connection interface { |
| // Metadata methods |
| // |
| // Generally these methods return an array.RecordReader that |
| // can be consumed to retrieve metadata about the database as Arrow |
| // data. The returned metadata has an expected schema given in the |
| // doc strings of the specific methods. Schema fields are nullable |
| // unless otherwise marked. While no Statement is used in these |
| // methods, the result set may count as an active statement to the |
| // driver for the purposes of concurrency management (e.g. if the |
| // driver has a limit on concurrent active statements and it must |
| // execute a SQL query internally in order to implement the metadata |
| // method). |
| // |
| // Some methods accept "search pattern" arguments, which are strings |
| // that can contain the special character "%" to match zero or more |
| // characters, or "_" to match exactly one character. (See the |
| // documentation of DatabaseMetaData in JDBC or "Pattern Value Arguments" |
| // in the ODBC documentation.) Escaping is not currently supported. |
| |
| // GetInfo returns metadata about the database/driver. |
| // |
| // The result is an Arrow dataset with the following schema: |
| // |
| // Field Name | Field Type |
| // ----------------------------|----------------------------- |
| // info_name | uint32 not null |
| // info_value | INFO_SCHEMA |
| // |
| // INFO_SCHEMA is a dense union with members: |
| // |
| // Field Name (Type Code) | Field Type |
| // ----------------------------|----------------------------- |
| // string_value (0) | utf8 |
| // bool_value (1) | bool |
| // int64_value (2) | int64 |
| // int32_bitmask (3) | int32 |
| // string_list (4) | list<utf8> |
| // int32_to_int32_list_map (5) | map<int32, list<int32>> |
| // |
| // Each metadatum is identified by an integer code. The recognized |
| // codes are defined as constants. Codes [0, 10_000) are reserved |
| // for ADBC usage. Drivers/vendors will ignore requests for unrecognized |
| // codes (the row will be omitted from the result). |
| GetInfo(ctx context.Context, infoCodes []InfoCode) (array.RecordReader, error) |
| |
| // GetObjects gets a hierarchical view of all catalogs, database schemas, |
| // tables, and columns. |
| // |
| // The result is an Arrow Dataset with the following schema: |
| // |
| // Field Name | Field Type |
| // ----------------------------|---------------------------- |
| // catalog_name | utf8 |
| // catalog_db_schemas | list<DB_SCHEMA_SCHEMA> |
| // |
| // DB_SCHEMA_SCHEMA is a Struct with the fields: |
| // |
| // Field Name | Field Type |
| // ----------------------------|---------------------------- |
| // db_schema_name | utf8 |
| // db_schema_tables | list<TABLE_SCHEMA> |
| // |
| // TABLE_SCHEMA is a Struct with the fields: |
| // |
| // Field Name | Field Type |
| // ----------------------------|---------------------------- |
| // table_name | utf8 not null |
| // table_type | utf8 not null |
| // table_columns | list<COLUMN_SCHEMA> |
| // table_constraints | list<CONSTRAINT_SCHEMA> |
| // |
| // COLUMN_SCHEMA is a Struct with the fields: |
| // |
| // Field Name | Field Type | Comments |
| // ----------------------------|---------------------|--------- |
| // column_name | utf8 not null | |
| // ordinal_position | int32 | (1) |
| // remarks | utf8 | (2) |
| // xdbc_data_type | int16 | (3) |
| // xdbc_type_name | utf8 | (3) |
| // xdbc_column_size | int32 | (3) |
| // xdbc_decimal_digits | int16 | (3) |
| // xdbc_num_prec_radix | int16 | (3) |
| // xdbc_nullable | int16 | (3) |
| // xdbc_column_def | utf8 | (3) |
| // xdbc_sql_data_type | int16 | (3) |
| // xdbc_datetime_sub | int16 | (3) |
| // xdbc_char_octet_length | int32 | (3) |
| // xdbc_is_nullable | utf8 | (3) |
| // xdbc_scope_catalog | utf8 | (3) |
| // xdbc_scope_schema | utf8 | (3) |
| // xdbc_scope_table | utf8 | (3) |
| // xdbc_is_autoincrement | bool | (3) |
| // xdbc_is_generatedcolumn | bool | (3) |
| // |
| // 1. The column's ordinal position in the table (starting from 1). |
| // 2. Database-specific description of the column. |
| // 3. Optional Value. Should be null if not supported by the driver. |
| // xdbc_values are meant to provide JDBC/ODBC-compatible metadata |
| // in an agnostic manner. |
| // |
| // CONSTRAINT_SCHEMA is a Struct with the fields: |
| // |
| // Field Name | Field Type | Comments |
| // ----------------------------|---------------------|--------- |
| // constraint_name | utf8 | |
| // constraint_type | utf8 not null | (1) |
| // constraint_column_names | list<utf8> not null | (2) |
| // constraint_column_usage | list<USAGE_SCHEMA> | (3) |
| // |
| // 1. One of 'CHECK', 'FOREIGN KEY', 'PRIMARY KEY', or 'UNIQUE'. |
| // 2. The columns on the current table that are constrained, in order. |
| // 3. For FOREIGN KEY only, the referenced table and columns. |
| // |
| // USAGE_SCHEMA is a Struct with fields: |
| // |
| // Field Name | Field Type |
| // ----------------------------|---------------------------- |
| // fk_catalog | utf8 |
| // fk_db_schema | utf8 |
| // fk_table | utf8 not null |
| // fk_column_name | utf8 not null |
| // |
| // For the parameters: If nil is passed, then that parameter will not |
| // be filtered by at all. If an empty string, then only objects without |
| // that property (ie: catalog or db schema) will be returned. |
| // |
| // tableName and columnName must be either nil (do not filter by |
| // table name or column name) or non-empty. |
| // |
| // All non-empty, non-nil strings should be a search pattern (as described |
| // earlier). |
| GetObjects(ctx context.Context, depth ObjectDepth, catalog, dbSchema, tableName, columnName *string, tableType []string) (array.RecordReader, error) |
| |
| GetTableSchema(ctx context.Context, catalog, dbSchema *string, tableName string) (*arrow.Schema, error) |
| |
| // GetTableTypes returns a list of the table types in the database. |
| // |
| // The result is an arrow dataset with the following schema: |
| // |
| // Field Name | Field Type |
| // ----------------|-------------- |
| // table_type | utf8 not null |
| // |
| GetTableTypes(context.Context) (array.RecordReader, error) |
| |
| // Commit commits any pending transactions on this connection, it should |
| // only be used if autocommit is disabled. |
| // |
| // Behavior is undefined if this is mixed with SQL transaction statements. |
| Commit(context.Context) error |
| |
| // Rollback rolls back any pending transactions. Only used if autocommit |
| // is disabled. |
| // |
| // Behavior is undefined if this is mixed with SQL transaction statements. |
| Rollback(context.Context) error |
| |
| // NewStatement initializes a new statement object tied to this connection |
| NewStatement() (Statement, error) |
| |
| // Close closes this connection and releases any associated resources. |
| Close() error |
| |
| // ReadPartition constructs a statement for a partition of a query. The |
| // results can then be read independently using the returned RecordReader. |
| // |
| // A partition can be retrieved by using ExecutePartitions on a statement. |
| ReadPartition(ctx context.Context, serializedPartition []byte) (array.RecordReader, error) |
| } |
| |
| // PostInitOptions is an optional interface which can be implemented by |
| // drivers which allow modifying and setting options after initializing |
| // a connection or statement. |
| type PostInitOptions interface { |
| SetOption(key, value string) error |
| } |
| |
| // Partitions represent a partitioned result set. |
| // |
| // Some backends may internally partition the results. These partitions |
| // are exposed to clients who may wish to integrate them with a threaded |
| // or distributed execution model, where partitions can be divided among |
| // threads or machines and fetched in parallel. |
| // |
| // To use partitioning, execute the statement with ExecutePartitions to |
| // get the partition descriptors. Then call ReadPartition on a connection |
| // to turn individual descriptors into RecordReader instances. This may |
| // be done on a different connection than the one the partition was |
| // created with, or even in a different process on a different machine. |
| // |
| // Drivers are not required to support partitioning. |
| type Partitions struct { |
| NumPartitions uint64 |
| PartitionIDs [][]byte |
| } |
| |
| // Statement is a container for all state needed to execute a database |
| // query, such as the query itself, parameters for prepared statements, |
| // driver parameters, etc. |
| // |
| // Statements may represent a single query or a prepared statement. |
| // |
| // Statements may be used multiple times and can be reconfigured |
| // (e.g. they can be reused to execute multiple different queries). |
| // However, executing a statement (and changing certain other state) |
| // will invalidate result sets obtained prior to that execution. |
| // |
| // Multiple statements may be created from a single connection. |
| // However, the driver may block or error if they are used concurrently |
| // (whether from a single goroutine or from multiple simultaneous |
| // goroutines). |
| // |
| // Statements are not required to be goroutine-safe, but they can be |
| // used from multiple goroutines as long as clients serialize accesses |
| // to a statement. |
| type Statement interface { |
| // Close releases any relevant resources associated with this statement |
| // and closes it (particularly if it is a prepared statement). |
| // |
| // A statement instance should not be used after Close is called. |
| Close() error |
| |
| // SetOption sets a string option on this statement |
| SetOption(key, val string) error |
| |
| // SetSqlQuery sets the query string to be executed. |
| // |
| // The query can then be executed with any of the Execute methods. |
| // For queries expected to be executed repeatedly, Prepare should be |
| // called before execution. |
| SetSqlQuery(query string) error |
| |
| // ExecuteQuery executes the current query or prepared statement |
| // and returnes a RecordReader for the results along with the number |
| // of rows affected if known, otherwise it will be -1. |
| // |
| // This invalidates any prior result sets on this statement. |
| ExecuteQuery(context.Context) (array.RecordReader, int64, error) |
| |
| // ExecuteUpdate executes a statement that does not generate a result |
| // set. It returns the number of rows affected if known, otherwise -1. |
| ExecuteUpdate(context.Context) (int64, error) |
| |
| // Prepare turns this statement into a prepared statement to be executed |
| // multiple times. This invalidates any prior result sets. |
| Prepare(context.Context) error |
| |
| // SetSubstraitPlan allows setting a serialized Substrait execution |
| // plan into the query or for querying Substrait-related metadata. |
| // |
| // Drivers are not required to support both SQL and Substrait semantics. |
| // If they do, it may be via converting between representations internally. |
| // |
| // Like SetSqlQuery, after this is called the query can be executed |
| // using any of the Execute methods. If the query is expected to be |
| // executed repeatedly, Prepare should be called first on the statement. |
| SetSubstraitPlan(plan []byte) error |
| |
| // Bind uses an arrow record batch to bind parameters to the query. |
| // |
| // This can be used for bulk inserts or for prepared statements. |
| // The driver will call release on the passed in Record when it is done, |
| // but it may not do this until the statement is closed or another |
| // record is bound. |
| Bind(ctx context.Context, values arrow.Record) error |
| |
| // BindStream uses a record batch stream to bind parameters for this |
| // query. This can be used for bulk inserts or prepared statements. |
| // |
| // The driver will call Release on the record reader, but may not do this |
| // until Close is called. |
| BindStream(ctx context.Context, stream array.RecordReader) error |
| |
| // GetParameterSchema returns an Arrow schema representation of |
| // the expected parameters to be bound. |
| // |
| // This retrieves an Arrow Schema describing the number, names, and |
| // types of the parameters in a parameterized statement. The fields |
| // of the schema should be in order of the ordinal position of the |
| // parameters; named parameters should appear only once. |
| // |
| // If the parameter does not have a name, or a name cannot be determined, |
| // the name of the corresponding field in the schema will be an empty |
| // string. If the type cannot be determined, the type of the corresponding |
| // field will be NA (NullType). |
| // |
| // This should be called only after calling Prepare. |
| // |
| // This should return an error with StatusNotImplemented if the schema |
| // cannot be determined. |
| GetParameterSchema() (*arrow.Schema, error) |
| |
| // ExecutePartitions executes the current statement and gets the results |
| // as a partitioned result set. |
| // |
| // It returns the Schema of the result set, the collection of partition |
| // descriptors and the number of rows affected, if known. If unknown, |
| // the number of rows affected will be -1. |
| // |
| // If the driver does not support partitioned results, this will return |
| // an error with a StatusNotImplemented code. |
| ExecutePartitions(context.Context) (*arrow.Schema, Partitions, int64, error) |
| } |