| /*------------------------------------------------------------------------- |
| * |
| * copy.h |
| * Definitions for using the POSTGRES copy command. |
| * |
| * |
| * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * $PostgreSQL: pgsql/src/include/commands/copy.h,v 1.28 2006/08/30 23:34:22 tgl Exp $ |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #ifndef COPY_H |
| #define COPY_H |
| |
| #include "c.h" |
| #include "nodes/parsenodes.h" |
| #include "tcop/dest.h" |
| #include "executor/executor.h" |
| |
| |
| /* |
| * Represents the different source/dest cases we need to worry about at |
| * the bottom level |
| */ |
| typedef enum CopyDest |
| { |
| COPY_FILE, /* to/from file */ |
| COPY_OLD_FE, /* to/from frontend (2.0 protocol) */ |
| COPY_NEW_FE, /* to/from frontend (3.0 protocol) */ |
| COPY_EXTERNAL_SOURCE /* to/from external source (RET/WET) */ |
| } CopyDest; |
| |
| /* |
| * Represents the end-of-line terminator type of the input |
| */ |
| typedef enum EolType |
| { |
| EOL_UNKNOWN, |
| EOL_LF, |
| EOL_CR, |
| EOL_CRLF |
| } EolType; |
| |
| /* |
| * There are several ways to know the input row number where an error happened, |
| * in order to report it reliably. |
| * |
| * ROWNUM_ORIGINAL - Used when the error happens in the same place that the |
| * *whole* file or data is read, and input row numbers can be tracked reliably. |
| * So, if an error happens in the n'th data row read, it's actually the n'th row |
| * of the file (or data stream). |
| * Using this type are - COPY in dispatch mode, COPY in utility mode, External |
| * tables with 'file', and external web tables. |
| * |
| * ROWNUM_EMBEDDED - Used when the error happens in a place where a random |
| * part of the file is read (given to us) and therefore the row numbers of the |
| * original data are unknown. |
| * So, If an error happens in the n'th row read it can actually be the (n+m)'th |
| * row of the original file. |
| * Using this type are - COPY in execute mode, and External tables getting data |
| * from a gpfdist process in a CSV Format (gpfdist only parses CSV row by row, |
| * while text format is parsed in chucks). |
| * In this case we do some extra work to retrieve the original row number - the |
| * distributor (COPY dispatcher, or gpfdist) embeds the original row number in |
| * the beginning of each data row, and this number is extracted later on. |
| * |
| * BYTENUM_EMBEDDED - Original row isn't even known to the distributor, only |
| * the byte offset of each chunk it sends. We report errors in byte offset |
| * number, not row number. We keep track of byte counts. This is currently |
| * only used by external tables with gpfdist in 'text' format. |
| * |
| */ |
| typedef enum ErrLocType |
| { |
| ROWNUM_ORIGINAL, |
| ROWNUM_EMBEDDED, |
| BYTENUM_EMBEDDED |
| } ErrLocType; |
| |
| |
| /* |
| * The error handling mode for this data load. |
| */ |
| typedef enum CopyErrMode |
| { |
| ALL_OR_NOTHING, /* Either all rows or no rows get loaded (the default) */ |
| SREH_IGNORE, /* Sreh - ignore errors (REJECT but no error table) */ |
| SREH_LOG /* Sreh - log errors in an error table */ |
| } CopyErrMode; |
| |
| |
| /* |
| * This struct contains all the state variables used throughout a COPY |
| * operation. For simplicity, we use the same struct for all variants of COPY, |
| * even though some fields are used in only some cases. |
| * |
| * Multi-byte encodings: all supported client-side encodings encode multi-byte |
| * characters by having the first byte's high bit set. Subsequent bytes of the |
| * character can have the high bit not set. When scanning data in such an |
| * encoding to look for a match to a single-byte (ie ASCII) character, we must |
| * use the full pg_encoding_mblen() machinery to skip over multibyte |
| * characters, else we might find a false match to a trailing byte. In |
| * supported server encodings, there is no possibility of a false match, and |
| * it's faster to make useless comparisons to trailing bytes than it is to |
| * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is TRUE |
| * when we have to do it the hard way. |
| */ |
| typedef struct CopyStateData |
| { |
| /* low-level state data */ |
| CopyDest copy_dest; /* type of copy source/destination */ |
| FILE *copy_file; /* used if copy_dest == COPY_FILE */ |
| StringInfo fe_msgbuf; /* used for all dests during COPY TO, only for |
| * dest == COPY_NEW_FE in COPY FROM */ |
| bool fe_copy; /* true for all FE copy dests */ |
| bool fe_eof; /* true if detected end of copy data */ |
| EolType eol_type; /* EOL type of input */ |
| char *eol_str; /* optional NEWLINE from command. before eol_type is defined */ |
| int client_encoding; /* remote side's character encoding */ |
| bool need_transcoding; /* client encoding diff from server? */ |
| bool encoding_embeds_ascii; /* ASCII can be non-first byte? */ |
| FmgrInfo *enc_conversion_proc; /* conv proc from exttbl encoding to |
| server or the other way around */ |
| FmgrInfo *custom_formatter_func; /* function to convert to custom format */ |
| char *custom_formatter_name; /* name of function to convert to custom format */ |
| List *custom_formatter_params; /* list of defelems that hold user's format parameters */ |
| uint64 processed; /* # of tuples processed */ |
| size_t bytesread; |
| |
| /* parameters from the COPY command */ |
| Relation rel; /* relation to copy to or from */ |
| QueryDesc *queryDesc; /* executable query to copy from */ |
| List *attnumlist; /* integer list of attnums to copy */ |
| List *attnamelist; /* list of attributes by name */ |
| List *force_quote; /* the raw fc column name list */ |
| List *force_notnull; /* the raw fnn column name list */ |
| char *filename; /* filename, or NULL for STDIN/STDOUT */ |
| bool custom; /* custom format? */ |
| bool oids; /* include OIDs? */ |
| bool csv_mode; /* Comma Separated Value format? */ |
| bool header_line; /* CSV header line? */ |
| char *null_print; /* NULL marker string (server encoding!) */ |
| int null_print_len; /* length of same */ |
| char *null_print_client; /* same converted to client encoding */ |
| char *delim; /* column delimiter (1 byte or multibytes) */ |
| char *quote; /* CSV quote char (must be 1 byte) */ |
| char *escape; /* CSV escape char (must be 1 byte) */ |
| bool *force_quote_flags; /* per-column CSV FQ flags */ |
| bool *force_notnull_flags; /* per-column CSV FNN flags */ |
| bool fill_missing; /* missing attrs at end of line are NULL */ |
| |
| /* these are just for error messages, see copy_in_error_callback */ |
| const char *cur_relname; /* table name for error messages */ |
| int64 cur_lineno; /* line number for error messages. Negative means it isn't available. */ |
| int64 cur_byteno; /* number of bytes processed from input */ |
| const char *cur_attname; /* current att for error messages */ |
| //const char *cur_attval; /* current att value for error messages */ |
| |
| /* |
| * Working state for COPY TO |
| */ |
| FmgrInfo *out_functions; /* lookup info for output functions */ |
| MemoryContext rowcontext; /* per-row evaluation context */ |
| |
| /* |
| * These variables are used to reduce overhead in textual COPY FROM. |
| * |
| * attribute_buf holds the separated, de-escaped text for each field of |
| * the current line. The CopyReadAttributes functions return arrays of |
| * pointers into this buffer. We avoid palloc/pfree overhead by re-using |
| * the buffer on each cycle. |
| */ |
| StringInfoData attribute_buf; |
| |
| /* |
| * Similarly, line_buf holds the whole input line being processed. The |
| * input cycle is first to read the whole line into line_buf, convert it |
| * to server encoding there, and then extract the individual attribute |
| * fields into attribute_buf. line_buf is preserved unmodified so that we |
| * can display it in error messages if appropriate. |
| */ |
| StringInfoData line_buf; |
| |
| int *attr_offsets; |
| |
| bool line_buf_converted; /* converted to server encoding? */ |
| |
| /* |
| * Finally, raw_buf holds raw data read from the data source (file or |
| * client connection). CopyReadLine parses this data sufficiently to |
| * locate line boundaries, then transfers the data to line_buf and |
| * converts it. Note: we guarantee that there is a \0 at |
| * raw_buf[raw_buf_len]. |
| */ |
| |
| #define RAW_BUF_SIZE 65536 |
| |
| /* NOTE: raw_buf in Greenplum Database is used with different logic than postgres COPY */ |
| char raw_buf[RAW_BUF_SIZE + 1]; /* extra byte for '\0' */ |
| int raw_buf_index; /* next byte to process */ |
| bool raw_buf_done; /* finished processing the current buffer */ |
| int missing_bytes; /* see scanTextLine() for explanation */ |
| /* Greenplum Database specific variables */ |
| bool is_copy_in; /* copy in or out? */ |
| char eol_ch[2]; /* The byte values of the 1 or 2 eol bytes */ |
| bool escape_off; /* treat backslashes as non-special? */ |
| bool delimiter_off; /* no delimiter. 1-column external tabs only */ |
| bool delimiter_multibytes; |
| int last_hash_field; |
| bool line_done; /* finished processing the whole line or |
| * stopped in the middle */ |
| bool end_marker; |
| char *begloc; |
| char *endloc; |
| bool error_on_executor; /* true if errors arrived from the |
| * executors COPY (QEs) */ |
| StringInfoData executor_err_context; /* error context text from QE */ |
| |
| |
| /* for CSV format */ |
| bool in_quote; |
| bool last_was_esc; |
| |
| /* for TEXT format */ |
| bool esc_in_prevbuf; /* escape was last character of the data input |
| * buffer */ |
| bool cr_in_prevbuf; /* CR was last character of the data input |
| * buffer */ |
| |
| /* Original row number tracking variables */ |
| #define COPY_METADATA_DELIM '^' |
| ErrLocType err_loc_type; /* see enum def for description */ |
| bool md_error; |
| |
| /* Error handling options */ |
| CopyErrMode errMode; |
| struct CdbSreh *cdbsreh; /* single row error handler */ |
| int num_consec_csv_err; /* # of consecutive csv invalid format errs */ |
| |
| PartitionNode *partitions; /* partitioning meta data from dispatcher */ |
| List *ao_segnos; /* AO table meta data from dispatcher */ |
| List *ao_segfileinfos; /* AO segment file information from dispatcher */ |
| List *splits; /* table scan splits for this segment */ |
| /* end Greenplum Database specific variables */ |
| struct QueryResource *resource; |
| PlannedStmt *planstmt; /* ext table copy to */ |
| } CopyStateData; |
| |
| typedef CopyStateData *CopyState; |
| |
| |
| #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7')) |
| #define OCTVALUE(c) ((c) - '0') |
| |
| extern void ValidateControlChars(bool copy, bool load, bool csv_mode, char *delim, |
| char *null_print, char *quote, char *escape, |
| List *force_quote, List *force_notnull, |
| bool header_line, bool fill_missing, char *newline, |
| int numcols); |
| extern uint64 DoCopy(const CopyStmt *stmt, const char *queryString); |
| |
| extern DestReceiver *CreateCopyDestReceiver(void); |
| |
| extern List *CopyGetAttnums(TupleDesc tupDesc, Relation rel, List *attnamelist); |
| extern bool CopyReadLineText(CopyState cstate, size_t bytesread); |
| extern bool CopyReadLineCSV(CopyState cstate, size_t bytesread); |
| extern void CopyReadAttributesText(CopyState cstate, char * __restrict nulls, |
| int * __restrict attr_offsets, int num_phys_attrs, Form_pg_attribute * __restrict attr); |
| extern void CopyReadAttributesCSV(CopyState cstate, char *nulls, int *attr_offsets, |
| int num_phys_attrs, Form_pg_attribute *attr); |
| extern void CopyOneRowTo(CopyState cstate, Oid tupleOid, |
| Datum *values, bool *nulls); |
| extern void CopyOneCustomRowTo(CopyState cstate, bytea *value); |
| extern void CopySendEndOfRow(CopyState cstate); |
| extern void limit_printout_length(StringInfo buf); |
| extern void truncateEol(StringInfo buf, EolType eol_type); |
| extern void setEncodingConversionProc(CopyState cstate, int client_encoding, bool iswritable); |
| extern void CopyEolStrToType(CopyState cstate); |
| |
| #endif /* COPY_H */ |