blob: c6b8450332f57026aecd15746befdaa4d553859b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*-------------------------------------------------------------------------
*
* cdbsreh.h
* routines for single row error handling
*
*
*-------------------------------------------------------------------------
*/
#ifndef CDBSREH_H
#define CDBSREH_H
#include "c.h"
#include "cdb/cdbcopy.h"
#include "utils/memutils.h"
#include "port/pg_crc32c.h"
/*
* The error table is ALWAYS of the following format
* cmdtime timestamptz,
* relname text,
* filename text,
* linenum int,
* bytenum int,
* errmsg text,
* rawdata text,
* rawbytes bytea
*/
#define NUM_ERRORTABLE_ATTR 8
#define errtable_cmdtime 1
#define errtable_relname 2
#define errtable_filename 3
#define errtable_linenum 4
#define errtable_bytenum 5
#define errtable_errmsg 6
#define errtable_rawdata 7
#define errtable_rawbytes 8
/*
* In cases of invalid csv input data we end up with not being able to parse the
* data, resulting in very large data rows. In copy.c we throw an error ("line
* too long") and continue to try and parse. In some cases this is enough to
* recover and continue parsing. However in other cases, especially in input
* data that includes a lot of valid embedded newlines, we may never be able to
* recover from an error and will continue to parse huge lines and abort. In
* here we try to detect this case and abort the operation.
*/
#define CSV_IS_UNPARSABLE(sreh) (sreh->consec_csv_err == 3 ? (true) : (false))
/*
* All the Single Row Error Handling state is kept here.
* When an error happens and we are in single row error handling
* mode this struct is updated and handed to the single row
* error handling manager (cdbsreh.c).
*/
typedef struct CdbSreh
{
/* bad row information */
char *errmsg; /* the error message for this bad data row */
char *rawdata; /* the bad data row */
char *relname; /* target relation */
int64 linenumber; /* line number of error in original file */
uint64 processed; /* num logical input rows processed so far */
bool is_server_enc; /* was bad row converted to server encoding? */
int consec_csv_err; /* # of consecutive invalid csv errors */
/* reject limit state */
int rejectlimit; /* SEGMENT REJECT LIMIT value */
int rejectcount; /* how many were rejected so far */
bool is_limit_in_rows; /* ROWS = true, PERCENT = false */
/* the error table */
Relation errtbl; /* the error table we use (if any) */
struct AppendOnlyInsertDescData *err_aoInsertDesc;
int err_aosegno;
ResultRelSegFileInfo *err_aosegfileinfo;
/* error table lifespan */
bool is_keep; /* if true error table should not get DROP'ed */
bool reusing_errtbl; /* true if we are using an existing table (did not auto generate a new one) */
bool should_drop; /* true if we decide to DROP errtbl at end of execution (depends on previous 2 vars) */
/* COPY only vars */
CdbCopy *cdbcopy; /* for QD COPY to send bad rows to random QE */
int lastsegid; /* last QE COPY segid that QD COPY sent bad row to */
MemoryContext badrowcontext; /* per-badrow evaluation context */
char filename[256]; /* "uri [filename]" */
} CdbSreh;
extern CdbSreh *makeCdbSreh(bool is_keep, bool reusing_existing_errtable,
int rejectlimit,
bool is_limit_in_rows, RangeVar *errtbl, ResultRelSegFileInfo *segfileinfo,
char *filename, char *relname);
extern void destroyCdbSreh(CdbSreh *cdbsreh);
extern void HandleSingleRowError(CdbSreh *cdbsreh);
extern void ReportSrehResults(CdbSreh *cdbsreh, int total_rejected);
extern void SendNumRowsRejected(int numrejected);
extern void ValidateErrorTableMetaData(Relation rel);
extern void SetErrorTableVerdict(CdbSreh *cdbsreh, int total_rejected);
extern bool IsRejectLimitReached(CdbSreh *cdbsreh);
extern void emitSameTxnWarning(void);
extern void VerifyRejectLimit(char rejectlimittype, int rejectlimit);
#endif /* CDBSREH_H */