| /*------------------------------------------------------------------------- |
| * |
| * xml.c |
| * XML data type support. |
| * |
| * |
| * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * src/backend/utils/adt/xml.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| /* |
| * Generally, XML type support is only available when libxml use was |
| * configured during the build. But even if that is not done, the |
| * type and all the functions are available, but most of them will |
| * fail. For one thing, this avoids having to manage variant catalog |
| * installations. But it also has nice effects such as that you can |
| * dump a database containing XML type data even if the server is not |
| * linked with libxml. Thus, make sure xml_out() works even if nothing |
| * else does. |
| */ |
| |
| /* |
| * Notes on memory management: |
| * |
| * Sometimes libxml allocates global structures in the hope that it can reuse |
| * them later on. This makes it impractical to change the xmlMemSetup |
| * functions on-the-fly; that is likely to lead to trying to pfree() chunks |
| * allocated with malloc() or vice versa. Since libxml might be used by |
| * loadable modules, eg libperl, our only safe choices are to change the |
| * functions at postmaster/backend launch or not at all. Since we'd rather |
| * not activate libxml in sessions that might never use it, the latter choice |
| * is the preferred one. However, for debugging purposes it can be awfully |
| * handy to constrain libxml's allocations to be done in a specific palloc |
| * context, where they're easy to track. Therefore there is code here that |
| * can be enabled in debug builds to redirect libxml's allocations into a |
| * special context LibxmlContext. It's not recommended to turn this on in |
| * a production build because of the possibility of bad interactions with |
| * external modules. |
| */ |
| /* #define USE_LIBXMLCONTEXT */ |
| |
| #include "postgres.h" |
| |
| #ifdef USE_LIBXML |
| #include <libxml/chvalid.h> |
| #include <libxml/parser.h> |
| #include <libxml/parserInternals.h> |
| #include <libxml/tree.h> |
| #include <libxml/uri.h> |
| #include <libxml/xmlerror.h> |
| #include <libxml/xmlversion.h> |
| #include <libxml/xmlwriter.h> |
| #include <libxml/xpath.h> |
| #include <libxml/xpathInternals.h> |
| |
| /* |
| * We used to check for xmlStructuredErrorContext via a configure test; but |
| * that doesn't work on Windows, so instead use this grottier method of |
| * testing the library version number. |
| */ |
| #if LIBXML_VERSION >= 20704 |
| #define HAVE_XMLSTRUCTUREDERRORCONTEXT 1 |
| #endif |
| |
| /* |
| * libxml2 2.12 decided to insert "const" into the error handler API. |
| */ |
| #if LIBXML_VERSION >= 21200 |
| #define PgXmlErrorPtr const xmlError * |
| #else |
| #define PgXmlErrorPtr xmlErrorPtr |
| #endif |
| |
| #endif /* USE_LIBXML */ |
| |
| #include "access/htup_details.h" |
| #include "access/table.h" |
| #include "catalog/namespace.h" |
| #include "catalog/pg_class.h" |
| #include "catalog/pg_type.h" |
| #include "commands/dbcommands.h" |
| #include "executor/spi.h" |
| #include "executor/tablefunc.h" |
| #include "fmgr.h" |
| #include "lib/stringinfo.h" |
| #include "libpq/pqformat.h" |
| #include "mb/pg_wchar.h" |
| #include "miscadmin.h" |
| #include "nodes/execnodes.h" |
| #include "nodes/nodeFuncs.h" |
| #include "utils/array.h" |
| #include "utils/builtins.h" |
| #include "utils/date.h" |
| #include "utils/datetime.h" |
| #include "utils/lsyscache.h" |
| #include "utils/memutils.h" |
| #include "utils/rel.h" |
| #include "utils/syscache.h" |
| #include "utils/xml.h" |
| |
| |
| /* GUC variables */ |
| int xmlbinary; |
| int xmloption; |
| |
| #ifdef USE_LIBXML |
| |
| /* random number to identify PgXmlErrorContext */ |
| #define ERRCXT_MAGIC 68275028 |
| |
| static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID, |
| xmlParserCtxtPtr ctxt); |
| struct PgXmlErrorContext |
| { |
| int magic; |
| /* strictness argument passed to pg_xml_init */ |
| PgXmlStrictness strictness; |
| /* current error status and accumulated message, if any */ |
| bool err_occurred; |
| StringInfoData err_buf; |
| /* previous libxml error handling state (saved by pg_xml_init) */ |
| xmlStructuredErrorFunc saved_errfunc; |
| void *saved_errcxt; |
| /* previous libxml entity handler (saved by pg_xml_init) */ |
| xmlExternalEntityLoader saved_entityfunc; |
| }; |
| |
| static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID, |
| xmlParserCtxtPtr ctxt); |
| static void xml_errorHandler(void *data, PgXmlErrorPtr error); |
| static void xml_ereport_by_code(int level, int sqlcode, |
| const char *msg, int errcode); |
| static void chopStringInfoNewlines(StringInfo str); |
| static void appendStringInfoLineSeparator(StringInfo str); |
| |
| #ifdef USE_LIBXMLCONTEXT |
| |
| static MemoryContext LibxmlContext = NULL; |
| |
| static void xml_memory_init(void); |
| static void *xml_palloc(size_t size); |
| static void *xml_repalloc(void *ptr, size_t size); |
| static void xml_pfree(void *ptr); |
| static char *xml_pstrdup(const char *string); |
| #endif /* USE_LIBXMLCONTEXT */ |
| |
| static xmlChar *xml_text2xmlChar(text *in); |
| static int parse_xml_decl(const xmlChar *str, size_t *lenp, |
| xmlChar **version, xmlChar **encoding, int *standalone); |
| static bool print_xml_decl(StringInfo buf, const xmlChar *version, |
| pg_enc encoding, int standalone); |
| static bool xml_doctype_in_content(const xmlChar *str); |
| static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, |
| bool preserve_whitespace, int encoding); |
| static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt); |
| static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, |
| ArrayBuildState *astate, |
| PgXmlErrorContext *xmlerrcxt); |
| static xmlChar *pg_xmlCharStrndup(const char *str, size_t len); |
| #endif /* USE_LIBXML */ |
| |
| static void xmldata_root_element_start(StringInfo result, const char *eltname, |
| const char *xmlschema, const char *targetns, |
| bool top_level); |
| static void xmldata_root_element_end(StringInfo result, const char *eltname); |
| static StringInfo query_to_xml_internal(const char *query, char *tablename, |
| const char *xmlschema, bool nulls, bool tableforest, |
| const char *targetns, bool top_level); |
| static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, |
| bool nulls, bool tableforest, const char *targetns); |
| static const char *map_sql_schema_to_xmlschema_types(Oid nspid, |
| List *relid_list, bool nulls, |
| bool tableforest, const char *targetns); |
| static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list, |
| bool nulls, bool tableforest, |
| const char *targetns); |
| static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod); |
| static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list); |
| static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod); |
| static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, |
| char *tablename, bool nulls, bool tableforest, |
| const char *targetns, bool top_level); |
| |
| /* XMLTABLE support */ |
| #ifdef USE_LIBXML |
| /* random number to identify XmlTableContext */ |
| #define XMLTABLE_CONTEXT_MAGIC 46922182 |
| typedef struct XmlTableBuilderData |
| { |
| int magic; |
| int natts; |
| long int row_count; |
| PgXmlErrorContext *xmlerrcxt; |
| xmlParserCtxtPtr ctxt; |
| xmlDocPtr doc; |
| xmlXPathContextPtr xpathcxt; |
| xmlXPathCompExprPtr xpathcomp; |
| xmlXPathObjectPtr xpathobj; |
| xmlXPathCompExprPtr *xpathscomp; |
| } XmlTableBuilderData; |
| #endif |
| |
| static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts); |
| static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value); |
| static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name, |
| const char *uri); |
| static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path); |
| static void XmlTableSetColumnFilter(struct TableFuncScanState *state, |
| const char *path, int colnum); |
| static bool XmlTableFetchRow(struct TableFuncScanState *state); |
| static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum, |
| Oid typid, int32 typmod, bool *isnull); |
| static void XmlTableDestroyOpaque(struct TableFuncScanState *state); |
| |
| const TableFuncRoutine XmlTableRoutine = |
| { |
| XmlTableInitOpaque, |
| XmlTableSetDocument, |
| XmlTableSetNamespace, |
| XmlTableSetRowFilter, |
| XmlTableSetColumnFilter, |
| XmlTableFetchRow, |
| XmlTableGetValue, |
| XmlTableDestroyOpaque |
| }; |
| |
| #define NO_XML_SUPPORT() \ |
| ereport(ERROR, \ |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \ |
| errmsg("unsupported XML feature"), \ |
| errdetail("This functionality requires the server to be built with libxml support."), \ |
| errhint("You need to rebuild PostgreSQL using %s.", "--with-libxml"))) |
| |
| |
| /* from SQL/XML:2008 section 4.9 */ |
| #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema" |
| #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance" |
| #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml" |
| |
| |
| #ifdef USE_LIBXML |
| |
| static int |
| xmlChar_to_encoding(const xmlChar *encoding_name) |
| { |
| int encoding = pg_char_to_encoding((const char *) encoding_name); |
| |
| if (encoding < 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("invalid encoding name \"%s\"", |
| (const char *) encoding_name))); |
| return encoding; |
| } |
| #endif |
| |
| |
| /* |
| * xml_in uses a plain C string to VARDATA conversion, so for the time being |
| * we use the conversion function for the text datatype. |
| * |
| * This is only acceptable so long as xmltype and text use the same |
| * representation. |
| */ |
| Datum |
| xml_in(PG_FUNCTION_ARGS) |
| { |
| #ifdef USE_LIBXML |
| char *s = PG_GETARG_CSTRING(0); |
| xmltype *vardata; |
| xmlDocPtr doc; |
| |
| vardata = (xmltype *) cstring_to_text(s); |
| |
| /* |
| * Parse the data to check if it is well-formed XML data. Assume that |
| * ERROR occurred if parsing failed. |
| */ |
| doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding()); |
| xmlFreeDoc(doc); |
| |
| PG_RETURN_XML_P(vardata); |
| #else |
| NO_XML_SUPPORT(); |
| return 0; |
| #endif |
| } |
| |
| |
| #define PG_XML_DEFAULT_VERSION "1.0" |
| |
| |
| /* |
| * xml_out_internal uses a plain VARDATA to C string conversion, so for the |
| * time being we use the conversion function for the text datatype. |
| * |
| * This is only acceptable so long as xmltype and text use the same |
| * representation. |
| */ |
| static char * |
| xml_out_internal(xmltype *x, pg_enc target_encoding) |
| { |
| char *str = text_to_cstring((text *) x); |
| |
| #ifdef USE_LIBXML |
| size_t len = strlen(str); |
| xmlChar *version; |
| int standalone; |
| int res_code; |
| |
| if ((res_code = parse_xml_decl((xmlChar *) str, |
| &len, &version, NULL, &standalone)) == 0) |
| { |
| StringInfoData buf; |
| |
| initStringInfo(&buf); |
| |
| if (!print_xml_decl(&buf, version, target_encoding, standalone)) |
| { |
| /* |
| * If we are not going to produce an XML declaration, eat a single |
| * newline in the original string to prevent empty first lines in |
| * the output. |
| */ |
| if (*(str + len) == '\n') |
| len += 1; |
| } |
| appendStringInfoString(&buf, str + len); |
| |
| pfree(str); |
| |
| return buf.data; |
| } |
| |
| xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR, |
| "could not parse XML declaration in stored value", |
| res_code); |
| #endif |
| return str; |
| } |
| |
| |
| Datum |
| xml_out(PG_FUNCTION_ARGS) |
| { |
| xmltype *x = PG_GETARG_XML_P(0); |
| |
| /* |
| * xml_out removes the encoding property in all cases. This is because we |
| * cannot control from here whether the datum will be converted to a |
| * different client encoding, so we'd do more harm than good by including |
| * it. |
| */ |
| PG_RETURN_CSTRING(xml_out_internal(x, 0)); |
| } |
| |
| |
| Datum |
| xml_recv(PG_FUNCTION_ARGS) |
| { |
| #ifdef USE_LIBXML |
| StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); |
| xmltype *result; |
| char *str; |
| char *newstr; |
| int nbytes; |
| xmlDocPtr doc; |
| xmlChar *encodingStr = NULL; |
| int encoding; |
| |
| /* |
| * Read the data in raw format. We don't know yet what the encoding is, as |
| * that information is embedded in the xml declaration; so we have to |
| * parse that before converting to server encoding. |
| */ |
| nbytes = buf->len - buf->cursor; |
| str = (char *) pq_getmsgbytes(buf, nbytes); |
| |
| /* |
| * We need a null-terminated string to pass to parse_xml_decl(). Rather |
| * than make a separate copy, make the temporary result one byte bigger |
| * than it needs to be. |
| */ |
| result = palloc(nbytes + 1 + VARHDRSZ); |
| SET_VARSIZE(result, nbytes + VARHDRSZ); |
| memcpy(VARDATA(result), str, nbytes); |
| str = VARDATA(result); |
| str[nbytes] = '\0'; |
| |
| parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL); |
| |
| /* |
| * If encoding wasn't explicitly specified in the XML header, treat it as |
| * UTF-8, as that's the default in XML. This is different from xml_in(), |
| * where the input has to go through the normal client to server encoding |
| * conversion. |
| */ |
| encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8; |
| |
| /* |
| * Parse the data to check if it is well-formed XML data. Assume that |
| * xml_parse will throw ERROR if not. |
| */ |
| doc = xml_parse(result, xmloption, true, encoding); |
| xmlFreeDoc(doc); |
| |
| /* Now that we know what we're dealing with, convert to server encoding */ |
| newstr = pg_any_to_server(str, nbytes, encoding); |
| |
| if (newstr != str) |
| { |
| pfree(result); |
| result = (xmltype *) cstring_to_text(newstr); |
| pfree(newstr); |
| } |
| |
| PG_RETURN_XML_P(result); |
| #else |
| NO_XML_SUPPORT(); |
| return 0; |
| #endif |
| } |
| |
| |
| Datum |
| xml_send(PG_FUNCTION_ARGS) |
| { |
| xmltype *x = PG_GETARG_XML_P(0); |
| char *outval; |
| StringInfoData buf; |
| |
| /* |
| * xml_out_internal doesn't convert the encoding, it just prints the right |
| * declaration. pq_sendtext will do the conversion. |
| */ |
| outval = xml_out_internal(x, pg_get_client_encoding()); |
| |
| pq_begintypsend(&buf); |
| pq_sendtext(&buf, outval, strlen(outval)); |
| pfree(outval); |
| PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); |
| } |
| |
| |
| #ifdef USE_LIBXML |
| static void |
| appendStringInfoText(StringInfo str, const text *t) |
| { |
| appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t)); |
| } |
| #endif |
| |
| |
| static xmltype * |
| stringinfo_to_xmltype(StringInfo buf) |
| { |
| return (xmltype *) cstring_to_text_with_len(buf->data, buf->len); |
| } |
| |
| |
| static xmltype * |
| cstring_to_xmltype(const char *string) |
| { |
| return (xmltype *) cstring_to_text(string); |
| } |
| |
| |
| #ifdef USE_LIBXML |
| static xmltype * |
| xmlBuffer_to_xmltype(xmlBufferPtr buf) |
| { |
| return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf), |
| xmlBufferLength(buf)); |
| } |
| #endif |
| |
| |
| Datum |
| xmlcomment(PG_FUNCTION_ARGS) |
| { |
| #ifdef USE_LIBXML |
| text *arg = PG_GETARG_TEXT_PP(0); |
| char *argdata = VARDATA_ANY(arg); |
| int len = VARSIZE_ANY_EXHDR(arg); |
| StringInfoData buf; |
| int i; |
| |
| /* check for "--" in string or "-" at the end */ |
| for (i = 1; i < len; i++) |
| { |
| if (argdata[i] == '-' && argdata[i - 1] == '-') |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_XML_COMMENT), |
| errmsg("invalid XML comment"))); |
| } |
| if (len > 0 && argdata[len - 1] == '-') |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_XML_COMMENT), |
| errmsg("invalid XML comment"))); |
| |
| initStringInfo(&buf); |
| appendStringInfoString(&buf, "<!--"); |
| appendStringInfoText(&buf, arg); |
| appendStringInfoString(&buf, "-->"); |
| |
| PG_RETURN_XML_P(stringinfo_to_xmltype(&buf)); |
| #else |
| NO_XML_SUPPORT(); |
| return 0; |
| #endif |
| } |
| |
| |
| |
| /* |
| * TODO: xmlconcat needs to merge the notations and unparsed entities |
| * of the argument values. Not very important in practice, though. |
| */ |
| xmltype * |
| xmlconcat(List *args) |
| { |
| #ifdef USE_LIBXML |
| int global_standalone = 1; |
| xmlChar *global_version = NULL; |
| bool global_version_no_value = false; |
| StringInfoData buf; |
| ListCell *v; |
| |
| initStringInfo(&buf); |
| foreach(v, args) |
| { |
| xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v))); |
| size_t len; |
| xmlChar *version; |
| int standalone; |
| char *str; |
| |
| len = VARSIZE(x) - VARHDRSZ; |
| str = text_to_cstring((text *) x); |
| |
| parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone); |
| |
| if (standalone == 0 && global_standalone == 1) |
| global_standalone = 0; |
| if (standalone < 0) |
| global_standalone = -1; |
| |
| if (!version) |
| global_version_no_value = true; |
| else if (!global_version) |
| global_version = version; |
| else if (xmlStrcmp(version, global_version) != 0) |
| global_version_no_value = true; |
| |
| appendStringInfoString(&buf, str + len); |
| pfree(str); |
| } |
| |
| if (!global_version_no_value || global_standalone >= 0) |
| { |
| StringInfoData buf2; |
| |
| initStringInfo(&buf2); |
| |
| print_xml_decl(&buf2, |
| (!global_version_no_value) ? global_version : NULL, |
| 0, |
| global_standalone); |
| |
| appendBinaryStringInfo(&buf2, buf.data, buf.len); |
| buf = buf2; |
| } |
| |
| return stringinfo_to_xmltype(&buf); |
| #else |
| NO_XML_SUPPORT(); |
| return NULL; |
| #endif |
| } |
| |
| |
| /* |
| * XMLAGG support |
| */ |
| Datum |
| xmlconcat2(PG_FUNCTION_ARGS) |
| { |
| if (PG_ARGISNULL(0)) |
| { |
| if (PG_ARGISNULL(1)) |
| PG_RETURN_NULL(); |
| else |
| PG_RETURN_XML_P(PG_GETARG_XML_P(1)); |
| } |
| else if (PG_ARGISNULL(1)) |
| PG_RETURN_XML_P(PG_GETARG_XML_P(0)); |
| else |
| PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0), |
| PG_GETARG_XML_P(1)))); |
| } |
| |
| |
| Datum |
| texttoxml(PG_FUNCTION_ARGS) |
| { |
| text *data = PG_GETARG_TEXT_PP(0); |
| |
| PG_RETURN_XML_P(xmlparse(data, xmloption, true)); |
| } |
| |
| |
| Datum |
| xmltotext(PG_FUNCTION_ARGS) |
| { |
| xmltype *data = PG_GETARG_XML_P(0); |
| |
| /* It's actually binary compatible. */ |
| PG_RETURN_TEXT_P((text *) data); |
| } |
| |
| |
| text * |
| xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg) |
| { |
| if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data)) |
| ereport(ERROR, |
| (errcode(ERRCODE_NOT_AN_XML_DOCUMENT), |
| errmsg("not an XML document"))); |
| |
| /* It's actually binary compatible, save for the above check. */ |
| return (text *) data; |
| } |
| |
| |
| xmltype * |
| xmlelement(XmlExpr *xexpr, |
| Datum *named_argvalue, bool *named_argnull, |
| Datum *argvalue, bool *argnull) |
| { |
| #ifdef USE_LIBXML |
| xmltype *result; |
| List *named_arg_strings; |
| List *arg_strings; |
| int i; |
| ListCell *arg; |
| ListCell *narg; |
| PgXmlErrorContext *xmlerrcxt; |
| volatile xmlBufferPtr buf = NULL; |
| volatile xmlTextWriterPtr writer = NULL; |
| |
| /* |
| * All arguments are already evaluated, and their values are passed in the |
| * named_argvalue/named_argnull or argvalue/argnull arrays. This avoids |
| * issues if one of the arguments involves a call to some other function |
| * or subsystem that wants to use libxml on its own terms. We examine the |
| * original XmlExpr to identify the numbers and types of the arguments. |
| */ |
| named_arg_strings = NIL; |
| i = 0; |
| foreach(arg, xexpr->named_args) |
| { |
| Expr *e = (Expr *) lfirst(arg); |
| char *str; |
| |
| if (named_argnull[i]) |
| str = NULL; |
| else |
| str = map_sql_value_to_xml_value(named_argvalue[i], |
| exprType((Node *) e), |
| false); |
| named_arg_strings = lappend(named_arg_strings, str); |
| i++; |
| } |
| |
| arg_strings = NIL; |
| i = 0; |
| foreach(arg, xexpr->args) |
| { |
| Expr *e = (Expr *) lfirst(arg); |
| char *str; |
| |
| /* here we can just forget NULL elements immediately */ |
| if (!argnull[i]) |
| { |
| str = map_sql_value_to_xml_value(argvalue[i], |
| exprType((Node *) e), |
| true); |
| arg_strings = lappend(arg_strings, str); |
| } |
| i++; |
| } |
| |
| xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); |
| |
| PG_TRY(); |
| { |
| buf = xmlBufferCreate(); |
| if (buf == NULL || xmlerrcxt->err_occurred) |
| xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, |
| "could not allocate xmlBuffer"); |
| writer = xmlNewTextWriterMemory(buf, 0); |
| if (writer == NULL || xmlerrcxt->err_occurred) |
| xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, |
| "could not allocate xmlTextWriter"); |
| |
| xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name); |
| |
| forboth(arg, named_arg_strings, narg, xexpr->arg_names) |
| { |
| char *str = (char *) lfirst(arg); |
| char *argname = strVal(lfirst(narg)); |
| |
| if (str) |
| xmlTextWriterWriteAttribute(writer, |
| (xmlChar *) argname, |
| (xmlChar *) str); |
| } |
| |
| foreach(arg, arg_strings) |
| { |
| char *str = (char *) lfirst(arg); |
| |
| xmlTextWriterWriteRaw(writer, (xmlChar *) str); |
| } |
| |
| xmlTextWriterEndElement(writer); |
| |
| /* we MUST do this now to flush data out to the buffer ... */ |
| xmlFreeTextWriter(writer); |
| writer = NULL; |
| |
| result = xmlBuffer_to_xmltype(buf); |
| } |
| PG_CATCH(); |
| { |
| if (writer) |
| xmlFreeTextWriter(writer); |
| if (buf) |
| xmlBufferFree(buf); |
| |
| pg_xml_done(xmlerrcxt, true); |
| |
| PG_RE_THROW(); |
| } |
| PG_END_TRY(); |
| |
| xmlBufferFree(buf); |
| |
| pg_xml_done(xmlerrcxt, false); |
| |
| return result; |
| #else |
| NO_XML_SUPPORT(); |
| return NULL; |
| #endif |
| } |
| |
| |
| xmltype * |
| xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace) |
| { |
| #ifdef USE_LIBXML |
| xmlDocPtr doc; |
| |
| doc = xml_parse(data, xmloption_arg, preserve_whitespace, |
| GetDatabaseEncoding()); |
| xmlFreeDoc(doc); |
| |
| return (xmltype *) data; |
| #else |
| NO_XML_SUPPORT(); |
| return NULL; |
| #endif |
| } |
| |
| |
| xmltype * |
| xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null) |
| { |
| #ifdef USE_LIBXML |
| xmltype *result; |
| StringInfoData buf; |
| |
| if (pg_strcasecmp(target, "xml") == 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_SYNTAX_ERROR), /* really */ |
| errmsg("invalid XML processing instruction"), |
| errdetail("XML processing instruction target name cannot be \"%s\".", target))); |
| |
| /* |
| * Following the SQL standard, the null check comes after the syntax check |
| * above. |
| */ |
| *result_is_null = arg_is_null; |
| if (*result_is_null) |
| return NULL; |
| |
| initStringInfo(&buf); |
| |
| appendStringInfo(&buf, "<?%s", target); |
| |
| if (arg != NULL) |
| { |
| char *string; |
| |
| string = text_to_cstring(arg); |
| if (strstr(string, "?>") != NULL) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION), |
| errmsg("invalid XML processing instruction"), |
| errdetail("XML processing instruction cannot contain \"?>\"."))); |
| |
| appendStringInfoChar(&buf, ' '); |
| appendStringInfoString(&buf, string + strspn(string, " ")); |
| pfree(string); |
| } |
| appendStringInfoString(&buf, "?>"); |
| |
| result = stringinfo_to_xmltype(&buf); |
| pfree(buf.data); |
| return result; |
| #else |
| NO_XML_SUPPORT(); |
| return NULL; |
| #endif |
| } |
| |
| |
| xmltype * |
| xmlroot(xmltype *data, text *version, int standalone) |
| { |
| #ifdef USE_LIBXML |
| char *str; |
| size_t len; |
| xmlChar *orig_version; |
| int orig_standalone; |
| StringInfoData buf; |
| |
| len = VARSIZE(data) - VARHDRSZ; |
| str = text_to_cstring((text *) data); |
| |
| parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone); |
| |
| if (version) |
| orig_version = xml_text2xmlChar(version); |
| else |
| orig_version = NULL; |
| |
| switch (standalone) |
| { |
| case XML_STANDALONE_YES: |
| orig_standalone = 1; |
| break; |
| case XML_STANDALONE_NO: |
| orig_standalone = 0; |
| break; |
| case XML_STANDALONE_NO_VALUE: |
| orig_standalone = -1; |
| break; |
| case XML_STANDALONE_OMITTED: |
| /* leave original value */ |
| break; |
| } |
| |
| initStringInfo(&buf); |
| print_xml_decl(&buf, orig_version, 0, orig_standalone); |
| appendStringInfoString(&buf, str + len); |
| |
| return stringinfo_to_xmltype(&buf); |
| #else |
| NO_XML_SUPPORT(); |
| return NULL; |
| #endif |
| } |
| |
| |
| /* |
| * Validate document (given as string) against DTD (given as external link) |
| * |
| * This has been removed because it is a security hole: unprivileged users |
| * should not be able to use Postgres to fetch arbitrary external files, |
| * which unfortunately is exactly what libxml is willing to do with the DTD |
| * parameter. |
| */ |
| Datum |
| xmlvalidate(PG_FUNCTION_ARGS) |
| { |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("xmlvalidate is not implemented"))); |
| return 0; |
| } |
| |
| |
| bool |
| xml_is_document(xmltype *arg) |
| { |
| #ifdef USE_LIBXML |
| bool result; |
| volatile xmlDocPtr doc = NULL; |
| MemoryContext ccxt = CurrentMemoryContext; |
| |
| /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */ |
| PG_TRY(); |
| { |
| doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true, |
| GetDatabaseEncoding()); |
| result = true; |
| } |
| PG_CATCH(); |
| { |
| ErrorData *errdata; |
| MemoryContext ecxt; |
| |
| ecxt = MemoryContextSwitchTo(ccxt); |
| errdata = CopyErrorData(); |
| if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT) |
| { |
| FlushErrorState(); |
| result = false; |
| } |
| else |
| { |
| MemoryContextSwitchTo(ecxt); |
| PG_RE_THROW(); |
| } |
| } |
| PG_END_TRY(); |
| |
| if (doc) |
| xmlFreeDoc(doc); |
| |
| return result; |
| #else /* not USE_LIBXML */ |
| NO_XML_SUPPORT(); |
| return false; |
| #endif /* not USE_LIBXML */ |
| } |
| |
| |
| #ifdef USE_LIBXML |
| |
| /* |
| * pg_xml_init_library --- set up for use of libxml |
| * |
| * This should be called by each function that is about to use libxml |
| * facilities but doesn't require error handling. It initializes libxml |
| * and verifies compatibility with the loaded libxml version. These are |
| * once-per-session activities. |
| * |
| * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and |
| * check) |
| */ |
| void |
| pg_xml_init_library(void) |
| { |
| static bool first_time = true; |
| |
| if (first_time) |
| { |
| /* Stuff we need do only once per session */ |
| |
| /* |
| * Currently, we have no pure UTF-8 support for internals -- check if |
| * we can work. |
| */ |
| if (sizeof(char) != sizeof(xmlChar)) |
| ereport(ERROR, |
| (errmsg("could not initialize XML library"), |
| errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.", |
| (int) sizeof(char), (int) sizeof(xmlChar)))); |
| |
| #ifdef USE_LIBXMLCONTEXT |
| /* Set up libxml's memory allocation our way */ |
| xml_memory_init(); |
| #endif |
| |
| /* Check library compatibility */ |
| LIBXML_TEST_VERSION; |
| |
| first_time = false; |
| } |
| } |
| |
| /* |
| * pg_xml_init --- set up for use of libxml and register an error handler |
| * |
| * This should be called by each function that is about to use libxml |
| * facilities and requires error handling. It initializes libxml with |
| * pg_xml_init_library() and establishes our libxml error handler. |
| * |
| * strictness determines which errors are reported and which are ignored. |
| * |
| * Calls to this function MUST be followed by a PG_TRY block that guarantees |
| * that pg_xml_done() is called during either normal or error exit. |
| * |
| * This is exported for use by contrib/xml2, as well as other code that might |
| * wish to share use of this module's libxml error handler. |
| */ |
| PgXmlErrorContext * |
| pg_xml_init(PgXmlStrictness strictness) |
| { |
| PgXmlErrorContext *errcxt; |
| void *new_errcxt; |
| |
| /* Do one-time setup if needed */ |
| pg_xml_init_library(); |
| |
| /* Create error handling context structure */ |
| errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext)); |
| errcxt->magic = ERRCXT_MAGIC; |
| errcxt->strictness = strictness; |
| errcxt->err_occurred = false; |
| initStringInfo(&errcxt->err_buf); |
| |
| /* |
| * Save original error handler and install ours. libxml originally didn't |
| * distinguish between the contexts for generic and for structured error |
| * handlers. If we're using an old libxml version, we must thus save the |
| * generic error context, even though we're using a structured error |
| * handler. |
| */ |
| errcxt->saved_errfunc = xmlStructuredError; |
| |
| #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT |
| errcxt->saved_errcxt = xmlStructuredErrorContext; |
| #else |
| errcxt->saved_errcxt = xmlGenericErrorContext; |
| #endif |
| |
| xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler); |
| |
| /* set up our entity loader, too */ |
| xmlSetExternalEntityLoader(xmlPgEntityLoader); |
| |
| /* |
| * Verify that xmlSetStructuredErrorFunc set the context variable we |
| * expected it to. If not, the error context pointer we just saved is not |
| * the correct thing to restore, and since that leaves us without a way to |
| * restore the context in pg_xml_done, we must fail. |
| * |
| * The only known situation in which this test fails is if we compile with |
| * headers from a libxml2 that doesn't track the structured error context |
| * separately (< 2.7.4), but at runtime use a version that does, or vice |
| * versa. The libxml2 authors did not treat that change as constituting |
| * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library |
| * fails to protect us from this. |
| */ |
| |
| #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT |
| new_errcxt = xmlStructuredErrorContext; |
| #else |
| new_errcxt = xmlGenericErrorContext; |
| #endif |
| |
| if (new_errcxt != (void *) errcxt) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("could not set up XML error handler"), |
| errhint("This probably indicates that the version of libxml2" |
| " being used is not compatible with the libxml2" |
| " header files that PostgreSQL was built with."))); |
| |
| /* |
| * Also, install an entity loader to prevent unwanted fetches of external |
| * files and URLs. |
| */ |
| errcxt->saved_entityfunc = xmlGetExternalEntityLoader(); |
| xmlSetExternalEntityLoader(xmlPgEntityLoader); |
| |
| /* |
| * Also, install an entity loader to prevent unwanted fetches of external |
| * files and URLs. |
| */ |
| errcxt->saved_entityfunc = xmlGetExternalEntityLoader(); |
| xmlSetExternalEntityLoader(xmlPgEntityLoader); |
| |
| return errcxt; |
| } |
| |
| |
| /* |
| * pg_xml_done --- restore previous libxml error handling |
| * |
| * Resets libxml's global error-handling state to what it was before |
| * pg_xml_init() was called. |
| * |
| * This routine verifies that all pending errors have been dealt with |
| * (in assert-enabled builds, anyway). |
| */ |
| void |
| pg_xml_done(PgXmlErrorContext *errcxt, bool isError) |
| { |
| void *cur_errcxt; |
| |
| /* An assert seems like enough protection here */ |
| Assert(errcxt->magic == ERRCXT_MAGIC); |
| |
| /* |
| * In a normal exit, there should be no un-handled libxml errors. But we |
| * shouldn't try to enforce this during error recovery, since the longjmp |
| * could have been thrown before xml_ereport had a chance to run. |
| */ |
| Assert(!errcxt->err_occurred || isError); |
| |
| /* |
| * Check that libxml's global state is correct, warn if not. This is a |
| * real test and not an Assert because it has a higher probability of |
| * happening. |
| */ |
| #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT |
| cur_errcxt = xmlStructuredErrorContext; |
| #else |
| cur_errcxt = xmlGenericErrorContext; |
| #endif |
| |
| if (cur_errcxt != (void *) errcxt) |
| elog(WARNING, "libxml error handling state is out of sync with xml.c"); |
| |
| /* Restore the saved handlers */ |
| xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc); |
| xmlSetExternalEntityLoader(errcxt->saved_entityfunc); |
| |
| /* |
| * Mark the struct as invalid, just in case somebody somehow manages to |
| * call xml_errorHandler or xml_ereport with it. |
| */ |
| errcxt->magic = 0; |
| |
| /* Release memory */ |
| pfree(errcxt->err_buf.data); |
| pfree(errcxt); |
| } |
| |
| |
| /* |
| * pg_xml_error_occurred() --- test the error flag |
| */ |
| bool |
| pg_xml_error_occurred(PgXmlErrorContext *errcxt) |
| { |
| return errcxt->err_occurred; |
| } |
| |
| |
| /* |
| * SQL/XML allows storing "XML documents" or "XML content". "XML |
| * documents" are specified by the XML specification and are parsed |
| * easily by libxml. "XML content" is specified by SQL/XML as the |
| * production "XMLDecl? content". But libxml can only parse the |
| * "content" part, so we have to parse the XML declaration ourselves |
| * to complete this. |
| */ |
| |
| #define CHECK_XML_SPACE(p) \ |
| do { \ |
| if (!xmlIsBlank_ch(*(p))) \ |
| return XML_ERR_SPACE_REQUIRED; \ |
| } while (0) |
| |
| #define SKIP_XML_SPACE(p) \ |
| while (xmlIsBlank_ch(*(p))) (p)++ |
| |
| /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */ |
| /* Beware of multiple evaluations of argument! */ |
| #define PG_XMLISNAMECHAR(c) \ |
| (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \ |
| || xmlIsDigit_ch(c) \ |
| || c == '.' || c == '-' || c == '_' || c == ':' \ |
| || xmlIsCombiningQ(c) \ |
| || xmlIsExtender_ch(c)) |
| |
| /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */ |
| static xmlChar * |
| xml_pnstrdup(const xmlChar *str, size_t len) |
| { |
| xmlChar *result; |
| |
| result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar)); |
| memcpy(result, str, len * sizeof(xmlChar)); |
| result[len] = 0; |
| return result; |
| } |
| |
| /* Ditto, except input is char* */ |
| static xmlChar * |
| pg_xmlCharStrndup(const char *str, size_t len) |
| { |
| xmlChar *result; |
| |
| result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar)); |
| memcpy(result, str, len); |
| result[len] = '\0'; |
| |
| return result; |
| } |
| |
| /* |
| * Copy xmlChar string to PostgreSQL-owned memory, freeing the input. |
| * |
| * The input xmlChar is freed regardless of success of the copy. |
| */ |
| static char * |
| xml_pstrdup_and_free(xmlChar *str) |
| { |
| char *result; |
| |
| if (str) |
| { |
| PG_TRY(); |
| { |
| result = pstrdup((char *) str); |
| } |
| PG_FINALLY(); |
| { |
| xmlFree(str); |
| } |
| PG_END_TRY(); |
| } |
| else |
| result = NULL; |
| |
| return result; |
| } |
| |
| /* |
| * str is the null-terminated input string. Remaining arguments are |
| * output arguments; each can be NULL if value is not wanted. |
| * version and encoding are returned as locally-palloc'd strings. |
| * Result is 0 if OK, an error code if not. |
| */ |
| static int |
| parse_xml_decl(const xmlChar *str, size_t *lenp, |
| xmlChar **version, xmlChar **encoding, int *standalone) |
| { |
| const xmlChar *p; |
| const xmlChar *save_p; |
| size_t len; |
| int utf8char; |
| int utf8len; |
| |
| /* |
| * Only initialize libxml. We don't need error handling here, but we do |
| * need to make sure libxml is initialized before calling any of its |
| * functions. Note that this is safe (and a no-op) if caller has already |
| * done pg_xml_init(). |
| */ |
| pg_xml_init_library(); |
| |
| /* Initialize output arguments to "not present" */ |
| if (version) |
| *version = NULL; |
| if (encoding) |
| *encoding = NULL; |
| if (standalone) |
| *standalone = -1; |
| |
| p = str; |
| |
| if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0) |
| goto finished; |
| |
| /* |
| * If next char is a name char, it's a PI like <?xml-stylesheet ...?> |
| * rather than an XMLDecl, so we have done what we came to do and found no |
| * XMLDecl. |
| * |
| * We need an input length value for xmlGetUTF8Char, but there's no need |
| * to count the whole document size, so use strnlen not strlen. |
| */ |
| utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN); |
| utf8char = xmlGetUTF8Char(p + 5, &utf8len); |
| if (PG_XMLISNAMECHAR(utf8char)) |
| goto finished; |
| |
| p += 5; |
| |
| /* version */ |
| CHECK_XML_SPACE(p); |
| SKIP_XML_SPACE(p); |
| if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0) |
| return XML_ERR_VERSION_MISSING; |
| p += 7; |
| SKIP_XML_SPACE(p); |
| if (*p != '=') |
| return XML_ERR_VERSION_MISSING; |
| p += 1; |
| SKIP_XML_SPACE(p); |
| |
| if (*p == '\'' || *p == '"') |
| { |
| const xmlChar *q; |
| |
| q = xmlStrchr(p + 1, *p); |
| if (!q) |
| return XML_ERR_VERSION_MISSING; |
| |
| if (version) |
| *version = xml_pnstrdup(p + 1, q - p - 1); |
| p = q + 1; |
| } |
| else |
| return XML_ERR_VERSION_MISSING; |
| |
| /* encoding */ |
| save_p = p; |
| SKIP_XML_SPACE(p); |
| if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0) |
| { |
| CHECK_XML_SPACE(save_p); |
| p += 8; |
| SKIP_XML_SPACE(p); |
| if (*p != '=') |
| return XML_ERR_MISSING_ENCODING; |
| p += 1; |
| SKIP_XML_SPACE(p); |
| |
| if (*p == '\'' || *p == '"') |
| { |
| const xmlChar *q; |
| |
| q = xmlStrchr(p + 1, *p); |
| if (!q) |
| return XML_ERR_MISSING_ENCODING; |
| |
| if (encoding) |
| *encoding = xml_pnstrdup(p + 1, q - p - 1); |
| p = q + 1; |
| } |
| else |
| return XML_ERR_MISSING_ENCODING; |
| } |
| else |
| { |
| p = save_p; |
| } |
| |
| /* standalone */ |
| save_p = p; |
| SKIP_XML_SPACE(p); |
| if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0) |
| { |
| CHECK_XML_SPACE(save_p); |
| p += 10; |
| SKIP_XML_SPACE(p); |
| if (*p != '=') |
| return XML_ERR_STANDALONE_VALUE; |
| p += 1; |
| SKIP_XML_SPACE(p); |
| if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 || |
| xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0) |
| { |
| if (standalone) |
| *standalone = 1; |
| p += 5; |
| } |
| else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 || |
| xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0) |
| { |
| if (standalone) |
| *standalone = 0; |
| p += 4; |
| } |
| else |
| return XML_ERR_STANDALONE_VALUE; |
| } |
| else |
| { |
| p = save_p; |
| } |
| |
| SKIP_XML_SPACE(p); |
| if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0) |
| return XML_ERR_XMLDECL_NOT_FINISHED; |
| p += 2; |
| |
| finished: |
| len = p - str; |
| |
| for (p = str; p < str + len; p++) |
| if (*p > 127) |
| return XML_ERR_INVALID_CHAR; |
| |
| if (lenp) |
| *lenp = len; |
| |
| return XML_ERR_OK; |
| } |
| |
| |
| /* |
| * Write an XML declaration. On output, we adjust the XML declaration |
| * as follows. (These rules are the moral equivalent of the clause |
| * "Serialization of an XML value" in the SQL standard.) |
| * |
| * We try to avoid generating an XML declaration if possible. This is |
| * so that you don't get trivial things like xml '<foo/>' resulting in |
| * '<?xml version="1.0"?><foo/>', which would surely be annoying. We |
| * must provide a declaration if the standalone property is specified |
| * or if we include an encoding declaration. If we have a |
| * declaration, we must specify a version (XML requires this). |
| * Otherwise we only make a declaration if the version is not "1.0", |
| * which is the default version specified in SQL:2003. |
| */ |
| static bool |
| print_xml_decl(StringInfo buf, const xmlChar *version, |
| pg_enc encoding, int standalone) |
| { |
| if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0) |
| || (encoding && encoding != PG_UTF8) |
| || standalone != -1) |
| { |
| appendStringInfoString(buf, "<?xml"); |
| |
| if (version) |
| appendStringInfo(buf, " version=\"%s\"", version); |
| else |
| appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION); |
| |
| if (encoding && encoding != PG_UTF8) |
| { |
| /* |
| * XXX might be useful to convert this to IANA names (ISO-8859-1 |
| * instead of LATIN1 etc.); needs field experience |
| */ |
| appendStringInfo(buf, " encoding=\"%s\"", |
| pg_encoding_to_char(encoding)); |
| } |
| |
| if (standalone == 1) |
| appendStringInfoString(buf, " standalone=\"yes\""); |
| else if (standalone == 0) |
| appendStringInfoString(buf, " standalone=\"no\""); |
| appendStringInfoString(buf, "?>"); |
| |
| return true; |
| } |
| else |
| return false; |
| } |
| |
| /* |
| * Test whether an input that is to be parsed as CONTENT contains a DTD. |
| * |
| * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not |
| * satisfied by a document with a DTD, which is a bit of a wart, as it means |
| * the CONTENT type is not a proper superset of DOCUMENT. SQL/XML:2006 and |
| * later fix that, by redefining content with reference to the "more |
| * permissive" Document Node of the XQuery/XPath Data Model, such that any |
| * DOCUMENT value is indeed also a CONTENT value. That definition is more |
| * useful, as CONTENT becomes usable for parsing input of unknown form (think |
| * pg_restore). |
| * |
| * As used below in parse_xml when parsing for CONTENT, libxml does not give |
| * us the 2006+ behavior, but only the 2003; it will choke if the input has |
| * a DTD. But we can provide the 2006+ definition of CONTENT easily enough, |
| * by detecting this case first and simply doing the parse as DOCUMENT. |
| * |
| * A DTD can be found arbitrarily far in, but that would be a contrived case; |
| * it will ordinarily start within a few dozen characters. The only things |
| * that can precede it are an XMLDecl (here, the caller will have called |
| * parse_xml_decl already), whitespace, comments, and processing instructions. |
| * This function need only return true if it sees a valid sequence of such |
| * things leading to <!DOCTYPE. It can simply return false in any other |
| * cases, including malformed input; that will mean the input gets parsed as |
| * CONTENT as originally planned, with libxml reporting any errors. |
| * |
| * This is only to be called from xml_parse, when pg_xml_init has already |
| * been called. The input is already in UTF8 encoding. |
| */ |
| static bool |
| xml_doctype_in_content(const xmlChar *str) |
| { |
| const xmlChar *p = str; |
| |
| for (;;) |
| { |
| const xmlChar *e; |
| |
| SKIP_XML_SPACE(p); |
| if (*p != '<') |
| return false; |
| p++; |
| |
| if (*p == '!') |
| { |
| p++; |
| |
| /* if we see <!DOCTYPE, we can return true */ |
| if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0) |
| return true; |
| |
| /* otherwise, if it's not a comment, fail */ |
| if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0) |
| return false; |
| /* find end of comment: find -- and a > must follow */ |
| p = xmlStrstr(p + 2, (xmlChar *) "--"); |
| if (!p || p[2] != '>') |
| return false; |
| /* advance over comment, and keep scanning */ |
| p += 3; |
| continue; |
| } |
| |
| /* otherwise, if it's not a PI <?target something?>, fail */ |
| if (*p != '?') |
| return false; |
| p++; |
| |
| /* find end of PI (the string ?> is forbidden within a PI) */ |
| e = xmlStrstr(p, (xmlChar *) "?>"); |
| if (!e) |
| return false; |
| |
| /* advance over PI, keep scanning */ |
| p = e + 2; |
| } |
| } |
| |
| |
| /* |
| * Convert a C string to XML internal representation |
| * |
| * Note: it is caller's responsibility to xmlFreeDoc() the result, |
| * else a permanent memory leak will ensue! |
| * |
| * TODO maybe libxml2's xmlreader is better? (do not construct DOM, |
| * yet do not use SAX - see xmlreader.c) |
| */ |
| static xmlDocPtr |
| xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, |
| int encoding) |
| { |
| int32 len; |
| xmlChar *string; |
| xmlChar *utf8string; |
| PgXmlErrorContext *xmlerrcxt; |
| volatile xmlParserCtxtPtr ctxt = NULL; |
| volatile xmlDocPtr doc = NULL; |
| |
| len = VARSIZE_ANY_EXHDR(data); /* will be useful later */ |
| string = xml_text2xmlChar(data); |
| |
| utf8string = pg_do_encoding_conversion(string, |
| len, |
| encoding, |
| PG_UTF8); |
| |
| /* Start up libxml and its parser */ |
| xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED); |
| |
| /* Use a TRY block to ensure we clean up correctly */ |
| PG_TRY(); |
| { |
| bool parse_as_document = false; |
| int res_code; |
| size_t count = 0; |
| xmlChar *version = NULL; |
| int standalone = 0; |
| |
| xmlInitParser(); |
| |
| ctxt = xmlNewParserCtxt(); |
| if (ctxt == NULL || xmlerrcxt->err_occurred) |
| xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, |
| "could not allocate parser context"); |
| |
| /* Decide whether to parse as document or content */ |
| if (xmloption_arg == XMLOPTION_DOCUMENT) |
| parse_as_document = true; |
| else |
| { |
| /* Parse and skip over the XML declaration, if any */ |
| res_code = parse_xml_decl(utf8string, |
| &count, &version, NULL, &standalone); |
| if (res_code != 0) |
| xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT, |
| "invalid XML content: invalid XML declaration", |
| res_code); |
| |
| /* Is there a DOCTYPE element? */ |
| if (xml_doctype_in_content(utf8string + count)) |
| parse_as_document = true; |
| } |
| |
| if (parse_as_document) |
| { |
| /* |
| * Note, that here we try to apply DTD defaults |
| * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d: |
| * 'Default values defined by internal DTD are applied'. As for |
| * external DTDs, we try to support them too, (see SQL/XML:2008 GR |
| * 10.16.7.e) |
| */ |
| doc = xmlCtxtReadDoc(ctxt, utf8string, |
| NULL, |
| "UTF-8", |
| XML_PARSE_NOENT | XML_PARSE_DTDATTR |
| | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS)); |
| if (doc == NULL || xmlerrcxt->err_occurred) |
| { |
| /* Use original option to decide which error code to throw */ |
| if (xmloption_arg == XMLOPTION_DOCUMENT) |
| xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, |
| "invalid XML document"); |
| else |
| xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT, |
| "invalid XML content"); |
| } |
| } |
| else |
| { |
| doc = xmlNewDoc(version); |
| Assert(doc->encoding == NULL); |
| doc->encoding = xmlStrdup((const xmlChar *) "UTF-8"); |
| doc->standalone = standalone; |
| |
| /* allow empty content */ |
| if (*(utf8string + count)) |
| { |
| res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, |
| utf8string + count, NULL); |
| if (res_code != 0 || xmlerrcxt->err_occurred) |
| xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT, |
| "invalid XML content"); |
| } |
| } |
| } |
| PG_CATCH(); |
| { |
| if (doc != NULL) |
| xmlFreeDoc(doc); |
| if (ctxt != NULL) |
| xmlFreeParserCtxt(ctxt); |
| |
| pg_xml_done(xmlerrcxt, true); |
| |
| PG_RE_THROW(); |
| } |
| PG_END_TRY(); |
| |
| xmlFreeParserCtxt(ctxt); |
| |
| pg_xml_done(xmlerrcxt, false); |
| |
| return doc; |
| } |
| |
| |
| /* |
| * xmlChar<->text conversions |
| */ |
| static xmlChar * |
| xml_text2xmlChar(text *in) |
| { |
| return (xmlChar *) text_to_cstring(in); |
| } |
| |
| |
| #ifdef USE_LIBXMLCONTEXT |
| |
| /* |
| * Manage the special context used for all libxml allocations (but only |
| * in special debug builds; see notes at top of file) |
| */ |
| static void |
| xml_memory_init(void) |
| { |
| /* Create memory context if not there already */ |
| if (LibxmlContext == NULL) |
| LibxmlContext = AllocSetContextCreate(TopMemoryContext, |
| "Libxml context", |
| ALLOCSET_DEFAULT_SIZES); |
| |
| /* Re-establish the callbacks even if already set */ |
| xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup); |
| } |
| |
| /* |
| * Wrappers for memory management functions |
| */ |
| static void * |
| xml_palloc(size_t size) |
| { |
| return MemoryContextAlloc(LibxmlContext, size); |
| } |
| |
| |
| static void * |
| xml_repalloc(void *ptr, size_t size) |
| { |
| return repalloc(ptr, size); |
| } |
| |
| |
| static void |
| xml_pfree(void *ptr) |
| { |
| /* At least some parts of libxml assume xmlFree(NULL) is allowed */ |
| if (ptr) |
| pfree(ptr); |
| } |
| |
| |
| static char * |
| xml_pstrdup(const char *string) |
| { |
| return MemoryContextStrdup(LibxmlContext, string); |
| } |
| #endif /* USE_LIBXMLCONTEXT */ |
| |
| |
| /* |
| * xmlPgEntityLoader --- entity loader callback function |
| * |
| * Silently prevent any external entity URL from being loaded. We don't want |
| * to throw an error, so instead make the entity appear to expand to an empty |
| * string. |
| * |
| * We would prefer to allow loading entities that exist in the system's |
| * global XML catalog; but the available libxml2 APIs make that a complex |
| * and fragile task. For now, just shut down all external access. |
| */ |
| static xmlParserInputPtr |
| xmlPgEntityLoader(const char *URL, const char *ID, |
| xmlParserCtxtPtr ctxt) |
| { |
| return xmlNewStringInputStream(ctxt, (const xmlChar *) ""); |
| } |
| |
| |
| /* |
| * xml_ereport --- report an XML-related error |
| * |
| * The "msg" is the SQL-level message; some can be adopted from the SQL/XML |
| * standard. This function adds libxml's native error message, if any, as |
| * detail. |
| * |
| * This is exported for modules that want to share the core libxml error |
| * handler. Note that pg_xml_init() *must* have been called previously. |
| */ |
| void |
| xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg) |
| { |
| char *detail; |
| |
| /* Defend against someone passing us a bogus context struct */ |
| if (errcxt->magic != ERRCXT_MAGIC) |
| elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext"); |
| |
| /* Flag that the current libxml error has been reported */ |
| errcxt->err_occurred = false; |
| |
| /* Include detail only if we have some text from libxml */ |
| if (errcxt->err_buf.len > 0) |
| detail = errcxt->err_buf.data; |
| else |
| detail = NULL; |
| |
| ereport(level, |
| (errcode(sqlcode), |
| errmsg_internal("%s", msg), |
| detail ? errdetail_internal("%s", detail) : 0)); |
| } |
| |
| |
| /* |
| * Error handler for libxml errors and warnings |
| */ |
| static void |
| xml_errorHandler(void *data, PgXmlErrorPtr error) |
| { |
| PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data; |
| xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt; |
| xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL; |
| xmlNodePtr node = error->node; |
| const xmlChar *name = (node != NULL && |
| node->type == XML_ELEMENT_NODE) ? node->name : NULL; |
| int domain = error->domain; |
| int level = error->level; |
| StringInfo errorBuf; |
| |
| /* |
| * Defend against someone passing us a bogus context struct. |
| * |
| * We force a backend exit if this check fails because longjmp'ing out of |
| * libxml would likely render it unsafe to use further. |
| */ |
| if (xmlerrcxt->magic != ERRCXT_MAGIC) |
| elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext"); |
| |
| /*---------- |
| * Older libxml versions report some errors differently. |
| * First, some errors were previously reported as coming from the parser |
| * domain but are now reported as coming from the namespace domain. |
| * Second, some warnings were upgraded to errors. |
| * We attempt to compensate for that here. |
| *---------- |
| */ |
| switch (error->code) |
| { |
| case XML_WAR_NS_URI: |
| level = XML_ERR_ERROR; |
| domain = XML_FROM_NAMESPACE; |
| break; |
| |
| case XML_ERR_NS_DECL_ERROR: |
| case XML_WAR_NS_URI_RELATIVE: |
| case XML_WAR_NS_COLUMN: |
| case XML_NS_ERR_XML_NAMESPACE: |
| case XML_NS_ERR_UNDEFINED_NAMESPACE: |
| case XML_NS_ERR_QNAME: |
| case XML_NS_ERR_ATTRIBUTE_REDEFINED: |
| case XML_NS_ERR_EMPTY: |
| domain = XML_FROM_NAMESPACE; |
| break; |
| } |
| |
| /* Decide whether to act on the error or not */ |
| switch (domain) |
| { |
| case XML_FROM_PARSER: |
| case XML_FROM_NONE: |
| case XML_FROM_MEMORY: |
| case XML_FROM_IO: |
| |
| /* |
| * Suppress warnings about undeclared entities. We need to do |
| * this to avoid problems due to not loading DTD definitions. |
| */ |
| if (error->code == XML_WAR_UNDECLARED_ENTITY) |
| return; |
| |
| /* Otherwise, accept error regardless of the parsing purpose */ |
| break; |
| |
| default: |
| /* Ignore error if only doing well-formedness check */ |
| if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED) |
| return; |
| break; |
| } |
| |
| /* Prepare error message in errorBuf */ |
| errorBuf = makeStringInfo(); |
| |
| if (error->line > 0) |
| appendStringInfo(errorBuf, "line %d: ", error->line); |
| if (name != NULL) |
| appendStringInfo(errorBuf, "element %s: ", name); |
| if (error->message != NULL) |
| appendStringInfoString(errorBuf, error->message); |
| else |
| appendStringInfoString(errorBuf, "(no message provided)"); |
| |
| /* |
| * Append context information to errorBuf. |
| * |
| * xmlParserPrintFileContext() uses libxml's "generic" error handler to |
| * write the context. Since we don't want to duplicate libxml |
| * functionality here, we set up a generic error handler temporarily. |
| * |
| * We use appendStringInfo() directly as libxml's generic error handler. |
| * This should work because it has essentially the same signature as |
| * libxml expects, namely (void *ptr, const char *msg, ...). |
| */ |
| if (input != NULL) |
| { |
| xmlGenericErrorFunc errFuncSaved = xmlGenericError; |
| void *errCtxSaved = xmlGenericErrorContext; |
| |
| xmlSetGenericErrorFunc((void *) errorBuf, |
| (xmlGenericErrorFunc) appendStringInfo); |
| |
| /* Add context information to errorBuf */ |
| appendStringInfoLineSeparator(errorBuf); |
| |
| xmlParserPrintFileContext(input); |
| |
| /* Restore generic error func */ |
| xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved); |
| } |
| |
| /* Get rid of any trailing newlines in errorBuf */ |
| chopStringInfoNewlines(errorBuf); |
| |
| /* |
| * Legacy error handling mode. err_occurred is never set, we just add the |
| * message to err_buf. This mode exists because the xml2 contrib module |
| * uses our error-handling infrastructure, but we don't want to change its |
| * behaviour since it's deprecated anyway. This is also why we don't |
| * distinguish between notices, warnings and errors here --- the old-style |
| * generic error handler wouldn't have done that either. |
| */ |
| if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY) |
| { |
| appendStringInfoLineSeparator(&xmlerrcxt->err_buf); |
| appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data, |
| errorBuf->len); |
| |
| pfree(errorBuf->data); |
| pfree(errorBuf); |
| return; |
| } |
| |
| /* |
| * We don't want to ereport() here because that'd probably leave libxml in |
| * an inconsistent state. Instead, we remember the error and ereport() |
| * from xml_ereport(). |
| * |
| * Warnings and notices can be reported immediately since they won't cause |
| * a longjmp() out of libxml. |
| */ |
| if (level >= XML_ERR_ERROR) |
| { |
| appendStringInfoLineSeparator(&xmlerrcxt->err_buf); |
| appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data, |
| errorBuf->len); |
| |
| xmlerrcxt->err_occurred = true; |
| } |
| else if (level >= XML_ERR_WARNING) |
| { |
| ereport(WARNING, |
| (errmsg_internal("%s", errorBuf->data))); |
| } |
| else |
| { |
| ereport(NOTICE, |
| (errmsg_internal("%s", errorBuf->data))); |
| } |
| |
| pfree(errorBuf->data); |
| pfree(errorBuf); |
| } |
| |
| |
| /* |
| * Wrapper for "ereport" function for XML-related errors. The "msg" |
| * is the SQL-level message; some can be adopted from the SQL/XML |
| * standard. This function uses "code" to create a textual detail |
| * message. At the moment, we only need to cover those codes that we |
| * may raise in this file. |
| */ |
| static void |
| xml_ereport_by_code(int level, int sqlcode, |
| const char *msg, int code) |
| { |
| const char *det; |
| |
| switch (code) |
| { |
| case XML_ERR_INVALID_CHAR: |
| det = gettext_noop("Invalid character value."); |
| break; |
| case XML_ERR_SPACE_REQUIRED: |
| det = gettext_noop("Space required."); |
| break; |
| case XML_ERR_STANDALONE_VALUE: |
| det = gettext_noop("standalone accepts only 'yes' or 'no'."); |
| break; |
| case XML_ERR_VERSION_MISSING: |
| det = gettext_noop("Malformed declaration: missing version."); |
| break; |
| case XML_ERR_MISSING_ENCODING: |
| det = gettext_noop("Missing encoding in text declaration."); |
| break; |
| case XML_ERR_XMLDECL_NOT_FINISHED: |
| det = gettext_noop("Parsing XML declaration: '?>' expected."); |
| break; |
| default: |
| det = gettext_noop("Unrecognized libxml error code: %d."); |
| break; |
| } |
| |
| ereport(level, |
| (errcode(sqlcode), |
| errmsg_internal("%s", msg), |
| errdetail(det, code))); |
| } |
| |
| |
| /* |
| * Remove all trailing newlines from a StringInfo string |
| */ |
| static void |
| chopStringInfoNewlines(StringInfo str) |
| { |
| while (str->len > 0 && str->data[str->len - 1] == '\n') |
| str->data[--str->len] = '\0'; |
| } |
| |
| |
| /* |
| * Append a newline after removing any existing trailing newlines |
| */ |
| static void |
| appendStringInfoLineSeparator(StringInfo str) |
| { |
| chopStringInfoNewlines(str); |
| if (str->len > 0) |
| appendStringInfoChar(str, '\n'); |
| } |
| |
| |
| /* |
| * Convert one char in the current server encoding to a Unicode codepoint. |
| */ |
| static pg_wchar |
| sqlchar_to_unicode(const char *s) |
| { |
| char *utf8string; |
| pg_wchar ret[2]; /* need space for trailing zero */ |
| |
| /* note we're not assuming s is null-terminated */ |
| utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8); |
| |
| pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret, |
| pg_encoding_mblen(PG_UTF8, utf8string)); |
| |
| if (utf8string != s) |
| pfree(utf8string); |
| |
| return ret[0]; |
| } |
| |
| |
| static bool |
| is_valid_xml_namefirst(pg_wchar c) |
| { |
| /* (Letter | '_' | ':') */ |
| return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c) |
| || c == '_' || c == ':'); |
| } |
| |
| |
| static bool |
| is_valid_xml_namechar(pg_wchar c) |
| { |
| /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */ |
| return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c) |
| || xmlIsDigitQ(c) |
| || c == '.' || c == '-' || c == '_' || c == ':' |
| || xmlIsCombiningQ(c) |
| || xmlIsExtenderQ(c)); |
| } |
| #endif /* USE_LIBXML */ |
| |
| |
| /* |
| * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1. |
| */ |
| char * |
| map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped, |
| bool escape_period) |
| { |
| #ifdef USE_LIBXML |
| StringInfoData buf; |
| const char *p; |
| |
| /* |
| * SQL/XML doesn't make use of this case anywhere, so it's probably a |
| * mistake. |
| */ |
| Assert(fully_escaped || !escape_period); |
| |
| initStringInfo(&buf); |
| |
| for (p = ident; *p; p += pg_mblen(p)) |
| { |
| if (*p == ':' && (p == ident || fully_escaped)) |
| appendStringInfoString(&buf, "_x003A_"); |
| else if (*p == '_' && *(p + 1) == 'x') |
| appendStringInfoString(&buf, "_x005F_"); |
| else if (fully_escaped && p == ident && |
| pg_strncasecmp(p, "xml", 3) == 0) |
| { |
| if (*p == 'x') |
| appendStringInfoString(&buf, "_x0078_"); |
| else |
| appendStringInfoString(&buf, "_x0058_"); |
| } |
| else if (escape_period && *p == '.') |
| appendStringInfoString(&buf, "_x002E_"); |
| else |
| { |
| pg_wchar u = sqlchar_to_unicode(p); |
| |
| if ((p == ident) |
| ? !is_valid_xml_namefirst(u) |
| : !is_valid_xml_namechar(u)) |
| appendStringInfo(&buf, "_x%04X_", (unsigned int) u); |
| else |
| appendBinaryStringInfo(&buf, p, pg_mblen(p)); |
| } |
| } |
| |
| return buf.data; |
| #else /* not USE_LIBXML */ |
| NO_XML_SUPPORT(); |
| return NULL; |
| #endif /* not USE_LIBXML */ |
| } |
| |
| |
| /* |
| * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3. |
| */ |
| char * |
| map_xml_name_to_sql_identifier(const char *name) |
| { |
| StringInfoData buf; |
| const char *p; |
| |
| initStringInfo(&buf); |
| |
| for (p = name; *p; p += pg_mblen(p)) |
| { |
| if (*p == '_' && *(p + 1) == 'x' |
| && isxdigit((unsigned char) *(p + 2)) |
| && isxdigit((unsigned char) *(p + 3)) |
| && isxdigit((unsigned char) *(p + 4)) |
| && isxdigit((unsigned char) *(p + 5)) |
| && *(p + 6) == '_') |
| { |
| char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1]; |
| unsigned int u; |
| |
| sscanf(p + 2, "%X", &u); |
| pg_unicode_to_server(u, (unsigned char *) cbuf); |
| appendStringInfoString(&buf, cbuf); |
| p += 6; |
| } |
| else |
| appendBinaryStringInfo(&buf, p, pg_mblen(p)); |
| } |
| |
| return buf.data; |
| } |
| |
| /* |
| * Map SQL value to XML value; see SQL/XML:2008 section 9.8. |
| * |
| * When xml_escape_strings is true, then certain characters in string |
| * values are replaced by entity references (< etc.), as specified |
| * in SQL/XML:2008 section 9.8 GR 9) a) iii). This is normally what is |
| * wanted. The false case is mainly useful when the resulting value |
| * is used with xmlTextWriterWriteAttribute() to write out an |
| * attribute, because that function does the escaping itself. |
| */ |
| char * |
| map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings) |
| { |
| if (type_is_array_domain(type)) |
| { |
| ArrayType *array; |
| Oid elmtype; |
| int16 elmlen; |
| bool elmbyval; |
| char elmalign; |
| int num_elems; |
| Datum *elem_values; |
| bool *elem_nulls; |
| StringInfoData buf; |
| int i; |
| |
| array = DatumGetArrayTypeP(value); |
| elmtype = ARR_ELEMTYPE(array); |
| get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign); |
| |
| deconstruct_array(array, elmtype, |
| elmlen, elmbyval, elmalign, |
| &elem_values, &elem_nulls, |
| &num_elems); |
| |
| initStringInfo(&buf); |
| |
| for (i = 0; i < num_elems; i++) |
| { |
| if (elem_nulls[i]) |
| continue; |
| appendStringInfoString(&buf, "<element>"); |
| appendStringInfoString(&buf, |
| map_sql_value_to_xml_value(elem_values[i], |
| elmtype, true)); |
| appendStringInfoString(&buf, "</element>"); |
| } |
| |
| pfree(elem_values); |
| pfree(elem_nulls); |
| |
| return buf.data; |
| } |
| else |
| { |
| Oid typeOut; |
| bool isvarlena; |
| char *str; |
| |
| /* |
| * Flatten domains; the special-case treatments below should apply to, |
| * eg, domains over boolean not just boolean. |
| */ |
| type = getBaseType(type); |
| |
| /* |
| * Special XSD formatting for some data types |
| */ |
| switch (type) |
| { |
| case BOOLOID: |
| if (DatumGetBool(value)) |
| return "true"; |
| else |
| return "false"; |
| |
| case DATEOID: |
| { |
| DateADT date; |
| struct pg_tm tm; |
| char buf[MAXDATELEN + 1]; |
| |
| date = DatumGetDateADT(value); |
| /* XSD doesn't support infinite values */ |
| if (DATE_NOT_FINITE(date)) |
| ereport(ERROR, |
| (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), |
| errmsg("date out of range"), |
| errdetail("XML does not support infinite date values."))); |
| j2date(date + POSTGRES_EPOCH_JDATE, |
| &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday)); |
| EncodeDateOnly(&tm, USE_XSD_DATES, buf); |
| |
| return pstrdup(buf); |
| } |
| |
| case TIMESTAMPOID: |
| { |
| Timestamp timestamp; |
| struct pg_tm tm; |
| fsec_t fsec; |
| char buf[MAXDATELEN + 1]; |
| |
| timestamp = DatumGetTimestamp(value); |
| |
| /* XSD doesn't support infinite values */ |
| if (TIMESTAMP_NOT_FINITE(timestamp)) |
| ereport(ERROR, |
| (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), |
| errmsg("timestamp out of range"), |
| errdetail("XML does not support infinite timestamp values."))); |
| else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0) |
| EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf); |
| else |
| ereport(ERROR, |
| (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), |
| errmsg("timestamp out of range"))); |
| |
| return pstrdup(buf); |
| } |
| |
| case TIMESTAMPTZOID: |
| { |
| TimestampTz timestamp; |
| struct pg_tm tm; |
| int tz; |
| fsec_t fsec; |
| const char *tzn = NULL; |
| char buf[MAXDATELEN + 1]; |
| |
| timestamp = DatumGetTimestamp(value); |
| |
| /* XSD doesn't support infinite values */ |
| if (TIMESTAMP_NOT_FINITE(timestamp)) |
| ereport(ERROR, |
| (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), |
| errmsg("timestamp out of range"), |
| errdetail("XML does not support infinite timestamp values."))); |
| else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0) |
| EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf); |
| else |
| ereport(ERROR, |
| (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), |
| errmsg("timestamp out of range"))); |
| |
| return pstrdup(buf); |
| } |
| |
| #ifdef USE_LIBXML |
| case BYTEAOID: |
| { |
| bytea *bstr = DatumGetByteaPP(value); |
| PgXmlErrorContext *xmlerrcxt; |
| volatile xmlBufferPtr buf = NULL; |
| volatile xmlTextWriterPtr writer = NULL; |
| char *result; |
| |
| xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); |
| |
| PG_TRY(); |
| { |
| buf = xmlBufferCreate(); |
| if (buf == NULL || xmlerrcxt->err_occurred) |
| xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, |
| "could not allocate xmlBuffer"); |
| writer = xmlNewTextWriterMemory(buf, 0); |
| if (writer == NULL || xmlerrcxt->err_occurred) |
| xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, |
| "could not allocate xmlTextWriter"); |
| |
| if (xmlbinary == XMLBINARY_BASE64) |
| xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr), |
| 0, VARSIZE_ANY_EXHDR(bstr)); |
| else |
| xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr), |
| 0, VARSIZE_ANY_EXHDR(bstr)); |
| |
| /* we MUST do this now to flush data out to the buffer */ |
| xmlFreeTextWriter(writer); |
| writer = NULL; |
| |
| result = pstrdup((const char *) xmlBufferContent(buf)); |
| } |
| PG_CATCH(); |
| { |
| if (writer) |
| xmlFreeTextWriter(writer); |
| if (buf) |
| xmlBufferFree(buf); |
| |
| pg_xml_done(xmlerrcxt, true); |
| |
| PG_RE_THROW(); |
| } |
| PG_END_TRY(); |
| |
| xmlBufferFree(buf); |
| |
| pg_xml_done(xmlerrcxt, false); |
| |
| return result; |
| } |
| #endif /* USE_LIBXML */ |
| |
| } |
| |
| /* |
| * otherwise, just use the type's native text representation |
| */ |
| getTypeOutputInfo(type, &typeOut, &isvarlena); |
| str = OidOutputFunctionCall(typeOut, value); |
| |
| /* ... exactly as-is for XML, and when escaping is not wanted */ |
| if (type == XMLOID || !xml_escape_strings) |
| return str; |
| |
| /* otherwise, translate special characters as needed */ |
| return escape_xml(str); |
| } |
| } |
| |
| |
| /* |
| * Escape characters in text that have special meanings in XML. |
| * |
| * Returns a palloc'd string. |
| * |
| * NB: this is intentionally not dependent on libxml. |
| */ |
| char * |
| escape_xml(const char *str) |
| { |
| StringInfoData buf; |
| const char *p; |
| |
| initStringInfo(&buf); |
| for (p = str; *p; p++) |
| { |
| switch (*p) |
| { |
| case '&': |
| appendStringInfoString(&buf, "&"); |
| break; |
| case '<': |
| appendStringInfoString(&buf, "<"); |
| break; |
| case '>': |
| appendStringInfoString(&buf, ">"); |
| break; |
| case '\r': |
| appendStringInfoString(&buf, "
"); |
| break; |
| default: |
| appendStringInfoCharMacro(&buf, *p); |
| break; |
| } |
| } |
| return buf.data; |
| } |
| |
| |
| static char * |
| _SPI_strdup(const char *s) |
| { |
| size_t len = strlen(s) + 1; |
| char *ret = SPI_palloc(len); |
| |
| memcpy(ret, s, len); |
| return ret; |
| } |
| |
| |
| /* |
| * SQL to XML mapping functions |
| * |
| * What follows below was at one point intentionally organized so that |
| * you can read along in the SQL/XML standard. The functions are |
| * mostly split up the way the clauses lay out in the standards |
| * document, and the identifiers are also aligned with the standard |
| * text. Unfortunately, SQL/XML:2006 reordered the clauses |
| * differently than SQL/XML:2003, so the order below doesn't make much |
| * sense anymore. |
| * |
| * There are many things going on there: |
| * |
| * There are two kinds of mappings: Mapping SQL data (table contents) |
| * to XML documents, and mapping SQL structure (the "schema") to XML |
| * Schema. And there are functions that do both at the same time. |
| * |
| * Then you can map a database, a schema, or a table, each in both |
| * ways. This breaks down recursively: Mapping a database invokes |
| * mapping schemas, which invokes mapping tables, which invokes |
| * mapping rows, which invokes mapping columns, although you can't |
| * call the last two from the outside. Because of this, there are a |
| * number of xyz_internal() functions which are to be called both from |
| * the function manager wrapper and from some upper layer in a |
| * recursive call. |
| * |
| * See the documentation about what the common function arguments |
| * nulls, tableforest, and targetns mean. |
| * |
| * Some style guidelines for XML output: Use double quotes for quoting |
| * XML attributes. Indent XML elements by two spaces, but remember |
| * that a lot of code is called recursively at different levels, so |
| * it's better not to indent rather than create output that indents |
| * and outdents weirdly. Add newlines to make the output look nice. |
| */ |
| |
| |
| /* |
| * Visibility of objects for XML mappings; see SQL/XML:2008 section |
| * 4.10.8. |
| */ |
| |
| /* |
| * Given a query, which must return type oid as first column, produce |
| * a list of Oids with the query results. |
| */ |
| static List * |
| query_to_oid_list(const char *query) |
| { |
| uint64 i; |
| List *list = NIL; |
| int spi_result; |
| |
| spi_result = SPI_execute(query, true, 0); |
| if (spi_result != SPI_OK_SELECT) |
| elog(ERROR, "SPI_execute returned %s for %s", |
| SPI_result_code_string(spi_result), query); |
| |
| for (i = 0; i < SPI_processed; i++) |
| { |
| Datum oid; |
| bool isnull; |
| |
| oid = SPI_getbinval(SPI_tuptable->vals[i], |
| SPI_tuptable->tupdesc, |
| 1, |
| &isnull); |
| if (!isnull) |
| list = lappend_oid(list, DatumGetObjectId(oid)); |
| } |
| |
| return list; |
| } |
| |
| |
| static List * |
| schema_get_xml_visible_tables(Oid nspid) |
| { |
| StringInfoData query; |
| |
| initStringInfo(&query); |
| appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class" |
| " WHERE relnamespace = %u AND relkind IN (" |
| CppAsString2(RELKIND_RELATION) "," |
| CppAsString2(RELKIND_DIRECTORY_TABLE) "," |
| CppAsString2(RELKIND_MATVIEW) "," |
| CppAsString2(RELKIND_VIEW) ")" |
| " AND pg_catalog.has_table_privilege (oid, 'SELECT')" |
| " ORDER BY relname;", nspid); |
| |
| return query_to_oid_list(query.data); |
| } |
| |
| |
| /* |
| * Including the system schemas is probably not useful for a database |
| * mapping. |
| */ |
| #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')" |
| |
| #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE |
| |
| |
| static List * |
| database_get_xml_visible_schemas(void) |
| { |
| return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;"); |
| } |
| |
| |
| static List * |
| database_get_xml_visible_tables(void) |
| { |
| /* At the moment there is no order required here. */ |
| return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class" |
| " WHERE relkind IN (" |
| CppAsString2(RELKIND_RELATION) "," |
| CppAsString2(RELKIND_DIRECTORY_TABLE) "," |
| CppAsString2(RELKIND_MATVIEW) "," |
| CppAsString2(RELKIND_VIEW) ")" |
| " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')" |
| " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");"); |
| } |
| |
| |
| /* |
| * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008 |
| * section 9.11. |
| */ |
| |
| static StringInfo |
| table_to_xml_internal(Oid relid, |
| const char *xmlschema, bool nulls, bool tableforest, |
| const char *targetns, bool top_level) |
| { |
| StringInfoData query; |
| |
| initStringInfo(&query); |
| appendStringInfo(&query, "SELECT * FROM %s", |
| DatumGetCString(DirectFunctionCall1(regclassout, |
| ObjectIdGetDatum(relid)))); |
| return query_to_xml_internal(query.data, get_rel_name(relid), |
| xmlschema, nulls, tableforest, |
| targetns, top_level); |
| } |
| |
| |
| Datum |
| table_to_xml(PG_FUNCTION_ARGS) |
| { |
| Oid relid = PG_GETARG_OID(0); |
| bool nulls = PG_GETARG_BOOL(1); |
| bool tableforest = PG_GETARG_BOOL(2); |
| const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); |
| |
| PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL, |
| nulls, tableforest, |
| targetns, true))); |
| } |
| |
| |
| Datum |
| query_to_xml(PG_FUNCTION_ARGS) |
| { |
| char *query = text_to_cstring(PG_GETARG_TEXT_PP(0)); |
| bool nulls = PG_GETARG_BOOL(1); |
| bool tableforest = PG_GETARG_BOOL(2); |
| const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); |
| |
| PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL, |
| NULL, nulls, tableforest, |
| targetns, true))); |
| } |
| |
| |
| Datum |
| cursor_to_xml(PG_FUNCTION_ARGS) |
| { |
| char *name = text_to_cstring(PG_GETARG_TEXT_PP(0)); |
| int32 count = PG_GETARG_INT32(1); |
| bool nulls = PG_GETARG_BOOL(2); |
| bool tableforest = PG_GETARG_BOOL(3); |
| const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4)); |
| |
| StringInfoData result; |
| Portal portal; |
| uint64 i; |
| |
| initStringInfo(&result); |
| |
| if (!tableforest) |
| { |
| xmldata_root_element_start(&result, "table", NULL, targetns, true); |
| appendStringInfoChar(&result, '\n'); |
| } |
| |
| SPI_connect(); |
| portal = SPI_cursor_find(name); |
| if (portal == NULL) |
| ereport(ERROR, |
| (errcode(ERRCODE_UNDEFINED_CURSOR), |
| errmsg("cursor \"%s\" does not exist", name))); |
| |
| SPI_cursor_fetch(portal, true, count); |
| for (i = 0; i < SPI_processed; i++) |
| SPI_sql_row_to_xmlelement(i, &result, NULL, nulls, |
| tableforest, targetns, true); |
| |
| SPI_finish(); |
| |
| if (!tableforest) |
| xmldata_root_element_end(&result, "table"); |
| |
| PG_RETURN_XML_P(stringinfo_to_xmltype(&result)); |
| } |
| |
| |
| /* |
| * Write the start tag of the root element of a data mapping. |
| * |
| * top_level means that this is the very top level of the eventual |
| * output. For example, when the user calls table_to_xml, then a call |
| * with a table name to this function is the top level. When the user |
| * calls database_to_xml, then a call with a schema name to this |
| * function is not the top level. If top_level is false, then the XML |
| * namespace declarations are omitted, because they supposedly already |
| * appeared earlier in the output. Repeating them is not wrong, but |
| * it looks ugly. |
| */ |
| static void |
| xmldata_root_element_start(StringInfo result, const char *eltname, |
| const char *xmlschema, const char *targetns, |
| bool top_level) |
| { |
| /* This isn't really wrong but currently makes no sense. */ |
| Assert(top_level || !xmlschema); |
| |
| appendStringInfo(result, "<%s", eltname); |
| if (top_level) |
| { |
| appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\""); |
| if (strlen(targetns) > 0) |
| appendStringInfo(result, " xmlns=\"%s\"", targetns); |
| } |
| if (xmlschema) |
| { |
| /* FIXME: better targets */ |
| if (strlen(targetns) > 0) |
| appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns); |
| else |
| appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\""); |
| } |
| appendStringInfoString(result, ">\n"); |
| } |
| |
| |
| static void |
| xmldata_root_element_end(StringInfo result, const char *eltname) |
| { |
| appendStringInfo(result, "</%s>\n", eltname); |
| } |
| |
| |
| static StringInfo |
| query_to_xml_internal(const char *query, char *tablename, |
| const char *xmlschema, bool nulls, bool tableforest, |
| const char *targetns, bool top_level) |
| { |
| StringInfo result; |
| char *xmltn; |
| uint64 i; |
| |
| if (tablename) |
| xmltn = map_sql_identifier_to_xml_name(tablename, true, false); |
| else |
| xmltn = "table"; |
| |
| result = makeStringInfo(); |
| |
| SPI_connect(); |
| if (SPI_execute(query, true, 0) != SPI_OK_SELECT) |
| ereport(ERROR, |
| (errcode(ERRCODE_DATA_EXCEPTION), |
| errmsg("invalid query"))); |
| |
| if (!tableforest) |
| { |
| xmldata_root_element_start(result, xmltn, xmlschema, |
| targetns, top_level); |
| appendStringInfoChar(result, '\n'); |
| } |
| |
| if (xmlschema) |
| appendStringInfo(result, "%s\n\n", xmlschema); |
| |
| for (i = 0; i < SPI_processed; i++) |
| SPI_sql_row_to_xmlelement(i, result, tablename, nulls, |
| tableforest, targetns, top_level); |
| |
| if (!tableforest) |
| xmldata_root_element_end(result, xmltn); |
| |
| SPI_finish(); |
| |
| return result; |
| } |
| |
| |
| Datum |
| table_to_xmlschema(PG_FUNCTION_ARGS) |
| { |
| Oid relid = PG_GETARG_OID(0); |
| bool nulls = PG_GETARG_BOOL(1); |
| bool tableforest = PG_GETARG_BOOL(2); |
| const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); |
| const char *result; |
| Relation rel; |
| |
| rel = table_open(relid, AccessShareLock); |
| result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls, |
| tableforest, targetns); |
| table_close(rel, NoLock); |
| |
| PG_RETURN_XML_P(cstring_to_xmltype(result)); |
| } |
| |
| |
| Datum |
| query_to_xmlschema(PG_FUNCTION_ARGS) |
| { |
| char *query = text_to_cstring(PG_GETARG_TEXT_PP(0)); |
| bool nulls = PG_GETARG_BOOL(1); |
| bool tableforest = PG_GETARG_BOOL(2); |
| const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); |
| const char *result; |
| SPIPlanPtr plan; |
| Portal portal; |
| |
| SPI_connect(); |
| |
| if ((plan = SPI_prepare(query, 0, NULL)) == NULL) |
| elog(ERROR, "SPI_prepare(\"%s\") failed", query); |
| |
| if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL) |
| elog(ERROR, "SPI_cursor_open(\"%s\") failed", query); |
| |
| result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc, |
| InvalidOid, nulls, |
| tableforest, targetns)); |
| SPI_cursor_close(portal); |
| SPI_finish(); |
| |
| PG_RETURN_XML_P(cstring_to_xmltype(result)); |
| } |
| |
| |
| Datum |
| cursor_to_xmlschema(PG_FUNCTION_ARGS) |
| { |
| char *name = text_to_cstring(PG_GETARG_TEXT_PP(0)); |
| bool nulls = PG_GETARG_BOOL(1); |
| bool tableforest = PG_GETARG_BOOL(2); |
| const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); |
| const char *xmlschema; |
| Portal portal; |
| |
| SPI_connect(); |
| portal = SPI_cursor_find(name); |
| if (portal == NULL) |
| ereport(ERROR, |
| (errcode(ERRCODE_UNDEFINED_CURSOR), |
| errmsg("cursor \"%s\" does not exist", name))); |
| |
| xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc, |
| InvalidOid, nulls, |
| tableforest, targetns)); |
| SPI_finish(); |
| |
| PG_RETURN_XML_P(cstring_to_xmltype(xmlschema)); |
| } |
| |
| |
| Datum |
| table_to_xml_and_xmlschema(PG_FUNCTION_ARGS) |
| { |
| Oid relid = PG_GETARG_OID(0); |
| bool nulls = PG_GETARG_BOOL(1); |
| bool tableforest = PG_GETARG_BOOL(2); |
| const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); |
| Relation rel; |
| const char *xmlschema; |
| |
| rel = table_open(relid, AccessShareLock); |
| xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls, |
| tableforest, targetns); |
| table_close(rel, NoLock); |
| |
| PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, |
| xmlschema, nulls, tableforest, |
| targetns, true))); |
| } |
| |
| |
| Datum |
| query_to_xml_and_xmlschema(PG_FUNCTION_ARGS) |
| { |
| char *query = text_to_cstring(PG_GETARG_TEXT_PP(0)); |
| bool nulls = PG_GETARG_BOOL(1); |
| bool tableforest = PG_GETARG_BOOL(2); |
| const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); |
| |
| const char *xmlschema; |
| SPIPlanPtr plan; |
| Portal portal; |
| |
| SPI_connect(); |
| |
| if ((plan = SPI_prepare(query, 0, NULL)) == NULL) |
| elog(ERROR, "SPI_prepare(\"%s\") failed", query); |
| |
| if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL) |
| elog(ERROR, "SPI_cursor_open(\"%s\") failed", query); |
| |
| xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc, |
| InvalidOid, nulls, tableforest, targetns)); |
| SPI_cursor_close(portal); |
| SPI_finish(); |
| |
| PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL, |
| xmlschema, nulls, tableforest, |
| targetns, true))); |
| } |
| |
| |
| /* |
| * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008 |
| * sections 9.13, 9.14. |
| */ |
| |
| static StringInfo |
| schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls, |
| bool tableforest, const char *targetns, bool top_level) |
| { |
| StringInfo result; |
| char *xmlsn; |
| List *relid_list; |
| ListCell *cell; |
| |
| xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid), |
| true, false); |
| result = makeStringInfo(); |
| |
| xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level); |
| appendStringInfoChar(result, '\n'); |
| |
| if (xmlschema) |
| appendStringInfo(result, "%s\n\n", xmlschema); |
| |
| SPI_connect(); |
| |
| relid_list = schema_get_xml_visible_tables(nspid); |
| |
| foreach(cell, relid_list) |
| { |
| Oid relid = lfirst_oid(cell); |
| StringInfo subres; |
| |
| subres = table_to_xml_internal(relid, NULL, nulls, tableforest, |
| targetns, false); |
| |
| appendBinaryStringInfo(result, subres->data, subres->len); |
| appendStringInfoChar(result, '\n'); |
| } |
| |
| SPI_finish(); |
| |
| xmldata_root_element_end(result, xmlsn); |
| |
| return result; |
| } |
| |
| |
| Datum |
| schema_to_xml(PG_FUNCTION_ARGS) |
| { |
| Name name = PG_GETARG_NAME(0); |
| bool nulls = PG_GETARG_BOOL(1); |
| bool tableforest = PG_GETARG_BOOL(2); |
| const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); |
| |
| char *schemaname; |
| Oid nspid; |
| |
| schemaname = NameStr(*name); |
| nspid = LookupExplicitNamespace(schemaname, false); |
| |
| PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL, |
| nulls, tableforest, targetns, true))); |
| } |
| |
| |
| /* |
| * Write the start element of the root element of an XML Schema mapping. |
| */ |
| static void |
| xsd_schema_element_start(StringInfo result, const char *targetns) |
| { |
| appendStringInfoString(result, |
| "<xsd:schema\n" |
| " xmlns:xsd=\"" NAMESPACE_XSD "\""); |
| if (strlen(targetns) > 0) |
| appendStringInfo(result, |
| "\n" |
| " targetNamespace=\"%s\"\n" |
| " elementFormDefault=\"qualified\"", |
| targetns); |
| appendStringInfoString(result, |
| ">\n\n"); |
| } |
| |
| |
| static void |
| xsd_schema_element_end(StringInfo result) |
| { |
| appendStringInfoString(result, "</xsd:schema>"); |
| } |
| |
| |
| static StringInfo |
| schema_to_xmlschema_internal(const char *schemaname, bool nulls, |
| bool tableforest, const char *targetns) |
| { |
| Oid nspid; |
| List *relid_list; |
| List *tupdesc_list; |
| ListCell *cell; |
| StringInfo result; |
| |
| result = makeStringInfo(); |
| |
| nspid = LookupExplicitNamespace(schemaname, false); |
| |
| xsd_schema_element_start(result, targetns); |
| |
| SPI_connect(); |
| |
| relid_list = schema_get_xml_visible_tables(nspid); |
| |
| tupdesc_list = NIL; |
| foreach(cell, relid_list) |
| { |
| Relation rel; |
| |
| rel = table_open(lfirst_oid(cell), AccessShareLock); |
| tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att)); |
| table_close(rel, NoLock); |
| } |
| |
| appendStringInfoString(result, |
| map_sql_typecoll_to_xmlschema_types(tupdesc_list)); |
| |
| appendStringInfoString(result, |
| map_sql_schema_to_xmlschema_types(nspid, relid_list, |
| nulls, tableforest, targetns)); |
| |
| xsd_schema_element_end(result); |
| |
| SPI_finish(); |
| |
| return result; |
| } |
| |
| |
| Datum |
| schema_to_xmlschema(PG_FUNCTION_ARGS) |
| { |
| Name name = PG_GETARG_NAME(0); |
| bool nulls = PG_GETARG_BOOL(1); |
| bool tableforest = PG_GETARG_BOOL(2); |
| const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); |
| |
| PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name), |
| nulls, tableforest, targetns))); |
| } |
| |
| |
| Datum |
| schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS) |
| { |
| Name name = PG_GETARG_NAME(0); |
| bool nulls = PG_GETARG_BOOL(1); |
| bool tableforest = PG_GETARG_BOOL(2); |
| const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3)); |
| char *schemaname; |
| Oid nspid; |
| StringInfo xmlschema; |
| |
| schemaname = NameStr(*name); |
| nspid = LookupExplicitNamespace(schemaname, false); |
| |
| xmlschema = schema_to_xmlschema_internal(schemaname, nulls, |
| tableforest, targetns); |
| |
| PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, |
| xmlschema->data, nulls, |
| tableforest, targetns, true))); |
| } |
| |
| |
| /* |
| * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008 |
| * sections 9.16, 9.17. |
| */ |
| |
| static StringInfo |
| database_to_xml_internal(const char *xmlschema, bool nulls, |
| bool tableforest, const char *targetns) |
| { |
| StringInfo result; |
| List *nspid_list; |
| ListCell *cell; |
| char *xmlcn; |
| |
| xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId), |
| true, false); |
| result = makeStringInfo(); |
| |
| xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true); |
| appendStringInfoChar(result, '\n'); |
| |
| if (xmlschema) |
| appendStringInfo(result, "%s\n\n", xmlschema); |
| |
| SPI_connect(); |
| |
| nspid_list = database_get_xml_visible_schemas(); |
| |
| foreach(cell, nspid_list) |
| { |
| Oid nspid = lfirst_oid(cell); |
| StringInfo subres; |
| |
| subres = schema_to_xml_internal(nspid, NULL, nulls, |
| tableforest, targetns, false); |
| |
| appendBinaryStringInfo(result, subres->data, subres->len); |
| appendStringInfoChar(result, '\n'); |
| } |
| |
| SPI_finish(); |
| |
| xmldata_root_element_end(result, xmlcn); |
| |
| return result; |
| } |
| |
| |
| Datum |
| database_to_xml(PG_FUNCTION_ARGS) |
| { |
| bool nulls = PG_GETARG_BOOL(0); |
| bool tableforest = PG_GETARG_BOOL(1); |
| const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2)); |
| |
| PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls, |
| tableforest, targetns))); |
| } |
| |
| |
| static StringInfo |
| database_to_xmlschema_internal(bool nulls, bool tableforest, |
| const char *targetns) |
| { |
| List *relid_list; |
| List *nspid_list; |
| List *tupdesc_list; |
| ListCell *cell; |
| StringInfo result; |
| |
| result = makeStringInfo(); |
| |
| xsd_schema_element_start(result, targetns); |
| |
| SPI_connect(); |
| |
| relid_list = database_get_xml_visible_tables(); |
| nspid_list = database_get_xml_visible_schemas(); |
| |
| tupdesc_list = NIL; |
| foreach(cell, relid_list) |
| { |
| Relation rel; |
| |
| rel = table_open(lfirst_oid(cell), AccessShareLock); |
| tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att)); |
| table_close(rel, NoLock); |
| } |
| |
| appendStringInfoString(result, |
| map_sql_typecoll_to_xmlschema_types(tupdesc_list)); |
| |
| appendStringInfoString(result, |
| map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns)); |
| |
| xsd_schema_element_end(result); |
| |
| SPI_finish(); |
| |
| return result; |
| } |
| |
| |
| Datum |
| database_to_xmlschema(PG_FUNCTION_ARGS) |
| { |
| bool nulls = PG_GETARG_BOOL(0); |
| bool tableforest = PG_GETARG_BOOL(1); |
| const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2)); |
| |
| PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls, |
| tableforest, targetns))); |
| } |
| |
| |
| Datum |
| database_to_xml_and_xmlschema(PG_FUNCTION_ARGS) |
| { |
| bool nulls = PG_GETARG_BOOL(0); |
| bool tableforest = PG_GETARG_BOOL(1); |
| const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2)); |
| StringInfo xmlschema; |
| |
| xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns); |
| |
| PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data, |
| nulls, tableforest, targetns))); |
| } |
| |
| |
| /* |
| * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section |
| * 9.2. |
| */ |
| static char * |
| map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d) |
| { |
| StringInfoData result; |
| |
| initStringInfo(&result); |
| |
| if (a) |
| appendStringInfoString(&result, |
| map_sql_identifier_to_xml_name(a, true, true)); |
| if (b) |
| appendStringInfo(&result, ".%s", |
| map_sql_identifier_to_xml_name(b, true, true)); |
| if (c) |
| appendStringInfo(&result, ".%s", |
| map_sql_identifier_to_xml_name(c, true, true)); |
| if (d) |
| appendStringInfo(&result, ".%s", |
| map_sql_identifier_to_xml_name(d, true, true)); |
| |
| return result.data; |
| } |
| |
| |
| /* |
| * Map an SQL table to an XML Schema document; see SQL/XML:2008 |
| * section 9.11. |
| * |
| * Map an SQL table to XML Schema data types; see SQL/XML:2008 section |
| * 9.9. |
| */ |
| static const char * |
| map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls, |
| bool tableforest, const char *targetns) |
| { |
| int i; |
| char *xmltn; |
| char *tabletypename; |
| char *rowtypename; |
| StringInfoData result; |
| |
| initStringInfo(&result); |
| |
| if (OidIsValid(relid)) |
| { |
| HeapTuple tuple; |
| Form_pg_class reltuple; |
| |
| tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); |
| if (!HeapTupleIsValid(tuple)) |
| elog(ERROR, "cache lookup failed for relation %u", relid); |
| reltuple = (Form_pg_class) GETSTRUCT(tuple); |
| |
| xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname), |
| true, false); |
| |
| tabletypename = map_multipart_sql_identifier_to_xml_name("TableType", |
| get_database_name(MyDatabaseId), |
| get_namespace_name(reltuple->relnamespace), |
| NameStr(reltuple->relname)); |
| |
| rowtypename = map_multipart_sql_identifier_to_xml_name("RowType", |
| get_database_name(MyDatabaseId), |
| get_namespace_name(reltuple->relnamespace), |
| NameStr(reltuple->relname)); |
| |
| ReleaseSysCache(tuple); |
| } |
| else |
| { |
| if (tableforest) |
| xmltn = "row"; |
| else |
| xmltn = "table"; |
| |
| tabletypename = "TableType"; |
| rowtypename = "RowType"; |
| } |
| |
| xsd_schema_element_start(&result, targetns); |
| |
| appendStringInfoString(&result, |
| map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc))); |
| |
| appendStringInfo(&result, |
| "<xsd:complexType name=\"%s\">\n" |
| " <xsd:sequence>\n", |
| rowtypename); |
| |
| for (i = 0; i < tupdesc->natts; i++) |
| { |
| Form_pg_attribute att = TupleDescAttr(tupdesc, i); |
| |
| if (att->attisdropped) |
| continue; |
| appendStringInfo(&result, |
| " <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n", |
| map_sql_identifier_to_xml_name(NameStr(att->attname), |
| true, false), |
| map_sql_type_to_xml_name(att->atttypid, -1), |
| nulls ? " nillable=\"true\"" : " minOccurs=\"0\""); |
| } |
| |
| appendStringInfoString(&result, |
| " </xsd:sequence>\n" |
| "</xsd:complexType>\n\n"); |
| |
| if (!tableforest) |
| { |
| appendStringInfo(&result, |
| "<xsd:complexType name=\"%s\">\n" |
| " <xsd:sequence>\n" |
| " <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n" |
| " </xsd:sequence>\n" |
| "</xsd:complexType>\n\n", |
| tabletypename, rowtypename); |
| |
| appendStringInfo(&result, |
| "<xsd:element name=\"%s\" type=\"%s\"/>\n\n", |
| xmltn, tabletypename); |
| } |
| else |
| appendStringInfo(&result, |
| "<xsd:element name=\"%s\" type=\"%s\"/>\n\n", |
| xmltn, rowtypename); |
| |
| xsd_schema_element_end(&result); |
| |
| return result.data; |
| } |
| |
| |
| /* |
| * Map an SQL schema to XML Schema data types; see SQL/XML:2008 |
| * section 9.12. |
| */ |
| static const char * |
| map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls, |
| bool tableforest, const char *targetns) |
| { |
| char *dbname; |
| char *nspname; |
| char *xmlsn; |
| char *schematypename; |
| StringInfoData result; |
| ListCell *cell; |
| |
| dbname = get_database_name(MyDatabaseId); |
| nspname = get_namespace_name(nspid); |
| |
| initStringInfo(&result); |
| |
| xmlsn = map_sql_identifier_to_xml_name(nspname, true, false); |
| |
| schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType", |
| dbname, |
| nspname, |
| NULL); |
| |
| appendStringInfo(&result, |
| "<xsd:complexType name=\"%s\">\n", schematypename); |
| if (!tableforest) |
| appendStringInfoString(&result, |
| " <xsd:all>\n"); |
| else |
| appendStringInfoString(&result, |
| " <xsd:sequence>\n"); |
| |
| foreach(cell, relid_list) |
| { |
| Oid relid = lfirst_oid(cell); |
| char *relname = get_rel_name(relid); |
| char *xmltn = map_sql_identifier_to_xml_name(relname, true, false); |
| char *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType", |
| dbname, |
| nspname, |
| relname); |
| |
| if (!tableforest) |
| appendStringInfo(&result, |
| " <xsd:element name=\"%s\" type=\"%s\"/>\n", |
| xmltn, tabletypename); |
| else |
| appendStringInfo(&result, |
| " <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n", |
| xmltn, tabletypename); |
| } |
| |
| if (!tableforest) |
| appendStringInfoString(&result, |
| " </xsd:all>\n"); |
| else |
| appendStringInfoString(&result, |
| " </xsd:sequence>\n"); |
| appendStringInfoString(&result, |
| "</xsd:complexType>\n\n"); |
| |
| appendStringInfo(&result, |
| "<xsd:element name=\"%s\" type=\"%s\"/>\n\n", |
| xmlsn, schematypename); |
| |
| return result.data; |
| } |
| |
| |
| /* |
| * Map an SQL catalog to XML Schema data types; see SQL/XML:2008 |
| * section 9.15. |
| */ |
| static const char * |
| map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls, |
| bool tableforest, const char *targetns) |
| { |
| char *dbname; |
| char *xmlcn; |
| char *catalogtypename; |
| StringInfoData result; |
| ListCell *cell; |
| |
| dbname = get_database_name(MyDatabaseId); |
| |
| initStringInfo(&result); |
| |
| xmlcn = map_sql_identifier_to_xml_name(dbname, true, false); |
| |
| catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType", |
| dbname, |
| NULL, |
| NULL); |
| |
| appendStringInfo(&result, |
| "<xsd:complexType name=\"%s\">\n", catalogtypename); |
| appendStringInfoString(&result, |
| " <xsd:all>\n"); |
| |
| foreach(cell, nspid_list) |
| { |
| Oid nspid = lfirst_oid(cell); |
| char *nspname = get_namespace_name(nspid); |
| char *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false); |
| char *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType", |
| dbname, |
| nspname, |
| NULL); |
| |
| appendStringInfo(&result, |
| " <xsd:element name=\"%s\" type=\"%s\"/>\n", |
| xmlsn, schematypename); |
| } |
| |
| appendStringInfoString(&result, |
| " </xsd:all>\n"); |
| appendStringInfoString(&result, |
| "</xsd:complexType>\n\n"); |
| |
| appendStringInfo(&result, |
| "<xsd:element name=\"%s\" type=\"%s\"/>\n\n", |
| xmlcn, catalogtypename); |
| |
| return result.data; |
| } |
| |
| |
| /* |
| * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4. |
| */ |
| static const char * |
| map_sql_type_to_xml_name(Oid typeoid, int typmod) |
| { |
| StringInfoData result; |
| |
| initStringInfo(&result); |
| |
| switch (typeoid) |
| { |
| case BPCHAROID: |
| if (typmod == -1) |
| appendStringInfoString(&result, "CHAR"); |
| else |
| appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ); |
| break; |
| case VARCHAROID: |
| if (typmod == -1) |
| appendStringInfoString(&result, "VARCHAR"); |
| else |
| appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ); |
| break; |
| case NUMERICOID: |
| if (typmod == -1) |
| appendStringInfoString(&result, "NUMERIC"); |
| else |
| appendStringInfo(&result, "NUMERIC_%d_%d", |
| ((typmod - VARHDRSZ) >> 16) & 0xffff, |
| (typmod - VARHDRSZ) & 0xffff); |
| break; |
| case INT4OID: |
| appendStringInfoString(&result, "INTEGER"); |
| break; |
| case INT2OID: |
| appendStringInfoString(&result, "SMALLINT"); |
| break; |
| case INT8OID: |
| appendStringInfoString(&result, "BIGINT"); |
| break; |
| case FLOAT4OID: |
| appendStringInfoString(&result, "REAL"); |
| break; |
| case FLOAT8OID: |
| appendStringInfoString(&result, "DOUBLE"); |
| break; |
| case BOOLOID: |
| appendStringInfoString(&result, "BOOLEAN"); |
| break; |
| case TIMEOID: |
| if (typmod == -1) |
| appendStringInfoString(&result, "TIME"); |
| else |
| appendStringInfo(&result, "TIME_%d", typmod); |
| break; |
| case TIMETZOID: |
| if (typmod == -1) |
| appendStringInfoString(&result, "TIME_WTZ"); |
| else |
| appendStringInfo(&result, "TIME_WTZ_%d", typmod); |
| break; |
| case TIMESTAMPOID: |
| if (typmod == -1) |
| appendStringInfoString(&result, "TIMESTAMP"); |
| else |
| appendStringInfo(&result, "TIMESTAMP_%d", typmod); |
| break; |
| case TIMESTAMPTZOID: |
| if (typmod == -1) |
| appendStringInfoString(&result, "TIMESTAMP_WTZ"); |
| else |
| appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod); |
| break; |
| case DATEOID: |
| appendStringInfoString(&result, "DATE"); |
| break; |
| case XMLOID: |
| appendStringInfoString(&result, "XML"); |
| break; |
| default: |
| { |
| HeapTuple tuple; |
| Form_pg_type typtuple; |
| |
| tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid)); |
| if (!HeapTupleIsValid(tuple)) |
| elog(ERROR, "cache lookup failed for type %u", typeoid); |
| typtuple = (Form_pg_type) GETSTRUCT(tuple); |
| |
| appendStringInfoString(&result, |
| map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT", |
| get_database_name(MyDatabaseId), |
| get_namespace_name(typtuple->typnamespace), |
| NameStr(typtuple->typname))); |
| |
| ReleaseSysCache(tuple); |
| } |
| } |
| |
| return result.data; |
| } |
| |
| |
| /* |
| * Map a collection of SQL data types to XML Schema data types; see |
| * SQL/XML:2008 section 9.7. |
| */ |
| static const char * |
| map_sql_typecoll_to_xmlschema_types(List *tupdesc_list) |
| { |
| List *uniquetypes = NIL; |
| int i; |
| StringInfoData result; |
| ListCell *cell0; |
| |
| /* extract all column types used in the set of TupleDescs */ |
| foreach(cell0, tupdesc_list) |
| { |
| TupleDesc tupdesc = (TupleDesc) lfirst(cell0); |
| |
| for (i = 0; i < tupdesc->natts; i++) |
| { |
| Form_pg_attribute att = TupleDescAttr(tupdesc, i); |
| |
| if (att->attisdropped) |
| continue; |
| uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid); |
| } |
| } |
| |
| /* add base types of domains */ |
| foreach(cell0, uniquetypes) |
| { |
| Oid typid = lfirst_oid(cell0); |
| Oid basetypid = getBaseType(typid); |
| |
| if (basetypid != typid) |
| uniquetypes = list_append_unique_oid(uniquetypes, basetypid); |
| } |
| |
| /* Convert to textual form */ |
| initStringInfo(&result); |
| |
| foreach(cell0, uniquetypes) |
| { |
| appendStringInfo(&result, "%s\n", |
| map_sql_type_to_xmlschema_type(lfirst_oid(cell0), |
| -1)); |
| } |
| |
| return result.data; |
| } |
| |
| |
| /* |
| * Map an SQL data type to a named XML Schema data type; see |
| * SQL/XML:2008 sections 9.5 and 9.6. |
| * |
| * (The distinction between 9.5 and 9.6 is basically that 9.6 adds |
| * a name attribute, which this function does. The name-less version |
| * 9.5 doesn't appear to be required anywhere.) |
| */ |
| static const char * |
| map_sql_type_to_xmlschema_type(Oid typeoid, int typmod) |
| { |
| StringInfoData result; |
| const char *typename = map_sql_type_to_xml_name(typeoid, typmod); |
| |
| initStringInfo(&result); |
| |
| if (typeoid == XMLOID) |
| { |
| appendStringInfoString(&result, |
| "<xsd:complexType mixed=\"true\">\n" |
| " <xsd:sequence>\n" |
| " <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n" |
| " </xsd:sequence>\n" |
| "</xsd:complexType>\n"); |
| } |
| else |
| { |
| appendStringInfo(&result, |
| "<xsd:simpleType name=\"%s\">\n", typename); |
| |
| switch (typeoid) |
| { |
| case BPCHAROID: |
| case VARCHAROID: |
| case TEXTOID: |
| appendStringInfoString(&result, |
| " <xsd:restriction base=\"xsd:string\">\n"); |
| if (typmod != -1) |
| appendStringInfo(&result, |
| " <xsd:maxLength value=\"%d\"/>\n", |
| typmod - VARHDRSZ); |
| appendStringInfoString(&result, " </xsd:restriction>\n"); |
| break; |
| |
| case BYTEAOID: |
| appendStringInfo(&result, |
| " <xsd:restriction base=\"xsd:%s\">\n" |
| " </xsd:restriction>\n", |
| xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary"); |
| break; |
| |
| case NUMERICOID: |
| if (typmod != -1) |
| appendStringInfo(&result, |
| " <xsd:restriction base=\"xsd:decimal\">\n" |
| " <xsd:totalDigits value=\"%d\"/>\n" |
| " <xsd:fractionDigits value=\"%d\"/>\n" |
| " </xsd:restriction>\n", |
| ((typmod - VARHDRSZ) >> 16) & 0xffff, |
| (typmod - VARHDRSZ) & 0xffff); |
| break; |
| |
| case INT2OID: |
| appendStringInfo(&result, |
| " <xsd:restriction base=\"xsd:short\">\n" |
| " <xsd:maxInclusive value=\"%d\"/>\n" |
| " <xsd:minInclusive value=\"%d\"/>\n" |
| " </xsd:restriction>\n", |
| SHRT_MAX, SHRT_MIN); |
| break; |
| |
| case INT4OID: |
| appendStringInfo(&result, |
| " <xsd:restriction base=\"xsd:int\">\n" |
| " <xsd:maxInclusive value=\"%d\"/>\n" |
| " <xsd:minInclusive value=\"%d\"/>\n" |
| " </xsd:restriction>\n", |
| INT_MAX, INT_MIN); |
| break; |
| |
| case INT8OID: |
| appendStringInfo(&result, |
| " <xsd:restriction base=\"xsd:long\">\n" |
| " <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n" |
| " <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n" |
| " </xsd:restriction>\n", |
| (((uint64) 1) << (sizeof(int64) * 8 - 1)) - 1, |
| (((uint64) 1) << (sizeof(int64) * 8 - 1))); |
| break; |
| |
| case FLOAT4OID: |
| appendStringInfoString(&result, |
| " <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n"); |
| break; |
| |
| case FLOAT8OID: |
| appendStringInfoString(&result, |
| " <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n"); |
| break; |
| |
| case BOOLOID: |
| appendStringInfoString(&result, |
| " <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n"); |
| break; |
| |
| case TIMEOID: |
| case TIMETZOID: |
| { |
| const char *tz = (typeoid == TIMETZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : ""); |
| |
| if (typmod == -1) |
| appendStringInfo(&result, |
| " <xsd:restriction base=\"xsd:time\">\n" |
| " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n" |
| " </xsd:restriction>\n", tz); |
| else if (typmod == 0) |
| appendStringInfo(&result, |
| " <xsd:restriction base=\"xsd:time\">\n" |
| " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n" |
| " </xsd:restriction>\n", tz); |
| else |
| appendStringInfo(&result, |
| " <xsd:restriction base=\"xsd:time\">\n" |
| " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n" |
| " </xsd:restriction>\n", typmod - VARHDRSZ, tz); |
| break; |
| } |
| |
| case TIMESTAMPOID: |
| case TIMESTAMPTZOID: |
| { |
| const char *tz = (typeoid == TIMESTAMPTZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : ""); |
| |
| if (typmod == -1) |
| appendStringInfo(&result, |
| " <xsd:restriction base=\"xsd:dateTime\">\n" |
| " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n" |
| " </xsd:restriction>\n", tz); |
| else if (typmod == 0) |
| appendStringInfo(&result, |
| " <xsd:restriction base=\"xsd:dateTime\">\n" |
| " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n" |
| " </xsd:restriction>\n", tz); |
| else |
| appendStringInfo(&result, |
| " <xsd:restriction base=\"xsd:dateTime\">\n" |
| " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n" |
| " </xsd:restriction>\n", typmod - VARHDRSZ, tz); |
| break; |
| } |
| |
| case DATEOID: |
| appendStringInfoString(&result, |
| " <xsd:restriction base=\"xsd:date\">\n" |
| " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n" |
| " </xsd:restriction>\n"); |
| break; |
| |
| default: |
| if (get_typtype(typeoid) == TYPTYPE_DOMAIN) |
| { |
| Oid base_typeoid; |
| int32 base_typmod = -1; |
| |
| base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod); |
| |
| appendStringInfo(&result, |
| " <xsd:restriction base=\"%s\"/>\n", |
| map_sql_type_to_xml_name(base_typeoid, base_typmod)); |
| } |
| break; |
| } |
| appendStringInfoString(&result, "</xsd:simpleType>\n"); |
| } |
| |
| return result.data; |
| } |
| |
| |
| /* |
| * Map an SQL row to an XML element, taking the row from the active |
| * SPI cursor. See also SQL/XML:2008 section 9.10. |
| */ |
| static void |
| SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename, |
| bool nulls, bool tableforest, |
| const char *targetns, bool top_level) |
| { |
| int i; |
| char *xmltn; |
| |
| if (tablename) |
| xmltn = map_sql_identifier_to_xml_name(tablename, true, false); |
| else |
| { |
| if (tableforest) |
| xmltn = "row"; |
| else |
| xmltn = "table"; |
| } |
| |
| if (tableforest) |
| xmldata_root_element_start(result, xmltn, NULL, targetns, top_level); |
| else |
| appendStringInfoString(result, "<row>\n"); |
| |
| for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++) |
| { |
| char *colname; |
| Datum colval; |
| bool isnull; |
| |
| colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i), |
| true, false); |
| colval = SPI_getbinval(SPI_tuptable->vals[rownum], |
| SPI_tuptable->tupdesc, |
| i, |
| &isnull); |
| if (isnull) |
| { |
| if (nulls) |
| appendStringInfo(result, " <%s xsi:nil=\"true\"/>\n", colname); |
| } |
| else |
| appendStringInfo(result, " <%s>%s</%s>\n", |
| colname, |
| map_sql_value_to_xml_value(colval, |
| SPI_gettypeid(SPI_tuptable->tupdesc, i), true), |
| colname); |
| } |
| |
| if (tableforest) |
| { |
| xmldata_root_element_end(result, xmltn); |
| appendStringInfoChar(result, '\n'); |
| } |
| else |
| appendStringInfoString(result, "</row>\n\n"); |
| } |
| |
| |
| /* |
| * XPath related functions |
| */ |
| |
| #ifdef USE_LIBXML |
| |
| /* |
| * Convert XML node to text. |
| * |
| * For attribute and text nodes, return the escaped text. For anything else, |
| * dump the whole subtree. |
| */ |
| static text * |
| xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt) |
| { |
| xmltype *result = NULL; |
| |
| if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE) |
| { |
| void (*volatile nodefree) (xmlNodePtr) = NULL; |
| volatile xmlBufferPtr buf = NULL; |
| volatile xmlNodePtr cur_copy = NULL; |
| |
| PG_TRY(); |
| { |
| int bytes; |
| |
| buf = xmlBufferCreate(); |
| if (buf == NULL || xmlerrcxt->err_occurred) |
| xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, |
| "could not allocate xmlBuffer"); |
| |
| /* |
| * Produce a dump of the node that we can serialize. xmlNodeDump |
| * does that, but the result of that function won't contain |
| * namespace definitions from ancestor nodes, so we first do a |
| * xmlCopyNode() which duplicates the node along with its required |
| * namespace definitions. |
| * |
| * Some old libxml2 versions such as 2.7.6 produce partially |
| * broken XML_DOCUMENT_NODE nodes (unset content field) when |
| * copying them. xmlNodeDump of such a node works fine, but |
| * xmlFreeNode crashes; set us up to call xmlFreeDoc instead. |
| */ |
| cur_copy = xmlCopyNode(cur, 1); |
| if (cur_copy == NULL || xmlerrcxt->err_occurred) |
| xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, |
| "could not copy node"); |
| nodefree = (cur_copy->type == XML_DOCUMENT_NODE) ? |
| (void (*) (xmlNodePtr)) xmlFreeDoc : xmlFreeNode; |
| |
| bytes = xmlNodeDump(buf, NULL, cur_copy, 0, 0); |
| if (bytes == -1 || xmlerrcxt->err_occurred) |
| xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, |
| "could not dump node"); |
| |
| result = xmlBuffer_to_xmltype(buf); |
| } |
| PG_FINALLY(); |
| { |
| if (nodefree) |
| nodefree(cur_copy); |
| if (buf) |
| xmlBufferFree(buf); |
| } |
| PG_END_TRY(); |
| } |
| else |
| { |
| xmlChar *str; |
| |
| str = xmlXPathCastNodeToString(cur); |
| PG_TRY(); |
| { |
| /* Here we rely on XML having the same representation as TEXT */ |
| char *escaped = escape_xml((char *) str); |
| |
| result = (xmltype *) cstring_to_text(escaped); |
| pfree(escaped); |
| } |
| PG_FINALLY(); |
| { |
| xmlFree(str); |
| } |
| PG_END_TRY(); |
| } |
| |
| return result; |
| } |
| |
| /* |
| * Convert an XML XPath object (the result of evaluating an XPath expression) |
| * to an array of xml values, which are appended to astate. The function |
| * result value is the number of elements in the array. |
| * |
| * If "astate" is NULL then we don't generate the array value, but we still |
| * return the number of elements it would have had. |
| * |
| * Nodesets are converted to an array containing the nodes' textual |
| * representations. Primitive values (float, double, string) are converted |
| * to a single-element array containing the value's string representation. |
| */ |
| static int |
| xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, |
| ArrayBuildState *astate, |
| PgXmlErrorContext *xmlerrcxt) |
| { |
| int result = 0; |
| Datum datum; |
| Oid datumtype; |
| char *result_str; |
| |
| switch (xpathobj->type) |
| { |
| case XPATH_NODESET: |
| if (xpathobj->nodesetval != NULL) |
| { |
| result = xpathobj->nodesetval->nodeNr; |
| if (astate != NULL) |
| { |
| int i; |
| |
| for (i = 0; i < result; i++) |
| { |
| datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i], |
| xmlerrcxt)); |
| (void) accumArrayResult(astate, datum, false, |
| XMLOID, CurrentMemoryContext); |
| } |
| } |
| } |
| return result; |
| |
| case XPATH_BOOLEAN: |
| if (astate == NULL) |
| return 1; |
| datum = BoolGetDatum(xpathobj->boolval); |
| datumtype = BOOLOID; |
| break; |
| |
| case XPATH_NUMBER: |
| if (astate == NULL) |
| return 1; |
| datum = Float8GetDatum(xpathobj->floatval); |
| datumtype = FLOAT8OID; |
| break; |
| |
| case XPATH_STRING: |
| if (astate == NULL) |
| return 1; |
| datum = CStringGetDatum((char *) xpathobj->stringval); |
| datumtype = CSTRINGOID; |
| break; |
| |
| default: |
| elog(ERROR, "xpath expression result type %d is unsupported", |
| xpathobj->type); |
| return 0; /* keep compiler quiet */ |
| } |
| |
| /* Common code for scalar-value cases */ |
| result_str = map_sql_value_to_xml_value(datum, datumtype, true); |
| datum = PointerGetDatum(cstring_to_xmltype(result_str)); |
| (void) accumArrayResult(astate, datum, false, |
| XMLOID, CurrentMemoryContext); |
| return 1; |
| } |
| |
| |
| /* |
| * Common code for xpath() and xmlexists() |
| * |
| * Evaluate XPath expression and return number of nodes in res_nitems |
| * and array of XML values in astate. Either of those pointers can be |
| * NULL if the corresponding result isn't wanted. |
| * |
| * It is up to the user to ensure that the XML passed is in fact |
| * an XML document - XPath doesn't work easily on fragments without |
| * a context node being known. |
| */ |
| static void |
| xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, |
| int *res_nitems, ArrayBuildState *astate) |
| { |
| PgXmlErrorContext *xmlerrcxt; |
| volatile xmlParserCtxtPtr ctxt = NULL; |
| volatile xmlDocPtr doc = NULL; |
| volatile xmlXPathContextPtr xpathctx = NULL; |
| volatile xmlXPathCompExprPtr xpathcomp = NULL; |
| volatile xmlXPathObjectPtr xpathobj = NULL; |
| char *datastr; |
| int32 len; |
| int32 xpath_len; |
| xmlChar *string; |
| xmlChar *xpath_expr; |
| size_t xmldecl_len = 0; |
| int i; |
| int ndim; |
| Datum *ns_names_uris; |
| bool *ns_names_uris_nulls; |
| int ns_count; |
| |
| /* |
| * Namespace mappings are passed as text[]. If an empty array is passed |
| * (ndim = 0, "0-dimensional"), then there are no namespace mappings. |
| * Else, a 2-dimensional array with length of the second axis being equal |
| * to 2 should be passed, i.e., every subarray contains 2 elements, the |
| * first element defining the name, the second one the URI. Example: |
| * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2', |
| * 'http://example2.com']]. |
| */ |
| ndim = namespaces ? ARR_NDIM(namespaces) : 0; |
| if (ndim != 0) |
| { |
| int *dims; |
| |
| dims = ARR_DIMS(namespaces); |
| |
| if (ndim != 2 || dims[1] != 2) |
| ereport(ERROR, |
| (errcode(ERRCODE_DATA_EXCEPTION), |
| errmsg("invalid array for XML namespace mapping"), |
| errdetail("The array must be two-dimensional with length of the second axis equal to 2."))); |
| |
| Assert(ARR_ELEMTYPE(namespaces) == TEXTOID); |
| |
| deconstruct_array(namespaces, TEXTOID, -1, false, TYPALIGN_INT, |
| &ns_names_uris, &ns_names_uris_nulls, |
| &ns_count); |
| |
| Assert((ns_count % 2) == 0); /* checked above */ |
| ns_count /= 2; /* count pairs only */ |
| } |
| else |
| { |
| ns_names_uris = NULL; |
| ns_names_uris_nulls = NULL; |
| ns_count = 0; |
| } |
| |
| datastr = VARDATA(data); |
| len = VARSIZE(data) - VARHDRSZ; |
| xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text); |
| if (xpath_len == 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_DATA_EXCEPTION), |
| errmsg("empty XPath expression"))); |
| |
| string = pg_xmlCharStrndup(datastr, len); |
| xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len); |
| |
| /* |
| * In a UTF8 database, skip any xml declaration, which might assert |
| * another encoding. Ignore parse_xml_decl() failure, letting |
| * xmlCtxtReadMemory() report parse errors. Documentation disclaims |
| * xpath() support for non-ASCII data in non-UTF8 databases, so leave |
| * those scenarios bug-compatible with historical behavior. |
| */ |
| if (GetDatabaseEncoding() == PG_UTF8) |
| parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL); |
| |
| /* |
| * In a UTF8 database, skip any xml declaration, which might assert |
| * another encoding. Ignore parse_xml_decl() failure, letting |
| * xmlCtxtReadMemory() report parse errors. Documentation disclaims |
| * xpath() support for non-ASCII data in non-UTF8 databases, so leave |
| * those scenarios bug-compatible with historical behavior. |
| */ |
| if (GetDatabaseEncoding() == PG_UTF8) |
| parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL); |
| |
| xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); |
| |
| PG_TRY(); |
| { |
| xmlInitParser(); |
| |
| /* |
| * redundant XML parsing (two parsings for the same value during one |
| * command execution are possible) |
| */ |
| ctxt = xmlNewParserCtxt(); |
| if (ctxt == NULL || xmlerrcxt->err_occurred) |
| xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, |
| "could not allocate parser context"); |
| doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len, |
| len - xmldecl_len, NULL, NULL, 0); |
| if (doc == NULL || xmlerrcxt->err_occurred) |
| xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, |
| "could not parse XML document"); |
| xpathctx = xmlXPathNewContext(doc); |
| if (xpathctx == NULL || xmlerrcxt->err_occurred) |
| xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, |
| "could not allocate XPath context"); |
| xpathctx->node = (xmlNodePtr) doc; |
| |
| /* register namespaces, if any */ |
| if (ns_count > 0) |
| { |
| for (i = 0; i < ns_count; i++) |
| { |
| char *ns_name; |
| char *ns_uri; |
| |
| if (ns_names_uris_nulls[i * 2] || |
| ns_names_uris_nulls[i * 2 + 1]) |
| ereport(ERROR, |
| (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), |
| errmsg("neither namespace name nor URI may be null"))); |
| ns_name = TextDatumGetCString(ns_names_uris[i * 2]); |
| ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]); |
| if (xmlXPathRegisterNs(xpathctx, |
| (xmlChar *) ns_name, |
| (xmlChar *) ns_uri) != 0) |
| ereport(ERROR, /* is this an internal error??? */ |
| (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"", |
| ns_name, ns_uri))); |
| } |
| } |
| |
| xpathcomp = xmlXPathCompile(xpath_expr); |
| if (xpathcomp == NULL || xmlerrcxt->err_occurred) |
| xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, |
| "invalid XPath expression"); |
| |
| /* |
| * Version 2.6.27 introduces a function named |
| * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists, |
| * but we can derive the existence by whether any nodes are returned, |
| * thereby preventing a library version upgrade and keeping the code |
| * the same. |
| */ |
| xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx); |
| if (xpathobj == NULL || xmlerrcxt->err_occurred) |
| xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, |
| "could not create XPath object"); |
| |
| /* |
| * Extract the results as requested. |
| */ |
| if (res_nitems != NULL) |
| *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt); |
| else |
| (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt); |
| } |
| PG_CATCH(); |
| { |
| if (xpathobj) |
| xmlXPathFreeObject(xpathobj); |
| if (xpathcomp) |
| xmlXPathFreeCompExpr(xpathcomp); |
| if (xpathctx) |
| xmlXPathFreeContext(xpathctx); |
| if (doc) |
| xmlFreeDoc(doc); |
| if (ctxt) |
| xmlFreeParserCtxt(ctxt); |
| |
| pg_xml_done(xmlerrcxt, true); |
| |
| PG_RE_THROW(); |
| } |
| PG_END_TRY(); |
| |
| xmlXPathFreeObject(xpathobj); |
| xmlXPathFreeCompExpr(xpathcomp); |
| xmlXPathFreeContext(xpathctx); |
| xmlFreeDoc(doc); |
| xmlFreeParserCtxt(ctxt); |
| |
| pg_xml_done(xmlerrcxt, false); |
| } |
| #endif /* USE_LIBXML */ |
| |
| /* |
| * Evaluate XPath expression and return array of XML values. |
| * |
| * As we have no support of XQuery sequences yet, this function seems |
| * to be the most useful one (array of XML functions plays a role of |
| * some kind of substitution for XQuery sequences). |
| */ |
| Datum |
| xpath(PG_FUNCTION_ARGS) |
| { |
| #ifdef USE_LIBXML |
| text *xpath_expr_text = PG_GETARG_TEXT_PP(0); |
| xmltype *data = PG_GETARG_XML_P(1); |
| ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2); |
| ArrayBuildState *astate; |
| |
| astate = initArrayResult(XMLOID, CurrentMemoryContext, true); |
| xpath_internal(xpath_expr_text, data, namespaces, |
| NULL, astate); |
| PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext)); |
| #else |
| NO_XML_SUPPORT(); |
| return 0; |
| #endif |
| } |
| |
| /* |
| * Determines if the node specified by the supplied XPath exists |
| * in a given XML document, returning a boolean. |
| */ |
| Datum |
| xmlexists(PG_FUNCTION_ARGS) |
| { |
| #ifdef USE_LIBXML |
| text *xpath_expr_text = PG_GETARG_TEXT_PP(0); |
| xmltype *data = PG_GETARG_XML_P(1); |
| int res_nitems; |
| |
| xpath_internal(xpath_expr_text, data, NULL, |
| &res_nitems, NULL); |
| |
| PG_RETURN_BOOL(res_nitems > 0); |
| #else |
| NO_XML_SUPPORT(); |
| return 0; |
| #endif |
| } |
| |
| /* |
| * Determines if the node specified by the supplied XPath exists |
| * in a given XML document, returning a boolean. Differs from |
| * xmlexists as it supports namespaces and is not defined in SQL/XML. |
| */ |
| Datum |
| xpath_exists(PG_FUNCTION_ARGS) |
| { |
| #ifdef USE_LIBXML |
| text *xpath_expr_text = PG_GETARG_TEXT_PP(0); |
| xmltype *data = PG_GETARG_XML_P(1); |
| ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2); |
| int res_nitems; |
| |
| xpath_internal(xpath_expr_text, data, namespaces, |
| &res_nitems, NULL); |
| |
| PG_RETURN_BOOL(res_nitems > 0); |
| #else |
| NO_XML_SUPPORT(); |
| return 0; |
| #endif |
| } |
| |
| /* |
| * Functions for checking well-formed-ness |
| */ |
| |
| #ifdef USE_LIBXML |
| static bool |
| wellformed_xml(text *data, XmlOptionType xmloption_arg) |
| { |
| bool result; |
| volatile xmlDocPtr doc = NULL; |
| |
| /* We want to catch any exceptions and return false */ |
| PG_TRY(); |
| { |
| doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding()); |
| result = true; |
| } |
| PG_CATCH(); |
| { |
| FlushErrorState(); |
| result = false; |
| } |
| PG_END_TRY(); |
| |
| if (doc) |
| xmlFreeDoc(doc); |
| |
| return result; |
| } |
| #endif |
| |
| Datum |
| xml_is_well_formed(PG_FUNCTION_ARGS) |
| { |
| #ifdef USE_LIBXML |
| text *data = PG_GETARG_TEXT_PP(0); |
| |
| PG_RETURN_BOOL(wellformed_xml(data, xmloption)); |
| #else |
| NO_XML_SUPPORT(); |
| return 0; |
| #endif /* not USE_LIBXML */ |
| } |
| |
| Datum |
| xml_is_well_formed_document(PG_FUNCTION_ARGS) |
| { |
| #ifdef USE_LIBXML |
| text *data = PG_GETARG_TEXT_PP(0); |
| |
| PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT)); |
| #else |
| NO_XML_SUPPORT(); |
| return 0; |
| #endif /* not USE_LIBXML */ |
| } |
| |
| Datum |
| xml_is_well_formed_content(PG_FUNCTION_ARGS) |
| { |
| #ifdef USE_LIBXML |
| text *data = PG_GETARG_TEXT_PP(0); |
| |
| PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT)); |
| #else |
| NO_XML_SUPPORT(); |
| return 0; |
| #endif /* not USE_LIBXML */ |
| } |
| |
| /* |
| * support functions for XMLTABLE |
| * |
| */ |
| #ifdef USE_LIBXML |
| |
| /* |
| * Returns private data from executor state. Ensure validity by check with |
| * MAGIC number. |
| */ |
| static inline XmlTableBuilderData * |
| GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname) |
| { |
| XmlTableBuilderData *result; |
| |
| if (!IsA(state, TableFuncScanState)) |
| elog(ERROR, "%s called with invalid TableFuncScanState", fname); |
| result = (XmlTableBuilderData *) state->opaque; |
| if (result->magic != XMLTABLE_CONTEXT_MAGIC) |
| elog(ERROR, "%s called with invalid TableFuncScanState", fname); |
| |
| return result; |
| } |
| #endif |
| |
| /* |
| * XmlTableInitOpaque |
| * Fill in TableFuncScanState->opaque for XmlTable processor; initialize |
| * the XML parser. |
| * |
| * Note: Because we call pg_xml_init() here and pg_xml_done() in |
| * XmlTableDestroyOpaque, it is critical for robustness that no other |
| * executor nodes run until this node is processed to completion. Caller |
| * must execute this to completion (probably filling a tuplestore to exhaust |
| * this node in a single pass) instead of using row-per-call mode. |
| */ |
| static void |
| XmlTableInitOpaque(TableFuncScanState *state, int natts) |
| { |
| #ifdef USE_LIBXML |
| volatile xmlParserCtxtPtr ctxt = NULL; |
| XmlTableBuilderData *xtCxt; |
| PgXmlErrorContext *xmlerrcxt; |
| |
| xtCxt = palloc0(sizeof(XmlTableBuilderData)); |
| xtCxt->magic = XMLTABLE_CONTEXT_MAGIC; |
| xtCxt->natts = natts; |
| xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts); |
| |
| xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); |
| |
| PG_TRY(); |
| { |
| xmlInitParser(); |
| |
| ctxt = xmlNewParserCtxt(); |
| if (ctxt == NULL || xmlerrcxt->err_occurred) |
| xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, |
| "could not allocate parser context"); |
| } |
| PG_CATCH(); |
| { |
| if (ctxt != NULL) |
| xmlFreeParserCtxt(ctxt); |
| |
| pg_xml_done(xmlerrcxt, true); |
| |
| PG_RE_THROW(); |
| } |
| PG_END_TRY(); |
| |
| xtCxt->xmlerrcxt = xmlerrcxt; |
| xtCxt->ctxt = ctxt; |
| |
| state->opaque = xtCxt; |
| #else |
| NO_XML_SUPPORT(); |
| #endif /* not USE_LIBXML */ |
| } |
| |
| /* |
| * XmlTableSetDocument |
| * Install the input document |
| */ |
| static void |
| XmlTableSetDocument(TableFuncScanState *state, Datum value) |
| { |
| #ifdef USE_LIBXML |
| XmlTableBuilderData *xtCxt; |
| xmltype *xmlval = DatumGetXmlP(value); |
| char *str; |
| xmlChar *xstr; |
| int length; |
| volatile xmlDocPtr doc = NULL; |
| volatile xmlXPathContextPtr xpathcxt = NULL; |
| |
| xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument"); |
| |
| /* |
| * Use out function for casting to string (remove encoding property). See |
| * comment in xml_out. |
| */ |
| str = xml_out_internal(xmlval, 0); |
| |
| length = strlen(str); |
| xstr = pg_xmlCharStrndup(str, length); |
| |
| PG_TRY(); |
| { |
| doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0); |
| if (doc == NULL || xtCxt->xmlerrcxt->err_occurred) |
| xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, |
| "could not parse XML document"); |
| xpathcxt = xmlXPathNewContext(doc); |
| if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred) |
| xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, |
| "could not allocate XPath context"); |
| xpathcxt->node = (xmlNodePtr) doc; |
| } |
| PG_CATCH(); |
| { |
| if (xpathcxt != NULL) |
| xmlXPathFreeContext(xpathcxt); |
| if (doc != NULL) |
| xmlFreeDoc(doc); |
| |
| PG_RE_THROW(); |
| } |
| PG_END_TRY(); |
| |
| xtCxt->doc = doc; |
| xtCxt->xpathcxt = xpathcxt; |
| #else |
| NO_XML_SUPPORT(); |
| #endif /* not USE_LIBXML */ |
| } |
| |
| /* |
| * XmlTableSetNamespace |
| * Add a namespace declaration |
| */ |
| static void |
| XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri) |
| { |
| #ifdef USE_LIBXML |
| XmlTableBuilderData *xtCxt; |
| |
| if (name == NULL) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("DEFAULT namespace is not supported"))); |
| xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace"); |
| |
| if (xmlXPathRegisterNs(xtCxt->xpathcxt, |
| pg_xmlCharStrndup(name, strlen(name)), |
| pg_xmlCharStrndup(uri, strlen(uri)))) |
| xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION, |
| "could not set XML namespace"); |
| #else |
| NO_XML_SUPPORT(); |
| #endif /* not USE_LIBXML */ |
| } |
| |
| /* |
| * XmlTableSetRowFilter |
| * Install the row-filter Xpath expression. |
| */ |
| static void |
| XmlTableSetRowFilter(TableFuncScanState *state, const char *path) |
| { |
| #ifdef USE_LIBXML |
| XmlTableBuilderData *xtCxt; |
| xmlChar *xstr; |
| |
| xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter"); |
| |
| if (*path == '\0') |
| ereport(ERROR, |
| (errcode(ERRCODE_DATA_EXCEPTION), |
| errmsg("row path filter must not be empty string"))); |
| |
| xstr = pg_xmlCharStrndup(path, strlen(path)); |
| |
| xtCxt->xpathcomp = xmlXPathCompile(xstr); |
| if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred) |
| xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_SYNTAX_ERROR, |
| "invalid XPath expression"); |
| #else |
| NO_XML_SUPPORT(); |
| #endif /* not USE_LIBXML */ |
| } |
| |
| /* |
| * XmlTableSetColumnFilter |
| * Install the column-filter Xpath expression, for the given column. |
| */ |
| static void |
| XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum) |
| { |
| #ifdef USE_LIBXML |
| XmlTableBuilderData *xtCxt; |
| xmlChar *xstr; |
| |
| AssertArg(PointerIsValid(path)); |
| |
| xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter"); |
| |
| if (*path == '\0') |
| ereport(ERROR, |
| (errcode(ERRCODE_DATA_EXCEPTION), |
| errmsg("column path filter must not be empty string"))); |
| |
| xstr = pg_xmlCharStrndup(path, strlen(path)); |
| |
| xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr); |
| if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred) |
| xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION, |
| "invalid XPath expression"); |
| #else |
| NO_XML_SUPPORT(); |
| #endif /* not USE_LIBXML */ |
| } |
| |
| /* |
| * XmlTableFetchRow |
| * Prepare the next "current" tuple for upcoming GetValue calls. |
| * Returns false if the row-filter expression returned no more rows. |
| */ |
| static bool |
| XmlTableFetchRow(TableFuncScanState *state) |
| { |
| #ifdef USE_LIBXML |
| XmlTableBuilderData *xtCxt; |
| |
| xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow"); |
| |
| /* Propagate our own error context to libxml2 */ |
| xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler); |
| |
| if (xtCxt->xpathobj == NULL) |
| { |
| xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt); |
| if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred) |
| xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, |
| "could not create XPath object"); |
| |
| xtCxt->row_count = 0; |
| } |
| |
| if (xtCxt->xpathobj->type == XPATH_NODESET) |
| { |
| if (xtCxt->xpathobj->nodesetval != NULL) |
| { |
| if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr) |
| return true; |
| } |
| } |
| |
| return false; |
| #else |
| NO_XML_SUPPORT(); |
| return false; |
| #endif /* not USE_LIBXML */ |
| } |
| |
| /* |
| * XmlTableGetValue |
| * Return the value for column number 'colnum' for the current row. If |
| * column -1 is requested, return representation of the whole row. |
| * |
| * This leaks memory, so be sure to reset often the context in which it's |
| * called. |
| */ |
| static Datum |
| XmlTableGetValue(TableFuncScanState *state, int colnum, |
| Oid typid, int32 typmod, bool *isnull) |
| { |
| #ifdef USE_LIBXML |
| XmlTableBuilderData *xtCxt; |
| Datum result = (Datum) 0; |
| xmlNodePtr cur; |
| char *cstr = NULL; |
| volatile xmlXPathObjectPtr xpathobj = NULL; |
| |
| xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue"); |
| |
| Assert(xtCxt->xpathobj && |
| xtCxt->xpathobj->type == XPATH_NODESET && |
| xtCxt->xpathobj->nodesetval != NULL); |
| |
| /* Propagate our own error context to libxml2 */ |
| xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler); |
| |
| *isnull = false; |
| |
| cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1]; |
| |
| Assert(xtCxt->xpathscomp[colnum] != NULL); |
| |
| PG_TRY(); |
| { |
| /* Set current node as entry point for XPath evaluation */ |
| xtCxt->xpathcxt->node = cur; |
| |
| /* Evaluate column path */ |
| xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt); |
| if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred) |
| xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, |
| "could not create XPath object"); |
| |
| /* |
| * There are four possible cases, depending on the number of nodes |
| * returned by the XPath expression and the type of the target column: |
| * a) XPath returns no nodes. b) The target type is XML (return all |
| * as XML). For non-XML return types: c) One node (return content). |
| * d) Multiple nodes (error). |
| */ |
| if (xpathobj->type == XPATH_NODESET) |
| { |
| int count = 0; |
| |
| if (xpathobj->nodesetval != NULL) |
| count = xpathobj->nodesetval->nodeNr; |
| |
| if (xpathobj->nodesetval == NULL || count == 0) |
| { |
| *isnull = true; |
| } |
| else |
| { |
| if (typid == XMLOID) |
| { |
| text *textstr; |
| StringInfoData str; |
| |
| /* Concatenate serialized values */ |
| initStringInfo(&str); |
| for (int i = 0; i < count; i++) |
| { |
| textstr = |
| xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i], |
| xtCxt->xmlerrcxt); |
| |
| appendStringInfoText(&str, textstr); |
| } |
| cstr = str.data; |
| } |
| else |
| { |
| xmlChar *str; |
| |
| if (count > 1) |
| ereport(ERROR, |
| (errcode(ERRCODE_CARDINALITY_VIOLATION), |
| errmsg("more than one value returned by column XPath expression"))); |
| |
| str = xmlXPathCastNodeSetToString(xpathobj->nodesetval); |
| cstr = str ? xml_pstrdup_and_free(str) : ""; |
| } |
| } |
| } |
| else if (xpathobj->type == XPATH_STRING) |
| { |
| /* Content should be escaped when target will be XML */ |
| if (typid == XMLOID) |
| cstr = escape_xml((char *) xpathobj->stringval); |
| else |
| cstr = (char *) xpathobj->stringval; |
| } |
| else if (xpathobj->type == XPATH_BOOLEAN) |
| { |
| char typcategory; |
| bool typispreferred; |
| xmlChar *str; |
| |
| /* Allow implicit casting from boolean to numbers */ |
| get_type_category_preferred(typid, &typcategory, &typispreferred); |
| |
| if (typcategory != TYPCATEGORY_NUMERIC) |
| str = xmlXPathCastBooleanToString(xpathobj->boolval); |
| else |
| str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval)); |
| |
| cstr = xml_pstrdup_and_free(str); |
| } |
| else if (xpathobj->type == XPATH_NUMBER) |
| { |
| xmlChar *str; |
| |
| str = xmlXPathCastNumberToString(xpathobj->floatval); |
| cstr = xml_pstrdup_and_free(str); |
| } |
| else |
| elog(ERROR, "unexpected XPath object type %u", xpathobj->type); |
| |
| /* |
| * By here, either cstr contains the result value, or the isnull flag |
| * has been set. |
| */ |
| Assert(cstr || *isnull); |
| |
| if (!*isnull) |
| result = InputFunctionCall(&state->in_functions[colnum], |
| cstr, |
| state->typioparams[colnum], |
| typmod); |
| } |
| PG_FINALLY(); |
| { |
| if (xpathobj != NULL) |
| xmlXPathFreeObject(xpathobj); |
| } |
| PG_END_TRY(); |
| |
| return result; |
| #else |
| NO_XML_SUPPORT(); |
| return 0; |
| #endif /* not USE_LIBXML */ |
| } |
| |
| /* |
| * XmlTableDestroyOpaque |
| * Release all libxml2 resources |
| */ |
| static void |
| XmlTableDestroyOpaque(TableFuncScanState *state) |
| { |
| #ifdef USE_LIBXML |
| XmlTableBuilderData *xtCxt; |
| |
| xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque"); |
| |
| /* Propagate our own error context to libxml2 */ |
| xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler); |
| |
| if (xtCxt->xpathscomp != NULL) |
| { |
| int i; |
| |
| for (i = 0; i < xtCxt->natts; i++) |
| if (xtCxt->xpathscomp[i] != NULL) |
| xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]); |
| } |
| |
| if (xtCxt->xpathobj != NULL) |
| xmlXPathFreeObject(xtCxt->xpathobj); |
| if (xtCxt->xpathcomp != NULL) |
| xmlXPathFreeCompExpr(xtCxt->xpathcomp); |
| if (xtCxt->xpathcxt != NULL) |
| xmlXPathFreeContext(xtCxt->xpathcxt); |
| if (xtCxt->doc != NULL) |
| xmlFreeDoc(xtCxt->doc); |
| if (xtCxt->ctxt != NULL) |
| xmlFreeParserCtxt(xtCxt->ctxt); |
| |
| pg_xml_done(xtCxt->xmlerrcxt, true); |
| |
| /* not valid anymore */ |
| xtCxt->magic = 0; |
| state->opaque = NULL; |
| |
| #else |
| NO_XML_SUPPORT(); |
| #endif /* not USE_LIBXML */ |
| } |