| /* |
| * xml.c |
| * |
| * This file is based on backend/utils/adt/xml.c from the PostgreSQL 9.1 |
| * distribution whose original header is below. The primary differences |
| * between this code and the original are as follows: |
| * |
| * 1. As use of libxml is required, conditional #ifdef codepaths were removed. |
| * |
| * 2. Since these were not needed for XPath support, the following functions |
| * and macros (mainly for SQL/XML:2008) were removed. |
| * |
| * NAMESPACE_SQLXML |
| * NAMESPACE_XSD |
| * NAMESPACE_XSI |
| * NO_XML_SUPPORT |
| * SPI_sql_row_to_xmlelement |
| * XML_VISIBLE_SCHEMAS |
| * XML_VISIBLE_SCHEMAS_EXCLUDE |
| * _SPI_strdup |
| * cstring_to_xmltype |
| * cursor_to_xml |
| * cursor_to_xmlschema |
| * database_get_xml_visible_schemas |
| * database_get_xml_visible_tables |
| * database_to_xml |
| * database_to_xml_and_xmlschema |
| * database_to_xml_internal |
| * database_to_xmlschema |
| * database_to_xmlschema_internal |
| * escape_xml |
| * is_valid_xml_namechar |
| * is_valid_xml_namefirst |
| * map_multipart_sql_identifier_to_xml_name |
| * map_sql_catalog_to_xmlschema_types |
| * map_sql_schema_to_xmlschema_types |
| * map_sql_table_to_xmlschema |
| * map_sql_type_to_xml_name |
| * map_sql_type_to_xmlschema_type |
| * map_sql_typecoll_to_xmlschema_types |
| * map_sql_value_to_xml_value |
| * map_xml_name_to_sql_identifier |
| * query_to_oid_list |
| * query_to_xml |
| * query_to_xml_and_xmlschema |
| * query_to_xml_internal |
| * query_to_xmlschema |
| * schema_get_xml_visible_tables |
| * schema_to_xml |
| * schema_to_xml_and_xmlschema |
| * schema_to_xml_internal |
| * schema_to_xmlschema |
| * schema_to_xmlschema_internal |
| * sqlchar_to_unicode |
| * table_to_xml |
| * table_to_xml_and_xmlschema |
| * table_to_xml_internal |
| * table_to_xmlschema |
| * unicode_to_sqlchar |
| * xmldata_root_element_end |
| * xmldata_root_element_start |
| * xmlelement |
| * xsd_schema_element_end |
| * xsd_schema_element_start |
| * |
| * 3. the xpath() function was changed to call |
| * PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); |
| * instead of |
| * PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext)); |
| * since the correct return type of makeArrayResult() is Datum. |
| * |
| * 4. due to a 'xml.c:1085: error: 'count' may be used uninitialized in this function' |
| * compiler warning, |
| * size_t count; |
| * was changed to |
| * size_t count = 0; |
| */ |
| |
| /*------------------------------------------------------------------------- |
| * |
| * xml.c |
| * XML data type support. |
| * |
| * |
| * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * src/backend/utils/adt/xml.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| /* |
| * Generally, XML type support is only available when libxml use was |
| * configured during the build. But even if that is not done, the |
| * type and all the functions are available, but most of them will |
| * fail. For one thing, this avoids having to manage variant catalog |
| * installations. But it also has nice effects such as that you can |
| * dump a database containing XML type data even if the server is not |
| * linked with libxml. Thus, make sure xml_out() works even if nothing |
| * else does. |
| */ |
| |
| /* |
| * Notes on memory management: |
| * |
| * Sometimes libxml allocates global structures in the hope that it can reuse |
| * them later on. This makes it impractical to change the xmlMemSetup |
| * functions on-the-fly; that is likely to lead to trying to pfree() chunks |
| * allocated with malloc() or vice versa. Since libxml might be used by |
| * loadable modules, eg libperl, our only safe choices are to change the |
| * functions at postmaster/backend launch or not at all. Since we'd rather |
| * not activate libxml in sessions that might never use it, the latter choice |
| * is the preferred one. However, for debugging purposes it can be awfully |
| * handy to constrain libxml's allocations to be done in a specific palloc |
| * context, where they're easy to track. Therefore there is code here that |
| * can be enabled in debug builds to redirect libxml's allocations into a |
| * special context LibxmlContext. It's not recommended to turn this on in |
| * a production build because of the possibility of bad interactions with |
| * external modules. |
| */ |
| /* #define USE_LIBXMLCONTEXT */ |
| |
| #include "postgres.h" |
| |
| #include <libxml/chvalid.h> |
| #include <libxml/parser.h> |
| #include <libxml/tree.h> |
| #include <libxml/uri.h> |
| #include <libxml/xmlerror.h> |
| #include <libxml/xmlwriter.h> |
| #include <libxml/xpath.h> |
| #include <libxml/xpathInternals.h> |
| |
| #include "catalog/namespace.h" |
| #include "catalog/pg_type.h" |
| #include "commands/dbcommands.h" |
| #include "executor/executor.h" |
| #include "executor/spi.h" |
| #include "fmgr.h" |
| #include "lib/stringinfo.h" |
| #include "libpq/pqformat.h" |
| #include "mb/pg_wchar.h" |
| #include "miscadmin.h" |
| #include "nodes/execnodes.h" |
| #include "nodes/nodeFuncs.h" |
| #include "utils/array.h" |
| #include "utils/builtins.h" |
| #include "utils/date.h" |
| #include "utils/datetime.h" |
| #include "utils/lsyscache.h" |
| #include "utils/memutils.h" |
| #include "utils/syscache.h" |
| #include "utils/xml.h" |
| |
| |
| /* GUC variables */ |
| int xmlbinary; |
| int xmloption; |
| |
| static StringInfo xml_err_buf = NULL; |
| |
| static void xml_errorHandler(void *ctxt, const char *msg,...); |
| static void xml_ereport_by_code(int level, int sqlcode, |
| const char *msg, int errcode); |
| |
| #ifdef USE_LIBXMLCONTEXT |
| |
| static MemoryContext LibxmlContext = NULL; |
| |
| static void xml_memory_init(void); |
| static void *xml_palloc(size_t size); |
| static void *xml_repalloc(void *ptr, size_t size); |
| static void xml_pfree(void *ptr); |
| static char *xml_pstrdup(const char *string); |
| #endif /* USE_LIBXMLCONTEXT */ |
| |
| static xmlChar *xml_text2xmlChar(text *in); |
| static int parse_xml_decl(const xmlChar *str, size_t *lenp, |
| xmlChar **version, xmlChar **encoding, int *standalone); |
| static bool print_xml_decl(StringInfo buf, const xmlChar *version, |
| pg_enc encoding, int standalone); |
| static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, |
| bool preserve_whitespace, int encoding); |
| static text *xml_xmlnodetoxmltype(xmlNodePtr cur); |
| |
| |
| |
| static int |
| xmlChar_to_encoding(const xmlChar *encoding_name) |
| { |
| int encoding = pg_char_to_encoding((const char *) encoding_name); |
| |
| if (encoding < 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("invalid encoding name \"%s\"", |
| (const char *) encoding_name))); |
| return encoding; |
| } |
| |
| |
| /* |
| * xml_in uses a plain C string to VARDATA conversion, so for the time being |
| * we use the conversion function for the text datatype. |
| * |
| * This is only acceptable so long as xmltype and text use the same |
| * representation. |
| */ |
| Datum |
| xml_in(PG_FUNCTION_ARGS) |
| { |
| char *s = PG_GETARG_CSTRING(0); |
| xmltype *vardata; |
| xmlDocPtr doc; |
| |
| vardata = (xmltype *) cstring_to_text(s); |
| |
| /* |
| * Parse the data to check if it is well-formed XML data. Assume that |
| * ERROR occurred if parsing failed. |
| */ |
| doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding()); |
| xmlFreeDoc(doc); |
| |
| PG_RETURN_XML_P(vardata); |
| } |
| |
| |
| #define PG_XML_DEFAULT_VERSION "1.0" |
| |
| |
| /* |
| * xml_out_internal uses a plain VARDATA to C string conversion, so for the |
| * time being we use the conversion function for the text datatype. |
| * |
| * This is only acceptable so long as xmltype and text use the same |
| * representation. |
| */ |
| static char * |
| xml_out_internal(xmltype *x, pg_enc target_encoding) |
| { |
| char *str = text_to_cstring((text *) x); |
| |
| size_t len = strlen(str); |
| xmlChar *version; |
| int standalone; |
| int res_code; |
| |
| if ((res_code = parse_xml_decl((xmlChar *) str, |
| &len, &version, NULL, &standalone)) == 0) |
| { |
| StringInfoData buf; |
| |
| initStringInfo(&buf); |
| |
| if (!print_xml_decl(&buf, version, target_encoding, standalone)) |
| { |
| /* |
| * If we are not going to produce an XML declaration, eat a single |
| * newline in the original string to prevent empty first lines in |
| * the output. |
| */ |
| if (*(str + len) == '\n') |
| len += 1; |
| } |
| appendStringInfoString(&buf, str + len); |
| |
| pfree(str); |
| |
| return buf.data; |
| } |
| |
| xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR, |
| "could not parse XML declaration in stored value", |
| res_code); |
| return str; |
| } |
| |
| |
| Datum |
| xml_out(PG_FUNCTION_ARGS) |
| { |
| xmltype *x = PG_GETARG_XML_P(0); |
| |
| /* |
| * xml_out removes the encoding property in all cases. This is because we |
| * cannot control from here whether the datum will be converted to a |
| * different client encoding, so we'd do more harm than good by including |
| * it. |
| */ |
| PG_RETURN_CSTRING(xml_out_internal(x, 0)); |
| } |
| |
| |
| Datum |
| xml_recv(PG_FUNCTION_ARGS) |
| { |
| StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); |
| xmltype *result; |
| char *str; |
| char *newstr; |
| int nbytes; |
| xmlDocPtr doc; |
| xmlChar *encodingStr = NULL; |
| int encoding; |
| |
| /* |
| * Read the data in raw format. We don't know yet what the encoding is, as |
| * that information is embedded in the xml declaration; so we have to |
| * parse that before converting to server encoding. |
| */ |
| nbytes = buf->len - buf->cursor; |
| str = (char *) pq_getmsgbytes(buf, nbytes); |
| |
| /* |
| * We need a null-terminated string to pass to parse_xml_decl(). Rather |
| * than make a separate copy, make the temporary result one byte bigger |
| * than it needs to be. |
| */ |
| result = palloc(nbytes + 1 + VARHDRSZ); |
| SET_VARSIZE(result, nbytes + VARHDRSZ); |
| memcpy(VARDATA(result), str, nbytes); |
| str = VARDATA(result); |
| str[nbytes] = '\0'; |
| |
| parse_xml_decl((xmlChar *) str, NULL, NULL, &encodingStr, NULL); |
| |
| /* |
| * If encoding wasn't explicitly specified in the XML header, treat it as |
| * UTF-8, as that's the default in XML. This is different from xml_in(), |
| * where the input has to go through the normal client to server encoding |
| * conversion. |
| */ |
| encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8; |
| |
| /* |
| * Parse the data to check if it is well-formed XML data. Assume that |
| * xml_parse will throw ERROR if not. |
| */ |
| doc = xml_parse(result, xmloption, true, encoding); |
| xmlFreeDoc(doc); |
| |
| /* Now that we know what we're dealing with, convert to server encoding */ |
| newstr = (char *) pg_do_encoding_conversion((unsigned char *) str, |
| nbytes, |
| encoding, |
| GetDatabaseEncoding()); |
| |
| if (newstr != str) |
| { |
| pfree(result); |
| result = (xmltype *) cstring_to_text(newstr); |
| pfree(newstr); |
| } |
| |
| PG_RETURN_XML_P(result); |
| } |
| |
| |
| Datum |
| xml_send(PG_FUNCTION_ARGS) |
| { |
| xmltype *x = PG_GETARG_XML_P(0); |
| char *outval; |
| StringInfoData buf; |
| |
| /* |
| * xml_out_internal doesn't convert the encoding, it just prints the right |
| * declaration. pq_sendtext will do the conversion. |
| */ |
| outval = xml_out_internal(x, pg_get_client_encoding()); |
| |
| pq_begintypsend(&buf); |
| pq_sendtext(&buf, outval, strlen(outval)); |
| pfree(outval); |
| PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); |
| } |
| |
| |
| static void |
| appendStringInfoText(StringInfo str, const text *t) |
| { |
| appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ); |
| } |
| |
| |
| static xmltype * |
| stringinfo_to_xmltype(StringInfo buf) |
| { |
| return (xmltype *) cstring_to_text_with_len(buf->data, buf->len); |
| } |
| |
| |
| static xmltype * |
| xmlBuffer_to_xmltype(xmlBufferPtr buf) |
| { |
| return (xmltype *) cstring_to_text_with_len((char *) xmlBufferContent(buf), |
| xmlBufferLength(buf)); |
| } |
| |
| |
| Datum |
| xmlcomment(PG_FUNCTION_ARGS) |
| { |
| text *arg = PG_GETARG_TEXT_P(0); |
| char *argdata = VARDATA(arg); |
| int len = VARSIZE(arg) - VARHDRSZ; |
| StringInfoData buf; |
| int i; |
| |
| /* check for "--" in string or "-" at the end */ |
| for (i = 1; i < len; i++) |
| { |
| if (argdata[i] == '-' && argdata[i - 1] == '-') |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_XML_COMMENT), |
| errmsg("invalid XML comment"))); |
| } |
| if (len > 0 && argdata[len - 1] == '-') |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_XML_COMMENT), |
| errmsg("invalid XML comment"))); |
| |
| initStringInfo(&buf); |
| appendStringInfo(&buf, "<!--"); |
| appendStringInfoText(&buf, arg); |
| appendStringInfo(&buf, "-->"); |
| |
| PG_RETURN_XML_P(stringinfo_to_xmltype(&buf)); |
| } |
| |
| |
| |
| /* |
| * TODO: xmlconcat needs to merge the notations and unparsed entities |
| * of the argument values. Not very important in practice, though. |
| */ |
| xmltype * |
| xmlconcat(List *args) |
| { |
| int global_standalone = 1; |
| xmlChar *global_version = NULL; |
| bool global_version_no_value = false; |
| StringInfoData buf; |
| ListCell *v; |
| |
| initStringInfo(&buf); |
| foreach(v, args) |
| { |
| xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v))); |
| size_t len; |
| xmlChar *version; |
| int standalone; |
| char *str; |
| |
| len = VARSIZE(x) - VARHDRSZ; |
| str = text_to_cstring((text *) x); |
| |
| parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone); |
| |
| if (standalone == 0 && global_standalone == 1) |
| global_standalone = 0; |
| if (standalone < 0) |
| global_standalone = -1; |
| |
| if (!version) |
| global_version_no_value = true; |
| else if (!global_version) |
| global_version = version; |
| else if (xmlStrcmp(version, global_version) != 0) |
| global_version_no_value = true; |
| |
| appendStringInfoString(&buf, str + len); |
| pfree(str); |
| } |
| |
| if (!global_version_no_value || global_standalone >= 0) |
| { |
| StringInfoData buf2; |
| |
| initStringInfo(&buf2); |
| |
| print_xml_decl(&buf2, |
| (!global_version_no_value) ? global_version : NULL, |
| 0, |
| global_standalone); |
| |
| appendStringInfoString(&buf2, buf.data); |
| buf = buf2; |
| } |
| |
| return stringinfo_to_xmltype(&buf); |
| } |
| |
| |
| /* |
| * XMLAGG support |
| */ |
| Datum |
| xmlconcat2(PG_FUNCTION_ARGS) |
| { |
| if (PG_ARGISNULL(0)) |
| { |
| if (PG_ARGISNULL(1)) |
| PG_RETURN_NULL(); |
| else |
| PG_RETURN_XML_P(PG_GETARG_XML_P(1)); |
| } |
| else if (PG_ARGISNULL(1)) |
| PG_RETURN_XML_P(PG_GETARG_XML_P(0)); |
| else |
| PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0), |
| PG_GETARG_XML_P(1)))); |
| } |
| |
| |
| Datum |
| texttoxml(PG_FUNCTION_ARGS) |
| { |
| text *data = PG_GETARG_TEXT_P(0); |
| |
| PG_RETURN_XML_P(xmlparse(data, xmloption, true)); |
| } |
| |
| |
| Datum |
| xmltotext(PG_FUNCTION_ARGS) |
| { |
| xmltype *data = PG_GETARG_XML_P(0); |
| |
| /* It's actually binary compatible. */ |
| PG_RETURN_TEXT_P((text *) data); |
| } |
| |
| |
| text * |
| xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg) |
| { |
| if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data)) |
| ereport(ERROR, |
| (errcode(ERRCODE_NOT_AN_XML_DOCUMENT), |
| errmsg("not an XML document"))); |
| |
| /* It's actually binary compatible, save for the above check. */ |
| return (text *) data; |
| } |
| |
| |
| |
| xmltype * |
| xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace) |
| { |
| xmlDocPtr doc; |
| |
| doc = xml_parse(data, xmloption_arg, preserve_whitespace, |
| GetDatabaseEncoding()); |
| xmlFreeDoc(doc); |
| |
| return (xmltype *) data; |
| } |
| |
| |
| xmltype * |
| xmlpi(char *target, text *arg, bool arg_is_null, bool *result_is_null) |
| { |
| xmltype *result; |
| StringInfoData buf; |
| |
| if (pg_strcasecmp(target, "xml") == 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_SYNTAX_ERROR), /* really */ |
| errmsg("invalid XML processing instruction"), |
| errdetail("XML processing instruction target name cannot be \"%s\".", target))); |
| |
| /* |
| * Following the SQL standard, the null check comes after the syntax check |
| * above. |
| */ |
| *result_is_null = arg_is_null; |
| if (*result_is_null) |
| return NULL; |
| |
| initStringInfo(&buf); |
| |
| appendStringInfo(&buf, "<?%s", target); |
| |
| if (arg != NULL) |
| { |
| char *string; |
| |
| string = text_to_cstring(arg); |
| if (strstr(string, "?>") != NULL) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION), |
| errmsg("invalid XML processing instruction"), |
| errdetail("XML processing instruction cannot contain \"?>\"."))); |
| |
| appendStringInfoChar(&buf, ' '); |
| appendStringInfoString(&buf, string + strspn(string, " ")); |
| pfree(string); |
| } |
| appendStringInfoString(&buf, "?>"); |
| |
| result = stringinfo_to_xmltype(&buf); |
| pfree(buf.data); |
| return result; |
| } |
| |
| |
| xmltype * |
| xmlroot(xmltype *data, text *version, int standalone) |
| { |
| char *str; |
| size_t len; |
| xmlChar *orig_version; |
| int orig_standalone; |
| StringInfoData buf; |
| |
| len = VARSIZE(data) - VARHDRSZ; |
| str = text_to_cstring((text *) data); |
| |
| parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone); |
| |
| if (version) |
| orig_version = xml_text2xmlChar(version); |
| else |
| orig_version = NULL; |
| |
| switch (standalone) |
| { |
| case XML_STANDALONE_YES: |
| orig_standalone = 1; |
| break; |
| case XML_STANDALONE_NO: |
| orig_standalone = 0; |
| break; |
| case XML_STANDALONE_NO_VALUE: |
| orig_standalone = -1; |
| break; |
| case XML_STANDALONE_OMITTED: |
| /* leave original value */ |
| break; |
| } |
| |
| initStringInfo(&buf); |
| print_xml_decl(&buf, orig_version, 0, orig_standalone); |
| appendStringInfoString(&buf, str + len); |
| |
| return stringinfo_to_xmltype(&buf); |
| } |
| |
| |
| /* |
| * Validate document (given as string) against DTD (given as external link) |
| * |
| * This has been removed because it is a security hole: unprivileged users |
| * should not be able to use Postgres to fetch arbitrary external files, |
| * which unfortunately is exactly what libxml is willing to do with the DTD |
| * parameter. |
| */ |
| Datum |
| xmlvalidate(PG_FUNCTION_ARGS) |
| { |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("xmlvalidate is not implemented"))); |
| return 0; |
| } |
| |
| |
| bool |
| xml_is_document(xmltype *arg) |
| { |
| bool result; |
| xmlDocPtr doc = NULL; |
| MemoryContext ccxt = CurrentMemoryContext; |
| |
| /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */ |
| PG_TRY(); |
| { |
| doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true, |
| GetDatabaseEncoding()); |
| result = true; |
| } |
| PG_CATCH(); |
| { |
| ErrorData *errdata; |
| MemoryContext ecxt; |
| |
| ecxt = MemoryContextSwitchTo(ccxt); |
| errdata = CopyErrorData(); |
| if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT) |
| { |
| FlushErrorState(); |
| result = false; |
| } |
| else |
| { |
| MemoryContextSwitchTo(ecxt); |
| PG_RE_THROW(); |
| } |
| } |
| PG_END_TRY(); |
| |
| if (doc) |
| xmlFreeDoc(doc); |
| |
| return result; |
| } |
| |
| |
| |
| /* |
| * pg_xml_init --- set up for use of libxml |
| * |
| * This should be called by each function that is about to use libxml |
| * facilities. It has two responsibilities: verify compatibility with the |
| * loaded libxml version (done on first call in a session) and establish |
| * or re-establish our libxml error handler. The latter needs to be done |
| * anytime we might have passed control to add-on modules (eg libperl) which |
| * might have set their own error handler for libxml. |
| * |
| * This is exported for use by contrib/xml2, as well as other code that might |
| * wish to share use of this module's libxml error handler. |
| * |
| * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and |
| * check) |
| */ |
| void |
| pg_xml_init(void) |
| { |
| static bool first_time = true; |
| |
| if (first_time) |
| { |
| /* Stuff we need do only once per session */ |
| MemoryContext oldcontext; |
| |
| /* |
| * Currently, we have no pure UTF-8 support for internals -- check if |
| * we can work. |
| */ |
| if (sizeof(char) != sizeof(xmlChar)) |
| ereport(ERROR, |
| (errmsg("could not initialize XML library"), |
| errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.", |
| (int) sizeof(char), (int) sizeof(xmlChar)))); |
| |
| /* create error buffer in permanent context */ |
| oldcontext = MemoryContextSwitchTo(TopMemoryContext); |
| xml_err_buf = makeStringInfo(); |
| MemoryContextSwitchTo(oldcontext); |
| |
| /* Now that xml_err_buf exists, safe to call xml_errorHandler */ |
| xmlSetGenericErrorFunc(NULL, xml_errorHandler); |
| |
| #ifdef USE_LIBXMLCONTEXT |
| /* Set up memory allocation our way, too */ |
| xml_memory_init(); |
| #endif |
| |
| /* Check library compatibility */ |
| LIBXML_TEST_VERSION; |
| |
| first_time = false; |
| } |
| else |
| { |
| /* Reset pre-existing buffer to empty */ |
| Assert(xml_err_buf != NULL); |
| resetStringInfo(xml_err_buf); |
| |
| /* |
| * We re-establish the error callback function every time. This makes |
| * it safe for other subsystems (PL/Perl, say) to also use libxml with |
| * their own callbacks ... so long as they likewise set up the |
| * callbacks on every use. It's cheap enough to not be worth worrying |
| * about, anyway. |
| */ |
| xmlSetGenericErrorFunc(NULL, xml_errorHandler); |
| } |
| } |
| |
| |
| /* |
| * SQL/XML allows storing "XML documents" or "XML content". "XML |
| * documents" are specified by the XML specification and are parsed |
| * easily by libxml. "XML content" is specified by SQL/XML as the |
| * production "XMLDecl? content". But libxml can only parse the |
| * "content" part, so we have to parse the XML declaration ourselves |
| * to complete this. |
| */ |
| |
| #define CHECK_XML_SPACE(p) \ |
| do { \ |
| if (!xmlIsBlank_ch(*(p))) \ |
| return XML_ERR_SPACE_REQUIRED; \ |
| } while (0) |
| |
| #define SKIP_XML_SPACE(p) \ |
| while (xmlIsBlank_ch(*(p))) (p)++ |
| |
| /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */ |
| /* Beware of multiple evaluations of argument! */ |
| #define PG_XMLISNAMECHAR(c) \ |
| (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \ |
| || xmlIsDigit_ch(c) \ |
| || c == '.' || c == '-' || c == '_' || c == ':' \ |
| || xmlIsCombiningQ(c) \ |
| || xmlIsExtender_ch(c)) |
| |
| /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */ |
| static xmlChar * |
| xml_pnstrdup(const xmlChar *str, size_t len) |
| { |
| xmlChar *result; |
| |
| result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar)); |
| memcpy(result, str, len * sizeof(xmlChar)); |
| result[len] = 0; |
| return result; |
| } |
| |
| /* |
| * str is the null-terminated input string. Remaining arguments are |
| * output arguments; each can be NULL if value is not wanted. |
| * version and encoding are returned as locally-palloc'd strings. |
| * Result is 0 if OK, an error code if not. |
| */ |
| static int |
| parse_xml_decl(const xmlChar *str, size_t *lenp, |
| xmlChar **version, xmlChar **encoding, int *standalone) |
| { |
| const xmlChar *p; |
| const xmlChar *save_p; |
| size_t len; |
| int utf8char; |
| int utf8len; |
| |
| pg_xml_init(); |
| |
| /* Initialize output arguments to "not present" */ |
| if (version) |
| *version = NULL; |
| if (encoding) |
| *encoding = NULL; |
| if (standalone) |
| *standalone = -1; |
| |
| p = str; |
| |
| if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0) |
| goto finished; |
| |
| /* if next char is name char, it's a PI like <?xml-stylesheet ...?> */ |
| utf8len = strlen((const char *) (p + 5)); |
| utf8char = xmlGetUTF8Char(p + 5, &utf8len); |
| if (PG_XMLISNAMECHAR(utf8char)) |
| goto finished; |
| |
| p += 5; |
| |
| /* version */ |
| CHECK_XML_SPACE(p); |
| SKIP_XML_SPACE(p); |
| if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0) |
| return XML_ERR_VERSION_MISSING; |
| p += 7; |
| SKIP_XML_SPACE(p); |
| if (*p != '=') |
| return XML_ERR_VERSION_MISSING; |
| p += 1; |
| SKIP_XML_SPACE(p); |
| |
| if (*p == '\'' || *p == '"') |
| { |
| const xmlChar *q; |
| |
| q = xmlStrchr(p + 1, *p); |
| if (!q) |
| return XML_ERR_VERSION_MISSING; |
| |
| if (version) |
| *version = xml_pnstrdup(p + 1, q - p - 1); |
| p = q + 1; |
| } |
| else |
| return XML_ERR_VERSION_MISSING; |
| |
| /* encoding */ |
| save_p = p; |
| SKIP_XML_SPACE(p); |
| if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0) |
| { |
| CHECK_XML_SPACE(save_p); |
| p += 8; |
| SKIP_XML_SPACE(p); |
| if (*p != '=') |
| return XML_ERR_MISSING_ENCODING; |
| p += 1; |
| SKIP_XML_SPACE(p); |
| |
| if (*p == '\'' || *p == '"') |
| { |
| const xmlChar *q; |
| |
| q = xmlStrchr(p + 1, *p); |
| if (!q) |
| return XML_ERR_MISSING_ENCODING; |
| |
| if (encoding) |
| *encoding = xml_pnstrdup(p + 1, q - p - 1); |
| p = q + 1; |
| } |
| else |
| return XML_ERR_MISSING_ENCODING; |
| } |
| else |
| { |
| p = save_p; |
| } |
| |
| /* standalone */ |
| save_p = p; |
| SKIP_XML_SPACE(p); |
| if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0) |
| { |
| CHECK_XML_SPACE(save_p); |
| p += 10; |
| SKIP_XML_SPACE(p); |
| if (*p != '=') |
| return XML_ERR_STANDALONE_VALUE; |
| p += 1; |
| SKIP_XML_SPACE(p); |
| if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 || |
| xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0) |
| { |
| if (standalone) |
| *standalone = 1; |
| p += 5; |
| } |
| else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 || |
| xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0) |
| { |
| if (standalone) |
| *standalone = 0; |
| p += 4; |
| } |
| else |
| return XML_ERR_STANDALONE_VALUE; |
| } |
| else |
| { |
| p = save_p; |
| } |
| |
| SKIP_XML_SPACE(p); |
| if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0) |
| return XML_ERR_XMLDECL_NOT_FINISHED; |
| p += 2; |
| |
| finished: |
| len = p - str; |
| |
| for (p = str; p < str + len; p++) |
| if (*p > 127) |
| return XML_ERR_INVALID_CHAR; |
| |
| if (lenp) |
| *lenp = len; |
| |
| return XML_ERR_OK; |
| } |
| |
| |
| /* |
| * Write an XML declaration. On output, we adjust the XML declaration |
| * as follows. (These rules are the moral equivalent of the clause |
| * "Serialization of an XML value" in the SQL standard.) |
| * |
| * We try to avoid generating an XML declaration if possible. This is |
| * so that you don't get trivial things like xml '<foo/>' resulting in |
| * '<?xml version="1.0"?><foo/>', which would surely be annoying. We |
| * must provide a declaration if the standalone property is specified |
| * or if we include an encoding declaration. If we have a |
| * declaration, we must specify a version (XML requires this). |
| * Otherwise we only make a declaration if the version is not "1.0", |
| * which is the default version specified in SQL:2003. |
| */ |
| static bool |
| print_xml_decl(StringInfo buf, const xmlChar *version, |
| pg_enc encoding, int standalone) |
| { |
| pg_xml_init(); /* why is this here? */ |
| |
| if ((version && strcmp((char *) version, PG_XML_DEFAULT_VERSION) != 0) |
| || (encoding && encoding != PG_UTF8) |
| || standalone != -1) |
| { |
| appendStringInfoString(buf, "<?xml"); |
| |
| if (version) |
| appendStringInfo(buf, " version=\"%s\"", version); |
| else |
| appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION); |
| |
| if (encoding && encoding != PG_UTF8) |
| { |
| /* |
| * XXX might be useful to convert this to IANA names (ISO-8859-1 |
| * instead of LATIN1 etc.); needs field experience |
| */ |
| appendStringInfo(buf, " encoding=\"%s\"", |
| pg_encoding_to_char(encoding)); |
| } |
| |
| if (standalone == 1) |
| appendStringInfoString(buf, " standalone=\"yes\""); |
| else if (standalone == 0) |
| appendStringInfoString(buf, " standalone=\"no\""); |
| appendStringInfoString(buf, "?>"); |
| |
| return true; |
| } |
| else |
| return false; |
| } |
| |
| |
| /* |
| * Convert a C string to XML internal representation |
| * |
| * Note: it is caller's responsibility to xmlFreeDoc() the result, |
| * else a permanent memory leak will ensue! |
| * |
| * TODO maybe libxml2's xmlreader is better? (do not construct DOM, |
| * yet do not use SAX - see xmlreader.c) |
| */ |
| static xmlDocPtr |
| xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, |
| int encoding) |
| { |
| int32 len; |
| xmlChar *string; |
| xmlChar *utf8string; |
| xmlParserCtxtPtr ctxt; |
| xmlDocPtr doc; |
| |
| len = VARSIZE(data) - VARHDRSZ; /* will be useful later */ |
| string = xml_text2xmlChar(data); |
| |
| utf8string = pg_do_encoding_conversion(string, |
| len, |
| encoding, |
| PG_UTF8); |
| |
| /* Start up libxml and its parser (no-ops if already done) */ |
| pg_xml_init(); |
| xmlInitParser(); |
| |
| ctxt = xmlNewParserCtxt(); |
| if (ctxt == NULL) |
| xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, |
| "could not allocate parser context"); |
| |
| /* Use a TRY block to ensure the ctxt is released */ |
| PG_TRY(); |
| { |
| if (xmloption_arg == XMLOPTION_DOCUMENT) |
| { |
| /* |
| * Note, that here we try to apply DTD defaults |
| * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d: |
| * 'Default values defined by internal DTD are applied'. As for |
| * external DTDs, we try to support them too, (see SQL/XML:2008 GR |
| * 10.16.7.e) |
| */ |
| doc = xmlCtxtReadDoc(ctxt, utf8string, |
| NULL, |
| "UTF-8", |
| XML_PARSE_NOENT | XML_PARSE_DTDATTR |
| | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS)); |
| if (doc == NULL) |
| xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT, |
| "invalid XML document"); |
| } |
| else |
| { |
| int res_code; |
| size_t count = 0; |
| xmlChar *version; |
| int standalone; |
| |
| res_code = parse_xml_decl(utf8string, |
| &count, &version, NULL, &standalone); |
| if (res_code != 0) |
| xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT, |
| "invalid XML content: invalid XML declaration", |
| res_code); |
| |
| doc = xmlNewDoc(version); |
| Assert(doc->encoding == NULL); |
| doc->encoding = xmlStrdup((const xmlChar *) "UTF-8"); |
| doc->standalone = standalone; |
| |
| res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, |
| utf8string + count, NULL); |
| if (res_code != 0) |
| { |
| xmlFreeDoc(doc); |
| xml_ereport(ERROR, ERRCODE_INVALID_XML_CONTENT, |
| "invalid XML content"); |
| } |
| } |
| } |
| PG_CATCH(); |
| { |
| xmlFreeParserCtxt(ctxt); |
| PG_RE_THROW(); |
| } |
| PG_END_TRY(); |
| |
| xmlFreeParserCtxt(ctxt); |
| |
| return doc; |
| } |
| |
| |
| /* |
| * xmlChar<->text conversions |
| */ |
| static xmlChar * |
| xml_text2xmlChar(text *in) |
| { |
| return (xmlChar *) text_to_cstring(in); |
| } |
| |
| |
| #ifdef USE_LIBXMLCONTEXT |
| |
| /* |
| * Manage the special context used for all libxml allocations (but only |
| * in special debug builds; see notes at top of file) |
| */ |
| static void |
| xml_memory_init(void) |
| { |
| /* Create memory context if not there already */ |
| if (LibxmlContext == NULL) |
| LibxmlContext = AllocSetContextCreate(TopMemoryContext, |
| "LibxmlContext", |
| ALLOCSET_DEFAULT_MINSIZE, |
| ALLOCSET_DEFAULT_INITSIZE, |
| ALLOCSET_DEFAULT_MAXSIZE); |
| |
| /* Re-establish the callbacks even if already set */ |
| xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup); |
| } |
| |
| /* |
| * Wrappers for memory management functions |
| */ |
| static void * |
| xml_palloc(size_t size) |
| { |
| return MemoryContextAlloc(LibxmlContext, size); |
| } |
| |
| |
| static void * |
| xml_repalloc(void *ptr, size_t size) |
| { |
| return repalloc(ptr, size); |
| } |
| |
| |
| static void |
| xml_pfree(void *ptr) |
| { |
| /* At least some parts of libxml assume xmlFree(NULL) is allowed */ |
| if (ptr) |
| pfree(ptr); |
| } |
| |
| |
| static char * |
| xml_pstrdup(const char *string) |
| { |
| return MemoryContextStrdup(LibxmlContext, string); |
| } |
| #endif /* USE_LIBXMLCONTEXT */ |
| |
| |
| /* |
| * xml_ereport --- report an XML-related error |
| * |
| * The "msg" is the SQL-level message; some can be adopted from the SQL/XML |
| * standard. This function adds libxml's native error message, if any, as |
| * detail. |
| * |
| * This is exported for modules that want to share the core libxml error |
| * handler. Note that pg_xml_init() *must* have been called previously. |
| */ |
| void |
| xml_ereport(int level, int sqlcode, const char *msg) |
| { |
| char *detail; |
| |
| /* |
| * It might seem that we should just pass xml_err_buf->data directly to |
| * errdetail. However, we want to clean out xml_err_buf before throwing |
| * error, in case there is another function using libxml further down the |
| * call stack. |
| */ |
| if (xml_err_buf->len > 0) |
| { |
| detail = pstrdup(xml_err_buf->data); |
| resetStringInfo(xml_err_buf); |
| } |
| else |
| detail = NULL; |
| |
| if (detail) |
| { |
| size_t len; |
| |
| /* libxml error messages end in '\n'; get rid of it */ |
| len = strlen(detail); |
| if (len > 0 && detail[len - 1] == '\n') |
| detail[len - 1] = '\0'; |
| |
| ereport(level, |
| (errcode(sqlcode), |
| errmsg("%s", msg), |
| errdetail("%s", detail))); |
| } |
| else |
| { |
| ereport(level, |
| (errcode(sqlcode), |
| errmsg("%s", msg))); |
| } |
| } |
| |
| |
| /* |
| * Error handler for libxml error messages |
| */ |
| static void |
| xml_errorHandler(void *ctxt, const char *msg,...) |
| { |
| /* Append the formatted text to xml_err_buf */ |
| for (;;) |
| { |
| va_list args; |
| bool success; |
| |
| /* Try to format the data. */ |
| va_start(args, msg); |
| success = appendStringInfoVA(xml_err_buf, msg, args); |
| va_end(args); |
| |
| if (success) |
| break; |
| |
| /* Double the buffer size and try again. */ |
| enlargeStringInfo(xml_err_buf, xml_err_buf->maxlen); |
| } |
| } |
| |
| |
| /* |
| * Wrapper for "ereport" function for XML-related errors. The "msg" |
| * is the SQL-level message; some can be adopted from the SQL/XML |
| * standard. This function uses "code" to create a textual detail |
| * message. At the moment, we only need to cover those codes that we |
| * may raise in this file. |
| */ |
| static void |
| xml_ereport_by_code(int level, int sqlcode, |
| const char *msg, int code) |
| { |
| const char *det; |
| |
| switch (code) |
| { |
| case XML_ERR_INVALID_CHAR: |
| det = gettext_noop("Invalid character value."); |
| break; |
| case XML_ERR_SPACE_REQUIRED: |
| det = gettext_noop("Space required."); |
| break; |
| case XML_ERR_STANDALONE_VALUE: |
| det = gettext_noop("standalone accepts only 'yes' or 'no'."); |
| break; |
| case XML_ERR_VERSION_MISSING: |
| det = gettext_noop("Malformed declaration: missing version."); |
| break; |
| case XML_ERR_MISSING_ENCODING: |
| det = gettext_noop("Missing encoding in text declaration."); |
| break; |
| case XML_ERR_XMLDECL_NOT_FINISHED: |
| det = gettext_noop("Parsing XML declaration: '?>' expected."); |
| break; |
| default: |
| det = gettext_noop("Unrecognized libxml error code: %d."); |
| break; |
| } |
| |
| ereport(level, |
| (errcode(sqlcode), |
| errmsg("%s", msg), |
| errdetail(det, code))); |
| } |
| |
| |
| |
| /* |
| * XPath related functions |
| */ |
| |
| /* |
| * Convert XML node to text (dump subtree in case of element, |
| * return value otherwise) |
| */ |
| static text * |
| xml_xmlnodetoxmltype(xmlNodePtr cur) |
| { |
| xmltype *result; |
| |
| if (cur->type == XML_ELEMENT_NODE) |
| { |
| xmlBufferPtr buf; |
| |
| buf = xmlBufferCreate(); |
| PG_TRY(); |
| { |
| xmlNodeDump(buf, NULL, cur, 0, 1); |
| result = xmlBuffer_to_xmltype(buf); |
| } |
| PG_CATCH(); |
| { |
| xmlBufferFree(buf); |
| PG_RE_THROW(); |
| } |
| PG_END_TRY(); |
| xmlBufferFree(buf); |
| } |
| else |
| { |
| xmlChar *str; |
| |
| str = xmlXPathCastNodeToString(cur); |
| PG_TRY(); |
| { |
| result = (xmltype *) cstring_to_text((char *) str); |
| } |
| PG_CATCH(); |
| { |
| xmlFree(str); |
| PG_RE_THROW(); |
| } |
| PG_END_TRY(); |
| xmlFree(str); |
| } |
| |
| return result; |
| } |
| |
| |
| /* |
| * Common code for xpath() and xmlexists() |
| * |
| * Evaluate XPath expression and return number of nodes in res_items |
| * and array of XML values in astate. |
| * |
| * It is up to the user to ensure that the XML passed is in fact |
| * an XML document - XPath doesn't work easily on fragments without |
| * a context node being known. |
| */ |
| static void |
| xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, |
| int *res_nitems, ArrayBuildState **astate) |
| { |
| xmlParserCtxtPtr ctxt = NULL; |
| xmlDocPtr doc = NULL; |
| xmlXPathContextPtr xpathctx = NULL; |
| xmlXPathCompExprPtr xpathcomp = NULL; |
| xmlXPathObjectPtr xpathobj = NULL; |
| char *datastr; |
| int32 len; |
| int32 xpath_len; |
| xmlChar *string; |
| xmlChar *xpath_expr; |
| int i; |
| int ndim; |
| Datum *ns_names_uris; |
| bool *ns_names_uris_nulls; |
| int ns_count; |
| |
| /* |
| * Namespace mappings are passed as text[]. If an empty array is passed |
| * (ndim = 0, "0-dimensional"), then there are no namespace mappings. |
| * Else, a 2-dimensional array with length of the second axis being equal |
| * to 2 should be passed, i.e., every subarray contains 2 elements, the |
| * first element defining the name, the second one the URI. Example: |
| * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2', |
| * 'http://example2.com']]. |
| */ |
| ndim = namespaces ? ARR_NDIM(namespaces) : 0; |
| if (ndim != 0) |
| { |
| int *dims; |
| |
| dims = ARR_DIMS(namespaces); |
| |
| if (ndim != 2 || dims[1] != 2) |
| ereport(ERROR, |
| (errcode(ERRCODE_DATA_EXCEPTION), |
| errmsg("invalid array for XML namespace mapping"), |
| errdetail("The array must be two-dimensional with length of the second axis equal to 2."))); |
| |
| Assert(ARR_ELEMTYPE(namespaces) == TEXTOID); |
| |
| deconstruct_array(namespaces, TEXTOID, -1, false, 'i', |
| &ns_names_uris, &ns_names_uris_nulls, |
| &ns_count); |
| |
| Assert((ns_count % 2) == 0); /* checked above */ |
| ns_count /= 2; /* count pairs only */ |
| } |
| else |
| { |
| ns_names_uris = NULL; |
| ns_names_uris_nulls = NULL; |
| ns_count = 0; |
| } |
| |
| datastr = VARDATA(data); |
| len = VARSIZE(data) - VARHDRSZ; |
| xpath_len = VARSIZE(xpath_expr_text) - VARHDRSZ; |
| if (xpath_len == 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_DATA_EXCEPTION), |
| errmsg("empty XPath expression"))); |
| |
| string = (xmlChar *) palloc((len + 1) * sizeof(xmlChar)); |
| memcpy(string, datastr, len); |
| string[len] = '\0'; |
| |
| xpath_expr = (xmlChar *) palloc((xpath_len + 1) * sizeof(xmlChar)); |
| memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len); |
| xpath_expr[xpath_len] = '\0'; |
| |
| pg_xml_init(); |
| xmlInitParser(); |
| |
| PG_TRY(); |
| { |
| /* |
| * redundant XML parsing (two parsings for the same value during one |
| * command execution are possible) |
| */ |
| ctxt = xmlNewParserCtxt(); |
| if (ctxt == NULL) |
| xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, |
| "could not allocate parser context"); |
| doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0); |
| if (doc == NULL) |
| xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT, |
| "could not parse XML document"); |
| xpathctx = xmlXPathNewContext(doc); |
| if (xpathctx == NULL) |
| xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, |
| "could not allocate XPath context"); |
| xpathctx->node = xmlDocGetRootElement(doc); |
| if (xpathctx->node == NULL) |
| xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR, |
| "could not find root XML element"); |
| |
| /* register namespaces, if any */ |
| if (ns_count > 0) |
| { |
| for (i = 0; i < ns_count; i++) |
| { |
| char *ns_name; |
| char *ns_uri; |
| |
| if (ns_names_uris_nulls[i * 2] || |
| ns_names_uris_nulls[i * 2 + 1]) |
| ereport(ERROR, |
| (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), |
| errmsg("neither namespace name nor URI may be null"))); |
| ns_name = TextDatumGetCString(ns_names_uris[i * 2]); |
| ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]); |
| if (xmlXPathRegisterNs(xpathctx, |
| (xmlChar *) ns_name, |
| (xmlChar *) ns_uri) != 0) |
| ereport(ERROR, /* is this an internal error??? */ |
| (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"", |
| ns_name, ns_uri))); |
| } |
| } |
| |
| xpathcomp = xmlXPathCompile(xpath_expr); |
| if (xpathcomp == NULL) /* TODO: show proper XPath error details */ |
| xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR, |
| "invalid XPath expression"); |
| |
| /* |
| * Version 2.6.27 introduces a function named |
| * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists, |
| * but we can derive the existence by whether any nodes are returned, |
| * thereby preventing a library version upgrade and keeping the code |
| * the same. |
| */ |
| xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx); |
| if (xpathobj == NULL) /* TODO: reason? */ |
| xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR, |
| "could not create XPath object"); |
| |
| /* return empty array in cases when nothing is found */ |
| if (xpathobj->nodesetval == NULL) |
| *res_nitems = 0; |
| else |
| *res_nitems = xpathobj->nodesetval->nodeNr; |
| |
| if (*res_nitems && astate) |
| { |
| *astate = NULL; |
| for (i = 0; i < xpathobj->nodesetval->nodeNr; i++) |
| { |
| Datum elem; |
| bool elemisnull = false; |
| |
| elem = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i])); |
| *astate = accumArrayResult(*astate, elem, |
| elemisnull, XMLOID, |
| CurrentMemoryContext); |
| } |
| } |
| } |
| PG_CATCH(); |
| { |
| if (xpathobj) |
| xmlXPathFreeObject(xpathobj); |
| if (xpathcomp) |
| xmlXPathFreeCompExpr(xpathcomp); |
| if (xpathctx) |
| xmlXPathFreeContext(xpathctx); |
| if (doc) |
| xmlFreeDoc(doc); |
| if (ctxt) |
| xmlFreeParserCtxt(ctxt); |
| PG_RE_THROW(); |
| } |
| PG_END_TRY(); |
| |
| xmlXPathFreeObject(xpathobj); |
| xmlXPathFreeCompExpr(xpathcomp); |
| xmlXPathFreeContext(xpathctx); |
| xmlFreeDoc(doc); |
| xmlFreeParserCtxt(ctxt); |
| } |
| |
| /* |
| * Evaluate XPath expression and return array of XML values. |
| * |
| * As we have no support of XQuery sequences yet, this function seems |
| * to be the most useful one (array of XML functions plays a role of |
| * some kind of substitution for XQuery sequences). |
| */ |
| Datum |
| xpath(PG_FUNCTION_ARGS) |
| { |
| text *xpath_expr_text = PG_GETARG_TEXT_P(0); |
| xmltype *data = PG_GETARG_XML_P(1); |
| ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2); |
| int res_nitems; |
| ArrayBuildState *astate; |
| |
| xpath_internal(xpath_expr_text, data, namespaces, |
| &res_nitems, &astate); |
| |
| if (res_nitems == 0) |
| PG_RETURN_ARRAYTYPE_P(construct_empty_array(XMLOID)); |
| else |
| PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); |
| } |
| |
| /* |
| * Determines if the node specified by the supplied XPath exists |
| * in a given XML document, returning a boolean. |
| */ |
| Datum |
| xmlexists(PG_FUNCTION_ARGS) |
| { |
| text *xpath_expr_text = PG_GETARG_TEXT_P(0); |
| xmltype *data = PG_GETARG_XML_P(1); |
| int res_nitems; |
| |
| xpath_internal(xpath_expr_text, data, NULL, |
| &res_nitems, NULL); |
| |
| PG_RETURN_BOOL(res_nitems > 0); |
| } |
| |
| /* |
| * Determines if the node specified by the supplied XPath exists |
| * in a given XML document, returning a boolean. Differs from |
| * xmlexists as it supports namespaces and is not defined in SQL/XML. |
| */ |
| Datum |
| xpath_exists(PG_FUNCTION_ARGS) |
| { |
| text *xpath_expr_text = PG_GETARG_TEXT_P(0); |
| xmltype *data = PG_GETARG_XML_P(1); |
| ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2); |
| int res_nitems; |
| |
| xpath_internal(xpath_expr_text, data, namespaces, |
| &res_nitems, NULL); |
| |
| PG_RETURN_BOOL(res_nitems > 0); |
| } |
| |
| /* |
| * Functions for checking well-formed-ness |
| */ |
| |
| static bool |
| wellformed_xml(text *data, XmlOptionType xmloption_arg) |
| { |
| bool result; |
| xmlDocPtr doc = NULL; |
| |
| /* We want to catch any exceptions and return false */ |
| PG_TRY(); |
| { |
| doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding()); |
| result = true; |
| } |
| PG_CATCH(); |
| { |
| FlushErrorState(); |
| result = false; |
| } |
| PG_END_TRY(); |
| |
| if (doc) |
| xmlFreeDoc(doc); |
| |
| return result; |
| } |
| |
| Datum |
| xml_is_well_formed(PG_FUNCTION_ARGS) |
| { |
| text *data = PG_GETARG_TEXT_P(0); |
| |
| PG_RETURN_BOOL(wellformed_xml(data, xmloption)); |
| } |
| |
| Datum |
| xml_is_well_formed_document(PG_FUNCTION_ARGS) |
| { |
| text *data = PG_GETARG_TEXT_P(0); |
| |
| PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT)); |
| } |
| |
| Datum |
| xml_is_well_formed_content(PG_FUNCTION_ARGS) |
| { |
| text *data = PG_GETARG_TEXT_P(0); |
| |
| PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT)); |
| } |