plugins/esi/lib/EsiParser.cc - trafficserver - Git at Google

 /** @file

   A brief file description

   @section license License

   Licensed to the Apache Software Foundation (ASF) under one
   or more contributor license agreements.  See the NOTICE file
   distributed with this work for additional information
   regarding copyright ownership.  The ASF licenses this file
   to you under the Apache License, Version 2.0 (the
   "License"); you may not use this file except in compliance
   with the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
  */

 #include "EsiParser.h"
 #include "Utils.h"

 #include <ts/ts.h>

 #include <cctype>

 using std::string;
 using namespace EsiLib;

 namespace
 {
 DbgCtl dbg_ctl{"plugin_esi_parser"};
 }

 const char *EsiParser::ESI_TAG_PREFIX     = "<esi:";
 const int   EsiParser::ESI_TAG_PREFIX_LEN = 5;

 const string EsiParser::SRC_ATTR_STR("src");
 const string EsiParser::TEST_ATTR_STR("test");
 const string EsiParser::HANDLER_ATTR_STR("handler");

 const EsiParser::EsiNodeInfo EsiParser::ESI_NODES[] = {
   EsiNodeInfo(DocNode::TYPE_INCLUDE, "include", 7, "/>", 2),
   EsiNodeInfo(DocNode::TYPE_REMOVE, "remove>", 7, "</esi:remove>", 13),
   EsiNodeInfo(DocNode::TYPE_COMMENT, "comment", 7, "/>", 2),
   EsiNodeInfo(DocNode::TYPE_VARS, "vars>", 5, "</esi:vars>", 11),
   EsiNodeInfo(DocNode::TYPE_CHOOSE, "choose>", 7, "</esi:choose>", 13),
   EsiNodeInfo(DocNode::TYPE_WHEN, "when", 4, "</esi:when>", 11),
   EsiNodeInfo(DocNode::TYPE_OTHERWISE, "otherwise>", 10, "</esi:otherwise>", 16),
   EsiNodeInfo(DocNode::TYPE_TRY, "try>", 4, "</esi:try>", 10),
   EsiNodeInfo(DocNode::TYPE_ATTEMPT, "attempt>", 8, "</esi:attempt>", 14),
   EsiNodeInfo(DocNode::TYPE_EXCEPT, "except>", 7, "</esi:except>", 13),
   EsiNodeInfo(DocNode::TYPE_SPECIAL_INCLUDE, "special-include", 15, "/>", 2),
   EsiNodeInfo(DocNode::TYPE_UNKNOWN, "", 0, "", 0) // serves as end marker
 };

 const EsiParser::EsiNodeInfo EsiParser::HTML_COMMENT_NODE_INFO(DocNode::TYPE_HTML_COMMENT, "<!--esi", 7, "-->", 3);

 EsiParser::EsiParser(unsigned max_doc_size, std::string_view request_url)
   : _max_doc_size(max_doc_size), _request_url{request_url}, _parse_start_pos(-1)
 {
   // do this so that object doesn't move around in memory;
   // (because we return pointers into this object)
   _data.reserve(_max_doc_size);
 }

 bool
 EsiParser::_setup(string &data, int &parse_start_pos, size_t &orig_output_list_size, DocNodeList &node_list, const char *data_ptr,
                   int &data_len) const
 {
   bool retval = true;
   if (!data_ptr || !data_len) {
     Dbg(dbg_ctl, "[%s] Returning true for empty data", __FUNCTION__);
   } else {
     if (data_len == -1) {
       data_len = strlen(data_ptr);
     }
     if ((data.size() + data_len) > _max_doc_size) {
       TSError("[%s] Cannot allow attempted doc of size %d; Max allowed size is %d for URL [%s]", __FUNCTION__,
               int(data.size() + data_len), _max_doc_size, _request_url.c_str());
       retval = false;
     } else {
       data.append(data_ptr, data_len);
     }
   }
   if (parse_start_pos == -1) { // first time this cycle that input is being provided
     parse_start_pos       = 0;
     orig_output_list_size = node_list.size();
   }
   return retval;
 }

 bool
 EsiParser::parseChunk(const char *data, DocNodeList &node_list, int data_len /* = -1 */)
 {
   if (!_setup(_data, _parse_start_pos, _orig_output_list_size, node_list, data, data_len)) {
     return false;
   }
   if (!_parse(_data, _parse_start_pos, node_list)) {
     TSError("[%s] Failed to parse chunk of size %d starting with [%.5s]... for URL [%s]", __FUNCTION__, data_len,
             (data_len ? data : "(null)"), _request_url.c_str());
     return false;
   }
   return true;
 }

 bool
 EsiParser::_completeParse(string &data, int &parse_start_pos, size_t &orig_output_list_size, DocNodeList &node_list,
                           const char *data_ptr /* = 0 */, int data_len /* = -1 */) const
 {
   if (!_setup(data, parse_start_pos, orig_output_list_size, node_list, data_ptr, data_len)) {
     return false;
   }
   if (!data.size()) {
     Dbg(dbg_ctl, "[%s] No data to parse!", __FUNCTION__);
     return true;
   }
   if (!_parse(data, parse_start_pos, node_list, true)) {
     TSError("[%s] Failed to complete parse of data of total size %d starting with [%.5s]...", __FUNCTION__, int(data.size()),
             (data.size() ? data.data() : "(null)"));
     node_list.resize(orig_output_list_size);
     return false;
   }
   return true;
 }

 EsiParser::MATCH_TYPE
 EsiParser::_searchData(const string &data, size_t start_pos, const char *str, int str_len, size_t &pos) const
 {
   const char *data_ptr = data.data() + start_pos;
   int         data_len = data.size() - start_pos;
   int         i_data = 0, i_str = 0;

   while (i_data < data_len) {
     if (data_ptr[i_data] == str[i_str]) {
       ++i_str;
       if (i_str == str_len) {
         break;
       }
     } else {
       i_data -= i_str;
       i_str   = 0;
     }
     ++i_data;
   }

   if (i_str == str_len) {
     pos = start_pos + i_data + 1 - i_str;
     Dbg(dbg_ctl, "[%s] Found full match of %.*s in [%.5s...] at position %d", __FUNCTION__, str_len, str, data_ptr, int(pos));
     return COMPLETE_MATCH;
   } else if (i_str) {
     pos = start_pos + i_data - i_str;
     Dbg(dbg_ctl, "[%s] Found partial match of %.*s in [%.5s...] at position %d", __FUNCTION__, str_len, str, data_ptr, int(pos));
     return PARTIAL_MATCH;
   } else {
     Dbg(dbg_ctl, "[%s] Found no match of %.*s in [%.5s...]", __FUNCTION__, str_len, str, data_ptr);
     return NO_MATCH;
   }
 }

 EsiParser::MATCH_TYPE
 EsiParser::_compareData(const string &data, size_t pos, const char *str, int str_len) const
 {
   int    i_str  = 0;
   size_t i_data = pos;
   for (; i_data < data.size(); ++i_data) {
     if (data[i_data] == str[i_str]) {
       ++i_str;
       if (i_str == str_len) {
         Dbg(dbg_ctl, "[%s] string [%.*s] is equal to data at position %d", __FUNCTION__, str_len, str, int(pos));
         return COMPLETE_MATCH;
       }
     } else {
       return NO_MATCH;
     }
   }
   Dbg(dbg_ctl, "[%s] string [%.*s] is partially equal to data at position %d", __FUNCTION__, str_len, str, int(pos));
   return PARTIAL_MATCH;
 }

 /** This implementation is optimized but not completely correct.  If
  * the opening tag were to have a repeating opening sequence ('<e<esi'
  * or something like that), this will break. However that is not the
  * case for the two opening tags we are looking for */
 EsiParser::MATCH_TYPE
 EsiParser::_findOpeningTag(const string &data, size_t start_pos, size_t &opening_tag_pos, bool &is_html_comment_node) const
 {
   size_t i_data = start_pos;
   int    i_esi = 0, i_html_comment = 0;

   while (i_data < data.size()) {
     if (data[i_data] == ESI_TAG_PREFIX[i_esi]) {
       if (++i_esi == ESI_TAG_PREFIX_LEN) {
         is_html_comment_node = false;
         opening_tag_pos      = i_data - i_esi + 1;
         return COMPLETE_MATCH;
       }
     } else {
       if (i_esi) {
         i_esi = 0;
         --i_data; // we do this to reexamine the current char as target string might start from here
         if (i_html_comment) {
           --i_html_comment; // in case other target string has started matching, adjust it's index
         }
       }
     }
     // doing the exact same thing for the other target string
     if (i_html_comment < HTML_COMMENT_NODE_INFO.tag_suffix_len &&
         data[i_data] == HTML_COMMENT_NODE_INFO.tag_suffix[i_html_comment]) {
       if (++i_html_comment == HTML_COMMENT_NODE_INFO.tag_suffix_len && i_data + 1 < data.size()) {
         char ch = data[i_data + 1]; //<!--esi must follow by a space char
         if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') {
           is_html_comment_node = true;
           opening_tag_pos      = i_data - i_html_comment + 1;
           return COMPLETE_MATCH;
         }
       }
     } else {
       if (i_html_comment) {
         i_html_comment = 0;
         --i_data; // same comments from above applies
         if (i_esi) {
           --i_esi;
         }
       }
     }
     ++i_data;
   }
   // partial matches; with the nature of our current opening tags, the
   // only way we can have a partial match for both target strings is
   // if the last char of the input string is '<' and that is not
   // enough information to differentiate the tags; Anyway, the parser
   // takes no action for a partial match
   if (i_esi) {
     is_html_comment_node = false;
     opening_tag_pos      = i_data - i_esi;
     return PARTIAL_MATCH;
   }
   if (i_html_comment) {
     is_html_comment_node = true;
     opening_tag_pos      = i_data - i_html_comment;
     return PARTIAL_MATCH;
   }
   return NO_MATCH;
 }

 inline bool
 EsiParser::_processSimpleContentTag(DocNode::TYPE node_type, const char *data, int data_len, DocNodeList &node_list) const
 {
   DocNode new_node(node_type);
   if (!parse(new_node.child_nodes, data, data_len)) {
     TSError("[%s] Could not parse simple content of [%s] node", __FUNCTION__, DocNode::type_names_[node_type]);
     return false;
   }
   node_list.push_back(new_node);
   return true;
 }

 bool
 EsiParser::_parse(const string &data, int &parse_start_pos, DocNodeList &node_list, bool last_chunk /* = false */) const
 {
   size_t             orig_list_size = node_list.size();
   size_t             curr_pos, end_pos;
   const char *const  data_start_ptr = data.data();
   size_t             data_size      = data.size();
   const EsiNodeInfo *node_info;
   MATCH_TYPE         search_result;
   bool               is_html_comment_node;
   bool               parse_result;

   while (parse_start_pos < static_cast<int>(data_size)) {
     search_result = _findOpeningTag(data, static_cast<int>(parse_start_pos), curr_pos, is_html_comment_node);
     if (search_result == NO_MATCH) {
       // we could add this chunk as a PRE node, but it might be
       // possible that the next chunk is also a PRE node, in which
       // case it is more correct to create one PRE node than two PRE
       // nodes even though processing would result in the same final
       // output in either case.  we are sacrificing a little
       // performance (we'll have to parse this chunk again next time)
       // for correctness
       break;
     }
     if (search_result == PARTIAL_MATCH) {
       goto lPartialMatch;
     }

     // we have a complete match of the opening tag
     if ((curr_pos - parse_start_pos) > 0) {
       // add text till here as a PRE node
       Dbg(dbg_ctl, "[%s], Adding data of size %d before (newly found) ESI tag as PRE node", __FUNCTION__,
           int(curr_pos - parse_start_pos));
       node_list.push_back(DocNode(DocNode::TYPE_PRE, data_start_ptr + parse_start_pos, curr_pos - parse_start_pos));
       parse_start_pos = curr_pos;
     }

     if (is_html_comment_node) {
       Dbg(dbg_ctl, "[%s] Found html comment tag at position %d", __FUNCTION__, int(curr_pos));
       node_info = &HTML_COMMENT_NODE_INFO;
       ++curr_pos;
     } else {
       curr_pos += ESI_TAG_PREFIX_LEN;

       for (node_info = ESI_NODES; node_info->type != DocNode::TYPE_UNKNOWN; ++node_info) {
         search_result = _compareData(data, curr_pos, node_info->tag_suffix, node_info->tag_suffix_len);
         if (search_result == COMPLETE_MATCH) {
           if (node_info->tag_suffix[node_info->tag_suffix_len - 1] == '>') {
             Dbg(dbg_ctl, "[%s] Found [%s] tag at position %d", __FUNCTION__, DocNode::type_names_[node_info->type],
                 int(curr_pos - ESI_TAG_PREFIX_LEN));
             break;
           } else {
             if (curr_pos + node_info->tag_suffix_len < data_size) {
               char ch = data_start_ptr[curr_pos + node_info->tag_suffix_len];
               if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') {
                 Dbg(dbg_ctl, "[%s] Found [%s] tag at position %d", __FUNCTION__, DocNode::type_names_[node_info->type],
                     int(curr_pos - ESI_TAG_PREFIX_LEN));
                 ++curr_pos; // skip the space char
                 break;
               } else if (ch == '/' || ch == '>') {
                 Dbg(dbg_ctl, "[%s] Found [%s] tag at position %d", __FUNCTION__, DocNode::type_names_[node_info->type],
                     int(curr_pos - ESI_TAG_PREFIX_LEN));
                 break;
               }
             } else {
               goto lPartialMatch;
             }
           }
         } else if (search_result == PARTIAL_MATCH) {
           goto lPartialMatch;
         }
       }
       if (node_info->type == DocNode::TYPE_UNKNOWN) {
         TSError("[%s] Unknown ESI tag starting with [%10s]...", __FUNCTION__, data.c_str());
         goto lFail;
       }
     }

     curr_pos      += node_info->tag_suffix_len;
     search_result  = _searchData(data, curr_pos, node_info->closing_tag, node_info->closing_tag_len, end_pos);

     if ((search_result == NO_MATCH) || (search_result == PARTIAL_MATCH)) {
       if (last_chunk) {
         TSError("[%s] ESI tag starting with [%10s]... has no matching closing tag [%.*s]", __FUNCTION__, data.c_str(),
                 node_info->closing_tag_len, node_info->closing_tag);
         goto lFail;
       } else {
         goto lPartialMatch;
       }
     }

     // now we process only complete nodes
     switch (node_info->type) {
     case DocNode::TYPE_INCLUDE:
       Dbg(dbg_ctl, "[%s] Handling include tag...", __FUNCTION__);
       parse_result = _processIncludeTag(data, curr_pos, end_pos, node_list);
       break;
     case DocNode::TYPE_COMMENT:
     case DocNode::TYPE_REMOVE:
       Dbg(dbg_ctl, "[%s] Adding node [%s]", __FUNCTION__, DocNode::type_names_[node_info->type]);
       node_list.push_back(DocNode(node_info->type)); // no data required
       parse_result = true;
       break;
     case DocNode::TYPE_WHEN:
       Dbg(dbg_ctl, "[%s] Handling when tag...", __FUNCTION__);
       parse_result = _processWhenTag(data, curr_pos, end_pos, node_list);
       break;
     case DocNode::TYPE_TRY:
       Dbg(dbg_ctl, "[%s] Handling try tag...", __FUNCTION__);
       parse_result = _processTryTag(data, curr_pos, end_pos, node_list);
       break;
     case DocNode::TYPE_CHOOSE:
       Dbg(dbg_ctl, "[%s] Handling choose tag...", __FUNCTION__);
       parse_result = _processChooseTag(data, curr_pos, end_pos, node_list);
       break;
     case DocNode::TYPE_OTHERWISE:
     case DocNode::TYPE_ATTEMPT:
     case DocNode::TYPE_EXCEPT:
       Dbg(dbg_ctl, "[%s] Handling %s tag...", __FUNCTION__, DocNode::type_names_[node_info->type]);
       parse_result = _processSimpleContentTag(node_info->type, data.data() + curr_pos, end_pos - curr_pos, node_list);
       break;
     case DocNode::TYPE_VARS:
     case DocNode::TYPE_HTML_COMMENT:
       Dbg(dbg_ctl, "[%s] added string of size %d starting with [%.5s] for node %s", __FUNCTION__, int(end_pos - curr_pos),
           data.data() + curr_pos, DocNode::type_names_[node_info->type]);
       node_list.push_back(DocNode(node_info->type, data.data() + curr_pos, end_pos - curr_pos));
       parse_result = true;
       break;
     case DocNode::TYPE_SPECIAL_INCLUDE:
       Dbg(dbg_ctl, "[%s] Handling special include tag...", __FUNCTION__);
       parse_result = _processSpecialIncludeTag(data, curr_pos, end_pos, node_list);
       break;
     default:
       parse_result = false;
       break;
     }

     if (!parse_result) {
       TSError("[%s] Cannot handle ESI tag [%.*s]", __FUNCTION__, node_info->tag_suffix_len, node_info->tag_suffix);
       goto lFail;
     }

     parse_start_pos = end_pos + node_info->closing_tag_len;
     continue;

   lPartialMatch:
     if (last_chunk) {
       Dbg(dbg_ctl, "[%s] Found a partial ESI tag - will be treated as PRE text", __FUNCTION__);
     } else {
       Dbg(dbg_ctl, "[%s] Deferring to next chunk to find complete tag", __FUNCTION__);
     }
     break;
   }
   if (last_chunk && (parse_start_pos < static_cast<int>(data_size))) {
     Dbg(dbg_ctl, "[%s] Adding trailing text of size %d starting at [%.5s] as a PRE node", __FUNCTION__,
         int(data_size - parse_start_pos), data_start_ptr + parse_start_pos);
     node_list.push_back(DocNode(DocNode::TYPE_PRE, data_start_ptr + parse_start_pos, data_size - parse_start_pos));
   }
   Dbg(dbg_ctl, "[%s] Added %d node(s) during parse", __FUNCTION__, int(node_list.size() - orig_list_size));
   return true;

 lFail:
   node_list.resize(orig_list_size); // delete whatever nodes we have added so far
   return false;
 }

 bool
 EsiParser::_processIncludeTag(const string &data, size_t curr_pos, size_t end_pos, DocNodeList &node_list) const
 {
   Attribute src_info;
   if (!Utils::getAttribute(data, SRC_ATTR_STR, curr_pos, end_pos, src_info)) {
     TSError("[%s] Could not find src attribute", __FUNCTION__);
     return false;
   }
   node_list.push_back(DocNode(DocNode::TYPE_INCLUDE));
   node_list.back().attr_list.push_back(src_info);
   Dbg(dbg_ctl, "[%s] Added include tag with url [%.*s]", __FUNCTION__, src_info.value_len, src_info.value);
   return true;
 }

 bool
 EsiParser::_processSpecialIncludeTag(const string &data, size_t curr_pos, size_t end_pos, DocNodeList &node_list) const
 {
   Attribute handler_info;
   if (!Utils::getAttribute(data, HANDLER_ATTR_STR, curr_pos, end_pos, handler_info)) {
     TSError("[%s] Could not find handler attribute", __FUNCTION__);
     return false;
   }
   node_list.push_back(DocNode(DocNode::TYPE_SPECIAL_INCLUDE));
   DocNode &node = node_list.back();
   node.attr_list.push_back(handler_info);
   node.data     = data.data() + curr_pos;
   node.data_len = end_pos - curr_pos;
   Dbg(dbg_ctl, "[%s] Added special include tag with handler [%.*s] and data [%.*s]", __FUNCTION__, handler_info.value_len,
       handler_info.value, node.data_len, node.data);
   return true;
 }

 inline bool
 EsiParser::_isWhitespace(const char *data, int data_len) const
 {
   for (int i = 0; i < data_len; ++i) {
     if (!isspace(data[i])) {
       return false;
     }
   }
   return true;
 }

 bool
 EsiParser::_processWhenTag(const string &data, size_t curr_pos, size_t end_pos, DocNodeList &node_list) const
 {
   Attribute test_expr;
   size_t    term_pos;
   if (!Utils::getAttribute(data, TEST_ATTR_STR, curr_pos, end_pos, test_expr, &term_pos, '>')) {
     TSError("[%s] Could not find test attribute", __FUNCTION__);
     return false;
   }
   ++term_pos; // go past the terminator
   const char *data_start_ptr = data.data() + term_pos;
   int         data_size      = end_pos - term_pos;
   if (!_processSimpleContentTag(DocNode::TYPE_WHEN, data_start_ptr, data_size, node_list)) {
     TSError("[%s] Could not parse when node's content", __FUNCTION__);
     return false;
   }
   node_list.back().attr_list.push_back(test_expr);
   Dbg(dbg_ctl, "[%s] Added when tag with expression [%.*s] and data starting with [%.5s]", __FUNCTION__, test_expr.value_len,
       test_expr.value, data_start_ptr);
   return true;
 }

 bool
 EsiParser::_processTryTag(const string &data, size_t curr_pos, size_t end_pos, DocNodeList &node_list) const
 {
   const char *data_start_ptr = data.data() + curr_pos;
   int         data_size      = end_pos - curr_pos;
   DocNode     try_node(DocNode::TYPE_TRY);
   if (!parse(try_node.child_nodes, data_start_ptr, data_size)) {
     TSError("[%s] Could not parse try node's content", __FUNCTION__);
     return false;
   }

   DocNodeList::iterator iter, end_node, attempt_node, except_node, temp_iter;
   end_node     = try_node.child_nodes.end();
   attempt_node = except_node = end_node;
   iter                       = try_node.child_nodes.begin();
   while (iter != end_node) {
     if (iter->type == DocNode::TYPE_ATTEMPT) {
       if (attempt_node != end_node) {
         TSError("[%s] Can have exactly one attempt node in try block", __FUNCTION__);
         return false;
       }
       attempt_node = iter;
     } else if (iter->type == DocNode::TYPE_EXCEPT) {
       if (except_node != end_node) {
         TSError("[%s] Can have exactly one except node in try block", __FUNCTION__);
         return false;
       }
       except_node = iter;
     } else if (iter->type == DocNode::TYPE_PRE) {
       if (!_isWhitespace(iter->data, iter->data_len)) {
         TSError("[%s] Cannot have non-whitespace raw text as top level node in try block", __FUNCTION__);
         return false;
       }
       Dbg(dbg_ctl, "[%s] Ignoring top-level whitespace raw text", __FUNCTION__);
       temp_iter = iter;
       ++temp_iter;
       try_node.child_nodes.erase(iter);
       iter = temp_iter;
       continue; // skip the increment
     } else {
       TSError("[%s] Only attempt/except/text nodes allowed in try block; [%s] node invalid", __FUNCTION__,
               DocNode::type_names_[iter->type]);
       return false;
     }
     ++iter;
   }
   if ((attempt_node == end_node) || (except_node == end_node)) {
     TSError("[%s] try block must contain one each of attempt and except nodes", __FUNCTION__);
     return false;
   }
   node_list.push_back(try_node);
   Dbg(dbg_ctl, "[%s] Added try node successfully", __FUNCTION__);
   return true;
 }

 bool
 EsiParser::_processChooseTag(const string &data, size_t curr_pos, size_t end_pos, DocNodeList &node_list) const
 {
   const char *data_start_ptr = data.data() + curr_pos;
   size_t      data_size      = end_pos - curr_pos;
   DocNode     choose_node(DocNode::TYPE_CHOOSE);
   if (!parse(choose_node.child_nodes, data_start_ptr, data_size)) {
     TSError("[%s] Couldn't parse choose node content", __FUNCTION__);
     return false;
   }
   DocNodeList::iterator end_node       = choose_node.child_nodes.end();
   DocNodeList::iterator otherwise_node = end_node, iter, temp_iter;
   iter                                 = choose_node.child_nodes.begin();
   while (iter != end_node) {
     if (iter->type == DocNode::TYPE_OTHERWISE) {
       if (otherwise_node != end_node) {
         TSError("[%s] Cannot have more than one esi:otherwise node in an esi:choose node", __FUNCTION__);
         return false;
       }
       otherwise_node = iter;
     } else if (iter->type == DocNode::TYPE_PRE) {
       if (!_isWhitespace(iter->data, iter->data_len)) {
         TSError("[%s] Cannot have non-whitespace raw text (%s) as top-level node in choose data", __FUNCTION__,
                 DocNode::type_names_[iter->type]);
         return false;
       }
       Dbg(dbg_ctl, "[%s] Ignoring top-level whitespace raw text", __FUNCTION__);
       temp_iter = iter;
       ++temp_iter;
       choose_node.child_nodes.erase(iter);
       iter = temp_iter;
       continue; // skip the increment
     } else if (iter->type != DocNode::TYPE_WHEN) {
       TSError("[%s] Cannot have %s as top-level node in choose data; only when/otherwise/whitespace-text "
               "permitted",
               __FUNCTION__, DocNode::type_names_[iter->type]);
       return false;
     }
     ++iter;
   }
   node_list.push_back(choose_node);
   return true;
 }

 void
 EsiParser::clear()
 {
   _data.clear();
   _parse_start_pos = -1;
 }

 EsiParser::~EsiParser() = default;

 inline void
 EsiParser::_adjustPointers(DocNodeList::iterator node_iter, DocNodeList::iterator end, const char *ext_data_ptr,
                            const char *int_data_start) const
 {
   AttributeList::iterator attr_iter;
   for (; node_iter != end; ++node_iter) {
     if (node_iter->data_len) {
       node_iter->data = ext_data_ptr + (node_iter->data - int_data_start);
     }
     for (attr_iter = node_iter->attr_list.begin(); attr_iter != node_iter->attr_list.end(); ++attr_iter) {
       if (attr_iter->name_len) {
         attr_iter->name = ext_data_ptr + (attr_iter->name - int_data_start);
       }
       if (attr_iter->value_len) {
         attr_iter->value = ext_data_ptr + (attr_iter->value - int_data_start);
       }
     }
     if (node_iter->child_nodes.size()) {
       _adjustPointers(node_iter->child_nodes.begin(), node_iter->child_nodes.end(), ext_data_ptr, int_data_start);
     }
   }
 }

 bool
 EsiParser::parse(DocNodeList &node_list, const char *ext_data_ptr, int data_len /* = -1 */) const
 {
   string data;
   size_t orig_output_list_size;
   int    parse_start_pos = -1;
   bool   retval          = _completeParse(data, parse_start_pos, orig_output_list_size, node_list, ext_data_ptr, data_len);
   if (retval && (node_list.size() - orig_output_list_size)) {
     // adjust all pointers to addresses in input parameter
     const char           *int_data_start = data.data();
     DocNodeList::iterator node_iter      = node_list.begin();
     for (size_t i = 0; i < orig_output_list_size; ++i, ++node_iter) {
       ;
     }
     _adjustPointers(node_iter, node_list.end(), ext_data_ptr, int_data_start);
   }
   return retval;
 }