blob: f1cd630975ff5054f9eba9daf3b978b662ad2aeb [file] [log] [blame]
/*
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jmarantz@google.com (Joshua Marantz)
#ifndef PAGESPEED_KERNEL_HTML_HTML_PARSE_H_
#define PAGESPEED_KERNEL_HTML_HTML_PARSE_H_
#include <cstdarg>
#include <cstddef>
#include <list>
#include <map>
#include <set>
#include <utility>
#include <vector>
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/arena.h"
#include "pagespeed/kernel/base/printf_format.h"
#include "pagespeed/kernel/base/scoped_ptr.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/base/symbol_table.h"
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/html/html_name.h"
#include "pagespeed/kernel/html/html_node.h"
#include "pagespeed/kernel/http/content_type.h"
#include "pagespeed/kernel/http/google_url.h"
namespace net_instaweb {
class DocType;
class HtmlEvent;
class HtmlFilter;
class HtmlLexer;
class MessageHandler;
class Timer;
typedef std::set <const HtmlEvent*> ConstHtmlEventSet;
// Streaming Html Parser API. Callbacks defined in HtmlFilter are
// called on each parser token.
//
// Any number of filters can be added to the Html Parser; they are
// organized in a chain. Each filter processes a stream of SAX events
// (HtmlEvent), interspersed by Flushes. The filter operates on the
// sequence of events between flushes (a flush-window), and the system
// passes the (possibly mutated) event-stream to the next filter.
//
// An HTML Event is a lexical token provided by the parser, including:
// begin document
// end document
// begin element
// end element
// whitespace
// characters
// cdata
// comment
//
// The parser retains the sequence of events as a data structure:
// list<HtmlEvent>. HtmlEvents are sent to filters (HtmlFilter), as follows:
// foreach filter in filter-chain
// foreach event in flush-window
// apply filter to event
//
// Filters may mutate the event streams as they are being processed,
// and these mutations be seen by downstream filters. The filters can
// mutate any event that has not been flushed. Supported mutations include:
// - Removing an HTML element whose begin/end tags are both within
// the flush window. This will also remove any nested elements.
// - Removing other HTML events
// - Inserting new elements (automatically inserts begin/end events)
// before or after "current" event
// - Inserting new events, before or after "current" event
class HtmlParse {
public:
explicit HtmlParse(MessageHandler* message_handler);
virtual ~HtmlParse();
// Application methods for parsing functions and adding filters
// Add a new html filter to the filter-chain, without taking ownership
// of it.
void AddFilter(HtmlFilter* filter);
// Initiate a chunked parsing session. Finish with FinishParse. The
// url is only used to resolve relative URLs; the contents are not
// directly fetched. The caller must supply the text and call ParseText.
//
// Returns whether the URL is valid.
bool StartParse(const StringPiece& url) {
return StartParseWithType(url, kContentTypeHtml);
}
bool StartParseWithType(const StringPiece& url,
const ContentType& content_type) {
return StartParseId(url, url, content_type);
}
// Returns whether the google_url() URL is valid.
bool is_url_valid() const { return url_valid_; }
// Mostly useful for file-based rewriters so that messages can reference
// the HTML file and produce navigable errors.
//
// Returns whether the URL is valid.
virtual bool StartParseId(const StringPiece& url, const StringPiece& id,
const ContentType& content_type);
// Sets url() for test purposes. Normally this is done by StartParseId,
// but sometimes tests need to set it without worrying about parse
// state.
void SetUrlForTesting(const StringPiece& url);
// Parses an arbitrary block of an html file, queuing up the events. Call
// Flush to send the events through the Filter.
//
// To parse an entire file, first call StartParse(), then call
// ParseText on the file contents (in whatever size chunks are convenient),
// then call FinishParse().
//
// It is invalid to call ParseText when the StartParse* routines returned
// false.
void ParseText(const char* content, int size) {
ParseTextInternal(content, size);
}
void ParseText(const StringPiece& sp) {
ParseTextInternal(sp.data(), sp.size());
}
// Flush the currently queued events through the filters. It is desirable
// for large web pages, particularly dynamically generated ones, to start
// getting delivered to the browser as soon as they are ready. On the
// other hand, rewriting is more powerful when more of the content can
// be considered for image/css/js spriting. This method should be called
// when the controlling network process wants to induce a new chunk of
// output. The less you call this function the better the rewriting will
// be.
//
// It is invalid to call Flush when the StartParse* routines returned
// false.
//
// If this is called from a Filter, the request will be deferred until after
// currently active filters are completed.
virtual void Flush();
// Finish a chunked parsing session. This also induces a Flush.
//
// It is invalid to call FinishParse when the StartParse* routines returned
// false.
virtual void FinishParse();
// Utility methods for implementing filters
// These "New*" functions do *not* append the new node to the parent; you
// must do that yourself. Also note that in the context of a filter, you
// must add parents to the DOM in some fashion, before appending children to
// parents.
HtmlCdataNode* NewCdataNode(HtmlElement* parent,
const StringPiece& contents);
HtmlCharactersNode* NewCharactersNode(HtmlElement* parent,
const StringPiece& literal);
HtmlCommentNode* NewCommentNode(HtmlElement* parent,
const StringPiece& contents);
HtmlDirectiveNode* NewDirectiveNode(HtmlElement* parent,
const StringPiece& contents);
HtmlIEDirectiveNode* NewIEDirectiveNode(HtmlElement* parent,
const StringPiece& contents);
void InsertScriptAfterCurrent(StringPiece text, bool external);
void InsertScriptBeforeCurrent(StringPiece text, bool external);
// Creates and appends an Anchor tag into the HTML, and then returns it.
// TODO(jmaessen): refactor and use this in the relevant places.
HtmlElement* AppendAnchor(StringPiece link, StringPiece text,
HtmlElement* parent);
// DOM-manipulation methods.
// TODO(sligocki): Find Javascript equivalents and list them or even change
// our names to be consistent.
// This and downstream filters will then see inserted elements but upstream
// filters will not.
// Note: In Javascript the first is called insertBefore and takes the arg
// in the opposite order.
// Note: new_node must not already be in the DOM.
void InsertNodeBeforeNode(const HtmlNode* existing_node, HtmlNode* new_node);
void InsertNodeAfterNode(const HtmlNode* existing_node, HtmlNode* new_node);
// These are a backwards-compatibility wrapper for use by Pagespeed Insights.
// TODO(morlovich): Remove them after PSI is synced.
void InsertElementBeforeElement(const HtmlNode* existing_element,
HtmlNode* new_element) {
InsertNodeBeforeNode(existing_element, new_element);
}
void InsertElementAfterElement(const HtmlNode* existing_element,
HtmlNode* new_element) {
InsertNodeAfterNode(existing_element, new_element);
}
// Add a new child element at the beginning or end of existing_parent's
// children. Named after Javascript's appendChild method.
// Note: new_child must not already be in the DOM.
void PrependChild(const HtmlElement* existing_parent, HtmlNode* new_child);
void AppendChild(const HtmlElement* existing_parent, HtmlNode* new_child);
// Insert a new element before the current one. current_ remains unchanged.
// Note: new_node must not already be in the DOM.
void InsertNodeBeforeCurrent(HtmlNode* new_node);
// Insert a new element after the current one, moving current_ to the new
// element. In a Filter, the flush-loop will advance past this on
// the next iteration.
// Note: new_node must not already be in the DOM.
void InsertNodeAfterCurrent(HtmlNode* new_node);
// Enclose element around two elements in a sequence. The first
// element must be the same as, or precede the last element in the
// event-stream, and this is not checked, but the two elements do
// not need to be adjacent. They must have the same parent to start
// with.
bool AddParentToSequence(HtmlNode* first, HtmlNode* last,
HtmlElement* new_parent);
// Moves current node (and all children) to an already-existing parent,
// where they will be placed as the last elements in that parent.
// Returns false if the operation could not be performed because either
// the node or its parent was partially or wholly flushed.
// Note: Will not work if called from StartElement() event.
//
// This differs from AppendChild() because it moves the current node,
// which is already in the DOM, rather than adding a new node.
bool MoveCurrentInto(HtmlElement* new_parent);
// Moves current node (and all children) directly before existing_node.
// Note: Will not work if called from StartElement() event.
//
// This differs from InsertNodeBeforeNode() because it moves the
// current node, which is already in the DOM, rather than adding a new node.
bool MoveCurrentBefore(HtmlNode* existing_node);
// If the given node is rewritable, delete it and all of its children (if
// any) and return true; otherwise, do nothing and return false.
// Note: Javascript appears to use removeChild for this.
bool DeleteNode(HtmlNode* node);
// Delete a parent element, retaining any children and moving them to
// reside under the parent's parent. Note that an element must be
// fully inside the flush-window for this to work. Returns false on
// failure.
//
// See also MakeElementInvisible
bool DeleteSavingChildren(HtmlElement* element);
// Similar in effect to DeleteSavingChildren, but this has no structural
// effect on the DOM. Instead it sets a bit in the HtmlElement that prevents
// it from being rendered by HtmlWriterFilter, though all its contents will
// be rendered.
//
// This fails, returning false, if the element's StartElement event has
// already been flushed.
bool MakeElementInvisible(HtmlElement* element);
// Determines whether the element, in the context of its flush
// window, has children. If the element is not rewritable, or
// has not been closed yet, or inserted into the DOM event stream,
// then 'false' is returned.
//
// Note that the concept of the Flush Window is important because the
// knowledge of an element's children is not limited to the current
// event being presented to a Filter. A Filter can call this method
// in the StartElement of an event to see if any children are going
// to be coming. Of course, if the StartElement is at the end of a
// Flush window, then we won't know about the children, but IsRewritable
// will also be false.
bool HasChildrenInFlushWindow(HtmlElement* element);
// If possible, replace the existing node with the new node and return true;
// otherwise, do nothing and return false.
bool ReplaceNode(HtmlNode* existing_node, HtmlNode* new_node);
// Creates an another element with the same name and attributes as in_element.
// Does not duplicate the children or insert it anywhere.
HtmlElement* CloneElement(HtmlElement* in_element);
HtmlElement* NewElement(HtmlElement* parent, const StringPiece& str) {
return NewElement(parent, MakeName(str));
}
HtmlElement* NewElement(HtmlElement* parent, HtmlName::Keyword keyword) {
return NewElement(parent, MakeName(keyword));
}
HtmlElement* NewElement(HtmlElement* parent, const HtmlName& name);
// For both versions of AddAttribute
// Pass in NULL for value to add an attribute with no value at all
// ex: <script data-pagespeed-no-transform>
// Pass in "" for value if you want the value to be the empty string
// ex: <div style="">
void AddAttribute(HtmlElement* element, HtmlName::Keyword keyword,
const StringPiece& value) {
return element->AddAttribute(MakeName(keyword), value,
HtmlElement::DOUBLE_QUOTE);
}
void AddAttribute(HtmlElement* element, StringPiece name,
const StringPiece& value) {
return element->AddAttribute(MakeName(name), value,
HtmlElement::DOUBLE_QUOTE);
}
void AddEscapedAttribute(HtmlElement* element, HtmlName::Keyword keyword,
const StringPiece& escaped_value) {
return element->AddEscapedAttribute(MakeName(keyword), escaped_value,
HtmlElement::DOUBLE_QUOTE);
}
void SetAttributeName(HtmlElement::Attribute* attribute,
HtmlName::Keyword keyword) {
attribute->set_name(MakeName(keyword));
}
HtmlName MakeName(const StringPiece& str);
HtmlName MakeName(HtmlName::Keyword keyword);
bool IsRewritable(const HtmlNode* node) const;
// IsRewritable will return false for a node if either the open or close tag
// has been flushed, but this is too conservative if we only want to call
// AppendChild on that node, since we can append even if the open tag has
// already been flushed.
bool CanAppendChild(const HtmlNode* node) const;
void ClearElements();
// Log the HtmlEvent queue_ to the message_handler_ for debugging.
void DebugLogQueue();
// Print the HtmlEvent queue_ to stdout for debugging.
void DebugPrintQueue();
// Implementation helper with detailed knowledge of html parsing libraries
friend class HtmlLexer;
// Determines whether a tag should be terminated in HTML, e.g. <meta ..>.
// We do not expect to see a close-tag for meta and should never insert one.
bool IsImplicitlyClosedTag(HtmlName::Keyword keyword) const;
// Determines whether a tag should be interpreted as a 'literal'
// tag. That is, a tag whose contents are not parsed until a
// corresponding matching end tag is encountered.
static bool IsLiteralTag(HtmlName::Keyword keyword);
// Determines whether a tag is interpreted as a 'literal' tag in
// some user agents. Since some user agents will interpret the
// contents of these tags, our parser never treats them as literal
// tags. However, a filter that wants to insert new tags that should
// be processed by all user agents should not insert those tags into
// a tag that is sometimes parsed as a literal tag. Those filters
// can use this method to determine if they are within such a tag.
static bool IsSometimesLiteralTag(HtmlName::Keyword keyword);
// An optionally closed tag ranges from <p>, which is typically not closed,
// but we infer the closing from context. Also consider <html>, which usually
// is closed but not always. E.g. www.google.com does not close its html tag.
bool IsOptionallyClosedTag(HtmlName::Keyword keyword) const;
// Determines whether a tag allows brief termination in HTML, e.g. <tag/>
bool TagAllowsBriefTermination(HtmlName::Keyword keyword) const;
MessageHandler* message_handler() const { return message_handler_; }
// Gets the current location information; typically to help with error
// messages.
const char* url() const { return url_.c_str(); }
// Gets a parsed GoogleUrl& corresponding to url().
const GoogleUrl& google_url() const { return google_url_; }
const char* id() const { return id_.c_str(); }
int line_number() const { return line_number_; }
// Returns URL (or id) and line number as a string, to be used in messages.
GoogleString UrlLine() const {
return StringPrintf("%s:%d", id(), line_number());
}
// Return the current assumed doctype of the document (based on the content
// type and any HTML directives encountered so far).
const DocType& doctype() const;
// Interface for any caller to report an error message via the message handler
void Info(const char* filename, int line, const char* msg, ...)
INSTAWEB_PRINTF_FORMAT(4, 5);
void Warning(const char* filename, int line, const char* msg, ...)
INSTAWEB_PRINTF_FORMAT(4, 5);
void Error(const char* filename, int line, const char* msg, ...)
INSTAWEB_PRINTF_FORMAT(4, 5);
void FatalError(const char* filename, int line, const char* msg, ...)
INSTAWEB_PRINTF_FORMAT(4, 5);
void InfoV(const char* file, int line, const char *msg, va_list args);
void WarningV(const char* file, int line, const char *msg, va_list args);
void ErrorV(const char* file, int line, const char *msg, va_list args);
void FatalErrorV(const char* file, int line, const char* msg, va_list args);
// Report error message with current parsing filename and linenumber.
void InfoHere(const char* msg, ...) INSTAWEB_PRINTF_FORMAT(2, 3);
void WarningHere(const char* msg, ...) INSTAWEB_PRINTF_FORMAT(2, 3);
void ErrorHere(const char* msg, ...) INSTAWEB_PRINTF_FORMAT(2, 3);
void FatalErrorHere(const char* msg, ...) INSTAWEB_PRINTF_FORMAT(2, 3);
// If set_log_rewrite_timing(true) has been called, logs the given message
// at info level with a timeset offset from the parsing start time,
void ShowProgress(const char* message);
void InfoHereV(const char *msg, va_list args) {
InfoV(id_.c_str(), line_number_, msg, args);
}
void WarningHereV(const char *msg, va_list args) {
WarningV(id_.c_str(), line_number_, msg, args);
}
void ErrorHereV(const char *msg, va_list args) {
ErrorV(id_.c_str(), line_number_, msg, args);
}
void FatalErrorHereV(const char* msg, va_list args) {
FatalErrorV(id_.c_str(), line_number_, msg, args);
}
void AddElement(HtmlElement* element, int line_number);
void CloseElement(HtmlElement* element, HtmlElement::Style style,
int line_number);
// Run a filter on the current queue of parse nodes.
void ApplyFilter(HtmlFilter* filter);
// Provide timer to helping to report timing of each filter. You must also
// set_log_rewrite_timing(true) to turn on this reporting.
void set_timer(Timer* timer) { timer_ = timer; }
Timer* timer() const { return timer_; }
void set_log_rewrite_timing(bool x) { log_rewrite_timing_ = x; }
// Adds a filter to be called during parsing as new events are added.
// Takes ownership of the HtmlFilter passed in.
void add_event_listener(HtmlFilter* listener);
// Inserts a comment before or after the current node. The function tries to
// pick an intelligent place depending on the document structure and
// whether the current node is a start-element, end-element, or a leaf.
// Returns true if it successfully added the comment, and false if it was not
// safe for the comment to be inserted. This can happen when a comment is
// inserted in a literal element (script or style) after the opening tag has
// been flushed, but the closing tag has not been seen yet. In this case, the
// caller can buffer the messages until EndElement is reached and call
// InsertComment at that point.
bool InsertComment(StringPiece sp);
// Sets the limit on the maximum number of bytes that should be parsed.
void set_size_limit(int64 x);
// Returns whether we have exceeded the size limit.
bool size_limit_exceeded() const;
// For debugging purposes. If this vector is supplied, DetermineEnabledFilters
// will populate it with the list of Filters that were disabled, plus the
// associated reason, if supplied by the Filter. Caller retains ownership
// of the pointer.
void SetDynamicallyDisabledFilterList(StringVector* list) {
dynamically_disabled_filter_list_ = list;
}
// Temporarily removes the current node from the parse tree. This must
// be run as part of a filter callback, and it is the responsibility of
// the filter to save the node and call RestoreNode on it later.
//
// If current node is an HtmlElement, this must be called on the
// StartElement event, not the EndElement event. When an element is
// deferred, all its children are deferred as well.
//
// It is fine to restore a node after a Flush. Note that while most
// HtmlNode objects are freed after a Flush window, a deferred one will
// be retained until it is Restored, or until the end of the document.
//
// If a node is not restored at end of document, a warning will be
// printed and the stored data cleaned up. Functionally it will be
// as if the filter called DeleteNode.
//
// Note that a filter that defers a node and never restores it will never
// see the EndElement for that node.
//
// Note that if you defer a Characters node and restore it next to
// another Characters node, they will be coalesced prior to the next
// filter, but this filter will not see the coalesced nodes.
// Similarly, if you defer a non-characters node that was previously
// separating two characters nodes, that will also result in a
// coalesce seen only by downstream filters.
void DeferCurrentNode();
// Restores a node, inserting it after the current event. If the node
// is an HtmlElement, the iteration will proceed with the first child node,
// or, if there were no children, then the EndElement method.
//
// Note: you cannot restore during Flush().
void RestoreDeferredNode(HtmlNode* deferred_node);
// Returns whether the filter pipeline can rewrite urls.
bool can_modify_urls() {
return can_modify_urls_;
}
protected:
typedef std::vector<HtmlFilter*> FilterVector;
typedef std::list<HtmlFilter*> FilterList;
typedef std::pair<HtmlNode*, HtmlEventList*> DeferredNode;
typedef std::map<const HtmlNode*, HtmlEventList*> NodeToEventListMap;
typedef std::map<HtmlFilter*, DeferredNode> FilterElementMap;
typedef std::set<const HtmlNode*> NodeSet;
// HtmlParse::FinishParse() is equivalent to the sequence of
// BeginFinishParse(); Flush(); EndFinishParse().
// Split up to permit asynchronous versions.
void BeginFinishParse();
void EndFinishParse();
// Clears any cached state we have while this object is laying
// around for recycling.
void Clear();
// Returns the number of events on the event queue.
size_t GetEventQueueSize();
virtual void ParseTextInternal(const char* content, int size);
// Calls DetermineFiltersBehaviorImpl in an idempotent way.
void DetermineFiltersBehavior() {
if (!determine_filter_behavior_called_) {
determine_filter_behavior_called_ = true;
can_modify_urls_ = false;
DetermineFiltersBehaviorImpl();
}
}
void DetermineFilterListBehavior(const FilterList& list) {
for (FilterList::const_iterator i = list.begin(); i != list.end(); ++i) {
CheckFilterBehavior(*i);
}
}
void CheckFilterBehavior(HtmlFilter* filter);
// Call DetermineEnabled() on each filter. Should be called after
// the property cache lookup has finished since some filters depend on
// pcache results in their DetermineEnabled implementation. If a subclass has
// filters that the base HtmlParse doesn't know about, it should override this
// function and call DetermineEnabled on each of its filters, along with
// calling the base DetermineEnabledFiltersImpl.
// For all enabled filters the CanModifyUrl() flag will be aggregated (or'ed)
// and can be queried on the can_modify_url function.
virtual void DetermineFiltersBehaviorImpl();
private:
void ApplyFilterHelper(HtmlFilter* filter);
HtmlEventListIterator Last(); // Last element in queue
bool IsInEventWindow(const HtmlEventListIterator& iter) const;
void InsertNodeBeforeEvent(const HtmlEventListIterator& event,
HtmlNode* new_node);
void InsertNodeAfterEvent(const HtmlEventListIterator& event,
HtmlNode* new_node);
bool MoveCurrentBeforeEvent(const HtmlEventListIterator& move_to);
bool IsDescendantOf(const HtmlNode* possible_child,
const HtmlNode* possible_parent);
void SanityCheck();
void CheckEventParent(HtmlEvent* event, HtmlElement* expect,
HtmlElement* actual);
void CheckParentFromAddEvent(HtmlEvent* event);
void FixParents(const HtmlEventListIterator& begin,
const HtmlEventListIterator& end_inclusive,
HtmlElement* new_parent);
void CoalesceAdjacentCharactersNodes();
void ClearEvents();
void EmitQueue(MessageHandler* handler);
inline void NextEvent();
void ClearDeferredNodes();
inline bool IsRewritableIgnoringDeferral(const HtmlNode* node) const;
inline bool IsRewritableIgnoringEnd(const HtmlNode* node) const;
void SetupScript(StringPiece text, bool external, HtmlElement* script);
// Visible for testing only, via HtmlTestingPeer
friend class HtmlTestingPeer;
void AddEvent(HtmlEvent* event);
void SetCurrent(HtmlNode* node);
void set_coalesce_characters(bool x) { coalesce_characters_ = x; }
size_t symbol_table_size() const {
return string_table_.string_bytes_allocated();
}
// If a FLUSH occurs in the middle of a script, style, or other tag
// whose contents can only be a Characters block, then we will buffer
// up the start of the script tag and not emit it and the Characters block
// until after we see the close script tag. This function enforces that
// right before calling the Filters.
void DelayLiteralTag();
FilterVector event_listeners_;
SymbolTableSensitive string_table_;
FilterList filters_;
HtmlLexer* lexer_;
Arena<HtmlNode> nodes_;
HtmlEventList queue_;
HtmlEventListIterator current_;
// Have we deleted current? Then we shouldn't do certain manipulations to it.
MessageHandler* message_handler_;
GoogleString url_;
GoogleUrl google_url_;
GoogleString id_; // Per-request identifier string used in error messages.
int line_number_;
bool skip_increment_;
bool determine_filter_behavior_called_;
bool can_modify_urls_;
bool determine_enabled_filters_called_;
bool need_sanity_check_;
bool coalesce_characters_;
bool need_coalesce_characters_;
bool url_valid_;
bool log_rewrite_timing_; // Should we time the speed of parsing?
bool running_filters_;
int64 parse_start_time_us_;
scoped_ptr<HtmlEvent> delayed_start_literal_;
Timer* timer_;
HtmlFilter* current_filter_; // Filter currently running in ApplyFilter
// When deferring a node that spans a flush window, we present upstream
// filters with a view of the event-stream that is not impacted by the
// deferral. To implement this, at the beginning of each flush window,
// we do the queue_ mutation for any outstanding deferrals right before
// running the filter that deferred them.
FilterElementMap open_deferred_nodes_;
// Keeps track of the deferred nodes that have not yet been restored.
NodeToEventListMap deferred_nodes_;
// We use the node-defer logic to implement DeleteNode for a node that
// hasn't been closed yet. The only difference is that you cannot
// restore a deleted node, and the parser will not print a warning if
// a deleted node is never restored.
NodeSet deferred_deleted_nodes_;
StringVector* dynamically_disabled_filter_list_;
DISALLOW_COPY_AND_ASSIGN(HtmlParse);
};
} // namespace net_instaweb
#endif // PAGESPEED_KERNEL_HTML_HTML_PARSE_H_