blob: 5b2b9a76d2830c6530fe761aaec6c25ccb57a9e9 [file] [log] [blame]
/*
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: mdsteele@google.com (Matthew D. Steele)
#ifndef PAGESPEED_KERNEL_HTML_HTML_NODE_H_
#define PAGESPEED_KERNEL_HTML_HTML_NODE_H_
#include <cstddef>
#include <list>
#include "base/logging.h"
#include "pagespeed/kernel/base/arena.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/scoped_ptr.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
namespace net_instaweb {
class HtmlElement;
class HtmlEvent;
typedef std::list<HtmlEvent*> HtmlEventList;
typedef HtmlEventList::iterator HtmlEventListIterator;
// Base class for HtmlElement and HtmlLeafNode. Generally represents all
// lexical tokens in HTML, except that for subclass HtmlElement, which
// represents both the opening & closing token.
class HtmlNode {
public:
virtual ~HtmlNode();
friend class HtmlParse;
HtmlElement* parent() const { return parent_; }
virtual bool live() const = 0;
virtual GoogleString ToString() const = 0;
// Marks a node as dead. The queue's end iterator should be passed in,
// to remove references to stale iterators, and to force IsRewritable to
// return false.
virtual void MarkAsDead(const HtmlEventListIterator& end) = 0;
void* operator new(size_t size, Arena<HtmlNode>* arena) {
return arena->Allocate(size);
}
void operator delete(void* ptr, Arena<HtmlNode>* arena) {
LOG(FATAL) << "HtmlNode must not be deleted directly.";
}
protected:
// TODO(jmarantz): jmaessen suggests instantiating the html nodes
// without parents and computing them from context at the time they
// are instantiated from the lexer. This is a little more difficult
// when synthesizing new nodes, however. We assert sanity, however,
// when calling HtmlParse::ApplyFilter.
explicit HtmlNode(HtmlElement* parent) : parent_(parent) {}
// Create new event object(s) representing this node, and insert them into
// the queue just before the given iterator; also, update this node object as
// necessary so that begin() and end() will return iterators pointing to
// the new event(s). The line number for each event should probably be -1.
virtual void SynthesizeEvents(const HtmlEventListIterator& iter,
HtmlEventList* queue) = 0;
// Return an iterator pointing to the first event associated with this node.
virtual HtmlEventListIterator begin() const = 0;
// Return an iterator pointing to the last event associated with this node.
virtual HtmlEventListIterator end() const = 0;
// Version that affects visibility of the destructor.
void operator delete(void* ptr) {
LOG(FATAL) << "HtmlNode must not be deleted directly.";
}
private:
friend class HtmlLexer;
friend class HtmlTestingPeer;
// Note: setting the parent doesn't change the DOM -- it just updates
// the pointer. This is intended to be called only from the DOM manipulation
// methods in HtmlParse.
void set_parent(HtmlElement* parent) { parent_ = parent; }
HtmlElement* parent_;
DISALLOW_COPY_AND_ASSIGN(HtmlNode);
};
class HtmlLeafNode : public HtmlNode {
public:
virtual ~HtmlLeafNode();
virtual bool live() const { return (data_.get() != NULL) && data_->is_live_; }
virtual void MarkAsDead(const HtmlEventListIterator& end);
virtual GoogleString ToString() const;
const GoogleString& contents() const { return data_->contents_; }
virtual HtmlEventListIterator begin() const {
return data_->iter_;
}
virtual HtmlEventListIterator end() const {
return data_->iter_;
}
void set_iter(const HtmlEventListIterator& iter) {
data_->iter_ = iter;
}
void FreeData() { data_.reset(NULL); }
protected:
HtmlLeafNode(HtmlElement* parent, const HtmlEventListIterator& iter,
const StringPiece& contents);
// Write-access to the contents is protected by default, and made
// accessible by subclasses that need to expose this method.
GoogleString* mutable_contents() { return &data_->contents_; }
private:
struct Data {
Data(const HtmlEventListIterator& iter, const StringPiece& contents)
: contents_(contents.data(), contents.size()),
is_live_(true),
iter_(iter) {
}
GoogleString contents_;
bool is_live_;
HtmlEventListIterator iter_;
};
scoped_ptr<Data> data_;
};
// Leaf node representing a CDATA section
class HtmlCdataNode : public HtmlLeafNode {
public:
virtual ~HtmlCdataNode();
friend class HtmlParse;
protected:
virtual void SynthesizeEvents(const HtmlEventListIterator& iter,
HtmlEventList* queue);
private:
HtmlCdataNode(HtmlElement* parent,
const StringPiece& contents,
const HtmlEventListIterator& iter)
: HtmlLeafNode(parent, iter, contents) {
}
DISALLOW_COPY_AND_ASSIGN(HtmlCdataNode);
};
// Leaf node representing raw characters in HTML
class HtmlCharactersNode : public HtmlLeafNode {
public:
virtual ~HtmlCharactersNode();
void Append(const StringPiece& str) {
mutable_contents()->append(str.data(), str.size());
}
friend class HtmlParse;
// Expose writable contents for Characters nodes.
using HtmlLeafNode::mutable_contents;
protected:
virtual void SynthesizeEvents(const HtmlEventListIterator& iter,
HtmlEventList* queue);
private:
HtmlCharactersNode(HtmlElement* parent,
const StringPiece& contents,
const HtmlEventListIterator& iter)
: HtmlLeafNode(parent, iter, contents) {
}
DISALLOW_COPY_AND_ASSIGN(HtmlCharactersNode);
};
// Leaf node representing an HTML comment
class HtmlCommentNode : public HtmlLeafNode {
public:
virtual ~HtmlCommentNode();
friend class HtmlParse;
protected:
virtual void SynthesizeEvents(const HtmlEventListIterator& iter,
HtmlEventList* queue);
private:
HtmlCommentNode(HtmlElement* parent,
const StringPiece& contents,
const HtmlEventListIterator& iter)
: HtmlLeafNode(parent, iter, contents) {
}
DISALLOW_COPY_AND_ASSIGN(HtmlCommentNode);
};
// Leaf node representing an HTML IE directive
class HtmlIEDirectiveNode : public HtmlLeafNode {
public:
virtual ~HtmlIEDirectiveNode();
friend class HtmlParse;
protected:
virtual void SynthesizeEvents(const HtmlEventListIterator& iter,
HtmlEventList* queue);
private:
HtmlIEDirectiveNode(HtmlElement* parent,
const StringPiece& contents,
const HtmlEventListIterator& iter)
: HtmlLeafNode(parent, iter, contents) {
}
DISALLOW_COPY_AND_ASSIGN(HtmlIEDirectiveNode);
};
// Leaf node representing an HTML directive
class HtmlDirectiveNode : public HtmlLeafNode {
public:
virtual ~HtmlDirectiveNode();
friend class HtmlParse;
protected:
virtual void SynthesizeEvents(const HtmlEventListIterator& iter,
HtmlEventList* queue);
private:
HtmlDirectiveNode(HtmlElement* parent,
const StringPiece& contents,
const HtmlEventListIterator& iter)
: HtmlLeafNode(parent, iter, contents) {
}
DISALLOW_COPY_AND_ASSIGN(HtmlDirectiveNode);
};
} // namespace net_instaweb
#endif // PAGESPEED_KERNEL_HTML_HTML_NODE_H_