blob: b46268ceefdf7abd9d378bbcc048b84f277e94a2 [file] [log] [blame]
/**
* Copyright 2011 Rackspace
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
var sprintf = require('./sprintf').sprintf;
var utils = require('./utils');
var ElementPath = require('./elementpath');
var TreeBuilder = require('./treebuilder').TreeBuilder;
var get_parser = require('./parser').get_parser;
var constants = require('./constants');
var element_ids = 0;
function Element(tag, attrib)
{
this._id = element_ids++;
this.tag = tag;
this.attrib = {};
this.text = null;
this.tail = null;
this._children = [];
if (attrib) {
this.attrib = utils.merge(this.attrib, attrib);
}
}
Element.prototype.toString = function()
{
return sprintf("<Element %s at %s>", this.tag, this._id);
};
Element.prototype.makeelement = function(tag, attrib)
{
return new Element(tag, attrib);
};
Element.prototype.len = function()
{
return this._children.length;
};
Element.prototype.getItem = function(index)
{
return this._children[index];
};
Element.prototype.setItem = function(index, element)
{
this._children[index] = element;
};
Element.prototype.delItem = function(index)
{
this._children.splice(index, 1);
};
Element.prototype.getSlice = function(start, stop)
{
return this._children.slice(start, stop);
};
Element.prototype.setSlice = function(start, stop, elements)
{
var i;
var k = 0;
for (i = start; i < stop; i++, k++) {
this._children[i] = elements[k];
}
};
Element.prototype.delSlice = function(start, stop)
{
this._children.splice(start, stop - start);
};
Element.prototype.append = function(element)
{
this._children.push(element);
};
Element.prototype.extend = function(elements)
{
this._children.concat(elements);
};
Element.prototype.insert = function(index, element)
{
this._children[index] = element;
};
Element.prototype.remove = function(index, element)
{
this._children = this._children.filter(function(e) {
/* TODO: is this the right way to do this? */
if (e._id === element._id) {
return false;
}
return true;
});
};
Element.prototype.getchildren = function() {
return this._children;
};
Element.prototype.find = function(path)
{
return ElementPath.find(this, path);
};
Element.prototype.findtext = function(path, defvalue)
{
return ElementPath.findtext(this, path, defvalue);
};
Element.prototype.findall = function(path, defvalue)
{
return ElementPath.findall(this, path, defvalue);
};
Element.prototype.clear = function()
{
this.attrib = {};
this._children = [];
this.text = null;
this.tail = null;
};
Element.prototype.get = function(key, defvalue)
{
if (this.attrib[key] !== undefined) {
return this.attrib[key];
}
else {
return defvalue;
}
};
Element.prototype.set = function(key, value)
{
this.attrib[key] = value;
};
Element.prototype.keys = function()
{
return Object.keys(this.attrib);
};
Element.prototype.items = function()
{
return utils.items(this.attrib);
};
/*
* In python this uses a generator, but in v8 we don't have em,
* so we use a callback instead.
**/
Element.prototype.iter = function(tag, callback)
{
var self = this;
var i, child;
if (tag === "*") {
tag = null;
}
if (tag === null || this.tag === tag) {
callback(self);
}
for (i = 0; i < this._children.length; i++) {
child = this._children[i];
child.iter(tag, function(e) {
callback(e);
});
}
};
Element.prototype.itertext = function(callback)
{
this.iter(null, function(e) {
if (e.text) {
callback(e.text);
}
if (e.tail) {
callback(e.tail);
}
});
};
function SubElement(parent, tag, attrib) {
var element = parent.makeelement(tag, attrib);
parent.append(element);
return element;
}
function Comment(text) {
var element = new Element(Comment);
if (text) {
element.text = text;
}
return element;
}
function ProcessingInstruction(target, text)
{
var element = new Element(ProcessingInstruction);
element.text = target;
if (text) {
element.text = element.text + " " + text;
}
return element;
}
function QName(text_or_uri, tag)
{
if (tag) {
text_or_uri = sprintf("{%s}%s", text_or_uri, tag);
}
this.text = text_or_uri;
}
QName.prototype.toString = function() {
return this.text;
};
function ElementTree(element)
{
this._root = element;
}
ElementTree.prototype.getroot = function() {
return this._root;
};
ElementTree.prototype._setroot = function(element) {
this._root = element;
};
ElementTree.prototype.parse = function(source, parser) {
if (!parser) {
parser = get_parser(constants.DEFAULT_PARSER);
parser = new parser.XMLParser(new TreeBuilder());
}
parser.feed(source);
this._root = parser.close();
return this._root;
};
ElementTree.prototype.iter = function(tag, callback) {
this._root.iter(tag, callback);
};
ElementTree.prototype.find = function(path) {
return this._root.find(path);
};
ElementTree.prototype.findtext = function(path, defvalue) {
return this._root.findtext(path, defvalue);
};
ElementTree.prototype.findall = function(path) {
return this._root.findall(path);
};
/**
* Unlike ElementTree, we don't write to a file, we return you a string.
*/
ElementTree.prototype.write = function(options) {
var sb = [];
options = utils.merge({
encoding: 'utf-8',
xml_declaration: null,
default_namespace: null,
method: 'xml'}, options);
if (options.xml_declaration !== false) {
sb.push("<?xml version='1.0' encoding='"+options.encoding +"'?>\n");
}
if (options.method === "text") {
_serialize_text(sb, self._root, encoding);
}
else {
var qnames, namespaces, indent, indent_string;
var x = _namespaces(this._root, options.encoding, options.default_namespace);
qnames = x[0];
namespaces = x[1];
if (options.hasOwnProperty('indent')) {
indent = 0;
indent_string = new Array(options.indent + 1).join(' ');
}
else {
indent = false;
}
if (options.method === "xml") {
_serialize_xml(function(data) {
sb.push(data);
}, this._root, options.encoding, qnames, namespaces, indent, indent_string);
}
else {
/* TODO: html */
throw new Error("unknown serialization method "+ options.method);
}
}
return sb.join("");
};
var _namespace_map = {
/* "well-known" namespace prefixes */
"http://www.w3.org/XML/1998/namespace": "xml",
"http://www.w3.org/1999/xhtml": "html",
"http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
"http://schemas.xmlsoap.org/wsdl/": "wsdl",
/* xml schema */
"http://www.w3.org/2001/XMLSchema": "xs",
"http://www.w3.org/2001/XMLSchema-instance": "xsi",
/* dublic core */
"http://purl.org/dc/elements/1.1/": "dc",
};
function register_namespace(prefix, uri) {
if (/ns\d+$/.test(prefix)) {
throw new Error('Prefix format reserved for internal use');
}
if (_namespace_map.hasOwnProperty(uri) && _namespace_map[uri] === prefix) {
delete _namespace_map[uri];
}
_namespace_map[uri] = prefix;
}
function _escape(text, encoding, isAttribute, isText) {
if (text) {
text = text.toString();
text = text.replace(/&/g, '&amp;');
text = text.replace(/</g, '&lt;');
text = text.replace(/>/g, '&gt;');
if (!isText) {
text = text.replace(/\n/g, '&#xA;');
text = text.replace(/\r/g, '&#xD;');
}
if (isAttribute) {
text = text.replace(/"/g, '&quot;');
}
}
return text;
}
/* TODO: benchmark single regex */
function _escape_attrib(text, encoding) {
return _escape(text, encoding, true);
}
function _escape_cdata(text, encoding) {
return _escape(text, encoding, false);
}
function _escape_text(text, encoding) {
return _escape(text, encoding, false, true);
}
function _namespaces(elem, encoding, default_namespace) {
var qnames = {};
var namespaces = {};
if (default_namespace) {
namespaces[default_namespace] = "";
}
function encode(text) {
return text;
}
function add_qname(qname) {
if (qname[0] === "{") {
var tmp = qname.substring(1).split("}", 2);
var uri = tmp[0];
var tag = tmp[1];
var prefix = namespaces[uri];
if (prefix === undefined) {
prefix = _namespace_map[uri];
if (prefix === undefined) {
prefix = "ns" + Object.keys(namespaces).length;
}
if (prefix !== "xml") {
namespaces[uri] = prefix;
}
}
if (prefix) {
qnames[qname] = sprintf("%s:%s", prefix, tag);
}
else {
qnames[qname] = tag;
}
}
else {
if (default_namespace) {
throw new Error('cannot use non-qualified names with default_namespace option');
}
qnames[qname] = qname;
}
}
elem.iter(null, function(e) {
var i;
var tag = e.tag;
var text = e.text;
var items = e.items();
if (tag instanceof QName && qnames[tag.text] === undefined) {
add_qname(tag.text);
}
else if (typeof(tag) === "string") {
add_qname(tag);
}
else if (tag !== null && tag !== Comment && tag !== ProcessingInstruction) {
throw new Error('Invalid tag type for serialization: '+ tag);
}
if (text instanceof QName && qnames[text.text] === undefined) {
add_qname(text.text);
}
items.forEach(function(item) {
var key = item[0],
value = item[1];
if (key instanceof QName) {
key = key.text;
}
if (qnames[key] === undefined) {
add_qname(key);
}
if (value instanceof QName && qnames[value.text] === undefined) {
add_qname(value.text);
}
});
});
return [qnames, namespaces];
}
function _serialize_xml(write, elem, encoding, qnames, namespaces, indent, indent_string) {
var tag = elem.tag;
var text = elem.text;
var items;
var i;
var newlines = indent || (indent === 0);
write(Array(indent + 1).join(indent_string));
if (tag === Comment) {
write(sprintf("<!--%s-->", _escape_cdata(text, encoding)));
}
else if (tag === ProcessingInstruction) {
write(sprintf("<?%s?>", _escape_cdata(text, encoding)));
}
else {
tag = qnames[tag];
if (tag === undefined) {
if (text) {
write(_escape_text(text, encoding));
}
elem.iter(function(e) {
_serialize_xml(write, e, encoding, qnames, null, newlines ? indent + 1 : false, indent_string);
});
}
else {
write("<" + tag);
items = elem.items();
if (items || namespaces) {
items.sort(); // lexical order
items.forEach(function(item) {
var k = item[0],
v = item[1];
if (k instanceof QName) {
k = k.text;
}
if (v instanceof QName) {
v = qnames[v.text];
}
else {
v = _escape_attrib(v, encoding);
}
write(sprintf(" %s=\"%s\"", qnames[k], v));
});
if (namespaces) {
items = utils.items(namespaces);
items.sort(function(a, b) { return a[1] < b[1]; });
items.forEach(function(item) {
var k = item[1],
v = item[0];
if (k) {
k = ':' + k;
}
write(sprintf(" xmlns%s=\"%s\"", k, _escape_attrib(v, encoding)));
});
}
}
if (text || elem.len()) {
if (text && text.toString().match(/^\s*$/)) {
text = null;
}
write(">");
if (!text && newlines) {
write("\n");
}
if (text) {
write(_escape_text(text, encoding));
}
elem._children.forEach(function(e) {
_serialize_xml(write, e, encoding, qnames, null, newlines ? indent + 1 : false, indent_string);
});
if (!text && indent) {
write(Array(indent + 1).join(indent_string));
}
write("</" + tag + ">");
}
else {
write(" />");
}
}
}
if (newlines) {
write("\n");
}
}
function parse(source, parser) {
var tree = new ElementTree();
tree.parse(source, parser);
return tree;
}
function tostring(element, options) {
return new ElementTree(element).write(options);
}
exports.PI = ProcessingInstruction;
exports.Comment = Comment;
exports.ProcessingInstruction = ProcessingInstruction;
exports.SubElement = SubElement;
exports.QName = QName;
exports.ElementTree = ElementTree;
exports.ElementPath = ElementPath;
exports.Element = function(tag, attrib) {
return new Element(tag, attrib);
};
exports.XML = function(data) {
var et = new ElementTree();
return et.parse(data);
};
exports.parse = parse;
exports.register_namespace = register_namespace;
exports.tostring = tostring;