blob: b6538eec40d0cbd3e84cfc45466604ebd6c6e346 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "DFPlatform.h"
#include "DFHTML.h"
#include "DFFilesystem.h"
#include "DFDOM.h"
#include "CSS.h"
#include "CSSProperties.h"
#include "CSSSelector.h"
#include "CSSLength.h"
#include "CSSSheet.h"
#include "DFHTDocument.h"
#include "DFTidyHelper.h"
#include "DFNameMap.h"
#include "DFXML.h"
#include "DFString.h"
#include "DFCharacterSet.h"
#include "DFCommon.h"
static DFNode *HTML_findHead(DFDocument *doc)
{
for (DFNode *child1 = doc->docNode->first; child1 != NULL; child1 = child1->next) {
if (child1->tag != HTML_HTML)
return NULL;
for (DFNode *child2 = child1->first; child2 != NULL; child2 = child2->next) {
if (child2->tag == HTML_HEAD) {
return child2;
}
}
}
return NULL;
}
void HTMLAddExternalStyleSheet(DFDocument *doc, const char *href)
{
DFNode *head = HTML_findHead(doc);
if (head == NULL)
return;;
DFNode *link = DFCreateElement(doc,HTML_LINK);
DFSetAttribute(link,HTML_REL,"stylesheet");
DFSetAttribute(link,HTML_HREF,href);
DFAppendChild(head,link);
}
void HTMLAddInternalStyleSheet(DFDocument *doc, const char *cssText)
{
DFNode *head = HTML_findHead(doc);
if (head == NULL)
return;;
DFNode *style = DFCreateElement(doc,HTML_STYLE);
DFAppendChild(style,DFCreateTextNode(doc,cssText));
DFAppendChild(head,style);
}
char *HTMLCopyCSSText(DFDocument *doc)
{
DFNode *head = DFChildWithTag(doc->root,HTML_HEAD);
if (head == NULL)
return xstrdup("");;
DFNode *style = DFChildWithTag(head,HTML_STYLE);
if (style == NULL)
return xstrdup("");
return DFNodeTextToString(style);
}
int HTML_isContainerTag(Tag tag)
{
switch (tag) {
case HTML_ARTICLE:
case HTML_SECTION:
case HTML_NAV:
case HTML_ASIDE:
case HTML_HGROUP:
case HTML_HEADER:
case HTML_FOOTER:
case HTML_ADDRESS:
case HTML_DIV:
return 1;
default:
return 0;
}
}
int HTML_isBlockLevelTag(Tag tag)
{
switch (tag) {
case HTML_H1:
case HTML_H2:
case HTML_H3:
case HTML_H4:
case HTML_H5:
case HTML_H6:
case HTML_P:
case HTML_UL:
case HTML_OL:
case HTML_LI:
case HTML_TABLE:
case HTML_CAPTION:
case HTML_TR:
case HTML_TD:
case HTML_TH:
case HTML_THEAD:
case HTML_TBODY:
case HTML_TFOOT:
case HTML_FIGURE:
case HTML_FIGCAPTION:
return 1;
default:
return HTML_isContainerTag(tag);
}
}
int HTML_isParagraphTag(Tag tag)
{
switch (tag) {
case HTML_H1:
case HTML_H2:
case HTML_H3:
case HTML_H4:
case HTML_H5:
case HTML_H6:
case HTML_P:
return 1;
default:
return 0;
}
}
int HTML_isListTag(Tag tag)
{
switch (tag) {
case HTML_UL:
case HTML_OL:
return 1;
default:
return 0;
}
}
int HTML_isSpecialSpan(DFNode *node)
{
if (node->tag != HTML_SPAN)
return 0;
return DFStringHasPrefix(DFGetAttribute(node,HTML_CLASS),"uxwrite-");
}
int HTML_isContentNode(DFNode *node)
{
switch (node->tag) {
case DOM_TEXT:
return 1;
case HTML_SPAN:
return HTML_isSpecialSpan(node);
case HTML_IMG:
return 1;
case HTML_BR:
return 1;
default:
return 0;
}
}
int HTML_nodeHasContent(DFNode *node)
{
if (DFIsWhitespaceNode(node))
return 0;
if (HTML_isContentNode(node))
return 1;
for (DFNode *child = node->first; child != NULL; child = child->next) {
if (HTML_nodeHasContent(child))
return 1;
}
return 0;
}
int HTML_nodeIsHyperlink(DFNode *node)
{
const char *href = DFGetAttribute(node,HTML_HREF);
return ((node->tag == HTML_A) && (href != NULL) && !DFStringHasPrefix(href,"#"));
}
static char *indentString(int depth)
{
char *str = (char *)xmalloc(1+depth*2+1);
str[0] = '\n';
memset(&str[1],' ',depth*2);
str[1+depth*2] = '\0';
return str;
}
static DFNode *createIndentNode(DFDocument *doc, int depth)
{
char *str = indentString(depth);
DFNode *node = DFCreateTextNode(doc,str);
free(str);
return node;
}
void HTML_safeIndent(DFNode *node, int depth)
{
switch (node->tag) {
case DOM_DOCUMENT:
case HTML_HTML:
case HTML_HEAD:
case HTML_BODY:
case HTML_DIV:
case HTML_UL:
case HTML_OL:
case HTML_LI:
case HTML_NAV:
case HTML_FIGURE:
case HTML_TABLE:
case HTML_COLGROUP:
case HTML_THEAD:
case HTML_TBODY:
case HTML_TFOOT:
case HTML_TR:
case HTML_TH:
case HTML_TD: {
DFNode *next;
for (DFNode *child = node->first; child != NULL; child = next) {
next = child->next;
DFNode *text = createIndentNode(node->doc,depth);
DFInsertBefore(node,text,child);
}
if (node->tag != DOM_DOCUMENT) {
DFNode *text = createIndentNode(node->doc,depth-1);
DFAppendChild(node,text);
}
break;
}
case HTML_SCRIPT:
case HTML_STYLE: {
char *nodeContent = DFNodeTextToString(node);
char *trimmedContent = DFStringTrimWhitespace(nodeContent);
while (node->first != NULL)
DFRemoveNode(node->first);
char *indentStr = indentString(depth-1);
char *revisedStr = DFFormatString("\n%s%s",trimmedContent,indentStr);
DFAppendChild(node,DFCreateTextNode(node->doc,revisedStr));
free(indentStr);
free(revisedStr);
free(nodeContent);
free(trimmedContent);
break;
}
case HTML_H1:
case HTML_H2:
case HTML_H3:
case HTML_H4:
case HTML_H5:
case HTML_H6:
case HTML_P:
case HTML_FIGCAPTION:
case HTML_CAPTION: {
if (node->first != NULL) {
char *indentBefore = indentString(depth);
char *indentAfter = indentString(depth-1);
DFNode *before = DFCreateTextNode(node->doc,indentBefore);
DFNode *after = DFCreateTextNode(node->doc,indentAfter);
DFInsertBefore(node,before,node->first);
DFAppendChild(node,after);
free(indentBefore);
free(indentAfter);
}
break;
}
}
DFNode *next;
for (DFNode *child = node->first; child != NULL; child = next) {
next = child->next;
HTML_safeIndent(child,depth+1);
}
}
void HTMLBreakBDTRefs(DFNode *node, const char *idPrefix)
{
if (node->tag >= MIN_ELEMENT_TAG) {
const char *seqNo = DFGetAttribute(node,HTML_ID);
if ((seqNo != NULL) && DFStringHasPrefix(seqNo,idPrefix)) {
char *newNodeId = DFFormatString("x%s",&seqNo[strlen(idPrefix)]);
DFSetAttribute(node,HTML_ID,newNodeId);
free(newNodeId);
}
}
for (DFNode *child = node->first; child != NULL; child = child->next)
HTMLBreakBDTRefs(child,idPrefix);
}
CSSSize HTML_getImageDimensions(DFNode *img)
{
CSSSize dimensions;
bzero(&dimensions,sizeof(CSSSize));
const char *styleAttr = DFGetAttribute(img,HTML_STYLE);
if (styleAttr != NULL) {
CSSProperties *properties = CSSPropertiesNewWithString(styleAttr);
if (CSSGet(properties,"width") != NULL)
dimensions.width = CSSLengthFromString(CSSGet(properties,"width"));
if (CSSGet(properties,"height") != NULL)
dimensions.height = CSSLengthFromString(CSSGet(properties,"height"));
CSSPropertiesRelease(properties);
}
if (!CSSLengthIsValid(dimensions.width)) {
const char *widthAttr = DFGetAttribute(img,HTML_WIDTH);
if (widthAttr != NULL) {
if (DFStringHasSuffix(widthAttr,"%"))
dimensions.width = CSSLengthFromValue(atof(widthAttr),UnitsPct);
else
dimensions.width = CSSLengthFromValue(atof(widthAttr),UnitsPx);
}
}
if (CSSLengthIsValid(dimensions.height)) {
const char *heightAttr = DFGetAttribute(img,HTML_HEIGHT);
if (heightAttr != NULL) {
if (DFStringHasSuffix(heightAttr,"%"))
dimensions.height = CSSLengthFromValue(atof(heightAttr),UnitsPct);
else
dimensions.height = CSSLengthFromValue(atof(heightAttr),UnitsPx);
}
}
return dimensions;
}
int isRRGGBB(const char *str)
{
if (str == NULL)
return 0;;
size_t len = strlen(str);
if (len != 6)
return 0;
for (size_t i = 0; i < len; i++) {
if (!DFCharIsHex(str[i]))
return 0;
}
return 1;
}
int isHashRRGGBB(const char *str)
{
return (strlen(str) == 7) && (str[0] == '#') && isRRGGBB(&str[1]);
}
const char *HTMLMetaGet(DFDocument *htmlDoc, const char *name)
{
assert(htmlDoc->root != NULL);
assert(htmlDoc->root->tag == HTML_HTML);
DFNode *head = DFChildWithTag(htmlDoc->root,HTML_HEAD);
if (head == NULL)
return NULL;
for (DFNode *meta = head->first; meta != NULL; meta = meta->next) {
if ((meta->tag == HTML_META) && DFStringEquals(DFGetAttribute(meta,HTML_NAME),name))
return DFGetAttribute(meta,HTML_CONTENT);
}
return NULL;
}
void HTMLMetaSet(DFDocument *htmlDoc, const char *name, const char *content)
{
assert(htmlDoc->root != NULL);
assert(htmlDoc->root->tag == HTML_HTML);
DFNode *head = DFChildWithTag(htmlDoc->root,HTML_HEAD);
if (head == NULL) {
head = DFCreateElement(htmlDoc,HTML_HEAD);
DFNode *body = DFChildWithTag(htmlDoc->root,HTML_BODY);
DFInsertBefore(htmlDoc->root,head,body);
}
for (DFNode *meta = head->first; meta != NULL; meta = meta->next) {
if ((meta->tag == HTML_META) && DFStringEquals(DFGetAttribute(meta,HTML_NAME),name)) {
DFSetAttribute(meta,HTML_CONTENT,content);
return;
}
}
DFNode *meta = DFCreateChildElement(head,HTML_META);
DFSetAttribute(meta,HTML_NAME,name);
DFSetAttribute(meta,HTML_CONTENT,content);
}
void HTMLMetaRemove(DFDocument *htmlDoc, const char *name)
{
assert(htmlDoc->root != NULL);
assert(htmlDoc->root->tag == HTML_HTML);
DFNode *head = DFChildWithTag(htmlDoc->root,HTML_HEAD);
if (head == NULL)
return;;
DFNode *next;
for (DFNode *meta = head->first; meta != NULL; meta = next) {
next = meta->next;
if ((meta->tag == HTML_META) && DFStringEquals(DFGetAttribute(meta,HTML_NAME),name))
DFRemoveNode(meta);
}
}
DFDocument *DFParseHTMLString(const char *str, int removeSpecial, DFError **error)
{
DFHTDocument *htdoc = DFHTDocumentNew();
// printf("tidy option nl = %d\n",tidyOptGetInt(htdoc.doc,TidyNewline));
// tidyOptSetInt(htdoc.doc,TidyNewline,1);
if (!DFHTDocumentParseCString(htdoc,str,error)) {
DFHTDocumentFree(htdoc);
return NULL;
}
if (removeSpecial)
DFHTDocumentRemoveUXWriteSpecial(htdoc);;
DFDocument *doc = DFDocumentNew();
DFNode *root = fromTidyNode(doc,htdoc->doc,tidyGetHtml(htdoc->doc));
if (root == NULL) {
DFErrorFormat(error,"No root element");
DFDocumentRelease(doc);
DFHTDocumentFree(htdoc);
return NULL;
}
DFAppendChild(doc->docNode,root);
doc->root = root;
// Remove empty title element that is automatically added by HTMLTidy's parser, since a lot of
// tests currently depend on it not being present
DFNode *head = DFChildWithTag(doc->root,HTML_HEAD);
if (head != NULL) {
DFNode *title = DFChildWithTag(head,HTML_TITLE);
if (title != NULL) {
char *titleText = DFNodeTextToString(title);
if (strlen(titleText) == 0)
DFRemoveNode(title);
free(titleText);
}
}
DFHTDocumentFree(htdoc);
return doc;
}
DFDocument *DFParseHTMLFile(const char *filename, int removeSpecial, DFError **error)
{
DFBuffer *buf = DFBufferReadFromFile(filename,error);
if (buf == NULL)
return NULL;;
DFDocument *doc = DFParseHTMLString(buf->data,removeSpecial,error);
DFBufferRelease(buf);
return doc;
}
static void DFHTMLGetImagesRecursive(DFNode *node, DFHashTable *images)
{
if (node->tag == HTML_IMG) {
const char *src = DFGetAttribute(node,HTML_SRC);
if (src != NULL) {
char *decoded = DFRemovePercentEncoding(src);
DFHashTableAdd(images,decoded,"");
free(decoded);
}
}
else {
for (DFNode *child = node->first; child != NULL; child = child->next)
DFHTMLGetImagesRecursive(child,images);
}
}
const char **DFHTMLGetImages(DFDocument *htmlDoc)
{
DFHashTable *images = DFHashTableNew(NULL,NULL);
DFHTMLGetImagesRecursive(htmlDoc->root,images);
const char **filenames = DFHashTableCopyKeys(images);
DFSortStringsCaseInsensitive(filenames);
DFHashTableRelease(images);
return filenames;
}