blob: 70aad45288f9da612909389e8c0761274f90c6b6 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "DFPlatform.h"
#include "DFHTDocument.h"
#include "DFString.h"
#include "streamio.h"
#include "DFCommon.h"
////////////////////////////////////////////////////////////////////////////////////////////////////
// //
// HTML document processing functions //
// //
////////////////////////////////////////////////////////////////////////////////////////////////////
static ctmbstr getAttrValue(TidyNode node, TidyAttrId attrId)
{
TidyAttr attr = tidyAttrGetById(node,attrId);
if (attr == NULL)
return NULL;
else
return tidyAttrValue(attr);
}
static void removeSpecial(TidyDoc doc, TidyNode node)
{
// We process the children first, so that if there are any nested removable elements (e.g.
// a selection span inside of an autocorrect span), all levels of nesting are taken care of
TidyNode next;
for (TidyNode child = tidyGetChild(node); child != NULL; child = next) {
next = tidyGetNext(child);
removeSpecial(doc,child);
}
if (!tidyNodeIsText(node)) {
ctmbstr cssClass = getAttrValue(node,TidyAttr_CLASS);
if (cssClass != NULL) {
if (!strcmp(cssClass,"uxwrite-heading-number") ||
!strcmp(cssClass,"uxwrite-figure-number") ||
!strcmp(cssClass,"uxwrite-table-number") ||
!strcmp(cssClass,"uxwrite-autocorrect") ||
!strcmp(cssClass,"uxwrite-selection") ||
!strcmp(cssClass,"uxwrite-selection-highlight") ||
!strcmp(cssClass,"uxwrite-spelling") ||
!strcmp(cssClass,"uxwrite-match")) {
tidyDiscardContainer(doc,node);
return;
}
}
switch (tidyNodeGetId(node)) {
case TidyTag_META: {
ctmbstr name = getAttrValue(node,TidyAttr_NAME);
if ((name != NULL) && !strcasecmp(name,"viewport")) {
tidyDiscardElement(doc,node);
return;
}
break;
}
case TidyTag_LINK: {
// This code removes the special "built-in" stylesheet that UX Write uses for displaying documents.
// FIXME: change the filename so it uniquely identifies it as being UX Write's own CSS file, not
// some other one that happens to have the same filename.
// This code really belongs in UX Write itself, rather than DocFormats
ctmbstr rel = getAttrValue(node,TidyAttr_REL);
ctmbstr href = getAttrValue(node,TidyAttr_HREF);
if ((rel != NULL) && (href != NULL)) {
if (DFStringEquals(rel,"stylesheet") && DFStringHasSuffix(href,"/builtin.css")) {
tidyDiscardElement(doc,node);
return;
}
}
break;
}
case TidyTag_HTML: {
TidyAttr attr = tidyAttrGetById(node,TidyAttr_STYLE);
if (attr != NULL)
tidyRemoveAttribute(doc,node,attr);
break;
}
default:
break;
}
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// //
// DFHTDocument //
// //
////////////////////////////////////////////////////////////////////////////////////////////////////
DFHTDocument *DFHTDocumentNew()
{
DFHTDocument *htd = (DFHTDocument *)xcalloc(1,sizeof(DFHTDocument));
htd->doc = tidyCreate();
tidyBufInit(&htd->errbuf);
tidySetErrorBuffer(htd->doc,&htd->errbuf);
tidyOptSetInt(htd->doc,TidyIndentContent,TidyAutoState);
tidyOptSetInt(htd->doc,TidyWrapLen,80);
tidyOptSetBool(htd->doc,TidyDropEmptyElems,no);
tidyOptSetBool(htd->doc,TidyMark,no);
tidyOptSetInt(htd->doc,TidyInCharEncoding,UTF8);
tidyOptSetInt(htd->doc,TidyOutCharEncoding,UTF8);
return htd;
}
void DFHTDocumentFree(DFHTDocument *htd)
{
tidyRelease(htd->doc);
tidyBufFree(&htd->errbuf);
free(htd);
}
int DFHTDocumentParseCString(DFHTDocument *htd, const char *str, DFError **error)
{
TidyBuffer inbuf;
tidyBufInit(&inbuf);
tidyBufAttach(&inbuf,(byte*)str,(unsigned int)strlen(str));
int rc = tidyParseBuffer(htd->doc,&inbuf);
tidyBufDetach(&inbuf);
tidyBufFree(&inbuf);
if (rc >= 0) {
return 1;
}
else {
DFErrorFormat(error,"Operation failed: error %d\n",-rc);
return 0;
}
}
void DFHTDocumentRemoveUXWriteSpecial(DFHTDocument *htd)
{
removeSpecial(htd->doc,tidyGetRoot(htd->doc));
}