// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#include "DFPlatform.h"
#include "WordConverter.h"
#include "WordBookmark.h"
#include "WordField.h"
#include "WordStyles.h"
#include "WordSheet.h"
#include "WordNotes.h"
#include "WordNumbering.h"
#include "WordSection.h"
#include "WordSettings.h"
#include "WordObjects.h"
#include "WordLists.h"
#include "WordGC.h"
#include "WordLenses.h"
#include "WordCaption.h"
#include "WordWhitespace.h"
#include "WordTheme.h"
#include "OPC.h"
#include "DFDOM.h"
#include "DFHTML.h"
#include "DFHTMLNormalization.h"
#include "DFBDT.h"
#include "CSS.h"
#include "CSSProperties.h"
#include "CSSLength.h"
#include "CSSSelector.h"
#include "CSSClassNames.h"
#include "CSSSheet.h"
#include "CSSStyle.h"
#include "DFXML.h"
#include "DFString.h"
#include "DFCharacterSet.h"
#include "DFCommon.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>

static int isWhitespaceRun(DFNode *run)
{
    for (DFNode *child = run->first; child != NULL; child = child->next) {
        switch (child->tag) {
            case WORD_RPR:
                break;
            case WORD_T: {
                char *str = DFNodeTextToString(child);
                int isWhitespace = DFStringIsWhitespace(str);
                free(str);
                if (!isWhitespace)
                    return 0;
                break;
            }
            default:
                return 0;
        }
    }
    return 1;
}

int Word_isFigureParagraph(DFNode *p)
{
    // A paragraph is a figure if it contains only a single run, and that run contains a drawing
    if ((p == NULL) || (p->tag != WORD_P))
        return 0;;

    DFNode *run = NULL;
    int runCount = 0;
    for (DFNode *child = p->first; child != NULL; child = child->next) {
        if (child->tag == WORD_R) {
            if (isWhitespaceRun(child))
                continue;
            run = child;
            runCount++;
        }
    }

    if (runCount != 1)
        return 0;

    for (DFNode *child = run->first; child != NULL; child = child->next) {
        switch (child->tag) {
            case WORD_DRAWING:
            case WORD_OBJECT:
            case WORD_PICT:
                return 1;
        }
    }

    return 0;
}

int Word_isEquationParagraph(DFNode *p)
{
    if ((p == NULL) || (p->tag != WORD_P))
        return 0;

    for (DFNode *child = p->first; child != NULL; child = child->next) {
        if (child->tag == MATH_OMATHPARA)
            return 1;
    }

    return 0;
}

static int attributesEqual(DFNode *elemA, DFNode *elemB)
{
    if (elemA->attrsCount != elemB->attrsCount)
        return 0;

    int count = elemA->attrsCount;
    for (int ai = 0; ai < count; ai++) {
        DFAttribute *attrA = &elemA->attrs[ai];
        int found = 0;
        for (int bi = 0; bi < count; bi++) {
            DFAttribute *attrB = &elemB->attrs[bi];
            if (attrA->tag == attrB->tag) {
                if (strcmp(attrA->value,attrB->value))
                    return 0;
                found = 1;
                break;
            }
        }
        if (!found)
            return 0;
    }

    return 1;
}

static int nodesEqual(DFNode *a, DFNode *b)
{
    if ((a == NULL) && (b == NULL))
        return 1;

    if ((a == NULL) || (b == NULL))
        return 0;

    if (a->tag != b->tag)
        return 0;

    if (a->tag < MIN_ELEMENT_TAG)
        return 0;;

    // First check if the number and type of children are the same
    DFNode *aChild = a->first;
    DFNode *bChild = b->first;
    while ((aChild != NULL) || (bChild != NULL)) {
        if ((aChild != NULL) && (bChild == NULL))
            return 0;
        if ((aChild == NULL) && (bChild != NULL))
            return 0;
        if (aChild->tag != bChild->tag)
            return 0;
        aChild = aChild->next;
        bChild = bChild->next;
    }

    // Next check the attributes
    if (!attributesEqual(a,b))
        return 0;

    // Now check the *content* of the children. We do this after the above as it is more expensive.
    aChild = a->first;
    bChild = b->first;
    while ((aChild != NULL) || (bChild != NULL)) {
        if (!nodesEqual(aChild,bChild))
            return 0;
        aChild = aChild->next;
        bChild = bChild->next;
    }
    return 1;
}

static void Word_mergeRunsRecursive(DFNode *node)
{
    DFNode *current = node->first;
    while (current != NULL) {
        DFNode *next = current->next;

        if ((current->tag == WORD_R) && (next != NULL) && (next->tag == WORD_R)) {
            DFNode *currentRPr = DFChildWithTag(current,WORD_RPR);
            DFNode *nextRPr = DFChildWithTag(next,WORD_RPR);
            if (nodesEqual(currentRPr,nextRPr)) {
                while (next->first != NULL) {
                    if (next->first->tag == WORD_RPR)
                        DFRemoveNode(next->first);
                    else
                        DFAppendChild(current,next->first);
                }
                DFRemoveNode(next);
                continue;
            }
        }

        current = next;
    }

    for (current = node->first; current != NULL; current = current->next)
        Word_mergeRunsRecursive(current);
}

static void Word_mergeRuns(WordPackage *package)
{
    if (package->document != NULL)
        Word_mergeRunsRecursive(package->document->docNode);
}

////////////////////////////////////////////////////////////////////////////////////////////////////
//                                                                                                //
//                                  HTML pre- and post-processing                                 //
//                                                                                                //
////////////////////////////////////////////////////////////////////////////////////////////////////

static void Word_addContentParts(DFNode *child, const char *content, WordCaption *caption)
{
    if (content == NULL)
        return;;
    DFNode *nextSibling = child->first;
    DFArray *parts = CSSParseContent(content);
    for (size_t i = 0; i < DFArrayCount(parts); i++) {
        ContentPart *part = DFArrayItemAt(parts,i);
        switch (part->type) {
            case ContentPartString: {
                DFNode *text = DFCreateTextNode(child->doc,part->value);
                if (strlen(part->value) > 0) {
                    DFNode *span = DFCreateElement(child->doc,HTML_SPAN);
                    DFAppendChild(span,text);
                    DFInsertBefore(child,span,nextSibling);
                }
                break;
            }
            case ContentPartCounter: {
                if (DFStringEquals(part->value,"figure")) {
                    DFNode *span = DFCreateElement(child->doc,HTML_SPAN);
                    DFSetAttribute(span,HTML_CLASS,DFFieldClass);
                    DFCreateChildTextNode(span," SEQ Figure \\* ARABIC ");
                    DFInsertBefore(child,span,nextSibling);
                    caption->number = span;
                }
                else if (DFStringEquals(part->value,"table")) {
                    DFNode *span = DFCreateElement(child->doc,HTML_SPAN);
                    DFSetAttribute(span,HTML_CLASS,DFFieldClass);
                    DFCreateChildTextNode(span," SEQ Table \\* ARABIC ");
                    DFInsertBefore(child,span,nextSibling);
                    caption->number = span;
                }
                break;
            default:
                break;
            }
        }
    }
    DFArrayRelease(parts);
}

static void Word_preProcessHTML(WordConverter *word, DFNode *node)
{
    switch (node->tag) {
        case HTML_TABLE:
        case HTML_FIGURE: {
            DFNode *next;
            for (DFNode *child = node->first; child != NULL; child = next) {
                next = child->next;

                if ((child->tag != HTML_CAPTION) && (child->tag != HTML_FIGCAPTION))
                    continue;

                WordCaption *caption = WordCaptionNew(child);
                WordObjectsSetCaption(word->objects,caption,node);
                caption->contentStart = child->first;
                WordCaptionRelease(caption);

                const char *className = DFGetAttribute(child,HTML_CLASS);
                CSSStyle *style;
                if (child->tag == HTML_CAPTION)
                    style = CSSSheetLookupElement(word->styleSheet,"caption",className,0,0);
                else
                    style = CSSSheetLookupElement(word->styleSheet,"figcaption",className,0,0);

                CSSProperties *before = CSSStyleBefore(style);
                if (CSSGet(before,"content") != NULL)
                    Word_addContentParts(child,CSSGet(before,"content"),caption);

                child->tag = HTML_P;
                DFSetAttribute(child,HTML_CLASS,"Caption");
                DFInsertBefore(node->parent,child,node->next);
                Word_preProcessHTML(word,child);
            }

            // The HTML normalization process ensures that apart from the <figcaption> element,
            // all children of a <figure> are paragraphs or containers. Currently the editor only
            // lets you create figures that contain a single image, so it's always a single
            // paragraph. Since the HTML <figure> element gets mapped to a single <w:p> element
            // by WordParagraphLens, we want to make sure it only contains inline children.

            for (DFNode *child = node->first; child != NULL; child = next) {
                next = child->next;
                if (HTML_isParagraphTag(child->tag))
                    DFRemoveNodeButKeepChildren(child);
            }

            // FIXME: Handle <div>, <pre>, lists, tables etc which could also theoretically
            // exist inside the <figure> element

            break;
        }
        case HTML_NAV: {
            const char *className = DFGetAttribute(node,HTML_CLASS);
            const char *instr = NULL;
            if (DFStringEquals(className,DFTableOfContentsClass))
                instr = " TOC \\o \"1-3\" ";
            else if (DFStringEquals(className,DFListOfFiguresClass))
                instr = " TOC \\c \"Figure\" ";
            else if (DFStringEquals(className,DFListOfTablesClass))
                instr = " TOC \\c \"Table\" ";

            if (instr != NULL) {
                DFNode *p = DFCreateElement(word->html,HTML_P);
                DFNode *field = DFCreateChildElement(p,HTML_SPAN);
                DFSetAttribute(field,HTML_CLASS,DFFieldClass);
                DFCreateChildTextNode(field,instr);
                DFInsertBefore(node->parent,p,node);
                DFRemoveNode(node);
            }
            break;
        }
    }

    DFNode *next;
    for (DFNode *child = node->first; child != NULL; child = next) {
        next = child->next;
        Word_preProcessHTML(word,child);
    }
}

static void Word_preProcessHTMLDoc(WordConverter *word, DFDocument *doc)
{
    WordPreProcessHTMLLists(word);
    Word_preProcessHTML(word,doc->docNode);
}

static int isSeqField(DFNode *node)
{
    if (node->tag != HTML_SPAN)
        return 0;
    if (!DFStringEquals(DFGetAttribute(node,HTML_CLASS),DFFieldClass))
        return 0;
    char *instr = DFNodeTextToString(node);
    const char **args = Word_parseField(instr);
    int result = (args[0] != NULL) && !strcmp(args[0],"SEQ");
    free(args);
    free(instr);
    return result;
}

static DFNode *findSeqChild(DFNode *parent)
{
    for (DFNode *child = parent->first; child != NULL; child = child->next) {
        if (isSeqField(child))
            return child;;
        DFNode *result = findSeqChild(child);
        if (result != NULL)
            return result;
    }
    return NULL;
}

static void extractPrefixRecursive(DFNode *node, const char *counterName, DFBuffer *result,
                                   int *foundSeq, int *foundContent)
{
    if (isSeqField(node)) {
        if (result->len > 0)
            DFBufferFormat(result," ");
        DFBufferFormat(result,"counter(%s)",counterName);
        *foundSeq = 1;
        DFRemoveNode(node);
        return;
    }

    if (node->tag == DOM_TEXT) {
        size_t valueLen = strlen(node->value);
        size_t pos = 0;

        if (*foundSeq) {
            size_t offset = 0;
            uint32_t ch;
            do {
                pos = offset;
                ch = DFNextChar(node->value,&offset);
            } while ((ch != 0) && (DFCharIsWhitespaceOrNewline(ch) || DFCharIsPunctuation(ch)));
        }
        else {
            pos = valueLen;
        }

        if (pos == valueLen) {
            if (result->len > 0)
                DFBufferFormat(result," ");
            char *quotedValue = DFQuote(node->value);
            DFBufferFormat(result,"%s",quotedValue);
            free(quotedValue);
            DFRemoveNode(node);
            if (*foundSeq)
                *foundContent = 1;
            return;
        }
        else if (pos > 0) {
            char *first = DFSubstring(node->value,0,pos);
            char *rest = DFSubstring(node->value,pos,valueLen);
            if (result->len > 0)
                DFBufferFormat(result," ");
            char *quotedFirst = DFQuote(first);
            DFBufferFormat(result,"%s",quotedFirst);
            free(quotedFirst);
            DFSetNodeValue(node,rest);
            if (*foundSeq)
                *foundContent = 1;
            free(first);
            free(rest);
            return;
        }
    }

    int wasEmpty = (node->first == NULL);
    DFNode *next;
    for (DFNode *child = node->first; child != NULL; child = next) {
        next = child->next;
        if (*foundContent)
            break;
        extractPrefixRecursive(child,counterName,result,foundSeq,foundContent);
    }
    int isEmpty = (node->first == NULL);
    if ((node->tag == HTML_SPAN) && isEmpty && !wasEmpty)
        DFRemoveNode(node);
}

static char *extractPrefix(DFNode *node, const char *counterName)
{
    if (findSeqChild(node) == NULL)
        return NULL;;
    DFBuffer *result = DFBufferNew();
    int foundSeq = 0;
    int foundContent = 0;
    extractPrefixRecursive(node,counterName,result,&foundSeq,&foundContent);
    char *str = xstrdup(result->data);
    DFBufferRelease(result);
    return str;
}

static void Word_postProcessHTML(WordConverter *conv, DFNode *node)
{
    DFNode *next;
    for (DFNode *child = node->first; child != NULL; child = next) {
        next = child->next;

        switch (child->tag) {
            case HTML_SPAN: {
                const char *className = DFGetAttribute(child,HTML_CLASS);
                if (DFStringEquals(className,DFBookmarkClass)) {
                    if (child->first != NULL)
                        next = child->first;
                    DFRemoveNodeButKeepChildren(child);
                }
                break;
            }
            case HTML_CAPTION: {
                const char *counterName = NULL;

                if ((child->prev != NULL) && (child->prev->tag == HTML_FIGURE) &&
                    (DFChildWithTag(child->prev,HTML_FIGCAPTION) == NULL)) {
                    child->tag = HTML_FIGCAPTION;
                    counterName = "figure";
                    DFAppendChild(child->prev,child);
                }
                else if ((child->prev != NULL) && (child->prev->tag == HTML_TABLE) &&
                         (DFChildWithTag(child->prev,HTML_CAPTION) == NULL)) {
                    counterName = "table";
                    DFInsertBefore(child->prev,child,child->prev->first);
                }
                else if ((child->next != NULL) && (child->next->tag == HTML_FIGURE) &&
                         (DFChildWithTag(child->next,HTML_FIGCAPTION) == NULL)) {
                    child->tag = HTML_FIGCAPTION;
                    counterName = "figure";
                    DFInsertBefore(child->next,child,child->next->first);
                }
                else if ((child->next != NULL) && (child->next->tag == HTML_TABLE) &&
                         (DFChildWithTag(child->next,HTML_CAPTION) == NULL)) {
                    counterName = "table";
                    DFSetAttribute(child,HTML_STYLE,"caption-side: top");
                    DFInsertBefore(child->next,child,child->next->first);
                }

                if (counterName != NULL) {
                    char *beforeText = extractPrefix(child,counterName);
                    if (beforeText != NULL) {
                        CSSStyle *style = CSSSheetLookupElement(conv->styleSheet,DFNodeName(child),NULL,1,0);
                        if (CSSGet(CSSStyleBefore(style),"content") == NULL) {
                            CSSPut(CSSStyleRule(style),"counter-increment",counterName);
                            CSSPut(CSSStyleBefore(style),"content",beforeText);
                        }
                    }
                    free(beforeText);
                }
                break;
            }
            case HTML_NAV: {
                if (HTML_isParagraphTag(node->tag)) {

                    if (child->prev != NULL) {
                        DFNode *beforeP = DFCreateElement(conv->html,node->tag);
                        while (child->prev != NULL)
                            DFInsertBefore(beforeP,child->prev,beforeP->first);
                        DFInsertBefore(node->parent,beforeP,node);
                    }
                    DFInsertBefore(node->parent,child,node);

                    if ((node->first == NULL) ||
                        ((node->first->tag == HTML_BR) && (node->first->next == NULL))) {
                        DFRemoveNode(node);
                        return;
                    }
                    next = NULL;
                }
                break;
            }
        }
    }

    for (DFNode *child = node->first; child != NULL; child = next) {
        next = child->next;
        Word_postProcessHTML(conv,child);
    }
}

static void Word_postProcessHTMLDoc(WordConverter *conv)
{
    WordPostProcessHTMLLists(conv);
    Word_postProcessHTML(conv,conv->html->docNode);
}

////////////////////////////////////////////////////////////////////////////////////////////////////
//                                                                                                //
//                                          WordConverter                                         //
//                                                                                                //
////////////////////////////////////////////////////////////////////////////////////////////////////

static WordConverter *WordConverterNew(DFDocument *html, DFStorage *abstractStorage, WordPackage *package, const char *idPrefix)
{
    WordConverter *converter = (WordConverter *)xcalloc(1,sizeof(WordConverter));
    converter->html = DFDocumentRetain(html);
    converter->abstractStorage = DFStorageRetain(abstractStorage);
    assert(DFStorageFormat(converter->abstractStorage) == DFFileFormatHTML);
    converter->idPrefix = (idPrefix != NULL) ? xstrdup(idPrefix) : xstrdup("word");
    converter->package = WordPackageRetain(package);
    converter->styles = WordSheetNew(converter->package->styles);
    converter->numbering = WordNumberingNew(converter->package);
    converter->theme = WordThemeNew(converter->package);
    converter->mainSection = WordSectionNew();
    converter->objects = WordObjectsNew(converter->package);
    converter->footnotes = WordNoteGroupNewFootnotes(converter->package->footnotes);
    converter->endnotes = WordNoteGroupNewEndnotes(converter->package->endnotes);
    converter->supportedContentTypes = DFHashTableNew((DFCopyFunction)xstrdup,free);
    DFHashTableAdd(converter->supportedContentTypes,"jpg","image/jpeg");
    DFHashTableAdd(converter->supportedContentTypes,"jpeg","image/jpeg");
    DFHashTableAdd(converter->supportedContentTypes,"tif","image/tiff");
    DFHashTableAdd(converter->supportedContentTypes,"tiff","image/tiff");
    DFHashTableAdd(converter->supportedContentTypes,"gif","image/gif");
    DFHashTableAdd(converter->supportedContentTypes,"bmp","image/bmp");
    DFHashTableAdd(converter->supportedContentTypes,"png","image/png");
    converter->warnings = DFBufferNew();
    return converter;
}

static void WordConverterFree(WordConverter *converter)
{
    DFDocumentRelease(converter->html);
    DFStorageRelease(converter->abstractStorage);
    free(converter->idPrefix);
    WordSheetFree(converter->styles);
    WordNumberingFree(converter->numbering);
    WordThemeFree(converter->theme);
    WordSectionFree(converter->mainSection);
    WordObjectsFree(converter->objects);
    WordNoteGroupRelease(converter->footnotes);
    WordNoteGroupRelease(converter->endnotes);
    DFHashTableRelease(converter->supportedContentTypes);
    DFBufferRelease(converter->warnings);
    CSSSheetRelease(converter->styleSheet);
    WordPackageRelease(converter->package);
    free(converter);
}

DFNode *WordConverterCreateAbstract(WordGetData *get, Tag tag, DFNode *concrete)
{
    DFNode *element = DFCreateElement(get->conv->html,tag);
    if (concrete != NULL) {
        char *idStr;
        if (concrete->doc == get->conv->package->document)
            idStr = DFFormatString("%s%u",get->conv->idPrefix,concrete->seqNo);
        else
            idStr = DFFormatString("%s%u-%s",get->conv->idPrefix,concrete->seqNo,DFNodeName(concrete->doc->root));
        DFSetAttribute(element,HTML_ID,idStr);
        free(idStr);
    }
    return element;
}

DFNode *WordConverterGetConcrete(WordPutData *put, DFNode *abstract)
{
    // Is the abstract node an element, and does it have an id that matches the prefix used for
    // conversion? That is, does it look like it has a corresponding node in the concrete document?
    if ((abstract == NULL) || (abstract->tag < MIN_ELEMENT_TAG))
        return NULL;;
    const char *idStr = DFGetAttribute(abstract,HTML_ID);
    if ((idStr == NULL) || !DFStringHasPrefix(idStr,put->conv->idPrefix))
        return NULL;;

    // Determine the node sequence number and the document based on the id attribute.
    // The format of the attribute is <prefix><seqno>(-<docname>)?, where
    //
    //     <prefix>  is the BDT prefix we use to identify nodes that match the original document
    //     <seqno>   is an integer uniquely identifying a node in a given document
    //     <docname> is the name of the document, either footnotes or endnotes. If absent, it is
    //               the main content document (that is, document.xml)
    //
    // Note that the sequence number only makes sense within the context of a specific document. It
    // is possible to have two different nodes in different documents that have the same sequence number.
    // It is for this reason that the id string identifies both the node and the document.

    size_t idLen = strlen(idStr);
    size_t prefixLen = strlen(put->conv->idPrefix);

    unsigned int seqNo = 0;
    size_t pos = prefixLen;
    while ((pos < idLen) && (idStr[pos] >= '0') && (idStr[pos] <= '9'))
        seqNo = seqNo*10 + (idStr[pos++] - '0');

    const char *docName = NULL;
    if ((pos < idLen) && (idStr[pos] == '-')) {
        pos++;
        docName = &idStr[pos];
    }

    DFDocument *doc = NULL;
    if (docName == NULL)
        doc = put->conv->package->document;
    else if (!strcmp(docName,"footnotes"))
        doc = put->conv->package->footnotes;
    else if (!strcmp(docName,"endnotes"))
        doc = put->conv->package->endnotes;
    else
        return NULL;

    // Check to see if we have a node in the concrete document matching that sequence number
    DFNode *node = DFNodeForSeqNo(doc,seqNo);

    // Only return the node if it's actually an element
    if ((node == NULL) || (node->tag < MIN_ELEMENT_TAG))
        return NULL;
    return node;
}

int WordConverterGet(DFDocument *html, DFStorage *abstractStorage, WordPackage *package, const char *idPrefix, DFError **error)
{
    if (package->document == NULL) {
        DFErrorFormat(error,"document.xml not found");
        return 0;
    }

    DFNode *wordDocument = DFChildWithTag(package->document->docNode,WORD_DOCUMENT);
    if (wordDocument == NULL) {
        DFErrorFormat(error,"word:document not found");
        return 0;
    }

    int haveFields = Word_simplifyFields(package);
    Word_mergeRuns(package);

    WordConverter *converter = WordConverterNew(html,abstractStorage,package,idPrefix);
    converter->haveFields = haveFields;
    WordAddNbsps(converter->package->document);
    WordFixLists(converter);

    CSSSheetRelease(converter->styleSheet);
    converter->styleSheet = WordParseStyles(converter);
    WordObjectsCollapseBookmarks(converter->objects);
    WordObjectsScan(converter->objects);
    WordObjectsAnalyzeBookmarks(converter->objects,converter->styles);

    WordGetData get;
    get.conv = converter;
    DFNode *abstract = WordDocumentLens.get(&get,wordDocument);
    DFAppendChild(converter->html->docNode,abstract);
    converter->html->root = abstract;
    Word_postProcessHTMLDoc(converter);

    HTMLAddExternalStyleSheet(converter->html,"reset.css");
    char *cssText = CSSSheetCopyCSSText(converter->styleSheet);
    HTMLAddInternalStyleSheet(converter->html,cssText);
    free(cssText);

    HTML_safeIndent(converter->html->docNode,0);

    int ok = 1;
    if (converter->warnings->len > 0) {
        DFErrorFormat(error,"%s",converter->warnings->data);
        ok = 0;
    }

    WordConverterFree(converter);
    return ok;
}

static void buildListMapFromHTML(WordPutData *put, DFNode *node)
{
    if (node->tag == HTML_P) {
        const char *htmlId = DFGetAttribute(node,CONV_LISTNUM);
        DFNode *conElem = (htmlId != NULL) ? WordConverterGetConcrete(put,node) : NULL;
        DFNode *pPrElem = (conElem != NULL) ? DFChildWithTag(conElem,WORD_PPR) : NULL;
        DFNode *numPrElem = (pPrElem != NULL) ? DFChildWithTag(pPrElem,WORD_NUMPR) : NULL;
        DFNode *numIdElem = (numPrElem != NULL) ? DFChildWithTag(numPrElem,WORD_NUMID) : NULL;
        const char *numId = (numIdElem != NULL) ? DFGetAttribute(numIdElem,WORD_VAL) : NULL;

        if (numId != NULL) {
            const char *existingHtmlId = DFHashTableLookup(put->htmlIdByNumId,numId);
            const char *existingNumId = DFHashTableLookup(put->numIdByHtmlId,htmlId);
            if ((existingHtmlId == NULL) && (existingNumId == NULL)) {
                DFHashTableAdd(put->htmlIdByNumId,numId,htmlId);
                DFHashTableAdd(put->numIdByHtmlId,htmlId,numId);

                WordConcreteNum *num = WordNumberingConcreteWithId(put->conv->numbering,numId);
                if (num != NULL)
                    num->referenceCount++;
            }
        }
    }

    for (DFNode *child = node->first; child != NULL; child = child->next)
        buildListMapFromHTML(put,child);
}

static void updateListTypes(WordPutData *put)
{
    const char **htmlIds = DFHashTableCopyKeys(put->numIdByHtmlId);
    for (int i = 0; htmlIds[i]; i++) {
        const char *htmlId = htmlIds[i];
        const char *numId = DFHashTableLookup(put->numIdByHtmlId,htmlId);
        WordConcreteNum *num = WordNumberingConcreteWithId(put->conv->numbering,numId);
        if (num == NULL)
            continue; // FIXME: remove entry from both maps so it is re-created
        DFNode *listNode = DFNodeForSeqNo(put->conv->html,(unsigned int)atoi(htmlId));
        assert(listNode != NULL);

        const char *htmlType = DFGetAttribute(listNode,CONV_LISTTYPE);
        const char *htmlIlvl = DFGetAttribute(listNode,CONV_ILVL);

        WordNumLevel *level = WordConcreteNumGetLevel(num,atoi(htmlIlvl));
        if (level == NULL)
            continue; // FIXME: remove entry from both maps so it is re-created

        const char *wordType = WordNumLevelToListStyleType(level);

        if (!DFStringEquals(wordType,htmlType)) {
            // Make a copy of numId, as it may be freed during the first call to DFHashTableRemove
            char *numIdCopy = xstrdup(numId);
            DFHashTableRemove(put->numIdByHtmlId,htmlId);
            DFHashTableRemove(put->htmlIdByNumId,numIdCopy);
            free(numIdCopy);
            if (num->referenceCount == 1)
                WordNumberingRemoveConcrete(put->conv->numbering,num);
        }
    }
    free(htmlIds);
}

static void addMissingDefaultStyles(WordConverter *converter)
{
    if (CSSSheetDefaultStyleForFamily(converter->styleSheet,StyleFamilyParagraph) == NULL) {
        CSSStyle *style = CSSSheetLookupElement(converter->styleSheet,"p","Normal",1,0);
        CSSSheetSetDefaultStyle(converter->styleSheet,style,StyleFamilyParagraph);
    }
    if (CSSSheetDefaultStyleForFamily(converter->styleSheet,StyleFamilyCharacter) == NULL) {
        CSSStyle *style = CSSSheetLookupElement(converter->styleSheet,"span","DefaultParagraphFont",1,0);
        CSSStyleSetDisplayName(style,"Default Paragraph Font");
        CSSSheetSetDefaultStyle(converter->styleSheet,style,StyleFamilyCharacter);
    }
    if (CSSSheetDefaultStyleForFamily(converter->styleSheet,StyleFamilyTable) == NULL) {
        CSSStyle *style = CSSSheetLookupElement(converter->styleSheet,"table","Normal_Table",1,0);
        CSSStyleSetDisplayName(style,"Normal Table");
        CSSPut(CSSStyleCell(style),"padding-left","5.4pt");
        CSSPut(CSSStyleCell(style),"padding-right","5.4pt");
        CSSPut(CSSStyleCell(style),"padding-top","0pt");
        CSSPut(CSSStyleCell(style),"padding-bottom","0pt");
        CSSSheetSetDefaultStyle(converter->styleSheet,style,StyleFamilyTable);
    }
}

int WordConverterPut(DFDocument *html, DFStorage *abstractStorage, WordPackage *package, const char *idPrefix, DFError **error)
{
    if (package->document == NULL) {
        DFErrorFormat(error,"document.xml not found");
        return 0;
    }

    DFNode *wordDocument = DFChildWithTag(package->document->docNode,WORD_DOCUMENT);
    if (wordDocument == NULL) {
        DFErrorFormat(error,"word:document not found");
        return 0;
    }

    HTML_normalizeDocument(html);
    HTML_pushDownInlineProperties(html->docNode);

    WordConverter *converter = WordConverterNew(html,abstractStorage,package,idPrefix);

    // FIXME: Need a more reliable way of telling whether this is a new document or not - it could be that the
    // document already existed (with styles set up) but did not have any content
    DFNode *wordBody = DFChildWithTag(wordDocument,WORD_BODY);
    int creating = ((wordBody == NULL) || (wordBody->first == NULL));

    converter->haveFields = Word_simplifyFields(converter->package);
    Word_mergeRuns(converter->package);

    assert(converter->package->styles);

    CSSSheetRelease(converter->styleSheet);
    converter->styleSheet = CSSSheetNew();

    char *cssText = HTMLCopyCSSText(converter->html);
    CSSSheetUpdateFromCSSText(converter->styleSheet,cssText);
    free(cssText);

    addMissingDefaultStyles(converter);
    CSSEnsureReferencedStylesPresent(converter->html,converter->styleSheet);
    if (creating)
        CSSSetHTMLDefaults(converter->styleSheet);
    CSSEnsureUnique(converter->styleSheet,converter->html,creating);

    CSSStyle *pageStyle = CSSSheetLookupElement(converter->styleSheet,"@page",NULL,0,0);
    CSSStyle *bodyStyle = CSSSheetLookupElement(converter->styleSheet,"body",NULL,1,0);
    CSSProperties *page = (pageStyle != NULL) ? CSSPropertiesRetain(CSSStyleRule(pageStyle)) : CSSPropertiesNew();
    CSSProperties *body = (bodyStyle != NULL) ? CSSPropertiesRetain(CSSStyleRule(bodyStyle)) : CSSPropertiesNew();

    if (CSSGet(body,"margin-left") == NULL)
        CSSPut(body,"margin-left","10%");
    if (CSSGet(body,"margin-right") == NULL)
        CSSPut(body,"margin-right","10%");
    if (CSSGet(body,"margin-top") == NULL)
        CSSPut(body,"margin-top","10%");
    if (CSSGet(body,"margin-bottom") == NULL)
        CSSPut(body,"margin-bottom","10%");

    WordSectionUpdateFromCSSPage(converter->mainSection,page,body);

    WordPutData put;
    put.conv = converter;
    put.contentDoc = converter->package->document;
    put.numIdByHtmlId = DFHashTableNew((DFCopyFunction)xstrdup,free);
    put.htmlIdByNumId = DFHashTableNew((DFCopyFunction)xstrdup,free);

    // Make sure we update styles.xml from the CSS stylesheet *before* doing any conversion of the content,
    // since the latter requires a full mapping of CSS selectors to styleIds to be in place.
    WordUpdateStyles(converter,converter->styleSheet);

    Word_preProcessHTMLDoc(converter,converter->html);
    buildListMapFromHTML(&put,converter->html->docNode);
    updateListTypes(&put);
    WordBookmarks_removeCaptionBookmarks(converter->package->document);
    WordObjectsCollapseBookmarks(converter->objects);
    WordObjectsScan(converter->objects);
    Word_setupBookmarkLinks(&put);
    WordObjectsAnalyzeBookmarks(converter->objects,converter->styles);
    WordDocumentLens.put(&put,converter->html->root,wordDocument);
    WordObjectsExpandBookmarks(converter->objects);
    WordRemoveNbsps(converter->package->document);

    // Make sure the updateFields flag is set
    Word_updateSettings(converter->package,converter->haveFields);

    // Remove any abstract numbering definitions that are no longer referenced from concrete
    // numbering definitions
    WordNumberingRemoveUnusedAbstractNums(converter->numbering);

    // Remove any relationships and images that have been removed from the HTML file and no longer
    // have any other references pointing to them
    WordGarbageCollect(converter->package);

    CSSPropertiesRelease(page);
    CSSPropertiesRelease(body);
    DFHashTableRelease(put.numIdByHtmlId);
    DFHashTableRelease(put.htmlIdByNumId);

    int ok = 1;
    if (converter->warnings->len > 0) {
        DFErrorFormat(error,"%s",converter->warnings->data);
        ok = 0;
    }

    WordConverterFree(converter);
    return ok;
}

void WordConverterWarning(WordConverter *converter, const char *format, ...)
{
    va_list ap;
    va_start(ap,format);
    DFBufferVFormat(converter->warnings,format,ap);
    va_end(ap);
}

char *WordStyleIdForStyle(CSSStyle *style)
{
    const char *selector = style->selector;
    char *resStyleId = NULL;

    if (!strcmp(selector,"table.Normal_Table"))
        return xstrdup("TableNormal");
    if (!strcmp(selector,"table.Table_Grid"))
        return xstrdup("TableGrid");
    if (!strcmp(selector,"span.Default_Paragraph_Font"))
        return xstrdup("DefaultParagraphFont");
    if (!strcmp(selector,"p.List_Paragraph"))
        return xstrdup("ListParagraph");

    int headingLevel = CSSSelectorHeadingLevel(selector);
    if (headingLevel != 0) {
        char *prefix = DFFormatString("heading_%d",headingLevel);
        if ((style->className != NULL) && DFStringHasPrefix(style->className,prefix)) {
            char *rest = DFSubstring(style->className,strlen(prefix),strlen(style->className));
            char *result = DFFormatString("Heading%d%s",headingLevel,rest);
            free(rest);
            free(prefix);
            return result;
        }
        free(prefix);
    }

    if (!strcmp(selector,"span.Heading1Char"))
        return xstrdup("Heading1Char");
    if (!strcmp(selector,"span.Heading2Char"))
        return xstrdup("Heading2Char");
    if (!strcmp(selector,"span.Heading3Char"))
        return xstrdup("Heading3Char");
    if (!strcmp(selector,"span.Heading4Char"))
        return xstrdup("Heading4Char");
    if (!strcmp(selector,"span.Heading5Char"))
        return xstrdup("Heading5Char");
    if (!strcmp(selector,"span.Heading6Char"))
        return xstrdup("Heading6Char");
    if (!strcmp(selector,"span.Heading7Char"))
        return xstrdup("Heading7Char");
    if (!strcmp(selector,"span.Heading8Char"))
        return xstrdup("Heading8Char");
    if (!strcmp(selector,"span.Heading9Char"))
        return xstrdup("Heading9Char");

    char *className = CSSSelectorCopyClassName(selector);
    switch (CSSSelectorGetTag(selector)) {
        case HTML_FIGURE: {
            resStyleId = DFStrDup("Figure");
            break;
        }
        case HTML_CAPTION: {
            resStyleId = DFStrDup("Caption");
            break;
        }
        case HTML_H1:
        case HTML_H2:
        case HTML_H3:
        case HTML_H4:
        case HTML_H5:
        case HTML_H6: {
            if ((className == NULL) || (strlen(className) == 0)) {
                int level = CSSSelectorHeadingLevel(selector);
                if ((level >= 1) && (level <= 6)) {
                    // FIXME: we shouldn't rely on the specific word "Heading" here - instead using the localised name
                    // FIXME: not covered by tests
                    resStyleId = DFFormatString("Heading%d",level);
                }
            }
            else {
                resStyleId = DFStrDup(className);
            }
            break;
        }
        case HTML_P:
            resStyleId = DFStrDup(className);
            break;
        case HTML_SPAN:
            resStyleId = DFStrDup(className);
            break;
        case HTML_TABLE:
            resStyleId = DFStrDup(className);
            break;
    }
    free(className);

    if (resStyleId == NULL) {
        // Note: selector here may start with . (i.e. applies to all elements)
        // FIXME: not covered by tests
        resStyleId = xstrdup(selector);
    }

    return resStyleId;
}

StyleFamily WordStyleFamilyForSelector(const char *selector)
{
    switch (CSSSelectorGetTag(selector)) {
        case HTML_FIGURE:
        case HTML_CAPTION:
        case HTML_H1:
        case HTML_H2:
        case HTML_H3:
        case HTML_H4:
        case HTML_H5:
        case HTML_H6:
            return StyleFamilyParagraph;
        case HTML_P: {
            char *className = CSSSelectorCopyClassName(selector);
            StyleFamily family = (className != NULL) ? StyleFamilyParagraph : StyleFamilySpecial;
            free(className);
            return family;
        }
        case HTML_SPAN:
            return StyleFamilyCharacter;
        case HTML_TABLE:
            return StyleFamilyTable;
        default:
            return StyleFamilySpecial;
    }
}

void childrenToArray(DFNode *node, DFNode **children)
{
    bzero(children,PREDEFINED_TAG_COUNT*sizeof(DFNode *));
    for (DFNode *child = node->first; child != NULL; child = child->next) {
        if ((child->tag >= MIN_ELEMENT_TAG) && (child->tag < PREDEFINED_TAG_COUNT))
            children[child->tag] = child;
    }
}

void replaceChildrenFromArray(DFNode *node, DFNode **children, Tag *tags)
{
    while (node->first != NULL)
        DFRemoveNode(node->first);

    for (int i = 0; tags[i] != 0; i++) {
        if (children[tags[i]])
            DFAppendChild(node,children[tags[i]]);
    }
}
