| /* |
| tidy.c - HTML TidyLib command line driver |
| |
| Copyright (c) 1998-2008 World Wide Web Consortium |
| (Massachusetts Institute of Technology, European Research |
| Consortium for Informatics and Mathematics, Keio University). |
| All Rights Reserved. |
| |
| */ |
| |
| #include "tidy.h" |
| |
| static FILE* errout = NULL; /* set to stderr */ |
| /* static FILE* txtout = NULL; */ /* set to stdout */ |
| |
| static Bool samefile( ctmbstr filename1, ctmbstr filename2 ) |
| { |
| #if FILENAMES_CASE_SENSITIVE |
| return ( strcmp( filename1, filename2 ) == 0 ); |
| #else |
| return ( strcasecmp( filename1, filename2 ) == 0 ); |
| #endif |
| } |
| |
| static void outOfMemory(void) |
| { |
| fprintf(stderr,"Out of memory. Bailing out."); |
| exit(1); |
| } |
| |
| static const char *cutToWhiteSpace(const char *s, uint offset, char *sbuf) |
| { |
| if (!s) |
| { |
| sbuf[0] = '\0'; |
| return NULL; |
| } |
| else if (strlen(s) <= offset) |
| { |
| strcpy(sbuf,s); |
| sbuf[offset] = '\0'; |
| return NULL; |
| } |
| else |
| { |
| uint j, l, n; |
| j = offset; |
| while(j && s[j] != ' ') |
| --j; |
| l = j; |
| n = j+1; |
| /* no white space */ |
| if (j==0) |
| { |
| l = offset; |
| n = offset; |
| } |
| strncpy(sbuf,s,l); |
| sbuf[l] = '\0'; |
| return s+n; |
| } |
| } |
| |
| static void print2Columns( const char* fmt, uint l1, uint l2, |
| const char *c1, const char *c2 ) |
| { |
| const char *pc1=c1, *pc2=c2; |
| char *c1buf = (char *)malloc(l1+1); |
| char *c2buf = (char *)malloc(l2+1); |
| if (!c1buf) outOfMemory(); |
| if (!c2buf) outOfMemory(); |
| |
| do |
| { |
| pc1 = cutToWhiteSpace(pc1, l1, c1buf); |
| pc2 = cutToWhiteSpace(pc2, l2, c2buf); |
| printf(fmt, |
| c1buf[0]!='\0'?c1buf:"", |
| c2buf[0]!='\0'?c2buf:""); |
| } while (pc1 || pc2); |
| free(c1buf); |
| free(c2buf); |
| } |
| |
| static void print3Columns( const char* fmt, uint l1, uint l2, uint l3, |
| const char *c1, const char *c2, const char *c3 ) |
| { |
| const char *pc1=c1, *pc2=c2, *pc3=c3; |
| char *c1buf = (char *)malloc(l1+1); |
| char *c2buf = (char *)malloc(l2+1); |
| char *c3buf = (char *)malloc(l3+1); |
| if (!c1buf) outOfMemory(); |
| if (!c2buf) outOfMemory(); |
| if (!c3buf) outOfMemory(); |
| |
| do |
| { |
| pc1 = cutToWhiteSpace(pc1, l1, c1buf); |
| pc2 = cutToWhiteSpace(pc2, l2, c2buf); |
| pc3 = cutToWhiteSpace(pc3, l3, c3buf); |
| printf(fmt, |
| c1buf[0]!='\0'?c1buf:"", |
| c2buf[0]!='\0'?c2buf:"", |
| c3buf[0]!='\0'?c3buf:""); |
| } while (pc1 || pc2 || pc3); |
| free(c1buf); |
| free(c2buf); |
| free(c3buf); |
| } |
| |
| static const char helpfmt[] = " %-19.19s %-58.58s\n"; |
| static const char helpul[] |
| = "-----------------------------------------------------------------"; |
| static const char fmt[] = "%-27.27s %-9.9s %-40.40s\n"; |
| static const char valfmt[] = "%-27.27s %-9.9s %-1.1s%-39.39s\n"; |
| static const char ul[] |
| = "================================================================="; |
| |
| typedef enum |
| { |
| CmdOptFileManip, |
| CmdOptCatFIRST = CmdOptFileManip, |
| CmdOptProcDir, |
| CmdOptCharEnc, |
| CmdOptMisc, |
| CmdOptCatLAST |
| } CmdOptCategory; |
| |
| static const struct { |
| ctmbstr mnemonic; |
| ctmbstr name; |
| } cmdopt_catname[] = { |
| { "file-manip", "File manipulation" }, |
| { "process-directives", "Processing directives" }, |
| { "char-encoding", "Character encodings" }, |
| { "misc", "Miscellaneous" } |
| }; |
| |
| typedef struct { |
| ctmbstr name1; /**< Name */ |
| ctmbstr desc; /**< Description */ |
| ctmbstr eqconfig; /**< Equivalent configuration option */ |
| CmdOptCategory cat; /**< Category */ |
| ctmbstr name2; /**< Name */ |
| ctmbstr name3; /**< Name */ |
| } CmdOptDesc; |
| |
| static const CmdOptDesc cmdopt_defs[] = { |
| { "-output <file>", |
| "write output to the specified <file>", |
| "output-file: <file>", CmdOptFileManip, "-o <file>" }, |
| { "-config <file>", |
| "set configuration options from the specified <file>", |
| NULL, CmdOptFileManip }, |
| { "-file <file>", |
| "write errors and warnings to the specified <file>", |
| "error-file: <file>", CmdOptFileManip, "-f <file>" }, |
| { "-modify", |
| "modify the original input files", |
| "write-back: yes", CmdOptFileManip, "-m" }, |
| { "-indent", |
| "indent element content", |
| "indent: auto", CmdOptProcDir, "-i" }, |
| { "-wrap <column>", |
| "wrap text at the specified <column>" |
| ". 0 is assumed if <column> is missing. " |
| "When this option is omitted, the default of the configuration option " |
| "\"wrap\" applies.", |
| "wrap: <column>", CmdOptProcDir, "-w <column>" }, |
| { "-upper", |
| "force tags to upper case", |
| "uppercase-tags: yes", CmdOptProcDir, "-u" }, |
| { "-clean", |
| "replace FONT, NOBR and CENTER tags by CSS", |
| "clean: yes", CmdOptProcDir, "-c" }, |
| { "-bare", |
| "strip out smart quotes and em dashes, etc.", |
| "bare: yes", CmdOptProcDir, "-b" }, |
| { "-gdoc", |
| "produce clean version of html exported by google docs", |
| "gdoc: yes", CmdOptProcDir, "-g" }, |
| { "-numeric", |
| "output numeric rather than named entities", |
| "numeric-entities: yes", CmdOptProcDir, "-n" }, |
| { "-errors", |
| "show only errors and warnings", |
| "markup: no", CmdOptProcDir, "-e" }, |
| { "-quiet", |
| "suppress nonessential output", |
| "quiet: yes", CmdOptProcDir, "-q" }, |
| { "-omit", |
| "omit optional start tags and end tags", |
| "omit-optional-tags: yes", CmdOptProcDir }, |
| { "-xml", |
| "specify the input is well formed XML", |
| "input-xml: yes", CmdOptProcDir }, |
| { "-asxml", |
| "convert HTML to well formed XHTML", |
| "output-xhtml: yes", CmdOptProcDir, "-asxhtml" }, |
| { "-ashtml", |
| "force XHTML to well formed HTML", |
| "output-html: yes", CmdOptProcDir }, |
| #if SUPPORT_ACCESSIBILITY_CHECKS |
| { "-access <level>", |
| "do additional accessibility checks (<level> = 0, 1, 2, 3)" |
| ". 0 is assumed if <level> is missing.", |
| "accessibility-check: <level>", CmdOptProcDir }, |
| #endif |
| { "-raw", |
| "output values above 127 without conversion to entities", |
| NULL, CmdOptCharEnc }, |
| { "-ascii", |
| "use ISO-8859-1 for input, US-ASCII for output", |
| NULL, CmdOptCharEnc }, |
| { "-latin0", |
| "use ISO-8859-15 for input, US-ASCII for output", |
| NULL, CmdOptCharEnc }, |
| { "-latin1", |
| "use ISO-8859-1 for both input and output", |
| NULL, CmdOptCharEnc }, |
| #ifndef NO_NATIVE_ISO2022_SUPPORT |
| { "-iso2022", |
| "use ISO-2022 for both input and output", |
| NULL, CmdOptCharEnc }, |
| #endif |
| { "-utf8", |
| "use UTF-8 for both input and output", |
| NULL, CmdOptCharEnc }, |
| { "-mac", |
| "use MacRoman for input, US-ASCII for output", |
| NULL, CmdOptCharEnc }, |
| { "-win1252", |
| "use Windows-1252 for input, US-ASCII for output", |
| NULL, CmdOptCharEnc }, |
| { "-ibm858", |
| "use IBM-858 (CP850+Euro) for input, US-ASCII for output", |
| NULL, CmdOptCharEnc }, |
| #if SUPPORT_UTF16_ENCODINGS |
| { "-utf16le", |
| "use UTF-16LE for both input and output", |
| NULL, CmdOptCharEnc }, |
| { "-utf16be", |
| "use UTF-16BE for both input and output", |
| NULL, CmdOptCharEnc }, |
| { "-utf16", |
| "use UTF-16 for both input and output", |
| NULL, CmdOptCharEnc }, |
| #endif |
| #if SUPPORT_ASIAN_ENCODINGS /* #431953 - RJ */ |
| { "-big5", |
| "use Big5 for both input and output", |
| NULL, CmdOptCharEnc }, |
| { "-shiftjis", |
| "use Shift_JIS for both input and output", |
| NULL, CmdOptCharEnc }, |
| { "-language <lang>", |
| "set the two-letter language code <lang> (for future use)", |
| "language: <lang>", CmdOptCharEnc }, |
| #endif |
| { "-version", |
| "show the version of Tidy", |
| NULL, CmdOptMisc, "-v" }, |
| { "-help", |
| "list the command line options", |
| NULL, CmdOptMisc, "-h", "-?" }, |
| { "-xml-help", |
| "list the command line options in XML format", |
| NULL, CmdOptMisc }, |
| { "-help-config", |
| "list all configuration options", |
| NULL, CmdOptMisc }, |
| { "-xml-config", |
| "list all configuration options in XML format", |
| NULL, CmdOptMisc }, |
| { "-show-config", |
| "list the current configuration settings", |
| NULL, CmdOptMisc }, |
| { NULL, NULL, NULL, CmdOptMisc } |
| }; |
| |
| static tmbstr get_option_names( const CmdOptDesc* pos ) |
| { |
| tmbstr name; |
| uint len = strlen(pos->name1); |
| if (pos->name2) |
| len += 2+strlen(pos->name2); |
| if (pos->name3) |
| len += 2+strlen(pos->name3); |
| |
| name = (tmbstr)malloc(len+1); |
| if (!name) outOfMemory(); |
| strcpy(name, pos->name1); |
| if (pos->name2) |
| { |
| strcat(name, ", "); |
| strcat(name, pos->name2); |
| } |
| if (pos->name3) |
| { |
| strcat(name, ", "); |
| strcat(name, pos->name3); |
| } |
| return name; |
| } |
| |
| static tmbstr get_escaped_name( ctmbstr name ) |
| { |
| tmbstr escpName; |
| char aux[2]; |
| uint len = 0; |
| ctmbstr c; |
| for(c=name; *c!='\0'; ++c) |
| switch(*c) |
| { |
| case '<': |
| case '>': |
| len += 4; |
| break; |
| case '"': |
| len += 6; |
| break; |
| default: |
| len += 1; |
| break; |
| } |
| |
| escpName = (tmbstr)malloc(len+1); |
| if (!escpName) outOfMemory(); |
| escpName[0] = '\0'; |
| |
| aux[1] = '\0'; |
| for(c=name; *c!='\0'; ++c) |
| switch(*c) |
| { |
| case '<': |
| strcat(escpName, "<"); |
| break; |
| case '>': |
| strcat(escpName, ">"); |
| break; |
| case '"': |
| strcat(escpName, """); |
| break; |
| default: |
| aux[0] = *c; |
| strcat(escpName, aux); |
| break; |
| } |
| |
| return escpName; |
| } |
| |
| static void print_help_option( void ) |
| { |
| CmdOptCategory cat = CmdOptCatFIRST; |
| const CmdOptDesc* pos = cmdopt_defs; |
| |
| for( cat=CmdOptCatFIRST; cat!=CmdOptCatLAST; ++cat) |
| { |
| size_t len = strlen(cmdopt_catname[cat].name); |
| printf("%s\n", cmdopt_catname[cat].name ); |
| printf("%*.*s\n", (int)len, (int)len, helpul ); |
| for( pos=cmdopt_defs; pos->name1; ++pos) |
| { |
| tmbstr name; |
| if (pos->cat != cat) |
| continue; |
| name = get_option_names( pos ); |
| print2Columns( helpfmt, 19, 58, name, pos->desc ); |
| free(name); |
| } |
| printf("\n"); |
| } |
| } |
| |
| static void print_xml_help_option_element( ctmbstr element, ctmbstr name ) |
| { |
| tmbstr escpName; |
| if (!name) |
| return; |
| printf(" <%s>%s</%s>\n", element, escpName = get_escaped_name(name), |
| element); |
| free(escpName); |
| } |
| |
| static void print_xml_help_option( void ) |
| { |
| const CmdOptDesc* pos = cmdopt_defs; |
| |
| for( pos=cmdopt_defs; pos->name1; ++pos) |
| { |
| printf(" <option class=\"%s\">\n", cmdopt_catname[pos->cat].mnemonic ); |
| print_xml_help_option_element("name", pos->name1); |
| print_xml_help_option_element("name", pos->name2); |
| print_xml_help_option_element("name", pos->name3); |
| print_xml_help_option_element("description", pos->desc); |
| if (pos->eqconfig) |
| print_xml_help_option_element("eqconfig", pos->eqconfig); |
| else |
| printf(" <eqconfig />\n"); |
| printf(" </option>\n"); |
| } |
| } |
| |
| static void xml_help( void ) |
| { |
| printf( "<?xml version=\"1.0\"?>\n" |
| "<cmdline version=\"%s\">\n", tidyReleaseDate()); |
| print_xml_help_option(); |
| printf( "</cmdline>\n" ); |
| } |
| |
| static void help( ctmbstr prog ) |
| { |
| printf( "%s [option...] [file...] [option...] [file...]\n", prog ); |
| printf( "Utility to clean up and pretty print HTML/XHTML/XML\n"); |
| printf( "\n"); |
| |
| printf( "This is an HTML5-aware experimental fork of HTML Tidy.\n"); |
| printf( "%s\n", tidyReleaseDate() ); |
| printf( "\n"); |
| |
| #ifdef PLATFORM_NAME |
| printf( "Options for HTML Tidy for %s:\n", PLATFORM_NAME ); |
| #else |
| printf( "Options for HTML Tidy:\n"); |
| #endif |
| printf( "\n"); |
| |
| print_help_option(); |
| |
| printf( "Use --optionX valueX for any configuration option \"optionX\" with argument\n" |
| "\"valueX\". For a list of the configuration options, use \"-help-config\" or refer\n" |
| "to the man page.\n\n"); |
| |
| printf( "Input/Output default to stdin/stdout respectively.\n"); |
| printf( "\n"); |
| printf( "Single letter options apart from -f may be combined\n"); |
| printf( "as in: tidy -f errs.txt -imu foo.html\n"); |
| printf( "\n"); |
| printf( "For more information on this HTML5-aware experimental fork of Tidy,\n" ); |
| printf( "see http://w3c.github.com/tidy-html5/\n" ); |
| printf( "\n"); |
| printf( "For more information on HTML, see the following:\n" ); |
| printf( "\n"); |
| printf( " HTML: Edition for Web Authors (the latest HTML specification)\n"); |
| printf( " http://dev.w3.org/html5/spec-author-view\n" ); |
| printf( "\n"); |
| printf( " HTML: The Markup Language (an HTML language reference)\n" ); |
| printf( " http://dev.w3.org/html5/markup/\n" ); |
| printf( "\n"); |
| printf( "File bug reports at https://github.com/w3c/tidy-html5/issues/\n" ); |
| printf( "or send questions and comments to html-tidy@w3.org\n" ); |
| printf( "\n"); |
| printf( "Validate your HTML documents using the W3C Nu Markup Validator:\n" ); |
| printf( "\n"); |
| printf( " http://validator.w3.org/nu/" ); |
| printf( "\n"); |
| } |
| |
| static Bool isAutoBool( TidyOption topt ) |
| { |
| TidyIterator pos; |
| ctmbstr def; |
| |
| if ( tidyOptGetType( topt ) != TidyInteger) |
| return no; |
| |
| pos = tidyOptGetPickList( topt ); |
| while ( pos ) |
| { |
| def = tidyOptGetNextPick( topt, &pos ); |
| if (0==strcmp(def,"yes")) |
| return yes; |
| } |
| return no; |
| } |
| |
| static |
| ctmbstr ConfigCategoryName( TidyConfigCategory id ) |
| { |
| switch( id ) |
| { |
| case TidyMarkup: |
| return "markup"; |
| case TidyDiagnostics: |
| return "diagnostics"; |
| case TidyPrettyPrint: |
| return "print"; |
| case TidyEncoding: |
| return "encoding"; |
| case TidyMiscellaneous: |
| return "misc"; |
| } |
| fprintf(stderr, "Fatal error: impossible value for id='%d'.\n", (int)id); |
| assert(0); |
| abort(); |
| } |
| |
| /* Description of an option */ |
| typedef struct { |
| ctmbstr name; /**< Name */ |
| ctmbstr cat; /**< Category */ |
| ctmbstr type; /**< "String, ... */ |
| ctmbstr vals; /**< Potential values. If NULL, use an external function */ |
| ctmbstr def; /**< default */ |
| tmbchar tempdefs[80]; /**< storage for default such as integer */ |
| Bool haveVals; /**< if yes, vals is valid */ |
| } OptionDesc; |
| |
| typedef void (*OptionFunc)( TidyDoc, TidyOption, OptionDesc * ); |
| |
| |
| /* Create description "d" related to "opt" */ |
| static |
| void GetOption( TidyDoc tdoc, TidyOption topt, OptionDesc *d ) |
| { |
| TidyOptionId optId = tidyOptGetId( topt ); |
| TidyOptionType optTyp = tidyOptGetType( topt ); |
| |
| d->name = tidyOptGetName( topt ); |
| d->cat = ConfigCategoryName( tidyOptGetCategory( topt ) ); |
| d->vals = NULL; |
| d->def = NULL; |
| d->haveVals = yes; |
| |
| /* Handle special cases first. |
| */ |
| switch ( optId ) |
| { |
| case TidyDuplicateAttrs: |
| case TidySortAttributes: |
| case TidyNewline: |
| case TidyAccessibilityCheckLevel: |
| d->type = "enum"; |
| d->vals = NULL; |
| d->def = |
| optId==TidyNewline ? |
| "<em>Platform dependent</em>" |
| :tidyOptGetCurrPick( tdoc, optId ); |
| break; |
| |
| case TidyDoctype: |
| d->type = "DocType"; |
| d->vals = NULL; |
| { |
| ctmbstr sdef = NULL; |
| sdef = tidyOptGetCurrPick( tdoc, TidyDoctypeMode ); |
| if ( !sdef || *sdef == '*' ) |
| sdef = tidyOptGetValue( tdoc, TidyDoctype ); |
| d->def = sdef; |
| } |
| break; |
| |
| case TidyInlineTags: |
| case TidyBlockTags: |
| case TidyEmptyTags: |
| case TidyPreTags: |
| d->type = "Tag names"; |
| d->vals = "tagX, tagY, ..."; |
| d->def = NULL; |
| break; |
| |
| case TidyCharEncoding: |
| case TidyInCharEncoding: |
| case TidyOutCharEncoding: |
| d->type = "Encoding"; |
| d->def = tidyOptGetEncName( tdoc, optId ); |
| if (!d->def) |
| d->def = "?"; |
| d->vals = NULL; |
| break; |
| |
| /* General case will handle remaining */ |
| default: |
| switch ( optTyp ) |
| { |
| case TidyBoolean: |
| d->type = "Boolean"; |
| d->vals = "y/n, yes/no, t/f, true/false, 1/0"; |
| d->def = tidyOptGetCurrPick( tdoc, optId ); |
| break; |
| |
| case TidyInteger: |
| if (isAutoBool(topt)) |
| { |
| d->type = "AutoBool"; |
| d->vals = "auto, y/n, yes/no, t/f, true/false, 1/0"; |
| d->def = tidyOptGetCurrPick( tdoc, optId ); |
| } |
| else |
| { |
| uint idef; |
| d->type = "Integer"; |
| if ( optId == TidyWrapLen ) |
| d->vals = "0 (no wrapping), 1, 2, ..."; |
| else |
| d->vals = "0, 1, 2, ..."; |
| |
| idef = tidyOptGetInt( tdoc, optId ); |
| sprintf(d->tempdefs, "%u", idef); |
| d->def = d->tempdefs; |
| } |
| break; |
| |
| case TidyString: |
| d->type = "String"; |
| d->vals = NULL; |
| d->haveVals = no; |
| d->def = tidyOptGetValue( tdoc, optId ); |
| break; |
| } |
| } |
| } |
| |
| /* Array holding all options. Contains a trailing sentinel. */ |
| typedef struct { |
| TidyOption topt[N_TIDY_OPTIONS]; |
| } AllOption_t; |
| |
| static |
| int cmpOpt(const void* e1_, const void *e2_) |
| { |
| const TidyOption* e1 = (const TidyOption*)e1_; |
| const TidyOption* e2 = (const TidyOption*)e2_; |
| return strcmp(tidyOptGetName(*e1), tidyOptGetName(*e2)); |
| } |
| |
| static |
| void getSortedOption( TidyDoc tdoc, AllOption_t *tOption ) |
| { |
| TidyIterator pos = tidyGetOptionList( tdoc ); |
| uint i = 0; |
| |
| while ( pos ) |
| { |
| TidyOption topt = tidyGetNextOption( tdoc, &pos ); |
| tOption->topt[i] = topt; |
| ++i; |
| } |
| tOption->topt[i] = NULL; /* sentinel */ |
| |
| qsort(tOption->topt, |
| /* Do not sort the sentinel: hence `-1' */ |
| sizeof(tOption->topt)/sizeof(tOption->topt[0])-1, |
| sizeof(tOption->topt[0]), |
| cmpOpt); |
| } |
| |
| static void ForEachSortedOption( TidyDoc tdoc, OptionFunc OptionPrint ) |
| { |
| AllOption_t tOption; |
| const TidyOption *topt; |
| |
| getSortedOption( tdoc, &tOption ); |
| for( topt = tOption.topt; *topt; ++topt) |
| { |
| OptionDesc d; |
| |
| GetOption( tdoc, *topt, &d ); |
| (*OptionPrint)( tdoc, *topt, &d ); |
| } |
| } |
| |
| static void ForEachOption( TidyDoc tdoc, OptionFunc OptionPrint ) |
| { |
| TidyIterator pos = tidyGetOptionList( tdoc ); |
| |
| while ( pos ) |
| { |
| TidyOption topt = tidyGetNextOption( tdoc, &pos ); |
| OptionDesc d; |
| |
| GetOption( tdoc, topt, &d ); |
| (*OptionPrint)( tdoc, topt, &d ); |
| } |
| } |
| |
| static |
| void PrintAllowedValuesFromPick( TidyOption topt ) |
| { |
| TidyIterator pos = tidyOptGetPickList( topt ); |
| Bool first = yes; |
| ctmbstr def; |
| while ( pos ) |
| { |
| if (first) |
| first = no; |
| else |
| printf(", "); |
| def = tidyOptGetNextPick( topt, &pos ); |
| printf("%s", def); |
| } |
| } |
| |
| static |
| void PrintAllowedValues( TidyOption topt, const OptionDesc *d ) |
| { |
| if (d->vals) |
| printf( "%s", d->vals ); |
| else |
| PrintAllowedValuesFromPick( topt ); |
| } |
| |
| static |
| void printXMLDescription( TidyDoc tdoc, TidyOption topt ) |
| { |
| ctmbstr doc = tidyOptGetDoc( tdoc, topt ); |
| |
| if (doc) |
| printf(" <description>%s</description>\n", doc); |
| else |
| { |
| printf(" <description />\n"); |
| fprintf(stderr, "Warning: option `%s' is not documented.\n", |
| tidyOptGetName( topt )); |
| } |
| } |
| |
| static |
| void printXMLCrossRef( TidyDoc tdoc, TidyOption topt ) |
| { |
| TidyOption optLinked; |
| TidyIterator pos = tidyOptGetDocLinksList(tdoc, topt); |
| while( pos ) |
| { |
| optLinked = tidyOptGetNextDocLinks(tdoc, &pos ); |
| printf(" <seealso>%s</seealso>\n",tidyOptGetName(optLinked)); |
| } |
| } |
| |
| static |
| void printXMLOption( TidyDoc tdoc, TidyOption topt, OptionDesc *d ) |
| { |
| if ( tidyOptIsReadOnly(topt) ) |
| return; |
| |
| printf( " <option class=\"%s\">\n", d->cat ); |
| printf (" <name>%s</name>\n",d->name); |
| printf (" <type>%s</type>\n",d->type); |
| if (d->def) |
| printf(" <default>%s</default>\n",d->def); |
| else |
| printf(" <default />\n"); |
| if (d->haveVals) |
| { |
| printf(" <example>"); |
| PrintAllowedValues( topt, d ); |
| printf("</example>\n"); |
| } |
| else |
| { |
| printf(" <example />\n"); |
| } |
| printXMLDescription( tdoc, topt ); |
| printXMLCrossRef( tdoc, topt ); |
| printf( " </option>\n" ); |
| } |
| |
| static void XMLoptionhelp( TidyDoc tdoc ) |
| { |
| printf( "<?xml version=\"1.0\"?>\n" |
| "<config version=\"%s\">\n", tidyReleaseDate()); |
| ForEachOption( tdoc, printXMLOption ); |
| printf( "</config>\n" ); |
| } |
| |
| static |
| tmbstr GetAllowedValuesFromPick( TidyOption topt ) |
| { |
| TidyIterator pos; |
| Bool first; |
| ctmbstr def; |
| uint len = 0; |
| tmbstr val; |
| |
| pos = tidyOptGetPickList( topt ); |
| first = yes; |
| while ( pos ) |
| { |
| if (first) |
| first = no; |
| else |
| len += 2; |
| def = tidyOptGetNextPick( topt, &pos ); |
| len += strlen(def); |
| } |
| val = (tmbstr)malloc(len+1); |
| if (!val) outOfMemory(); |
| val[0] = '\0'; |
| pos = tidyOptGetPickList( topt ); |
| first = yes; |
| while ( pos ) |
| { |
| if (first) |
| first = no; |
| else |
| strcat(val, ", "); |
| def = tidyOptGetNextPick( topt, &pos ); |
| strcat(val, def); |
| } |
| return val; |
| } |
| |
| static |
| tmbstr GetAllowedValues( TidyOption topt, const OptionDesc *d ) |
| { |
| if (d->vals) |
| { |
| tmbstr val = (tmbstr)malloc(1+strlen(d->vals)); |
| if (!val) outOfMemory(); |
| strcpy(val, d->vals); |
| return val; |
| } |
| else |
| return GetAllowedValuesFromPick( topt ); |
| } |
| |
| static |
| void printOption( TidyDoc ARG_UNUSED(tdoc), TidyOption topt, |
| OptionDesc *d ) |
| { |
| if ( tidyOptIsReadOnly(topt) ) |
| return; |
| |
| if ( *d->name || *d->type ) |
| { |
| ctmbstr pval = d->vals; |
| tmbstr val = NULL; |
| if (!d->haveVals) |
| { |
| pval = "-"; |
| } |
| else if (pval == NULL) |
| { |
| val = GetAllowedValues( topt, d); |
| pval = val; |
| } |
| print3Columns( fmt, 27, 9, 40, d->name, d->type, pval ); |
| if (val) |
| free(val); |
| } |
| } |
| |
| static void optionhelp( TidyDoc tdoc ) |
| { |
| printf( "\nHTML Tidy Configuration Settings\n\n" ); |
| printf( "Within a file, use the form:\n\n" ); |
| printf( "wrap: 72\n" ); |
| printf( "indent: no\n\n" ); |
| printf( "When specified on the command line, use the form:\n\n" ); |
| printf( "--wrap 72 --indent no\n\n"); |
| |
| printf( fmt, "Name", "Type", "Allowable values" ); |
| printf( fmt, ul, ul, ul ); |
| |
| ForEachSortedOption( tdoc, printOption ); |
| } |
| |
| static |
| void printOptionValues( TidyDoc ARG_UNUSED(tdoc), TidyOption topt, |
| OptionDesc *d ) |
| { |
| TidyOptionId optId = tidyOptGetId( topt ); |
| ctmbstr ro = tidyOptIsReadOnly( topt ) ? "*" : "" ; |
| |
| switch ( optId ) |
| { |
| case TidyInlineTags: |
| case TidyBlockTags: |
| case TidyEmptyTags: |
| case TidyPreTags: |
| { |
| TidyIterator pos = tidyOptGetDeclTagList( tdoc ); |
| while ( pos ) |
| { |
| d->def = tidyOptGetNextDeclTag(tdoc, optId, &pos); |
| if ( pos ) |
| { |
| if ( *d->name ) |
| printf( valfmt, d->name, d->type, ro, d->def ); |
| else |
| printf( fmt, d->name, d->type, d->def ); |
| d->name = ""; |
| d->type = ""; |
| } |
| } |
| } |
| break; |
| case TidyNewline: |
| d->def = tidyOptGetCurrPick( tdoc, optId ); |
| break; |
| default: |
| break; |
| } |
| |
| /* fix for http://tidy.sf.net/bug/873921 */ |
| if ( *d->name || *d->type || (d->def && *d->def) ) |
| { |
| if ( ! d->def ) |
| d->def = ""; |
| if ( *d->name ) |
| printf( valfmt, d->name, d->type, ro, d->def ); |
| else |
| printf( fmt, d->name, d->type, d->def ); |
| } |
| } |
| |
| static void optionvalues( TidyDoc tdoc ) |
| { |
| printf( "\nConfiguration File Settings:\n\n" ); |
| printf( fmt, "Name", "Type", "Current Value" ); |
| printf( fmt, ul, ul, ul ); |
| |
| ForEachSortedOption( tdoc, printOptionValues ); |
| |
| printf( "\n\nValues marked with an *asterisk are calculated \n" |
| "internally by HTML Tidy\n\n" ); |
| } |
| |
| static void version( void ) |
| { |
| #ifdef PLATFORM_NAME |
| printf( "HTML Tidy for HTML5 (experimental) for %s %s\n", |
| PLATFORM_NAME, tidyReleaseDate() ); |
| #else |
| printf( "HTML Tidy for HTML5 (experimental) %s\n", tidyReleaseDate() ); |
| #endif |
| } |
| |
| static void unknownOption( uint c ) |
| { |
| fprintf( errout, "HTML Tidy: unknown option: %c\n", (char)c ); |
| } |
| |
| int main( int argc, char** argv ) |
| { |
| ctmbstr prog = argv[0]; |
| ctmbstr cfgfil = NULL, errfil = NULL, htmlfil = NULL; |
| TidyDoc tdoc = tidyCreate(); |
| int status = 0; |
| |
| uint contentErrors = 0; |
| uint contentWarnings = 0; |
| uint accessWarnings = 0; |
| |
| errout = stderr; /* initialize to stderr */ |
| status = 0; |
| |
| #ifdef TIDY_CONFIG_FILE |
| if ( tidyFileExists( tdoc, TIDY_CONFIG_FILE) ) |
| { |
| status = tidyLoadConfig( tdoc, TIDY_CONFIG_FILE ); |
| if ( status != 0 ) |
| fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", TIDY_CONFIG_FILE, status); |
| } |
| #endif /* TIDY_CONFIG_FILE */ |
| |
| /* look for env var "HTML_TIDY" */ |
| /* then for ~/.tidyrc (on platforms defining $HOME) */ |
| |
| if ( (cfgfil = getenv("HTML_TIDY")) != NULL ) |
| { |
| status = tidyLoadConfig( tdoc, cfgfil ); |
| if ( status != 0 ) |
| fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", cfgfil, status); |
| } |
| #ifdef TIDY_USER_CONFIG_FILE |
| else if ( tidyFileExists( tdoc, TIDY_USER_CONFIG_FILE) ) |
| { |
| status = tidyLoadConfig( tdoc, TIDY_USER_CONFIG_FILE ); |
| if ( status != 0 ) |
| fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", TIDY_USER_CONFIG_FILE, status); |
| } |
| #endif /* TIDY_USER_CONFIG_FILE */ |
| |
| /* read command line */ |
| while ( argc > 0 ) |
| { |
| if (argc > 1 && argv[1][0] == '-') |
| { |
| /* support -foo and --foo */ |
| ctmbstr arg = argv[1] + 1; |
| |
| if ( strcasecmp(arg, "xml") == 0) |
| tidyOptSetBool( tdoc, TidyXmlTags, yes ); |
| |
| else if ( strcasecmp(arg, "asxml") == 0 || |
| strcasecmp(arg, "asxhtml") == 0 ) |
| { |
| tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); |
| } |
| else if ( strcasecmp(arg, "ashtml") == 0 ) |
| tidyOptSetBool( tdoc, TidyHtmlOut, yes ); |
| |
| else if ( strcasecmp(arg, "indent") == 0 ) |
| { |
| tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState ); |
| if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 ) |
| tidyOptResetToDefault( tdoc, TidyIndentSpaces ); |
| } |
| else if ( strcasecmp(arg, "omit") == 0 ) |
| tidyOptSetBool( tdoc, TidyOmitOptionalTags, yes ); |
| |
| else if ( strcasecmp(arg, "upper") == 0 ) |
| tidyOptSetBool( tdoc, TidyUpperCaseTags, yes ); |
| |
| else if ( strcasecmp(arg, "clean") == 0 ) |
| tidyOptSetBool( tdoc, TidyMakeClean, yes ); |
| |
| else if ( strcasecmp(arg, "gdoc") == 0 ) |
| tidyOptSetBool( tdoc, TidyGDocClean, yes ); |
| |
| else if ( strcasecmp(arg, "bare") == 0 ) |
| tidyOptSetBool( tdoc, TidyMakeBare, yes ); |
| |
| else if ( strcasecmp(arg, "raw") == 0 || |
| strcasecmp(arg, "ascii") == 0 || |
| strcasecmp(arg, "latin0") == 0 || |
| strcasecmp(arg, "latin1") == 0 || |
| strcasecmp(arg, "utf8") == 0 || |
| #ifndef NO_NATIVE_ISO2022_SUPPORT |
| strcasecmp(arg, "iso2022") == 0 || |
| #endif |
| #if SUPPORT_UTF16_ENCODINGS |
| strcasecmp(arg, "utf16le") == 0 || |
| strcasecmp(arg, "utf16be") == 0 || |
| strcasecmp(arg, "utf16") == 0 || |
| #endif |
| #if SUPPORT_ASIAN_ENCODINGS |
| strcasecmp(arg, "shiftjis") == 0 || |
| strcasecmp(arg, "big5") == 0 || |
| #endif |
| strcasecmp(arg, "mac") == 0 || |
| strcasecmp(arg, "win1252") == 0 || |
| strcasecmp(arg, "ibm858") == 0 ) |
| { |
| tidySetCharEncoding( tdoc, arg ); |
| } |
| else if ( strcasecmp(arg, "numeric") == 0 ) |
| tidyOptSetBool( tdoc, TidyNumEntities, yes ); |
| |
| else if ( strcasecmp(arg, "modify") == 0 || |
| strcasecmp(arg, "change") == 0 || /* obsolete */ |
| strcasecmp(arg, "update") == 0 ) /* obsolete */ |
| { |
| tidyOptSetBool( tdoc, TidyWriteBack, yes ); |
| } |
| else if ( strcasecmp(arg, "errors") == 0 ) |
| tidyOptSetBool( tdoc, TidyShowMarkup, no ); |
| |
| else if ( strcasecmp(arg, "quiet") == 0 ) |
| tidyOptSetBool( tdoc, TidyQuiet, yes ); |
| |
| else if ( strcasecmp(arg, "help") == 0 || |
| strcasecmp(arg, "h") == 0 || *arg == '?' ) |
| { |
| help( prog ); |
| tidyRelease( tdoc ); |
| return 0; /* success */ |
| } |
| else if ( strcasecmp(arg, "xml-help") == 0) |
| { |
| xml_help( ); |
| tidyRelease( tdoc ); |
| return 0; /* success */ |
| } |
| else if ( strcasecmp(arg, "help-config") == 0 ) |
| { |
| optionhelp( tdoc ); |
| tidyRelease( tdoc ); |
| return 0; /* success */ |
| } |
| else if ( strcasecmp(arg, "xml-config") == 0 ) |
| { |
| XMLoptionhelp( tdoc ); |
| tidyRelease( tdoc ); |
| return 0; /* success */ |
| } |
| else if ( strcasecmp(arg, "show-config") == 0 ) |
| { |
| optionvalues( tdoc ); |
| tidyRelease( tdoc ); |
| return 0; /* success */ |
| } |
| else if ( strcasecmp(arg, "config") == 0 ) |
| { |
| if ( argc >= 3 ) |
| { |
| ctmbstr post; |
| |
| tidyLoadConfig( tdoc, argv[2] ); |
| |
| /* Set new error output stream if setting changed */ |
| post = tidyOptGetValue( tdoc, TidyErrFile ); |
| if ( post && (!errfil || !samefile(errfil, post)) ) |
| { |
| errfil = post; |
| errout = tidySetErrorFile( tdoc, post ); |
| } |
| |
| --argc; |
| ++argv; |
| } |
| } |
| |
| #if SUPPORT_ASIAN_ENCODINGS |
| else if ( strcasecmp(arg, "language") == 0 || |
| strcasecmp(arg, "lang") == 0 ) |
| { |
| if ( argc >= 3 ) |
| { |
| tidyOptSetValue( tdoc, TidyLanguage, argv[2] ); |
| --argc; |
| ++argv; |
| } |
| } |
| #endif |
| |
| else if ( strcasecmp(arg, "output") == 0 || |
| strcasecmp(arg, "-output-file") == 0 || |
| strcasecmp(arg, "o") == 0 ) |
| { |
| if ( argc >= 3 ) |
| { |
| tidyOptSetValue( tdoc, TidyOutFile, argv[2] ); |
| --argc; |
| ++argv; |
| } |
| } |
| else if ( strcasecmp(arg, "file") == 0 || |
| strcasecmp(arg, "-file") == 0 || |
| strcasecmp(arg, "f") == 0 ) |
| { |
| if ( argc >= 3 ) |
| { |
| errfil = argv[2]; |
| errout = tidySetErrorFile( tdoc, errfil ); |
| --argc; |
| ++argv; |
| } |
| } |
| else if ( strcasecmp(arg, "wrap") == 0 || |
| strcasecmp(arg, "-wrap") == 0 || |
| strcasecmp(arg, "w") == 0 ) |
| { |
| if ( argc >= 3 ) |
| { |
| uint wraplen = 0; |
| int nfields = sscanf( argv[2], "%u", &wraplen ); |
| tidyOptSetInt( tdoc, TidyWrapLen, wraplen ); |
| if (nfields > 0) |
| { |
| --argc; |
| ++argv; |
| } |
| } |
| } |
| else if ( strcasecmp(arg, "version") == 0 || |
| strcasecmp(arg, "-version") == 0 || |
| strcasecmp(arg, "v") == 0 ) |
| { |
| version(); |
| tidyRelease( tdoc ); |
| return 0; /* success */ |
| |
| } |
| else if ( strncmp(argv[1], "--", 2 ) == 0) |
| { |
| if ( tidyOptParseValue(tdoc, argv[1]+2, argv[2]) ) |
| { |
| /* Set new error output stream if setting changed */ |
| ctmbstr post = tidyOptGetValue( tdoc, TidyErrFile ); |
| if ( post && (!errfil || !samefile(errfil, post)) ) |
| { |
| errfil = post; |
| errout = tidySetErrorFile( tdoc, post ); |
| } |
| |
| ++argv; |
| --argc; |
| } |
| } |
| |
| #if SUPPORT_ACCESSIBILITY_CHECKS |
| else if ( strcasecmp(arg, "access") == 0 ) |
| { |
| if ( argc >= 3 ) |
| { |
| uint acclvl = 0; |
| int nfields = sscanf( argv[2], "%u", &acclvl ); |
| tidyOptSetInt( tdoc, TidyAccessibilityCheckLevel, acclvl ); |
| if (nfields > 0) |
| { |
| --argc; |
| ++argv; |
| } |
| } |
| } |
| #endif |
| |
| else |
| { |
| uint c; |
| ctmbstr s = argv[1]; |
| |
| while ( (c = *++s) != '\0' ) |
| { |
| switch ( c ) |
| { |
| case 'i': |
| tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState ); |
| if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 ) |
| tidyOptResetToDefault( tdoc, TidyIndentSpaces ); |
| break; |
| |
| /* Usurp -o for output file. Anyone hiding end tags? |
| case 'o': |
| tidyOptSetBool( tdoc, TidyHideEndTags, yes ); |
| break; |
| */ |
| |
| case 'u': |
| tidyOptSetBool( tdoc, TidyUpperCaseTags, yes ); |
| break; |
| |
| case 'c': |
| tidyOptSetBool( tdoc, TidyMakeClean, yes ); |
| break; |
| |
| case 'g': |
| tidyOptSetBool( tdoc, TidyGDocClean, yes ); |
| break; |
| |
| case 'b': |
| tidyOptSetBool( tdoc, TidyMakeBare, yes ); |
| break; |
| |
| case 'n': |
| tidyOptSetBool( tdoc, TidyNumEntities, yes ); |
| break; |
| |
| case 'm': |
| tidyOptSetBool( tdoc, TidyWriteBack, yes ); |
| break; |
| |
| case 'e': |
| tidyOptSetBool( tdoc, TidyShowMarkup, no ); |
| break; |
| |
| case 'q': |
| tidyOptSetBool( tdoc, TidyQuiet, yes ); |
| break; |
| |
| default: |
| unknownOption( c ); |
| break; |
| } |
| } |
| } |
| |
| --argc; |
| ++argv; |
| continue; |
| } |
| |
| if ( argc > 1 ) |
| { |
| htmlfil = argv[1]; |
| if ( tidyOptGetBool(tdoc, TidyEmacs) ) |
| tidyOptSetValue( tdoc, TidyEmacsFile, htmlfil ); |
| status = tidyParseFile( tdoc, htmlfil ); |
| } |
| else |
| { |
| htmlfil = "stdin"; |
| status = tidyParseStdin( tdoc ); |
| } |
| |
| if ( status >= 0 ) |
| status = tidyCleanAndRepair( tdoc ); |
| |
| if ( status >= 0 ) |
| status = tidyRunDiagnostics( tdoc ); |
| |
| if ( status > 1 ) /* If errors, do we want to force output? */ |
| status = ( tidyOptGetBool(tdoc, TidyForceOutput) ? status : -1 ); |
| |
| if ( status >= 0 && tidyOptGetBool(tdoc, TidyShowMarkup) ) |
| { |
| if ( tidyOptGetBool(tdoc, TidyWriteBack) && argc > 1 ) |
| status = tidySaveFile( tdoc, htmlfil ); |
| else |
| { |
| ctmbstr outfil = tidyOptGetValue( tdoc, TidyOutFile ); |
| if ( outfil ) |
| status = tidySaveFile( tdoc, outfil ); |
| else |
| status = tidySaveStdout( tdoc ); |
| } |
| } |
| |
| contentErrors += tidyErrorCount( tdoc ); |
| contentWarnings += tidyWarningCount( tdoc ); |
| accessWarnings += tidyAccessWarningCount( tdoc ); |
| |
| --argc; |
| ++argv; |
| |
| if ( argc <= 1 ) |
| break; |
| } |
| |
| if (!tidyOptGetBool(tdoc, TidyQuiet) && |
| errout == stderr && !contentErrors) |
| fprintf(errout, "\n"); |
| |
| if (contentErrors + contentWarnings > 0 && |
| !tidyOptGetBool(tdoc, TidyQuiet)) |
| tidyErrorSummary(tdoc); |
| |
| if (!tidyOptGetBool(tdoc, TidyQuiet)) |
| tidyGeneralInfo(tdoc); |
| |
| /* called to free hash tables etc. */ |
| tidyRelease( tdoc ); |
| |
| /* return status can be used by scripts */ |
| if ( contentErrors > 0 ) |
| return 2; |
| |
| if ( contentWarnings > 0 ) |
| return 1; |
| |
| /* 0 signifies all is ok */ |
| return 0; |
| } |
| |
| /* |
| * local variables: |
| * mode: c |
| * indent-tabs-mode: nil |
| * c-basic-offset: 4 |
| * eval: (c-set-offset 'substatement-open 0) |
| * end: |
| */ |