ANY23-247 FIX Attribute name itemscope associated with an element type html must be followed by the ' = ' character. this closes #17
diff --git a/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java b/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java
index 009a604..e05c6b7 100644
--- a/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java
+++ b/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java
@@ -17,7 +17,6 @@
package org.apache.any23.extractor;
-import org.apache.any23.extractor.ExtractionParameters.ValidationMode;
import org.apache.any23.configuration.Configuration;
import org.apache.any23.configuration.DefaultConfiguration;
import org.apache.any23.encoding.EncodingDetector;
@@ -251,6 +250,7 @@
try {
final String documentLanguage = extractDocumentLanguage(extractionParameters);
for (ExtractorFactory<?> factory : matchingExtractors) {
+ @SuppressWarnings("rawtypes")
final Extractor extractor = factory.createExtractor();
final SingleExtractionReport er = runExtractor(
extractionParameters,
@@ -343,6 +343,7 @@
/**
* @return the list of all the activated extractors for the given {@link org.apache.any23.source.DocumentSource}.
*/
+ @SuppressWarnings("rawtypes")
public List<Extractor> getMatchingExtractors() {
final List<Extractor> extractorsList = new ArrayList<Extractor>();
for(ExtractorFactory extractorFactory : matchingExtractors) {
@@ -444,7 +445,7 @@
final Extractor<?> extractor
) throws ExtractionException, IOException, ValidatorException {
if(log.isDebugEnabled()) {
- log.debug("Running " + extractor.getDescription().getExtractorName() + " on " + documentURI);
+ log.debug("Running {} on {}", extractor.getDescription().getExtractorName(), documentURI);
}
long startTime = System.currentTimeMillis();
final ExtractionContext extractionContext = new ExtractionContext(
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
index e32ec51..be01d3f 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
@@ -94,11 +94,11 @@
parser.getParserConfig().setNonFatalErrors(new HashSet<RioSetting<?>>());
// Disable verification to ensure that DBPedia is accessible, given it uses so many custom datatypes
- parser.getParserConfig().set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, true);
+ parser.getParserConfig().set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, true);
parser.getParserConfig().addNonFatalError(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES);
- parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
+ parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
parser.getParserConfig().addNonFatalError(BasicParserSettings.VERIFY_DATATYPE_VALUES);
- parser.getParserConfig().set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, false);
+ parser.getParserConfig().set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, false);
parser.getParserConfig().addNonFatalError(BasicParserSettings.NORMALIZE_DATATYPE_VALUES);
//ByteBuffer seems to represent incorrect content. Need to make sure it is the content
//of the <script> node and not anything else!
diff --git a/core/src/main/java/org/apache/any23/validator/DefaultValidator.java b/core/src/main/java/org/apache/any23/validator/DefaultValidator.java
index 828ef1a..0094e54 100644
--- a/core/src/main/java/org/apache/any23/validator/DefaultValidator.java
+++ b/core/src/main/java/org/apache/any23/validator/DefaultValidator.java
@@ -20,6 +20,8 @@
import org.apache.any23.validator.rule.AboutNotURIRule;
import org.apache.any23.validator.rule.MetaNameMisuseFix;
import org.apache.any23.validator.rule.MetaNameMisuseRule;
+import org.apache.any23.validator.rule.MissingItemscopeAttributeValueFix;
+import org.apache.any23.validator.rule.MissingItemscopeAttributeValueRule;
import org.apache.any23.validator.rule.MissingOpenGraphNamespaceRule;
import org.apache.any23.validator.rule.OpenGraphNamespaceFix;
import org.w3c.dom.Document;
@@ -54,7 +56,8 @@
final ValidationReportBuilder validationReportBuilder = new DefaultValidationReportBuilder();
for(Class<? extends Rule> cRule : rulesOrder) {
Rule rule = newRuleInstance(cRule);
- final RuleContext ruleContext = new DefaultRuleContext();
+ @SuppressWarnings("rawtypes")
+ final RuleContext ruleContext = new DefaultRuleContext();
boolean applyOn;
try {
applyOn = rule.applyOn(document, ruleContext, validationReportBuilder);
@@ -121,6 +124,7 @@
addRule(MetaNameMisuseRule.class, MetaNameMisuseFix.class);
addRule(MissingOpenGraphNamespaceRule.class, OpenGraphNamespaceFix.class);
addRule(AboutNotURIRule.class);
+ addRule(MissingItemscopeAttributeValueRule.class, MissingItemscopeAttributeValueFix.class);
}
private Fix newFixInstance(Class<? extends Fix> cFix) throws ValidatorException {
diff --git a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseFix.java b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseFix.java
index dacde1b..5a0bfae 100644
--- a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseFix.java
+++ b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseFix.java
@@ -39,7 +39,7 @@
}
@SuppressWarnings("unchecked")
- public void execute(Rule rule, RuleContext context, DOMDocument document) {
+ public void execute(Rule rule, @SuppressWarnings("rawtypes") RuleContext context, DOMDocument document) {
List<Node> nodes = (List<Node>) context.getData(MetaNameMisuseRule.ERRORED_META_NODES);
for(Node node : nodes) {
final String nameValue = node.getAttributes().getNamedItem("name").getTextContent();
diff --git a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java
index cc9c886..a803107 100644
--- a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java
+++ b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java
@@ -44,7 +44,7 @@
public boolean applyOn(
DOMDocument document,
- RuleContext context,
+ @SuppressWarnings("rawtypes") RuleContext context,
ValidationReportBuilder validationReportBuilder
) {
List<Node> metaNodes = document.getNodes("/HTML/HEAD/META");
diff --git a/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueFix.java b/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueFix.java
new file mode 100644
index 0000000..909a33a
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueFix.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.any23.validator.rule;
+
+import java.util.List;
+
+import org.apache.any23.validator.DOMDocument;
+import org.apache.any23.validator.Fix;
+import org.apache.any23.validator.Rule;
+import org.apache.any23.validator.RuleContext;
+import org.w3c.dom.Node;
+
+/**
+ * Fix for the issue described within
+ * {@link org.apache.any23.validator.rule.MissingItemscopeAttributeValueRule}
+ */
+public class MissingItemscopeAttributeValueFix implements Fix {
+
+ /**
+ * Default constructor
+ */
+ public MissingItemscopeAttributeValueFix() {
+ }
+
+ public static final String EMPTY_ITEMSCOPE_VALUE = "=\"itemscope\"";
+
+ public String getHRName() {
+ return "missing-itemscope-value-fix";
+ }
+
+ public void execute(Rule rule, @SuppressWarnings("rawtypes") RuleContext context, DOMDocument document) {
+
+ List<Node> itemNodes = document.getNodesWithAttribute("itemscope");
+ for(Node itemNode : itemNodes) {
+ Node itemScopeNode = itemNode.getAttributes().getNamedItem("itemscope");
+ if(itemScopeNode.getNodeValue().contentEquals("")) {
+ itemNode.getAttributes().getNamedItem("itemscope").setNodeValue(EMPTY_ITEMSCOPE_VALUE);
+ }
+ }
+ }
+
+}
diff --git a/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueRule.java b/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueRule.java
new file mode 100644
index 0000000..b0ecd9b
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueRule.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.any23.validator.rule;
+
+import java.util.List;
+
+import org.apache.any23.validator.DOMDocument;
+import org.apache.any23.validator.Rule;
+import org.apache.any23.validator.RuleContext;
+import org.apache.any23.validator.ValidationReport;
+import org.apache.any23.validator.ValidationReportBuilder;
+import org.w3c.dom.Node;
+
+/**
+ * This fixes missing attribute values for the 'itemscope' attribute
+ * Typically when such a snippet of XHTML is fed through the
+ * {@link org.apache.any23.extractor.rdfa.RDFa11Extractor}, and
+ * subsequently to Sesame's {@link org.semarglproject.sesame.rdf.rdfa.SesameRDFaParser},
+ * it will result in the following behavior.
+ * <pre>
+ * {@code
+ * [Fatal Error] :23:15: Attribute name "itemscope" associated with an element type "div" must be followed by the ' = ' character.
+ * }
+ * </pre>
+ * This Rule identifies that happening.
+ *
+ */
+public class MissingItemscopeAttributeValueRule implements Rule {
+
+ /**
+ * Default constructor
+ */
+ public MissingItemscopeAttributeValueRule() {
+ }
+
+ @Override
+ public String getHRName() {
+ return "missing-itemscope-value-rule";
+ }
+
+ /**
+ * @see org.apache.any23.validator.Rule#applyOn(org.apache.any23.validator.DOMDocument, org.apache.any23.validator.RuleContext, org.apache.any23.validator.ValidationReportBuilder)
+ */
+ @Override
+ public boolean applyOn(DOMDocument document, @SuppressWarnings("rawtypes") RuleContext context,
+ ValidationReportBuilder validationReportBuilder) {
+ List<Node> itemNodes = document.getNodesWithAttribute("itemscope");
+ boolean foundPrecondition = false;
+ String propertyNode = null;
+ Node iNode = null;
+ for(Node itemNode : itemNodes) {
+ iNode = itemNode;
+ propertyNode = iNode.getAttributes().getNamedItem("itemscope").getNodeValue();
+ if( propertyNode == null || propertyNode.contentEquals("")) {
+ foundPrecondition = true;
+ break;
+ }
+ }
+ if(foundPrecondition) {
+ validationReportBuilder.reportIssue(
+ ValidationReport.IssueLevel.error,
+ "Located absence of an accompanying value for the the 'itemscope' attribute of element with hashcode: " + iNode.hashCode(),
+ iNode
+ );
+ return true;
+ }
+ return false;
+ }
+
+}
diff --git a/core/src/main/java/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.java b/core/src/main/java/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.java
index f814016..8229525 100644
--- a/core/src/main/java/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.java
+++ b/core/src/main/java/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.java
@@ -41,7 +41,7 @@
public boolean applyOn(
DOMDocument document,
- RuleContext context,
+ @SuppressWarnings("rawtypes") RuleContext context,
ValidationReportBuilder validationReportBuilder
) {
List<Node> metas = document.getNodes("/HTML/HEAD/META");
diff --git a/core/src/main/java/org/apache/any23/validator/rule/OpenGraphNamespaceFix.java b/core/src/main/java/org/apache/any23/validator/rule/OpenGraphNamespaceFix.java
index c0b394b..6975991 100644
--- a/core/src/main/java/org/apache/any23/validator/rule/OpenGraphNamespaceFix.java
+++ b/core/src/main/java/org/apache/any23/validator/rule/OpenGraphNamespaceFix.java
@@ -37,7 +37,7 @@
return "opengraph-namespace-fix";
}
- public void execute(Rule rule, RuleContext context, DOMDocument document) {
+ public void execute(Rule rule, @SuppressWarnings("rawtypes") RuleContext context, DOMDocument document) {
document.addAttribute("/HTML", "xmlns:og", OPENGRAPH_PROTOCOL_NS);
}
diff --git a/core/src/test/java/org/apache/any23/Any23Test.java b/core/src/test/java/org/apache/any23/Any23Test.java
index c487ee8..4e0d9c2 100644
--- a/core/src/test/java/org/apache/any23/Any23Test.java
+++ b/core/src/test/java/org/apache/any23/Any23Test.java
@@ -18,6 +18,7 @@
package org.apache.any23;
import org.junit.Assert;
+import org.apache.any23.configuration.Configuration;
import org.apache.any23.configuration.DefaultConfiguration;
import org.apache.any23.configuration.ModifiableConfiguration;
import org.apache.any23.extractor.ExtractionException;
@@ -53,7 +54,6 @@
import org.openrdf.repository.RepositoryResult;
import org.openrdf.repository.sail.SailRepository;
import org.openrdf.rio.RDFParseException;
-import org.openrdf.sail.Sail;
import org.openrdf.sail.SailException;
import org.openrdf.sail.memory.MemoryStore;
import org.slf4j.Logger;
@@ -552,11 +552,13 @@
*/
private ExtractionReport detectAndExtract(String in) throws Exception {
Any23 any23 = new Any23();
+ Configuration conf = DefaultConfiguration.copy();
ByteArrayOutputStream out = new ByteArrayOutputStream();
ReportingTripleHandler outputHandler = new ReportingTripleHandler(
new IgnoreAccidentalRDFa(new IgnoreTitlesOfEmptyDocuments(
new NTriplesWriter(out))));
- return any23.extract(in, "http://host.com/path", outputHandler);
+ return any23.extract(new ExtractionParameters(conf, ValidationMode.ValidateAndFix, null, null),
+ new StringDocumentSource(in, "http://host.com/path"), outputHandler, "UTF-8");
}
/**
@@ -586,9 +588,9 @@
* @throws ExtractionException
*/
private void assertExtractorActivation(String in,
- Class<? extends Extractor>... expectedExtractors) throws Exception {
+ @SuppressWarnings("rawtypes") Class<? extends Extractor>... expectedExtractors) throws Exception {
final ExtractionReport extractionReport = detectAndExtract(in);
- for (Class<? extends Extractor> expectedExtractorClass : expectedExtractors) {
+ for (@SuppressWarnings("rawtypes") Class<? extends Extractor> expectedExtractorClass : expectedExtractors) {
Assert.assertTrue(
String.format(
"Detection and extraction failed, expected extractor [%s] not found.",
diff --git a/core/src/test/java/org/apache/any23/validator/DefaultValidatorTest.java b/core/src/test/java/org/apache/any23/validator/DefaultValidatorTest.java
index efef2f2..f31b846 100644
--- a/core/src/test/java/org/apache/any23/validator/DefaultValidatorTest.java
+++ b/core/src/test/java/org/apache/any23/validator/DefaultValidatorTest.java
@@ -76,6 +76,27 @@
logger.debug( validationReport.toString() );
}
}
+
+ @Test
+ public void testMissingItemscopeAttributeValue() throws IOException, URISyntaxException, ValidatorException {
+ DOMDocument document = loadDocument("microdata-basic.html");
+ List<Node> brokenItemScopeNodes = document.getNodesWithAttribute("itemscope");
+ for (Node node : brokenItemScopeNodes) {
+ // all nodes with itemscope have an empty string value
+ Assert.assertEquals("", node.getAttributes().getNamedItem("itemscope").getNodeValue() );
+ }
+ ValidationReport validationReport = validator.validate(document, true);
+ List<Node> fixedItemScopeNodes = document.getNodesWithAttribute("itemscope");
+ for (Node node : fixedItemScopeNodes) {
+ // all nodes with itemscope now have a default value of "itemscope"
+ Assert.assertNotNull(node.getAttributes().getNamedItem("itemscope").getNodeValue() );
+ Assert.assertNotEquals("", node.getAttributes().getNamedItem("itemscope").getNodeValue() );
+ Assert.assertEquals("itemscope", node.getAttributes().getNamedItem("itemscope").getNodeValue());
+ }
+ if(logger.isDebugEnabled()) {
+ logger.debug( validationReport.toString() );
+ }
+ }
@Test
public void testMetaNameMisuse() throws Exception {
@@ -133,7 +154,7 @@
public boolean applyOn(
DOMDocument document,
- RuleContext context,
+ @SuppressWarnings("rawtypes") RuleContext context,
ValidationReportBuilder validationReportBuilder
) {
throw new UnsupportedOperationException();
@@ -145,7 +166,7 @@
return "fake-fix";
}
- public void execute(Rule rule, RuleContext context, DOMDocument document) {
+ public void execute(Rule rule, @SuppressWarnings("rawtypes") RuleContext context, DOMDocument document) {
throw new UnsupportedOperationException();
}
}
diff --git a/core/src/test/resources/log4j.properties b/core/src/test/resources/log4j.properties
index 4aa0d92..4634d6b 100644
--- a/core/src/test/resources/log4j.properties
+++ b/core/src/test/resources/log4j.properties
@@ -1,20 +1,35 @@
-log4j.rootCategory=INFO, O
-
-# Stdout
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+log4j.rootCategory=DEBUG, O
+
+# Stdout
log4j.appender.O=org.apache.log4j.ConsoleAppender
-
-# File
-#log4j.appender.R=org.apache.log4j.RollingFileAppender
-#log4j.appender.R.File=log4j.log
-
-# Control the maximum log file size
-#log4j.appender.R.MaxFileSize=100KB
-
-# Archive log files (one backup file here)
-log4j.appender.R.MaxBackupIndex=1
-
-log4j.appender.R.layout=org.apache.log4j.PatternLayout
-log4j.appender.O.layout=org.apache.log4j.PatternLayout
-
-log4j.appender.R.layout.ConversionPattern=[%d{ISO8601}]%5p%6.6r[%t]%x - %C.%M(%F:%L) - %m%n
-log4j.appender.O.layout.ConversionPattern=[%d{ISO8601}]%5p%6.6r[%t]%x - %C.%M(%F:%L) - %m%n
+
+# File
+#log4j.appender.R=org.apache.log4j.RollingFileAppender
+#log4j.appender.R.File=log4j.log
+
+# Control the maximum log file size
+#log4j.appender.R.MaxFileSize=100KB
+
+# Archive log files (one backup file here)
+log4j.appender.R.MaxBackupIndex=1
+
+log4j.appender.R.layout=org.apache.log4j.PatternLayout
+log4j.appender.O.layout=org.apache.log4j.PatternLayout
+
+log4j.appender.R.layout.ConversionPattern=[%d{ISO8601}]%5p%6.6r[%t]%x - %C.%M(%F:%L) - %m%n
+log4j.appender.O.layout.ConversionPattern=[%d{ISO8601}]%5p%6.6r[%t]%x - %C.%M(%F:%L) - %m%n
diff --git a/src/site/apt/index.apt b/src/site/apt/index.apt
index 5769466..f81da17 100644
--- a/src/site/apt/index.apt
+++ b/src/site/apt/index.apt
@@ -3,7 +3,6 @@
------
The Apache Software Foundation
------
- 2011-2012
~~ Licensed to the Apache Software Foundation (ASF) under one or more
~~ contributor license agreements. See the NOTICE file distributed with
@@ -31,12 +30,18 @@
* {{{http://www.w3.org/TR/xhtml-rdfa-primer/}RDFa}} with {{{http://www.w3.org/TR/2010/WD-rdfa-core-20100422/#scoping-of-prefix-mappings}RDFa1.1 prefix mechanism}}
- * {{{http://microformats.org/}Microformats}}: Adr, Geo, hCalendar, hCard, hListing, hResume, hReview, License, XFN and Species
+ * {{{http://microformats.org/}Microformats1}} and {{{http://microformats.org/wiki/microformats-2}Microformats2}}: hAdr, hCard, hCalendar, hEntry, hEvent, hGeo, hItem, hListing, hProduct, hProduct, hRecipie, hResume, hReview, License, Species, XFN, etc
+
+ * {{http://json-ld.org/}JSON-LD}: JSON for Linking Data. a lightweight Linked Data format based on the already successful JSON format and provides a way to help JSON data interoperate at Web-scale.
* {{{http://dev.w3.org/html5/md/}HTML5 Microdata}}: (such as {{{http://schema.org}Schema.org}})
* {{{http://www.ietf.org/rfc/rfc4180.txt}CSV}}: Comma Separated Values with separator autodetection.
+ * Vocabularies: Extraction support for {{{http://dublincore.org/}Dublin Core Terms}}, {{{http://www.w3.org/wiki/DescriptionOfACareerVocabulary}Description of a Career}}, {{{https://github.com/edumbill/doap/wiki}Description Of A Project}}, {{{http://xmlns.com/foaf/spec/}Friend Of A Friend}}, {{{http://www.geonames.org/ontology/}GEO Names}}, {{{http://www.w3.org/2002/12/cal/icaltzd#}ICAL}}, {{{https://github.com/RinkeHoekstra/lkif-core}lkif-core}}, {{{http://ogp.me/}Open Graph Protocol}}, {{{http://purl.org/ontology/po/}BBC Programmes Ontology}}, {{{http://vocab.org/review/terms.html}RDF Review Vocabulary}}, {{{http://schema.org/}schema.org}}, {{{http://www.w3.org/2006/vcard/ns}VCard}}, {{{http://purl.org/ontology/wo/}BBC Wildlife Ontology}} and {{{http://www.w3.org/1999/xhtml/vocab/}XHTML}}... and more!
+
+
+
A detailed description of available extractors is {{{./extractors.html}here}}.
<<Apache Any23>> is used in major Web of Data applications such as {{{http://sindice.com/}sindice.com}} and {{{http://sig.ma/}sig.ma}}. It is written in Java and licensed under the {{{http://any23.googlecode.com/svn/trunk/LICENSE.txt}Apache License}}.
@@ -45,9 +50,9 @@
* As a command-line tool for extracting and converting between the supported formats.
* As online service API available at {{{http://any23.org/}any23.org}}.
- You can <<download>> the latest release from {{{./download.html}Apache Mirrors}}.
+ You can <<download>> the latest release from our {{{./download.html}Apache Mirrors}}.
- Previous versions are available from the {{{http://code.google.com/p/any23/downloads/list}download site at Google Code}}.
+ Previous versions are available from the {{http://archive.apache.org/dist/any23/}Apache Archives site}.
* Documentation Content
diff --git a/test-resources/src/test/resources/org/apache/any23/validator/microdata-basic.html b/test-resources/src/test/resources/org/apache/any23/validator/microdata-basic.html
new file mode 100644
index 0000000..3ffca84
--- /dev/null
+++ b/test-resources/src/test/resources/org/apache/any23/validator/microdata-basic.html
@@ -0,0 +1,107 @@
+<!DOCTYPE html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+<body>
+
+<!-- result0 -->
+<div itemscope>
+ <p>My name is <span itemprop="name">Elizabeth</span>.</p>
+</div>
+<!-- result1 -->
+<div itemscope>
+ <p>My name is <span itemprop="name">Daniel</span>.</p>
+</div>
+
+<!-- result2 -->
+<div itemscope>
+ <p>My name is <span itemprop="name">Neil</span>.</p>
+ <p>My band is called <span itemprop="band">Four Parts Water</span>.</p>
+ <p>I am <span itemprop="nationality">British</span>.</p>
+</div>
+
+<!-- result3 -->
+<div itemscope>
+ <img itemprop="image" src="google-logo.png" alt="Google">
+</div>
+
+<!-- result4 -->
+<div itemscope>
+ I was born on <time itemprop="birthday" datetime="2009-05-10">May 10th 2009</time>.
+</div>
+
+<!-- result5 -->
+<div itemscope>
+ <p>Flavors in my favorite ice cream:</p>
+ <ul>
+ <li itemprop="flavor">Lemon sorbet</li>
+ <li itemprop="flavor">Apricot sorbet</li>
+ </ul>
+</div>
+
+<!-- result6 -->
+<div itemscope>
+ <span itemprop="favorite-color favorite-fruit">orange</span>
+</div>
+
+<!-- result7 -->
+<figure>
+ <img src="castle.jpeg">
+ <figcaption><span itemscope><span itemprop="name">The Castle</span></span> (1986)</figcaption>
+</figure>
+
+<!-- result8 -->
+<span itemscope><meta itemprop="name" content="The Castle"></span>
+<figure>
+ <img src="castle.jpeg">
+ <figcaption>The Castle (1986)</figcaption>
+</figure>
+
+<!-- result9 -->
+<section itemscope itemtype="http://example.org/animals#cat">
+ <h1 itemprop="name">Hedral</h1>
+ <p itemprop="desc">Hedral is a male american domestic shorthair,
+ with a fluffy black fur with white paws and belly.</p>
+ <img itemprop="img" src="hedral.jpeg" alt="" title="Hedral, age 18 months">
+</section>
+
+<!-- result10 -->
+<dl itemscope
+ itemtype="http://vocab.example.net/book"
+ itemid="urn:isbn:0-330-34032-8">
+ <dt>Title
+ <dd itemprop="title">The Reality Dysfunction
+ <dt>Author
+ <dd itemprop="author">Peter F. Hamilton
+ <dt>Publication date
+ <dd>
+ <time itemprop="pubdate" datetime="1996-01-26">26 January 1996</time>
+</dl>
+
+<!-- result11 -->
+<section itemscope itemtype="http://example.org/animals#cat">
+ <h1 itemprop="name http://example.com/fn">Hedral</h1>
+ <p itemprop="desc">Hedral is a male american domestic shorthair, with a fluffy
+ <span itemprop="http://example.com/color">black</span> fur with
+ <span itemprop="http://example.com/color">white</span> paws and belly.</p>
+ <img itemprop="img" src="hedral.jpeg" alt="" title="Hedral, age 18 months">
+</section>
+
+</body>
+</head>
+</html>
\ No newline at end of file