blob: f050db6f5d99a13a8a5ba03c0a8b70ebfb12ef43 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.any23.extractor.html;
import java.util.List;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.TagSoupExtractionResult;
import org.apache.any23.extractor.html.HTMLDocument.TextField;
import org.apache.any23.vocab.Review;
import org.apache.any23.vocab.ReviewAggregate;
import org.apache.any23.vocab.VCard;
import org.eclipse.rdf4j.model.BNode;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.w3c.dom.Node;
/**
* Extractor for the <a href="http://microformats.org/wiki/hreview-aggregate">hReview-aggregate</a> microformat.
*
* @author Sebastien Richard
*/
public class HReviewAggregateExtractor extends EntityBasedMicroformatExtractor {
private static final Review vREVIEW = Review.getInstance();
private static final ReviewAggregate vREVIEWAGG = ReviewAggregate.getInstance();
private static final VCard vVCARD = VCard.getInstance();
@Override
public ExtractorDescription getDescription() {
return HReviewAggregateExtractorFactory.getDescriptionInstance();
}
@Override
protected String getBaseClassName() {
return "hreview-aggregate";
}
@Override
protected void resetExtractor() {
// Empty.
}
@Override
protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
BNode rev = getBlankNodeFor(node);
out.writeTriple(rev, RDF.TYPE, vREVIEWAGG.ReviewAggregate);
final HTMLDocument fragment = new HTMLDocument(node);
addRating(fragment, rev);
addWorst(fragment, rev);
addBest(fragment, rev);
addAverage(fragment, rev);
addSummary(fragment, rev);
addType(fragment, rev);
addItem(fragment, rev);
addCount(fragment, rev);
addVotes(fragment, rev);
final TagSoupExtractionResult tser = (TagSoupExtractionResult) out;
tser.addResourceRoot(DomUtils.getXPathListForNode(node), rev, this.getClass());
return true;
}
private void addType(HTMLDocument doc, Resource rev) {
TextField value = doc.getSingularTextField("type");
conditionallyAddStringProperty(value.source(), rev, vREVIEW.type, value.value());
}
private void addItem(HTMLDocument root, BNode rev) throws ExtractionException {
List<Node> nodes = root.findAllByClassName("item");
for (Node node : nodes) {
Resource item = findDummy(new HTMLDocument(node));
addBNodeProperty(node, item, vREVIEW.hasReview, rev);
}
}
private Resource findDummy(HTMLDocument item) throws ExtractionException {
Resource blank = getBlankNodeFor(item.getDocument());
TextField val = item.getSingularTextField("fn");
conditionallyAddStringProperty(val.source(), blank, vVCARD.fn, val.value());
final TextField url = item.getSingularUrlField("url");
conditionallyAddResourceProperty(blank, vVCARD.url, getHTMLDocument().resolveIRI(url.value()));
TextField pics[] = item.getPluralUrlField("photo");
for (TextField pic : pics) {
addIRIProperty(blank, vVCARD.photo, getHTMLDocument().resolveIRI(pic.value()));
}
return blank;
}
private void addRating(HTMLDocument doc, Resource rev) {
HTMLDocument.TextField value = doc.getSingularTextField("rating");
conditionallyAddStringProperty(value.source(), rev, vREVIEW.rating, value.value());
}
private void addWorst(HTMLDocument doc, Resource rev) {
HTMLDocument.TextField value = doc.getSingularTextField("worst");
conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.worst, value.value());
}
private void addBest(HTMLDocument doc, Resource rev) {
HTMLDocument.TextField value = doc.getSingularTextField("best");
conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.best, value.value());
}
private void addAverage(HTMLDocument doc, Resource rev) {
HTMLDocument.TextField value = doc.getSingularTextField("average");
conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.average, value.value());
}
private void addCount(HTMLDocument doc, Resource rev) {
HTMLDocument.TextField value = doc.getSingularTextField("count");
conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.count, value.value());
}
private void addVotes(HTMLDocument doc, Resource rev) {
HTMLDocument.TextField value = doc.getSingularTextField("votes");
conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.votes, value.value());
}
private void addSummary(HTMLDocument doc, Resource rev) {
TextField value = doc.getSingularTextField("summary");
conditionallyAddStringProperty(value.source(), rev, vREVIEW.title, value.value());
}
}