blob: 73b8188b2ce974c49f574c02c54caea5317b4f98 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cocoon.components.search;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.helpers.AttributesImpl;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Stack;
/**
* Parse XML and generate lucene document(s)
*
* can now be configured via SimpleLuceneXMLIndexerImpl
* to store specific tags in Lucene, so that you can
* display them with hits.
*
* @author <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
* @author <a href="mailto:jeremy@apache.org">Jeremy Quinn</a>
* @version CVS $Id$
*/
public class LuceneIndexContentHandler implements ContentHandler
{
public static final String LUCENE_URI = "http://apache.org/cocoon/lucene/1.0";
/**
* If this attribute is specified on element, values of all attributes
* are added to the text of the element, and to the document
* body text
*/
public static final String LUCENE_ATTR_TO_TEXT_ATTRIBUTE = "text-attr";
StringBuffer bodyText;
private List documents;
private Document bodyDocument;
private Stack elementStack;
private HashSet fieldTags;
/**
* Constructor for the LuceneIndexContentHandler object
*/
public LuceneIndexContentHandler() {
this.bodyText = new StringBuffer();
this.bodyDocument = new Document();
this.documents = new ArrayList();
this.documents.add(this.bodyDocument);
this.elementStack = new Stack();
this.fieldTags = new HashSet();
}
/**
* Sets the fieldTags attribute of the LuceneIndexContentHandler object
*
* @param fieldTags The new fieldTags value
*/
public void setFieldTags(HashSet fieldTags) {
this.fieldTags = fieldTags;
}
/**
* Sets the documentLocator attribute of the LuceneIndexContentHandler object
*
* @param locator The new documentLocator value
*/
public void setDocumentLocator(Locator locator) { }
public List allDocuments() {
return documents;
}
public Iterator iterator() {
return documents.iterator();
}
public void characters(char[] ch, int start, int length) {
if (ch.length > 0 && start >= 0 && length > 1) {
if (elementStack.size() > 0) {
IndexHelperField tos = (IndexHelperField) elementStack.peek();
tos.appendText(ch, start, length);
}
bodyText.append(' ');
bodyText.append(ch, start, length);
}
}
public void endDocument() {
bodyDocument.add(Field.UnStored(LuceneXMLIndexer.BODY_FIELD, bodyText.toString()));
}
public void endElement(String namespaceURI, String localName, String qName) {
IndexHelperField tos = (IndexHelperField) elementStack.pop();
String lname = tos.getLocalFieldName();
StringBuffer text = tos.getText();
// (VG): Atts are never null, see startElement
Attributes atts = tos.getAttributes();
boolean attributesToText = atts.getIndex(LUCENE_URI, LUCENE_ATTR_TO_TEXT_ATTRIBUTE) != -1;
for (int i = 0; i < atts.getLength(); i++) {
if (LUCENE_URI.equals(atts.getURI(i))) continue;
String atts_lname = atts.getLocalName(i);
String atts_value = atts.getValue(i);
bodyDocument.add(Field.UnStored(lname + "@" + atts_lname, atts_value));
if (attributesToText) {
text.append(' ');
text.append(atts_value);
bodyText.append(' ');
bodyText.append(atts_value);
}
}
if (text != null && text.length() > 0) {
if (isFieldTag(lname)) {
bodyDocument.add(Field.UnIndexed(lname, text.toString()));
}
bodyDocument.add(Field.UnStored(lname, text.toString()));
}
}
public void endPrefixMapping(String prefix) { }
public void ignorableWhitespace(char[] ch, int start, int length) { }
public void processingInstruction(String target, String data) { }
public void skippedEntity(String name) { }
public void startDocument() { }
public void startElement(String namespaceURI, String localName, String qName, Attributes atts) {
IndexHelperField ihf = new IndexHelperField(localName, qName, new AttributesImpl(atts));
elementStack.push(ihf);
}
public void startPrefixMapping(String prefix, String uri) { }
/**
* check if tag is a candidate for making into a Field
*
* @param tag local name of the tag we are processing
* @return boolean
*/
private boolean isFieldTag(String tag) {
// by default do not make field
if (fieldTags == null) {
return false;
}
Iterator i = fieldTags.iterator();
while (i.hasNext()) {
if (tag.equals(i.next())) {
return true;
}
}
return false;
}
}