| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.cocoon.components.search; |
| |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| import org.xml.sax.Attributes; |
| import org.xml.sax.ContentHandler; |
| import org.xml.sax.Locator; |
| import org.xml.sax.helpers.AttributesImpl; |
| |
| import java.util.ArrayList; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Stack; |
| |
| /** |
| * Parse XML and generate lucene document(s) |
| * |
| * can now be configured via SimpleLuceneXMLIndexerImpl |
| * to store specific tags in Lucene, so that you can |
| * display them with hits. |
| * |
| * @author <a href="mailto:berni_huber@a1.net">Bernhard Huber</a> |
| * @author <a href="mailto:jeremy@apache.org">Jeremy Quinn</a> |
| * @version CVS $Id$ |
| */ |
| public class LuceneIndexContentHandler implements ContentHandler |
| { |
| public static final String LUCENE_URI = "http://apache.org/cocoon/lucene/1.0"; |
| |
| /** |
| * If this attribute is specified on element, values of all attributes |
| * are added to the text of the element, and to the document |
| * body text |
| */ |
| public static final String LUCENE_ATTR_TO_TEXT_ATTRIBUTE = "text-attr"; |
| |
| StringBuffer bodyText; |
| private List documents; |
| private Document bodyDocument; |
| private Stack elementStack; |
| private HashSet fieldTags; |
| |
| /** |
| * Constructor for the LuceneIndexContentHandler object |
| */ |
| public LuceneIndexContentHandler() { |
| this.bodyText = new StringBuffer(); |
| this.bodyDocument = new Document(); |
| this.documents = new ArrayList(); |
| this.documents.add(this.bodyDocument); |
| this.elementStack = new Stack(); |
| this.fieldTags = new HashSet(); |
| } |
| |
| /** |
| * Sets the fieldTags attribute of the LuceneIndexContentHandler object |
| * |
| * @param fieldTags The new fieldTags value |
| */ |
| public void setFieldTags(HashSet fieldTags) { |
| this.fieldTags = fieldTags; |
| } |
| |
| /** |
| * Sets the documentLocator attribute of the LuceneIndexContentHandler object |
| * |
| * @param locator The new documentLocator value |
| */ |
| public void setDocumentLocator(Locator locator) { } |
| |
| public List allDocuments() { |
| return documents; |
| } |
| |
| public Iterator iterator() { |
| return documents.iterator(); |
| } |
| |
| public void characters(char[] ch, int start, int length) { |
| if (ch.length > 0 && start >= 0 && length > 1) { |
| if (elementStack.size() > 0) { |
| IndexHelperField tos = (IndexHelperField) elementStack.peek(); |
| tos.appendText(ch, start, length); |
| } |
| bodyText.append(' '); |
| bodyText.append(ch, start, length); |
| } |
| } |
| |
| public void endDocument() { |
| bodyDocument.add(Field.UnStored(LuceneXMLIndexer.BODY_FIELD, bodyText.toString())); |
| } |
| |
| public void endElement(String namespaceURI, String localName, String qName) { |
| IndexHelperField tos = (IndexHelperField) elementStack.pop(); |
| String lname = tos.getLocalFieldName(); |
| StringBuffer text = tos.getText(); |
| |
| // (VG): Atts are never null, see startElement |
| Attributes atts = tos.getAttributes(); |
| boolean attributesToText = atts.getIndex(LUCENE_URI, LUCENE_ATTR_TO_TEXT_ATTRIBUTE) != -1; |
| for (int i = 0; i < atts.getLength(); i++) { |
| if (LUCENE_URI.equals(atts.getURI(i))) continue; |
| |
| String atts_lname = atts.getLocalName(i); |
| String atts_value = atts.getValue(i); |
| bodyDocument.add(Field.UnStored(lname + "@" + atts_lname, atts_value)); |
| if (attributesToText) { |
| text.append(' '); |
| text.append(atts_value); |
| bodyText.append(' '); |
| bodyText.append(atts_value); |
| } |
| } |
| |
| if (text != null && text.length() > 0) { |
| if (isFieldTag(lname)) { |
| bodyDocument.add(Field.UnIndexed(lname, text.toString())); |
| } |
| bodyDocument.add(Field.UnStored(lname, text.toString())); |
| } |
| } |
| |
| public void endPrefixMapping(String prefix) { } |
| |
| public void ignorableWhitespace(char[] ch, int start, int length) { } |
| |
| public void processingInstruction(String target, String data) { } |
| |
| public void skippedEntity(String name) { } |
| |
| public void startDocument() { } |
| |
| public void startElement(String namespaceURI, String localName, String qName, Attributes atts) { |
| IndexHelperField ihf = new IndexHelperField(localName, qName, new AttributesImpl(atts)); |
| elementStack.push(ihf); |
| } |
| |
| public void startPrefixMapping(String prefix, String uri) { } |
| |
| /** |
| * check if tag is a candidate for making into a Field |
| * |
| * @param tag local name of the tag we are processing |
| * @return boolean |
| */ |
| private boolean isFieldTag(String tag) { |
| // by default do not make field |
| if (fieldTags == null) { |
| return false; |
| } |
| Iterator i = fieldTags.iterator(); |
| while (i.hasNext()) { |
| if (tag.equals(i.next())) { |
| return true; |
| } |
| } |
| return false; |
| } |
| } |