blob: 0b55ca353b84aa50a649e9c4d7baaa8dff275ce1 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.client.solrj.impl;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.InputStream;
import java.io.Reader;
import java.lang.invoke.MethodHandles;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import org.apache.solr.client.solrj.ResponseParser;
import org.apache.solr.common.EmptyEntityResolver;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.XMLErrorLogger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
*
* @since solr 1.3
*/
public class XMLResponseParser extends ResponseParser
{
public static final String XML_CONTENT_TYPE = "application/xml; charset=UTF-8";
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final XMLErrorLogger xmllog = new XMLErrorLogger(log);
// reuse the factory among all parser instances so things like string caches
// won't be duplicated
static final XMLInputFactory factory;
static {
factory = XMLInputFactory.newInstance();
EmptyEntityResolver.configureXMLInputFactory(factory);
try {
// The java 1.6 bundled stax parser (sjsxp) does not currently have a thread-safe
// XMLInputFactory, as that implementation tries to cache and reuse the
// XMLStreamReader. Setting the parser-specific "reuse-instance" property to false
// prevents this.
// All other known open-source stax parsers (and the bea ref impl)
// have thread-safe factories.
factory.setProperty("reuse-instance", Boolean.FALSE);
}
catch( IllegalArgumentException ex ) {
// Other implementations will likely throw this exception since "reuse-instance"
// isimplementation specific.
log.debug( "Unable to set the 'reuse-instance' property for the input factory: {}", factory );
}
factory.setXMLReporter(xmllog);
}
public XMLResponseParser() {}
@Override
public String getWriterType()
{
return "xml";
}
@Override
public String getContentType() {
return XML_CONTENT_TYPE;
}
@Override
public NamedList<Object> processResponse(Reader in) {
XMLStreamReader parser = null;
try {
parser = factory.createXMLStreamReader(in);
} catch (XMLStreamException e) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "parsing error", e);
}
return processResponse(parser);
}
@Override
public NamedList<Object> processResponse(InputStream in, String encoding)
{
XMLStreamReader parser = null;
try {
parser = factory.createXMLStreamReader(in, encoding);
} catch (XMLStreamException e) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "parsing error", e);
}
return processResponse(parser);
}
/**
* parse the text into a named list...
*/
private NamedList<Object> processResponse(XMLStreamReader parser)
{
try {
NamedList<Object> response = null;
for (int event = parser.next();
event != XMLStreamConstants.END_DOCUMENT;
event = parser.next())
{
switch (event) {
case XMLStreamConstants.START_ELEMENT:
if( response != null ) {
throw new Exception( "already read the response!" );
}
// only top-level element is "response
String name = parser.getLocalName();
if( name.equals( "response" ) || name.equals( "result" ) ) {
response = readNamedList( parser );
}
else if( name.equals( "solr" ) ) {
return new SimpleOrderedMap<>();
}
else {
throw new Exception( "really needs to be response or result. " +
"not:"+parser.getLocalName() );
}
break;
}
}
return response;
}
catch( Exception ex ) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "parsing error", ex );
}
finally {
try {
parser.close();
}
catch( Exception ex ){}
}
}
protected enum KnownType {
STR (true) { @Override public String read( String txt ) { return txt; } },
INT (true) { @Override public Integer read( String txt ) { return Integer.valueOf(txt); } },
FLOAT (true) { @Override public Float read( String txt ) { return Float.valueOf(txt); } },
DOUBLE (true) { @Override public Double read( String txt ) { return Double.valueOf(txt); } },
LONG (true) { @Override public Long read( String txt ) { return Long.valueOf(txt); } },
BOOL (true) { @Override public Boolean read( String txt ) { return Boolean.valueOf(txt); } },
NULL (true) { @Override public Object read( String txt ) { return null; } },
DATE (true) {
@Override
public Date read( String txt ) {
try {
return new Date(Instant.parse(txt).toEpochMilli());
}
catch( Exception ex ) {
if (log.isInfoEnabled()) {
log.info("Error reading date: {}", txt, ex);
}
}
return null;
}
},
ARR (false) { @Override public Object read( String txt ) { return null; } },
LST (false) { @Override public Object read( String txt ) { return null; } },
RESULT (false) { @Override public Object read( String txt ) { return null; } },
DOC (false) { @Override public Object read( String txt ) { return null; } };
final boolean isLeaf;
KnownType( boolean isLeaf )
{
this.isLeaf = isLeaf;
}
public abstract Object read( String txt );
public static KnownType get( String v )
{
if( v != null ) {
try {
return KnownType.valueOf( v.toUpperCase(Locale.ROOT) );
}
catch( Exception ex ) {}
}
return null;
}
};
protected NamedList<Object> readNamedList( XMLStreamReader parser ) throws XMLStreamException
{
if( XMLStreamConstants.START_ELEMENT != parser.getEventType() ) {
throw new RuntimeException( "must be start element, not: "+parser.getEventType() );
}
StringBuilder builder = new StringBuilder();
NamedList<Object> nl = new SimpleOrderedMap<>();
KnownType type = null;
String name = null;
// just eat up the events...
int depth = 0;
while( true )
{
switch (parser.next()) {
case XMLStreamConstants.START_ELEMENT:
depth++;
builder.setLength( 0 ); // reset the text
type = KnownType.get( parser.getLocalName() );
if( type == null ) {
throw new RuntimeException( "this must be known type! not: "+parser.getLocalName() );
}
name = null;
int cnt = parser.getAttributeCount();
for( int i=0; i<cnt; i++ ) {
if( "name".equals( parser.getAttributeLocalName( i ) ) ) {
name = parser.getAttributeValue( i );
break;
}
}
/** The name in a NamedList can actually be null
if( name == null ) {
throw new XMLStreamException( "requires 'name' attribute: "+parser.getLocalName(), parser.getLocation() );
}
**/
if( !type.isLeaf ) {
switch( type ) {
case LST: nl.add( name, readNamedList( parser ) ); depth--; continue;
case ARR: nl.add( name, readArray( parser ) ); depth--; continue;
case RESULT: nl.add( name, readDocuments( parser ) ); depth--; continue;
case DOC: nl.add( name, readDocument( parser ) ); depth--; continue;
case BOOL:
case DATE:
case DOUBLE:
case FLOAT:
case INT:
case LONG:
case NULL:
case STR:
break;
}
throw new XMLStreamException( "branch element not handled!", parser.getLocation() );
}
break;
case XMLStreamConstants.END_ELEMENT:
if( --depth < 0 ) {
return nl;
}
//System.out.println( "NL:ELEM:"+type+"::"+name+"::"+builder );
nl.add( name, type.read( builder.toString().trim() ) );
break;
case XMLStreamConstants.SPACE: // TODO? should this be trimmed? make sure it only gets one/two space?
case XMLStreamConstants.CDATA:
case XMLStreamConstants.CHARACTERS:
builder.append( parser.getText() );
break;
}
}
}
protected List<Object> readArray( XMLStreamReader parser ) throws XMLStreamException
{
if( XMLStreamConstants.START_ELEMENT != parser.getEventType() ) {
throw new RuntimeException( "must be start element, not: "+parser.getEventType() );
}
if( !"arr".equals( parser.getLocalName().toLowerCase(Locale.ROOT) ) ) {
throw new RuntimeException( "must be 'arr', not: "+parser.getLocalName() );
}
StringBuilder builder = new StringBuilder();
KnownType type = null;
List<Object> vals = new ArrayList<>();
int depth = 0;
while( true )
{
switch (parser.next()) {
case XMLStreamConstants.START_ELEMENT:
depth++;
KnownType t = KnownType.get( parser.getLocalName() );
if( t == null ) {
throw new RuntimeException( "this must be known type! not: "+parser.getLocalName() );
}
if( type == null ) {
type = t;
}
/*** actually, there is no rule that arrays need the same type
else if( type != t && !(t == KnownType.NULL || type == KnownType.NULL)) {
throw new RuntimeException( "arrays must have the same type! ("+type+"!="+t+") "+parser.getLocalName() );
}
***/
type = t;
builder.setLength( 0 ); // reset the text
if( !type.isLeaf ) {
switch( type ) {
case LST: vals.add( readNamedList( parser ) ); depth--; continue;
case ARR: vals.add( readArray( parser ) ); depth--; continue;
case RESULT: vals.add( readDocuments( parser ) ); depth--; continue;
case DOC: vals.add( readDocument( parser ) ); depth--; continue;
case BOOL:
case DATE:
case DOUBLE:
case FLOAT:
case INT:
case LONG:
case NULL:
case STR:
break;
}
throw new XMLStreamException( "branch element not handled!", parser.getLocation() );
}
break;
case XMLStreamConstants.END_ELEMENT:
if( --depth < 0 ) {
return vals; // the last element is itself
}
//System.out.println( "ARR:"+type+"::"+builder );
Object val = type.read( builder.toString().trim() );
if( val == null && type != KnownType.NULL) {
throw new XMLStreamException( "error reading value:"+type, parser.getLocation() );
}
vals.add( val );
break;
case XMLStreamConstants.SPACE: // TODO? should this be trimmed? make sure it only gets one/two space?
case XMLStreamConstants.CDATA:
case XMLStreamConstants.CHARACTERS:
builder.append( parser.getText() );
break;
}
}
}
protected SolrDocumentList readDocuments( XMLStreamReader parser ) throws XMLStreamException
{
SolrDocumentList docs = new SolrDocumentList();
// Parse the attributes
for( int i=0; i<parser.getAttributeCount(); i++ ) {
String n = parser.getAttributeLocalName( i );
String v = parser.getAttributeValue( i );
if( "numFound".equals( n ) ) {
docs.setNumFound( Long.parseLong( v ) );
}
else if( "start".equals( n ) ) {
docs.setStart( Long.parseLong( v ) );
}
else if( "maxScore".equals( n ) ) {
docs.setMaxScore( Float.parseFloat( v ) );
}
}
// Read through each document
int event;
while( true ) {
event = parser.next();
if( XMLStreamConstants.START_ELEMENT == event ) {
if( !"doc".equals( parser.getLocalName() ) ) {
throw new RuntimeException( "should be doc! "+parser.getLocalName() + " :: " + parser.getLocation() );
}
docs.add( readDocument( parser ) );
}
else if ( XMLStreamConstants.END_ELEMENT == event ) {
return docs; // only happens once
}
}
}
protected SolrDocument readDocument( XMLStreamReader parser ) throws XMLStreamException
{
if( XMLStreamConstants.START_ELEMENT != parser.getEventType() ) {
throw new RuntimeException( "must be start element, not: "+parser.getEventType() );
}
if( !"doc".equals( parser.getLocalName().toLowerCase(Locale.ROOT) ) ) {
throw new RuntimeException( "must be 'lst', not: "+parser.getLocalName() );
}
SolrDocument doc = new SolrDocument();
StringBuilder builder = new StringBuilder();
KnownType type = null;
String name = null;
// just eat up the events...
int depth = 0;
while( true )
{
switch (parser.next()) {
case XMLStreamConstants.START_ELEMENT:
depth++;
builder.setLength( 0 ); // reset the text
type = KnownType.get( parser.getLocalName() );
if( type == null ) {
throw new RuntimeException( "this must be known type! not: "+parser.getLocalName() );
}
if ( type == KnownType.DOC) {
doc.addChildDocument(readDocument(parser));
depth--; // (nested) readDocument clears out the (nested) 'endElement'
continue; // may be more child docs, or other fields
}
// other then nested documents, all other possible nested elements require a name...
name = null;
int cnt = parser.getAttributeCount();
for( int i=0; i<cnt; i++ ) {
if( "name".equals( parser.getAttributeLocalName( i ) ) ) {
name = parser.getAttributeValue( i );
break;
}
}
if( name == null ) {
throw new XMLStreamException( "requires 'name' attribute: "+parser.getLocalName(), parser.getLocation() );
}
// Handle multi-valued fields
if( type == KnownType.ARR ) {
for( Object val : readArray( parser ) ) {
doc.addField( name, val );
}
depth--; // the array reading clears out the 'endElement'
} else if( type == KnownType.LST ) {
doc.addField( name, readNamedList( parser ) );
depth--;
} else if( !type.isLeaf ) {
System.out.println("nbot leaf!:" + type);
throw new XMLStreamException( "must be value or array", parser.getLocation() );
}
break;
case XMLStreamConstants.END_ELEMENT:
if( --depth < 0 ) {
return doc;
}
//System.out.println( "FIELD:"+type+"::"+name+"::"+builder );
Object val = type.read( builder.toString().trim() );
if( val == null ) {
throw new XMLStreamException( "error reading value:"+type, parser.getLocation() );
}
doc.addField( name, val );
break;
case XMLStreamConstants.SPACE: // TODO? should this be trimmed? make sure it only gets one/two space?
case XMLStreamConstants.CDATA:
case XMLStreamConstants.CHARACTERS:
builder.append( parser.getText() );
break;
}
}
}
}