blob: 4f88ab9769d4d8963511a2d3e4c1a5c55aa5f781 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store.easy.json.parser;
import org.apache.drill.exec.store.easy.json.parser.MessageParser.MessageContextException;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonToken;
/**
* The root parsers are special: they must detect EOF. Drill supports
* top-level objects either enclosed in an array (which forms legal
* JSON), or as the
* <a href="http://jsonlines.org/">jsonlines</a> format, restricted
* to a list of objects (but not scalars or arrays.) Although jsonlines
* requires newline separators between objects, this parser allows
* any amount of whitespace, including none.
*/
public abstract class RootParser {
protected static final Logger logger = LoggerFactory.getLogger(RootParser.class);
private final JsonStructureParser structParser;
protected final ObjectParser rootObject;
public RootParser(JsonStructureParser structParser) {
this.structParser = structParser;
this.rootObject = structParser.fieldFactory().rootParser();
}
/**
* Parse one data object. This is the "root" object which may contain
* nested objects. Overridden to handle different end-of-data indicators
* for different contexts.
*
* @return {@code true} if an object was found, {@code false} if the
* end of data was reached.
*/
public abstract boolean parseRoot(TokenIterator tokenizer);
/**
* Parse one data object. This is the "root" object which may contain
* nested objects. Called when the outer parser detects a start
* object token for a data object.
*
* @return {@code true} if an object was found, {@code false} if the
* end of data was reached.
*/
protected boolean parseRootObject(JsonToken token, TokenIterator tokenizer) {
// Position: ^ ?
switch (token) {
case NOT_AVAILABLE:
return false; // Should never occur
case START_OBJECT:
// Position: { ^
rootObject.parse(tokenizer);
break;
default:
// Position ~{ ^
// Not a valid object.
// Won't actually get here: the Jackson parser prevents it.
throw errorFactory().syntaxError(token); // Nothing else is valid
}
return true;
}
protected ErrorFactory errorFactory() {
return structParser.errorFactory();
}
/**
* Parser for a <a href="http://jsonlines.org/">jsonlines</a>-style
* data set which consists of a series of objects. EOF from the parser
* indicates the end of the data set.
*/
public static class RootObjectParser extends RootParser {
public RootObjectParser(JsonStructureParser structParser) {
super(structParser);
}
@Override
public boolean parseRoot(TokenIterator tokenizer) {
JsonToken token = tokenizer.next();
if (token == null) {
// Position: EOF ^
return false;
} else if (token == JsonToken.START_OBJECT) {
return parseRootObject(token, tokenizer);
} else {
throw errorFactory().syntaxError(token); // Nothing else is valid
}
}
}
/**
* Parser for a compliant JSON data set which consists of an
* array at the top level, where each element of the array is a
* JSON object that represents a data record. A closing array
* bracket indicates end of data.
*/
public static class RootArrayParser extends RootParser {
public RootArrayParser(JsonStructureParser structParser) {
super(structParser);
}
@Override
public boolean parseRoot(TokenIterator tokenizer) {
JsonToken token = tokenizer.next();
if (token == null) {
// Position: { ... EOF ^
// Saw EOF, but no closing ]. Warn and ignore.
// Note that the Jackson parser won't let us get here;
// it will have already thrown a syntax error.
logger.warn("Failed to close outer array. {}",
tokenizer.context());
return false;
}
switch (token) {
case END_ARRAY:
return false;
case START_OBJECT:
return parseRootObject(token, tokenizer);
default:
throw errorFactory().syntaxError(token); // Nothing else is valid
}
}
}
/**
* Parser for data embedded within a message structure which is
* encoded as an array of objects. Example:
* <pre><code>
* { status: "ok", results: [ { ... }, { ... } ] }</code></pre>
* <p>
* The closing array bracket indicates the end of data; the
* message parser will parse any content after the closing
* bracket.
*/
public static class EmbeddedArrayParser extends RootParser {
private final MessageParser messageParser;
public EmbeddedArrayParser(JsonStructureParser structParser, MessageParser messageParser) {
super(structParser);
this.messageParser = messageParser;
}
@Override
public boolean parseRoot(TokenIterator tokenizer) {
JsonToken token = tokenizer.requireNext();
switch (token) {
case END_ARRAY:
break;
case START_OBJECT:
return parseRootObject(token, tokenizer);
default:
throw errorFactory().syntaxError(token); // Nothing else is valid
}
// Parse the trailing message content.
try {
messageParser.parseSuffix(tokenizer);
return false;
} catch (MessageContextException e) {
throw errorFactory().messageParseError(e);
}
}
}
/**
* Parser for data embedded within a message structure which is encoded
* as a single JSON object. Example:
* <pre><code>
* { status: "ok", results: { ... } }</code></pre>
* <p>
* The parser counts over the single object. The
* message parser will parse any content after the closing
* bracket.
*/
public static class EmbeddedObjectParser extends RootParser {
private final MessageParser messageParser;
private int objectCount;
public EmbeddedObjectParser(JsonStructureParser structParser, MessageParser messageParser) {
super(structParser);
this.messageParser = messageParser;
}
@Override
public boolean parseRoot(TokenIterator tokenizer) {
Preconditions.checkState(objectCount <= 1);
if (objectCount == 0) {
objectCount++;
JsonToken token = tokenizer.requireNext();
if (token == JsonToken.START_OBJECT) {
return parseRootObject(token, tokenizer);
} else {
throw errorFactory().syntaxError(token); // Nothing else is valid
}
}
try {
messageParser.parseSuffix(tokenizer);
return false;
} catch (MessageContextException e) {
throw errorFactory().messageParseError(e);
}
}
}
}