blob: f6a15d075e679cd660ddf2956ea489ff1af7834d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.geo;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.List;
/*
We accept either a whole type: Feature, like this:
{ "type": "Feature",
"geometry": {
"type": "Polygon",
"coordinates": [
[ [100.0, 0.0], [101.0, 0.0], [101.0, 1.0],
[100.0, 1.0], [100.0, 0.0] ]
]
},
"properties": {
"prop0": "value0",
"prop1": {"this": "that"}
}
}
Or the inner object with type: Multi/Polygon.
Or a type: FeatureCollection, if it has only one Feature which is a Polygon or MultiPolyon.
type: MultiPolygon (union of polygons) is also accepted.
*/
/** Does minimal parsing of a GeoJSON object, to extract either Polygon or MultiPolygon, either directly as the top-level type, or if
* the top-level type is Feature, as the geometry of that feature. */
@SuppressWarnings("unchecked")
class SimpleGeoJSONPolygonParser {
final String input;
private int upto;
private String polyType;
private List<Object> coordinates;
public SimpleGeoJSONPolygonParser(String input) {
this.input = input;
}
public Polygon[] parse() throws ParseException {
// parse entire object
parseObject("");
// make sure there's nothing left:
readEnd();
// The order of JSON object keys (type, geometry, coordinates in our case) can be arbitrary, so we wait until we are done parsing to
// put the pieces together here:
if (coordinates == null) {
throw newParseException("did not see any polygon coordinates");
}
if (polyType == null) {
throw newParseException("did not see type: Polygon or MultiPolygon");
}
if (polyType.equals("Polygon")) {
return new Polygon[] {parsePolygon(coordinates)};
} else {
List<Polygon> polygons = new ArrayList<>();
for(int i=0;i<coordinates.size();i++) {
Object o = coordinates.get(i);
if (o instanceof List == false) {
throw newParseException("elements of coordinates array should be an array, but got: " + o.getClass());
}
polygons.add(parsePolygon((List<Object>) o));
}
return polygons.toArray(new Polygon[polygons.size()]);
}
}
/** path is the "address" by keys of where we are, e.g. geometry.coordinates */
private void parseObject(String path) throws ParseException {
scan('{');
boolean first = true;
while (true) {
char ch = peek();
if (ch == '}') {
break;
} else if (first == false) {
if (ch == ',') {
// ok
upto++;
ch = peek();
if (ch == '}') {
break;
}
} else {
throw newParseException("expected , but got " + ch);
}
}
first = false;
int uptoStart = upto;
String key = parseString();
if (path.equals("crs.properties") && key.equals("href")) {
upto = uptoStart;
throw newParseException("cannot handle linked crs");
}
scan(':');
Object o;
ch = peek();
uptoStart = upto;
if (ch == '[') {
String newPath;
if (path.length() == 0) {
newPath = key;
} else {
newPath = path + "." + key;
}
o = parseArray(newPath);
} else if (ch == '{') {
String newPath;
if (path.length() == 0) {
newPath = key;
} else {
newPath = path + "." + key;
}
parseObject(newPath);
o = null;
} else if (ch == '"') {
o = parseString();
} else if (ch == 't') {
scan("true");
o = Boolean.TRUE;
} else if (ch == 'f') {
scan("false");
o = Boolean.FALSE;
} else if (ch == 'n') {
scan("null");
o = null;
} else if (ch == '-' || ch == '.' || (ch >= '0' && ch <= '9')) {
o = parseNumber();
} else if (ch == '}') {
break;
} else {
throw newParseException("expected array, object, string or literal value, but got: " + ch);
}
if (path.equals("crs.properties") && key.equals("name")) {
if (o instanceof String == false) {
upto = uptoStart;
throw newParseException("crs.properties.name should be a string, but saw: " + o);
}
String crs = (String) o;
if (crs.startsWith("urn:ogc:def:crs:OGC") == false || crs.endsWith(":CRS84") == false) {
upto = uptoStart;
throw newParseException("crs must be CRS84 from OGC, but saw: " + o);
}
}
if (key.equals("type") && path.startsWith("crs") == false) {
if (o instanceof String == false) {
upto = uptoStart;
throw newParseException("type should be a string, but got: " + o);
}
String type = (String) o;
if (type.equals("Polygon") && isValidGeometryPath(path)) {
polyType = "Polygon";
} else if (type.equals("MultiPolygon") && isValidGeometryPath(path)) {
polyType = "MultiPolygon";
} else if ((type.equals("FeatureCollection") || type.equals("Feature")) && (path.equals("features.[]") || path.equals(""))) {
// OK, we recurse
} else {
upto = uptoStart;
throw newParseException("can only handle type FeatureCollection (if it has a single polygon geometry), Feature, Polygon or MultiPolygon, but got " + type);
}
} else if (key.equals("coordinates") && isValidGeometryPath(path)) {
if (o instanceof List == false) {
upto = uptoStart;
throw newParseException("coordinates should be an array, but got: " + o.getClass());
}
if (coordinates != null) {
upto = uptoStart;
throw newParseException("only one Polygon or MultiPolygon is supported");
}
coordinates = (List<Object>) o;
}
}
scan('}');
}
/** Returns true if the object path is a valid location to see a Multi/Polygon geometry */
private boolean isValidGeometryPath(String path) {
return path.equals("") || path.equals("geometry") || path.equals("features.[].geometry");
}
private Polygon parsePolygon(List<Object> coordinates) throws ParseException {
List<Polygon> holes = new ArrayList<>();
Object o = coordinates.get(0);
if (o instanceof List == false) {
throw newParseException("first element of polygon array must be an array [[lat, lon], [lat, lon] ...] but got: " + o);
}
double[][] polyPoints = parsePoints((List<Object>) o);
for(int i=1;i<coordinates.size();i++) {
o = coordinates.get(i);
if (o instanceof List == false) {
throw newParseException("elements of coordinates array must be an array [[lat, lon], [lat, lon] ...] but got: " + o);
}
double[][] holePoints = parsePoints((List<Object>) o);
holes.add(new Polygon(holePoints[0], holePoints[1]));
}
return new Polygon(polyPoints[0], polyPoints[1], holes.toArray(new Polygon[holes.size()]));
}
/** Parses [[lat, lon], [lat, lon] ...] into 2d double array */
private double[][] parsePoints(List<Object> o) throws ParseException {
double[] lats = new double[o.size()];
double[] lons = new double[o.size()];
for(int i=0;i<o.size();i++) {
Object point = o.get(i);
if (point instanceof List == false) {
throw newParseException("elements of coordinates array must [lat, lon] array, but got: " + point);
}
List<Object> pointList = (List<Object>) point;
if (pointList.size() != 2) {
throw newParseException("elements of coordinates array must [lat, lon] array, but got wrong element count: " + pointList);
}
if (pointList.get(0) instanceof Double == false) {
throw newParseException("elements of coordinates array must [lat, lon] array, but first element is not a Double: " + pointList.get(0));
}
if (pointList.get(1) instanceof Double == false) {
throw newParseException("elements of coordinates array must [lat, lon] array, but second element is not a Double: " + pointList.get(1));
}
// lon, lat ordering in GeoJSON!
lons[i] = ((Double) pointList.get(0)).doubleValue();
lats[i] = ((Double) pointList.get(1)).doubleValue();
}
return new double[][] {lats, lons};
}
private List<Object> parseArray(String path) throws ParseException {
List<Object> result = new ArrayList<>();
scan('[');
while (upto < input.length()) {
char ch = peek();
if (ch == ']') {
scan(']');
return result;
}
if (result.size() > 0) {
if (ch != ',') {
throw newParseException("expected ',' separating list items, but got '" + ch + "'");
}
// skip the ,
upto++;
if (upto == input.length()) {
throw newParseException("hit EOF while parsing array");
}
ch = peek();
}
Object o;
if (ch == '[') {
o = parseArray(path + ".[]");
} else if (ch == '{') {
// This is only used when parsing the "features" in type: FeatureCollection
parseObject(path + ".[]");
o = null;
} else if (ch == '-' || ch == '.' || (ch >= '0' && ch <= '9')) {
o = parseNumber();
} else if (ch == '"') {
o = parseString();
} else {
throw newParseException("expected another array or number while parsing array, not '" + ch + "'");
}
result.add(o);
}
throw newParseException("hit EOF while reading array");
}
private Number parseNumber() throws ParseException {
StringBuilder b = new StringBuilder();
int uptoStart = upto;
while (upto < input.length()) {
char ch = input.charAt(upto);
if (ch == '-' || ch == '.' || (ch >= '0' && ch <= '9') || ch == 'e' || ch == 'E') {
upto++;
b.append(ch);
} else {
break;
}
}
// we only handle doubles
try {
return Double.parseDouble(b.toString());
} catch (NumberFormatException nfe) {
upto = uptoStart;
throw newParseException("could not parse number as double");
}
}
private String parseString() throws ParseException {
scan('"');
StringBuilder b = new StringBuilder();
while (upto < input.length()) {
char ch = input.charAt(upto);
if (ch == '"') {
upto++;
return b.toString();
}
if (ch == '\\') {
// an escaped character
upto++;
if (upto == input.length()) {
throw newParseException("hit EOF inside string literal");
}
ch = input.charAt(upto);
if (ch == 'u') {
// 4 hex digit unicode BMP escape
upto++;
if (upto + 4 > input.length()) {
throw newParseException("hit EOF inside string literal");
}
b.append(Integer.parseInt(input.substring(upto, upto+4), 16));
} else if (ch == '\\') {
b.append('\\');
upto++;
} else {
// TODO: allow \n, \t, etc.???
throw newParseException("unsupported string escape character \\" + ch);
}
} else {
b.append(ch);
upto++;
}
}
throw newParseException("hit EOF inside string literal");
}
private char peek() throws ParseException {
while (upto < input.length()) {
char ch = input.charAt(upto);
if (isJSONWhitespace(ch)) {
upto++;
continue;
}
return ch;
}
throw newParseException("unexpected EOF");
}
/** Scans across whitespace and consumes the expected character, or throws {@code ParseException} if the character is wrong */
private void scan(char expected) throws ParseException {
while (upto < input.length()) {
char ch = input.charAt(upto);
if (isJSONWhitespace(ch)) {
upto++;
continue;
}
if (ch != expected) {
throw newParseException("expected '" + expected + "' but got '" + ch + "'");
}
upto++;
return;
}
throw newParseException("expected '" + expected + "' but got EOF");
}
private void readEnd() throws ParseException {
while (upto < input.length()) {
char ch = input.charAt(upto);
if (isJSONWhitespace(ch) == false) {
throw newParseException("unexpected character '" + ch + "' after end of GeoJSON object");
}
upto++;
}
}
/** Scans the expected string, or throws {@code ParseException} */
private void scan(String expected) throws ParseException {
if (upto + expected.length() > input.length()) {
throw newParseException("expected \"" + expected + "\" but hit EOF");
}
String subString = input.substring(upto, upto+expected.length());
if (subString.equals(expected) == false) {
throw newParseException("expected \"" + expected + "\" but got \"" + subString + "\"");
}
upto += expected.length();
}
private static boolean isJSONWhitespace(char ch) {
// JSON doesn't accept allow unicode whitespace?
return ch == 0x20 || // space
ch == 0x09 || // tab
ch == 0x0a || // line feed
ch == 0x0d; // newline
}
/** When calling this, upto should be at the position of the incorrect character! */
private ParseException newParseException(String details) throws ParseException {
String fragment;
int end = Math.min(input.length(), upto+1);
if (upto < 50) {
fragment = input.substring(0, end);
} else {
fragment = "..." + input.substring(upto-50, end);
}
return new ParseException(details + " at character offset " + upto + "; fragment leading to this:\n" + fragment, upto);
}
}