| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.spatial.prefix.tree; |
| |
| import java.text.ParseException; |
| import java.time.ZonedDateTime; |
| import java.util.Calendar; |
| import java.util.Date; |
| import java.util.GregorianCalendar; |
| import java.util.Locale; |
| import java.util.TimeZone; |
| |
| import org.locationtech.spatial4j.shape.Shape; |
| |
| /** |
| * A PrefixTree for date ranges in which the levels of the tree occur at natural periods of time (e.g. years, |
| * months, ...). You pass in {@link Calendar} objects with the desired fields set and the unspecified |
| * fields unset, which conveys the precision. The implementation makes some optimization assumptions about a |
| * {@link java.util.GregorianCalendar}; others could probably be supported easily. |
| * <p> |
| * Warning: If you construct a Calendar and then get something from the object like a field (e.g. year) or |
| * milliseconds, then every field is fully set by side-effect. So after setting the fields, pass it to this |
| * API first. |
| * @lucene.experimental |
| */ |
| public class DateRangePrefixTree extends NumberRangePrefixTree { |
| |
| /* |
| WARNING java.util.Calendar is tricky to work with: |
| * If you "get" any field value, every field becomes "set". This can introduce a Heisenbug effect, |
| when in a debugger in some cases. Fortunately, Calendar.toString() doesn't apply. |
| * Beware Calendar underflow of the underlying long. If you create a Calendar from LONG.MIN_VALUE, and clear |
| a field, it will underflow and appear close to LONG.MAX_VALUE (BC to AD). |
| |
| There are no doubt other reasons but those two were hard fought lessons here. |
| |
| TODO Improvements: |
| * Make max precision configurable (i.e. to SECOND). |
| * Make min & max year span configurable. Use that to remove pointless top levels of the SPT. |
| If year span is > 10k, then add 1k year level. If year span is > 10k of 1k levels, add 1M level. |
| * NumberRangePrefixTree: override getTreeCellIterator for optimized case where the shape isn't a date span; use |
| FilterCellIterator of the cell stack. |
| |
| */ |
| |
| private static final TimeZone UTC = TimeZone.getTimeZone("UTC"); |
| |
| /** |
| * The Java platform default {@link Calendar} with UTC & ROOT Locale. Generally a {@link GregorianCalendar}. |
| * Do <em>not</em> modify this! |
| */ |
| public static final Calendar DEFAULT_CAL;//template |
| static { |
| DEFAULT_CAL = Calendar.getInstance(UTC, Locale.ROOT); |
| DEFAULT_CAL.clear(); |
| } |
| |
| /** |
| * A Calendar instance compatible with {@link java.time.ZonedDateTime} as seen from |
| * {@link GregorianCalendar#from(ZonedDateTime)}. |
| * Do <em>not</em> modify this! |
| */ |
| public static final Calendar JAVA_UTIL_TIME_COMPAT_CAL; |
| static { |
| // see source of GregorianCalendar.from(ZonedDateTime) |
| GregorianCalendar cal = new GregorianCalendar(UTC, Locale.ROOT); |
| cal.setGregorianChange(new Date(Long.MIN_VALUE)); |
| cal.setFirstDayOfWeek(Calendar.MONDAY);// might not matter? |
| cal.setMinimalDaysInFirstWeek(4);// might not matter |
| cal.clear(); |
| JAVA_UTIL_TIME_COMPAT_CAL = cal; |
| } |
| |
| private static final int[] FIELD_BY_LEVEL = { |
| -1/*unused*/, -1, -1, Calendar.YEAR, Calendar.MONTH, Calendar.DAY_OF_MONTH, |
| Calendar.HOUR_OF_DAY, Calendar.MINUTE, Calendar.SECOND, Calendar.MILLISECOND}; |
| |
| private static final int YEAR_LEVEL = 3; |
| |
| //how many million years are there? |
| private static final int NUM_MYEARS = 586;// we assert how this was computed in the constructor |
| |
| /** An instanced based on {@link Calendar#getInstance(TimeZone, Locale)} with UTC and Locale.Root. This |
| * will (always?) be a {@link GregorianCalendar} with a so-called "Gregorian Change Date" of 1582. |
| */ |
| @Deprecated |
| public static final DateRangePrefixTree INSTANCE = new DateRangePrefixTree(DEFAULT_CAL); |
| |
| // Instance fields: (all are final) |
| |
| private final Calendar CAL_TMP;//template |
| |
| private final Calendar MINCAL; |
| private final Calendar MAXCAL; |
| |
| private final int BC_FIRSTYEAR; |
| private final int BC_LASTYEAR; |
| private final int BC_YEARS; |
| private final int AD_FIRSTYEAR; |
| private final int AD_LASTYEAR; |
| private final int AD_YEAR_BASE; |
| |
| private final UnitNRShape minLV, maxLV; |
| private final UnitNRShape gregorianChangeDateLV; |
| |
| /** Constructs with the specified calendar used as a template to be cloned whenever a new |
| * Calendar needs to be created. See {@link #DEFAULT_CAL} and {@link #JAVA_UTIL_TIME_COMPAT_CAL}. */ |
| public DateRangePrefixTree(Calendar templateCal) { |
| super(new int[]{//sublevels by level |
| NUM_MYEARS, |
| 1000,//1 thousand thousand-years in a million years |
| 1000,//1 thousand years in a thousand-year |
| calFieldLen(templateCal, Calendar.MONTH), |
| calFieldLen(templateCal, Calendar.DAY_OF_MONTH), |
| calFieldLen(templateCal, Calendar.HOUR_OF_DAY), |
| calFieldLen(templateCal, Calendar.MINUTE), |
| calFieldLen(templateCal, Calendar.SECOND), |
| calFieldLen(templateCal, Calendar.MILLISECOND), |
| }); |
| CAL_TMP = (Calendar) templateCal.clone();// defensive copy |
| MINCAL = (Calendar) CAL_TMP.clone(); |
| MINCAL.setTimeInMillis(Long.MIN_VALUE); |
| MAXCAL = (Calendar) CAL_TMP.clone(); |
| MAXCAL.setTimeInMillis(Long.MAX_VALUE); |
| //BC years are decreasing, remember. Yet ActualMaximum is the numerically high value, ActualMinimum is 1. |
| BC_FIRSTYEAR = MINCAL.getActualMaximum(Calendar.YEAR); |
| BC_LASTYEAR = MINCAL.getActualMinimum(Calendar.YEAR); // 1 |
| BC_YEARS = BC_FIRSTYEAR - BC_LASTYEAR + 1; |
| AD_FIRSTYEAR = MAXCAL.getActualMinimum(Calendar.YEAR); // 1 |
| AD_LASTYEAR = MAXCAL.getActualMaximum(Calendar.YEAR); |
| AD_YEAR_BASE = (((BC_YEARS-1) / 1000_000)+1) * 1000_000; // align year 0 at an even # of million years |
| assert BC_LASTYEAR == 1 && AD_FIRSTYEAR == 1; |
| assert NUM_MYEARS == (AD_YEAR_BASE + AD_LASTYEAR) / 1000_000 + 1; |
| |
| maxLV = toShape((Calendar)MAXCAL.clone()); |
| minLV = toShape((Calendar)MINCAL.clone()); |
| if (MAXCAL instanceof GregorianCalendar) { |
| GregorianCalendar gCal = (GregorianCalendar)MAXCAL; |
| gregorianChangeDateLV = toUnitShape(gCal.getGregorianChange()); |
| } else { |
| gregorianChangeDateLV = null; |
| } |
| } |
| |
| private static int calFieldLen(Calendar cal, int field) { |
| return cal.getMaximum(field) - cal.getMinimum(field) + 1; |
| } |
| |
| @Override |
| public int getNumSubCells(UnitNRShape lv) { |
| int cmp = comparePrefix(lv, maxLV); |
| assert cmp <= 0; |
| if (cmp == 0)//edge case (literally!) |
| return maxLV.getValAtLevel(lv.getLevel()+1) + 1; |
| |
| // if using GregorianCalendar and we're after the "Gregorian change date" then we'll compute |
| // the sub-cells ourselves more efficiently without the need to construct a Calendar. |
| cmp = gregorianChangeDateLV != null ? comparePrefix(lv, gregorianChangeDateLV) : -1; |
| //TODO consider also doing fast-path if field is <= hours even if before greg change date |
| if (cmp >= 0) { |
| int result = fastSubCells(lv); |
| assert result == slowSubCells(lv) : "fast/slow numSubCells inconsistency"; |
| return result; |
| } else { |
| return slowSubCells(lv); |
| } |
| } |
| |
| private int fastSubCells(UnitNRShape lv) { |
| if (lv.getLevel() == YEAR_LEVEL + 1) {//month |
| switch (lv.getValAtLevel(lv.getLevel())) { |
| case Calendar.SEPTEMBER: |
| case Calendar.APRIL: |
| case Calendar.JUNE: |
| case Calendar.NOVEMBER: |
| return 30; |
| case Calendar.FEBRUARY: |
| //get the year (negative numbers for BC) |
| int yearAdj = lv.getValAtLevel(1) * 1_000_000; |
| yearAdj += lv.getValAtLevel(2) * 1000; |
| yearAdj += lv.getValAtLevel(3); |
| int year = yearAdj - AD_YEAR_BASE; |
| if (year % 4 == 0 && !(year % 100 == 0 && year % 400 != 0) )//leap year |
| return 29; |
| else |
| return 28; |
| default: |
| return 31; |
| } |
| } else {//typical: |
| return super.getNumSubCells(lv); |
| } |
| } |
| |
| private int slowSubCells(UnitNRShape lv) { |
| int field = FIELD_BY_LEVEL[lv.getLevel()+1]; |
| //short-circuit optimization (GregorianCalendar assumptions) |
| if (field == -1 || field == Calendar.YEAR || field >= Calendar.HOUR_OF_DAY)//TODO make configurable |
| return super.getNumSubCells(lv); |
| Calendar cal = toCalendar(lv);//somewhat heavyweight op; ideally should be stored on UnitNRShape somehow |
| return cal.getActualMaximum(field) - cal.getActualMinimum(field) + 1; |
| } |
| |
| /** Calendar utility method: |
| * Returns a clone of the {@link Calendar} passed to the constructor with all fields cleared. */ |
| public Calendar newCal() { |
| return (Calendar) CAL_TMP.clone(); |
| } |
| |
| /** Calendar utility method: |
| * Returns the spatial prefix tree level for the corresponding {@link java.util.Calendar} field, such as |
| * {@link java.util.Calendar#YEAR}. If there's no match, the next greatest level is returned as a negative value. |
| */ |
| public int getTreeLevelForCalendarField(int calField) { |
| for (int i = YEAR_LEVEL; i < FIELD_BY_LEVEL.length; i++) { |
| if (FIELD_BY_LEVEL[i] == calField) { |
| return i; |
| } else if (FIELD_BY_LEVEL[i] > calField) { |
| return -1 * i; |
| } |
| } |
| throw new IllegalArgumentException("Bad calendar field?: " + calField); |
| } |
| |
| /** Calendar utility method: |
| * Gets the Calendar field code of the last field that is set prior to an unset field. It only |
| * examines fields relevant to the prefix tree. If no fields are set, it returns -1. */ |
| public int getCalPrecisionField(Calendar cal) { |
| int lastField = -1; |
| for (int level = YEAR_LEVEL; level < FIELD_BY_LEVEL.length; level++) { |
| int field = FIELD_BY_LEVEL[level]; |
| if (!cal.isSet(field)) |
| break; |
| lastField = field; |
| } |
| return lastField; |
| } |
| |
| /** Calendar utility method: |
| * Calls {@link Calendar#clear(int)} for every field after {@code field}. Beware of Calendar underflow. */ |
| public void clearFieldsAfter(Calendar cal, int field) { |
| int assertEra = -1; |
| assert (assertEra = (((Calendar)cal.clone()).get(Calendar.ERA))) >= 0;//a trick to only get this if assert enabled |
| //note: Calendar.ERA == 0; |
| for (int f = field + 1; f <= Calendar.MILLISECOND; f++) { |
| cal.clear(f); |
| } |
| assert field + 1 == Calendar.ERA || ((Calendar)cal.clone()).get(Calendar.ERA) == assertEra : "Calendar underflow"; |
| } |
| |
| /** Converts {@code value} from a {@link Calendar} or {@link Date} to a {@link Shape}. Other arguments |
| * result in a {@link java.lang.IllegalArgumentException}. |
| * If a Calendar is passed in, there might be problems if it is not created via {@link #newCal()}. |
| */ |
| @Override |
| public UnitNRShape toUnitShape(Object value) { |
| if (value instanceof Calendar) { |
| return toShape((Calendar) value); |
| } else if (value instanceof Date) { |
| Calendar cal = newCal(); |
| cal.setTime((Date)value); |
| return toShape(cal); |
| } |
| throw new IllegalArgumentException("Expecting Calendar or Date but got: "+value.getClass()); |
| } |
| |
| /** Converts the Calendar into a Shape. |
| * The isSet() state of the Calendar is re-instated when done. |
| * If a Calendar is passed in, there might be problems if it is not created via {@link #newCal()}. |
| */ |
| public UnitNRShape toShape(Calendar cal) { |
| // Convert a Calendar into a stack of cell numbers |
| final int calPrecField = getCalPrecisionField(cal);//must call first; getters set all fields |
| try { |
| int[] valStack = new int[maxLevels];//starts at level 1, not 0 |
| int len = 0; |
| if (calPrecField >= Calendar.YEAR) {//year or better precision |
| int year = cal.get(Calendar.YEAR); |
| int yearAdj = cal.get(Calendar.ERA) == 0 ? AD_YEAR_BASE - (year - 1) : AD_YEAR_BASE + year; |
| |
| valStack[len++] = yearAdj / 1000_000; |
| yearAdj -= valStack[len-1] * 1000_000; |
| valStack[len++] = yearAdj / 1000; |
| yearAdj -= valStack[len-1] * 1000; |
| valStack[len++] = yearAdj; |
| for (int level = YEAR_LEVEL +1; level < FIELD_BY_LEVEL.length; level++) { |
| int field = FIELD_BY_LEVEL[level]; |
| if (field > calPrecField) |
| break; |
| valStack[len++] = cal.get(field) - cal.getActualMinimum(field); |
| } |
| } |
| |
| return toShape(valStack, len); |
| } finally { |
| clearFieldsAfter(cal, calPrecField);//restore precision state modified by get() |
| } |
| } |
| |
| /** Calls {@link #toCalendar(org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape)}. */ |
| @Override |
| public Object toObject(UnitNRShape shape) { |
| return toCalendar(shape); |
| } |
| |
| /** Converts the {@link org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape} shape to a |
| * corresponding Calendar that is cleared below its level. */ |
| public Calendar toCalendar(UnitNRShape lv) { |
| if (lv.getLevel() == 0) |
| return newCal(); |
| if (comparePrefix(lv, minLV) <= 0) {//shouldn't typically happen; sometimes in a debugger |
| return (Calendar) MINCAL.clone();//full precision; truncation would cause underflow |
| } |
| assert comparePrefix(lv, maxLV) <= 0; |
| Calendar cal = newCal(); |
| |
| int yearAdj = lv.getValAtLevel(1) * 1_000_000; |
| if (lv.getLevel() > 1) { |
| yearAdj += lv.getValAtLevel(2) * 1000; |
| if (lv.getLevel() > 2) { |
| yearAdj += lv.getValAtLevel(3); |
| } |
| } |
| if (yearAdj > AD_YEAR_BASE) { |
| cal.set(Calendar.ERA, 1); |
| cal.set(Calendar.YEAR, yearAdj - AD_YEAR_BASE);//setting the year resets the era |
| } else { |
| cal.set(Calendar.ERA, 0);//we assert this "sticks" at the end |
| cal.set(Calendar.YEAR, (AD_YEAR_BASE - yearAdj) + 1); |
| } |
| for (int level = YEAR_LEVEL + 1; level <= lv.getLevel(); level++) { |
| int field = FIELD_BY_LEVEL[level]; |
| cal.set(field, lv.getValAtLevel(level) + cal.getActualMinimum(field)); |
| } |
| assert yearAdj > AD_YEAR_BASE || ((Calendar)cal.clone()).get(Calendar.ERA) == 0 : "ERA / YEAR underflow"; |
| return cal; |
| } |
| |
| @Override |
| protected String toString(UnitNRShape lv) { |
| return toString(toCalendar(lv)); |
| } |
| |
| /** Calendar utility method consistent with {@link java.time.format.DateTimeFormatter#ISO_INSTANT} except |
| * has no trailing 'Z', and will be truncated to the units given according to |
| * {@link Calendar#isSet(int)}. |
| * A fully cleared calendar will yield the string "*". |
| * The isSet() state of the Calendar is re-instated when done. */ |
| public String toString(Calendar cal) { |
| final int calPrecField = getCalPrecisionField(cal);//must call first; getters set all fields |
| if (calPrecField == -1) |
| return "*"; |
| try { |
| StringBuilder builder = new StringBuilder("yyyy-MM-dd'T'HH:mm:ss.SSS".length());//typical |
| int year = cal.get(Calendar.YEAR); // within the era (thus always positve). >= 1. |
| if (cal.get(Calendar.ERA) == 0) { // BC |
| year -= 1; // 1BC should be "0000", so shift by one |
| if (year > 0) { |
| builder.append('-'); |
| } |
| } else if (year > 9999) { |
| builder.append('+'); |
| } |
| appendPadded(builder, year, (short) 4); |
| if (calPrecField >= Calendar.MONTH) { |
| builder.append('-'); |
| appendPadded(builder, cal.get(Calendar.MONTH) + 1, (short) 2); // +1 since first is 0 |
| } |
| if (calPrecField >= Calendar.DAY_OF_MONTH) { |
| builder.append('-'); |
| appendPadded(builder, cal.get(Calendar.DAY_OF_MONTH), (short) 2); |
| } |
| if (calPrecField >= Calendar.HOUR_OF_DAY) { |
| builder.append('T'); |
| appendPadded(builder, cal.get(Calendar.HOUR_OF_DAY), (short) 2); |
| } |
| if (calPrecField >= Calendar.MINUTE) { |
| builder.append(':'); |
| appendPadded(builder, cal.get(Calendar.MINUTE), (short) 2); |
| } |
| if (calPrecField >= Calendar.SECOND) { |
| builder.append(':'); |
| appendPadded(builder, cal.get(Calendar.SECOND), (short) 2); |
| } |
| if (calPrecField >= Calendar.MILLISECOND && cal.get(Calendar.MILLISECOND) > 0) { // only if non-zero |
| builder.append('.'); |
| appendPadded(builder, cal.get(Calendar.MILLISECOND), (short) 3); |
| } |
| |
| return builder.toString(); |
| } finally { |
| clearFieldsAfter(cal, calPrecField);//restore precision state modified by get() |
| } |
| } |
| |
| private void appendPadded(StringBuilder builder, int integer, short positions) { |
| assert integer >= 0 && positions >= 1 && positions <= 4; |
| int preBuilderLen = builder.length(); |
| int intStrLen; |
| if (integer > 999) { |
| intStrLen = 4; |
| } else if (integer > 99) { |
| intStrLen = 3; |
| } else if (integer > 9) { |
| intStrLen = 2; |
| } else { |
| intStrLen = 1; |
| } |
| for (int i = 0; i < positions - intStrLen; i++) { |
| builder.append('0'); |
| } |
| builder.append(integer); |
| } |
| |
| @Override |
| protected UnitNRShape parseUnitShape(String str) throws ParseException { |
| return toShape(parseCalendar(str)); |
| } |
| |
| /** Calendar utility method: |
| * The reverse of {@link #toString(java.util.Calendar)}. It will only set the fields found, leaving |
| * the remainder in an un-set state. A leading '-' or '+' is optional (positive assumed), and a |
| * trailing 'Z' is also optional. |
| * @param str not null and not empty |
| * @return not null |
| */ |
| public Calendar parseCalendar(String str) throws ParseException { |
| // example: +2014-10-23T21:22:33.159Z |
| if (str == null || str.isEmpty()) |
| throw new IllegalArgumentException("str is null or blank"); |
| Calendar cal = newCal(); |
| if (str.equals("*")) |
| return cal; |
| int offset = 0;//a pointer |
| int parsedVal = 0; |
| try { |
| //year & era: |
| int lastOffset = str.charAt(str.length()-1) == 'Z' ? str.length() - 1 : str.length(); |
| int hyphenIdx = str.indexOf('-', 1);//look past possible leading hyphen |
| if (hyphenIdx < 0) |
| hyphenIdx = lastOffset; |
| int year = Integer.parseInt(str.substring(offset, hyphenIdx)); |
| cal.set(Calendar.ERA, year <= 0 ? 0 : 1); |
| cal.set(Calendar.YEAR, year <= 0 ? -1*year + 1 : year); |
| offset = hyphenIdx + 1; |
| if (lastOffset < offset) |
| return cal; |
| |
| //NOTE: We aren't validating separator chars, and we unintentionally accept leading +/-. |
| // The str.substring()'s hopefully get optimized to be stack-allocated. |
| |
| //month: |
| parsedVal = parseAndCheck( str, offset, 1, 12); |
| cal.set(Calendar.MONTH, parsedVal - 1);//starts at 0 |
| offset += 3; |
| if (lastOffset < offset) |
| return cal; |
| //day: |
| checkDelimeter(str, offset-1, '-'); |
| |
| parsedVal = parseAndCheck( str, offset, 1, 31); |
| cal.set(Calendar.DAY_OF_MONTH, parsedVal); |
| offset += 3; |
| if (lastOffset < offset) |
| return cal; |
| checkDelimeter(str, offset-1, 'T'); |
| //hour: |
| |
| parsedVal = parseAndCheck( str, offset, 0, 24); |
| cal.set(Calendar.HOUR_OF_DAY, parsedVal); |
| offset += 3; |
| if (lastOffset < offset) |
| return cal; |
| checkDelimeter(str, offset-1, ':'); |
| //minute: |
| |
| parsedVal = parseAndCheck( str, offset, 0, 59); |
| cal.set(Calendar.MINUTE, parsedVal); |
| offset += 3; |
| if (lastOffset < offset) |
| return cal; |
| checkDelimeter(str, offset-1, ':'); |
| //second: |
| |
| parsedVal = parseAndCheck( str, offset, 0, 59); |
| cal.set(Calendar.SECOND, parsedVal); |
| offset += 3; |
| if (lastOffset < offset) |
| return cal; |
| checkDelimeter(str, offset-1, '.'); |
| //ms: |
| |
| int maxOffset = lastOffset - offset; // assume remaining is all digits to compute milliseconds |
| // we truncate off > millisecond precision (3 digits only) |
| int millis = (int) (Integer.parseInt(str.substring(offset, offset + maxOffset)) / Math.pow(10, maxOffset - 3)); |
| cal.set(Calendar.MILLISECOND, millis); |
| return cal; |
| } catch (Exception e) { |
| ParseException pe = new ParseException("Improperly formatted datetime: "+str, offset); |
| pe.initCause(e); |
| throw pe; |
| } |
| } |
| |
| private void checkDelimeter(String str, int offset, char delim) { |
| if (str.charAt(offset) != delim) { |
| throw new IllegalArgumentException("Invalid delimeter: '"+str.charAt(offset)+ |
| "', expecting '"+delim+"'"); |
| } |
| } |
| |
| private int parseAndCheck(String str, int offset, int min, int max) { |
| int val = Integer.parseInt(str.substring(offset, offset+2)); |
| if (val < min || val > max) { |
| throw new IllegalArgumentException("Invalid value: "+val+"," + |
| " expecting from "+min+" to "+max+"]"); |
| } |
| return val; |
| } |
| |
| } |