| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package freemarker.adhoc; |
| |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.util.Optional; |
| import java.util.function.Predicate; |
| |
| /** |
| * This was used for generating the JavaCC pattern and the Java method to check if a character |
| * can appear in an FTL identifier. |
| */ |
| public class IdentifierCharGenerator { |
| |
| public static void main(String[] args) { |
| new IdentifierCharGenerator().run(); |
| } |
| |
| private void run() { |
| List<Range> ranges = generateRanges(this::generatorPredicate); |
| List<Range> ranges2 = generateRanges(IdentifierCharGenerator::isFTLIdentifierStart); |
| |
| if (!ranges.equals(ranges2)) { |
| throw new AssertionError(); |
| } |
| |
| ranges.forEach(r -> p(toJavaCC(r) + ", ")); |
| |
| int firstSplit = 0; |
| while (firstSplit < ranges.size() && ranges.get(firstSplit).getEnd() < 0x80) { |
| firstSplit++; |
| } |
| printJava(ranges, firstSplit, ""); |
| } |
| |
| private List<Range> generateRanges(Predicate<Integer> charCodePredicate) { |
| List<Range> ranges = new ArrayList<Range>(); |
| |
| int startedRange = -1; |
| int i; |
| for (i = 0; i < 0x10000; i++) { |
| if (charCodePredicate.test(i)) { |
| if (startedRange == -1) { |
| startedRange = i; |
| } |
| } else { |
| if (startedRange != -1) { |
| ranges.add(new Range(startedRange, i)); |
| } |
| startedRange = -1; |
| } |
| } |
| if (startedRange != -1) { |
| ranges.add(new Range(startedRange, i)); |
| } |
| |
| return ranges; |
| } |
| |
| private static void printJava(List<Range> ranges, int splitUntil, String indentation) { |
| final int rangeCount = ranges.size(); |
| if (rangeCount > 2) { |
| Range secondHalfStart = ranges.get(splitUntil); |
| pp(indentation); |
| pp("if (c < "); pp(toJavaCharCode(secondHalfStart.getStart())); p(") {"); |
| { |
| List<Range> firstHalf = ranges.subList(0, splitUntil); |
| printJava(firstHalf, (firstHalf.size() + 1) / 2, indentation + " "); |
| } |
| pp(indentation); |
| pp("} else { // c >= "); p(toJavaCharCode(secondHalfStart.start)); |
| { |
| List<Range> secondHalf = ranges.subList(splitUntil, ranges.size()); |
| printJava(secondHalf, (secondHalf.size() + 1) / 2, indentation + " "); |
| } |
| pp(indentation); |
| p("}"); |
| } else if (rangeCount == 2) { |
| pp(indentation); |
| pp("return "); |
| printJavaCondition(ranges.get(0)); |
| pp(" || "); |
| printJavaCondition(ranges.get(1)); |
| p(";"); |
| } else if (rangeCount == 1) { |
| pp(indentation); |
| pp("return "); |
| printJavaCondition(ranges.get(0)); |
| p(";"); |
| } else { |
| throw new IllegalArgumentException("Empty range list"); |
| } |
| } |
| |
| private static void printJavaCondition(Range range) { |
| if (range.size() > 1) { |
| pp("c >= "); pp(toJavaCharCode(range.getStart())); |
| pp(" && c <= "); pp(toJavaCharCode(range.getEnd() - 1)); |
| } else { |
| pp("c == "); pp(toJavaCharCode(range.getStart())); |
| } |
| } |
| |
| private boolean generatorPredicate(int c) { |
| return isLegacyFTLIdStartChar(c) |
| || (Character.isJavaIdentifierPart(c) && Character.isLetterOrDigit(c) && !(c >= '0' && c <= '9')); |
| } |
| |
| private static boolean isLegacyFTLIdStartChar(int i) { |
| return i == '$' || i == '_' |
| || (i >= 'a' && i <= 'z') |
| || (i >= '@' && i <= 'Z') |
| || (i >= '\u00c0' && i <= '\u00d6') |
| || (i >= '\u00d8' && i <= '\u00f6') |
| || (i >= '\u00f8' && i <= '\u1fff') |
| || (i >= '\u3040' && i <= '\u318f') |
| || (i >= '\u3300' && i <= '\u337f') |
| || (i >= '\u3400' && i <= '\u3d2d') |
| || (i >= '\u4e00' && i <= '\u9fff') |
| || (i >= '\uf900' && i <= '\ufaff'); |
| } |
| |
| private static boolean isXML11NameChar(int i) { |
| return isXML11NameStartChar(i) |
| || i == '-' || i == '.' || (i >= '0' && i <= '9') || i == 0xB7 |
| || (i >= 0x0300 && i <= 0x036F) || (i >= 0x203F && i <= 0x2040); |
| } |
| |
| private static boolean isXML11NameStartChar(int i) { |
| return i == ':' || (i >= 'A' && i <= 'Z') || i == '_' || (i >= 'a' && i <= 'z') |
| || i >= 0xC0 && i <= 0xD6 |
| || i >= 0xD8 && i <= 0xF6 |
| || i >= 0xF8 && i <= 0x2FF |
| || i >= 0x370 && i <= 0x37D |
| || i >= 0x37F && i <= 0x1FFF |
| || i >= 0x200C && i <= 0x200D |
| || i >= 0x2070 && i <= 0x218F |
| || i >= 0x2C00 && i <= 0x2FEF |
| || i >= 0x3001 && i <= 0xD7FF |
| || i >= 0xF900 && i <= 0xFDCF |
| || i >= 0xFDF0 && i <= 0xFFFD; |
| } |
| |
| private static String toJavaCC(Range range) { |
| final int start = range.getStart(), end = range.getEnd(); |
| if (start == end) { |
| throw new IllegalArgumentException("Empty range"); |
| } |
| if (end - start == 1) { |
| return toJavaCCCharString(start); |
| } else { |
| return toJavaCCCharString(start) + " - " + toJavaCCCharString(end - 1); |
| } |
| } |
| |
| private static String toJavaCCCharString(int cc) { |
| StringBuilder sb = new StringBuilder(); |
| |
| sb.append('"'); |
| if (cc < 0x7F && cc >= 0x20) { |
| sb.append((char) cc); |
| } else { |
| sb.append("\\u"); |
| sb.append(toHexDigit((cc >> 12) & 0xF)); |
| sb.append(toHexDigit((cc >> 8) & 0xF)); |
| sb.append(toHexDigit((cc >> 4) & 0xF)); |
| sb.append(toHexDigit(cc & 0xF)); |
| } |
| sb.append('"'); |
| |
| return sb.toString(); |
| } |
| |
| private static String toJavaCharCode(int cc) { |
| StringBuilder sb = new StringBuilder(); |
| |
| if (cc < 0x7F && cc >= 0x20) { |
| sb.append('\''); |
| sb.append((char) cc); |
| sb.append('\''); |
| } else { |
| sb.append("0x"); |
| if (cc > 0xFF) { |
| sb.append(toHexDigit((cc >> 12) & 0xF)); |
| sb.append(toHexDigit((cc >> 8) & 0xF)); |
| } |
| sb.append(toHexDigit((cc >> 4) & 0xF)); |
| sb.append(toHexDigit(cc & 0xF)); |
| } |
| |
| return sb.toString(); |
| } |
| |
| private static char toHexDigit(int d) { |
| return (char) (d < 0xA ? d + '0' : d - 0xA + 'A'); |
| } |
| |
| private static class Range { |
| |
| final int start; |
| final int end; |
| |
| public Range(int start, int end) { |
| this.start = start; |
| this.end = end; |
| } |
| |
| public int getStart() { |
| return start; |
| } |
| |
| public int getEnd() { |
| return end; |
| } |
| |
| public int size() { |
| return end - start; |
| } |
| |
| @Override |
| public int hashCode() { |
| final int prime = 31; |
| int result = 1; |
| result = prime * result + end; |
| result = prime * result + start; |
| return result; |
| } |
| |
| @Override |
| public boolean equals(Object obj) { |
| if (this == obj) return true; |
| if (obj == null) return false; |
| if (getClass() != obj.getClass()) return false; |
| Range other = (Range) obj; |
| if (end != other.end) return false; |
| if (start != other.start) return false; |
| return true; |
| } |
| |
| } |
| |
| static void p(final Object o) { |
| System.out.println(o); |
| } |
| |
| static void pp(final Object o) { |
| System.out.print(o); |
| } |
| |
| static void p(final Object[] o) { |
| System.out.println("["); |
| for (final Object i : o) { |
| System.out.println(" " + i); |
| } |
| System.out.println("]"); |
| } |
| |
| static void p() { |
| System.out.println(); |
| } |
| |
| // This is a copy of the generated code (somewhat modified), so that we can compare it with the generatorPredicate. |
| public static boolean isFTLIdentifierStart(int cc) { |
| char c = (char) cc; |
| |
| if (c < 0xAA) { |
| if (c >= 'a' && c <= 'z' || c >= '@' && c <= 'Z') { |
| return true; |
| } else { |
| return c == '$' || c == '_'; |
| } |
| } else { // c >= 0xAA |
| if (c < 0xA7F8) { |
| if (c < 0x2D6F) { |
| if (c < 0x2128) { |
| if (c < 0x2090) { |
| if (c < 0xD8) { |
| if (c < 0xBA) { |
| return c == 0xAA || c == 0xB5; |
| } else { // c >= 0xBA |
| return c == 0xBA || c >= 0xC0 && c <= 0xD6; |
| } |
| } else { // c >= 0xD8 |
| if (c < 0x2071) { |
| return c >= 0xD8 && c <= 0xF6 || c >= 0xF8 && c <= 0x1FFF; |
| } else { // c >= 0x2071 |
| return c == 0x2071 || c == 0x207F; |
| } |
| } |
| } else { // c >= 0x2090 |
| if (c < 0x2115) { |
| if (c < 0x2107) { |
| return c >= 0x2090 && c <= 0x209C || c == 0x2102; |
| } else { // c >= 0x2107 |
| return c == 0x2107 || c >= 0x210A && c <= 0x2113; |
| } |
| } else { // c >= 0x2115 |
| if (c < 0x2124) { |
| return c == 0x2115 || c >= 0x2119 && c <= 0x211D; |
| } else { // c >= 0x2124 |
| return c == 0x2124 || c == 0x2126; |
| } |
| } |
| } |
| } else { // c >= 0x2128 |
| if (c < 0x2C30) { |
| if (c < 0x2145) { |
| if (c < 0x212F) { |
| return c == 0x2128 || c >= 0x212A && c <= 0x212D; |
| } else { // c >= 0x212F |
| return c >= 0x212F && c <= 0x2139 || c >= 0x213C && c <= 0x213F; |
| } |
| } else { // c >= 0x2145 |
| if (c < 0x2183) { |
| return c >= 0x2145 && c <= 0x2149 || c == 0x214E; |
| } else { // c >= 0x2183 |
| return c >= 0x2183 && c <= 0x2184 || c >= 0x2C00 && c <= 0x2C2E; |
| } |
| } |
| } else { // c >= 0x2C30 |
| if (c < 0x2D00) { |
| if (c < 0x2CEB) { |
| return c >= 0x2C30 && c <= 0x2C5E || c >= 0x2C60 && c <= 0x2CE4; |
| } else { // c >= 0x2CEB |
| return c >= 0x2CEB && c <= 0x2CEE || c >= 0x2CF2 && c <= 0x2CF3; |
| } |
| } else { // c >= 0x2D00 |
| if (c < 0x2D2D) { |
| return c >= 0x2D00 && c <= 0x2D25 || c == 0x2D27; |
| } else { // c >= 0x2D2D |
| return c == 0x2D2D || c >= 0x2D30 && c <= 0x2D67; |
| } |
| } |
| } |
| } |
| } else { // c >= 0x2D6F |
| if (c < 0x31F0) { |
| if (c < 0x2DD0) { |
| if (c < 0x2DB0) { |
| if (c < 0x2DA0) { |
| return c == 0x2D6F || c >= 0x2D80 && c <= 0x2D96; |
| } else { // c >= 0x2DA0 |
| return c >= 0x2DA0 && c <= 0x2DA6 || c >= 0x2DA8 && c <= 0x2DAE; |
| } |
| } else { // c >= 0x2DB0 |
| if (c < 0x2DC0) { |
| return c >= 0x2DB0 && c <= 0x2DB6 || c >= 0x2DB8 && c <= 0x2DBE; |
| } else { // c >= 0x2DC0 |
| return c >= 0x2DC0 && c <= 0x2DC6 || c >= 0x2DC8 && c <= 0x2DCE; |
| } |
| } |
| } else { // c >= 0x2DD0 |
| if (c < 0x3031) { |
| if (c < 0x2E2F) { |
| return c >= 0x2DD0 && c <= 0x2DD6 || c >= 0x2DD8 && c <= 0x2DDE; |
| } else { // c >= 0x2E2F |
| return c == 0x2E2F || c >= 0x3005 && c <= 0x3006; |
| } |
| } else { // c >= 0x3031 |
| if (c < 0x3040) { |
| return c >= 0x3031 && c <= 0x3035 || c >= 0x303B && c <= 0x303C; |
| } else { // c >= 0x3040 |
| return c >= 0x3040 && c <= 0x318F || c >= 0x31A0 && c <= 0x31BA; |
| } |
| } |
| } |
| } else { // c >= 0x31F0 |
| if (c < 0xA67F) { |
| if (c < 0xA4D0) { |
| if (c < 0x3400) { |
| return c >= 0x31F0 && c <= 0x31FF || c >= 0x3300 && c <= 0x337F; |
| } else { // c >= 0x3400 |
| return c >= 0x3400 && c <= 0x4DB5 || c >= 0x4E00 && c <= 0xA48C; |
| } |
| } else { // c >= 0xA4D0 |
| if (c < 0xA610) { |
| return c >= 0xA4D0 && c <= 0xA4FD || c >= 0xA500 && c <= 0xA60C; |
| } else { // c >= 0xA610 |
| return c >= 0xA610 && c <= 0xA62B || c >= 0xA640 && c <= 0xA66E; |
| } |
| } |
| } else { // c >= 0xA67F |
| if (c < 0xA78B) { |
| if (c < 0xA717) { |
| return c >= 0xA67F && c <= 0xA697 || c >= 0xA6A0 && c <= 0xA6E5; |
| } else { // c >= 0xA717 |
| return c >= 0xA717 && c <= 0xA71F || c >= 0xA722 && c <= 0xA788; |
| } |
| } else { // c >= 0xA78B |
| if (c < 0xA7A0) { |
| return c >= 0xA78B && c <= 0xA78E || c >= 0xA790 && c <= 0xA793; |
| } else { // c >= 0xA7A0 |
| return c >= 0xA7A0 && c <= 0xA7AA; |
| } |
| } |
| } |
| } |
| } |
| } else { // c >= 0xA7F8 |
| if (c < 0xAB20) { |
| if (c < 0xAA44) { |
| if (c < 0xA8FB) { |
| if (c < 0xA840) { |
| if (c < 0xA807) { |
| return c >= 0xA7F8 && c <= 0xA801 || c >= 0xA803 && c <= 0xA805; |
| } else { // c >= 0xA807 |
| return c >= 0xA807 && c <= 0xA80A || c >= 0xA80C && c <= 0xA822; |
| } |
| } else { // c >= 0xA840 |
| if (c < 0xA8D0) { |
| return c >= 0xA840 && c <= 0xA873 || c >= 0xA882 && c <= 0xA8B3; |
| } else { // c >= 0xA8D0 |
| return c >= 0xA8D0 && c <= 0xA8D9 || c >= 0xA8F2 && c <= 0xA8F7; |
| } |
| } |
| } else { // c >= 0xA8FB |
| if (c < 0xA984) { |
| if (c < 0xA930) { |
| return c == 0xA8FB || c >= 0xA900 && c <= 0xA925; |
| } else { // c >= 0xA930 |
| return c >= 0xA930 && c <= 0xA946 || c >= 0xA960 && c <= 0xA97C; |
| } |
| } else { // c >= 0xA984 |
| if (c < 0xAA00) { |
| return c >= 0xA984 && c <= 0xA9B2 || c >= 0xA9CF && c <= 0xA9D9; |
| } else { // c >= 0xAA00 |
| return c >= 0xAA00 && c <= 0xAA28 || c >= 0xAA40 && c <= 0xAA42; |
| } |
| } |
| } |
| } else { // c >= 0xAA44 |
| if (c < 0xAAC0) { |
| if (c < 0xAA80) { |
| if (c < 0xAA60) { |
| return c >= 0xAA44 && c <= 0xAA4B || c >= 0xAA50 && c <= 0xAA59; |
| } else { // c >= 0xAA60 |
| return c >= 0xAA60 && c <= 0xAA76 || c == 0xAA7A; |
| } |
| } else { // c >= 0xAA80 |
| if (c < 0xAAB5) { |
| return c >= 0xAA80 && c <= 0xAAAF || c == 0xAAB1; |
| } else { // c >= 0xAAB5 |
| return c >= 0xAAB5 && c <= 0xAAB6 || c >= 0xAAB9 && c <= 0xAABD; |
| } |
| } |
| } else { // c >= 0xAAC0 |
| if (c < 0xAAF2) { |
| if (c < 0xAADB) { |
| return c == 0xAAC0 || c == 0xAAC2; |
| } else { // c >= 0xAADB |
| return c >= 0xAADB && c <= 0xAADD || c >= 0xAAE0 && c <= 0xAAEA; |
| } |
| } else { // c >= 0xAAF2 |
| if (c < 0xAB09) { |
| return c >= 0xAAF2 && c <= 0xAAF4 || c >= 0xAB01 && c <= 0xAB06; |
| } else { // c >= 0xAB09 |
| return c >= 0xAB09 && c <= 0xAB0E || c >= 0xAB11 && c <= 0xAB16; |
| } |
| } |
| } |
| } |
| } else { // c >= 0xAB20 |
| if (c < 0xFB46) { |
| if (c < 0xFB13) { |
| if (c < 0xAC00) { |
| if (c < 0xABC0) { |
| return c >= 0xAB20 && c <= 0xAB26 || c >= 0xAB28 && c <= 0xAB2E; |
| } else { // c >= 0xABC0 |
| return c >= 0xABC0 && c <= 0xABE2 || c >= 0xABF0 && c <= 0xABF9; |
| } |
| } else { // c >= 0xAC00 |
| if (c < 0xD7CB) { |
| return c >= 0xAC00 && c <= 0xD7A3 || c >= 0xD7B0 && c <= 0xD7C6; |
| } else { // c >= 0xD7CB |
| return c >= 0xD7CB && c <= 0xD7FB || c >= 0xF900 && c <= 0xFB06; |
| } |
| } |
| } else { // c >= 0xFB13 |
| if (c < 0xFB38) { |
| if (c < 0xFB1F) { |
| return c >= 0xFB13 && c <= 0xFB17 || c == 0xFB1D; |
| } else { // c >= 0xFB1F |
| return c >= 0xFB1F && c <= 0xFB28 || c >= 0xFB2A && c <= 0xFB36; |
| } |
| } else { // c >= 0xFB38 |
| if (c < 0xFB40) { |
| return c >= 0xFB38 && c <= 0xFB3C || c == 0xFB3E; |
| } else { // c >= 0xFB40 |
| return c >= 0xFB40 && c <= 0xFB41 || c >= 0xFB43 && c <= 0xFB44; |
| } |
| } |
| } |
| } else { // c >= 0xFB46 |
| if (c < 0xFF21) { |
| if (c < 0xFDF0) { |
| if (c < 0xFD50) { |
| return c >= 0xFB46 && c <= 0xFBB1 || c >= 0xFBD3 && c <= 0xFD3D; |
| } else { // c >= 0xFD50 |
| return c >= 0xFD50 && c <= 0xFD8F || c >= 0xFD92 && c <= 0xFDC7; |
| } |
| } else { // c >= 0xFDF0 |
| if (c < 0xFE76) { |
| return c >= 0xFDF0 && c <= 0xFDFB || c >= 0xFE70 && c <= 0xFE74; |
| } else { // c >= 0xFE76 |
| return c >= 0xFE76 && c <= 0xFEFC || c >= 0xFF10 && c <= 0xFF19; |
| } |
| } |
| } else { // c >= 0xFF21 |
| if (c < 0xFFCA) { |
| if (c < 0xFF66) { |
| return c >= 0xFF21 && c <= 0xFF3A || c >= 0xFF41 && c <= 0xFF5A; |
| } else { // c >= 0xFF66 |
| return c >= 0xFF66 && c <= 0xFFBE || c >= 0xFFC2 && c <= 0xFFC7; |
| } |
| } else { // c >= 0xFFCA |
| if (c < 0xFFDA) { |
| return c >= 0xFFCA && c <= 0xFFCF || c >= 0xFFD2 && c <= 0xFFD7; |
| } else { // c >= 0xFFDA |
| return c >= 0xFFDA && c <= 0xFFDC; |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| } |