blob: 3c4a726e081d3403ca89f272f89800bd7c0325e9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package freemarker.adhoc;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.function.Predicate;
/**
* This was used for generating the JavaCC pattern and the Java method to check if a character
* can appear in an FTL identifier.
*/
public class IdentifierCharGenerator {
public static void main(String[] args) {
new IdentifierCharGenerator().run();
}
private void run() {
List<Range> ranges = generateRanges(this::generatorPredicate);
List<Range> ranges2 = generateRanges(IdentifierCharGenerator::isFTLIdentifierStart);
if (!ranges.equals(ranges2)) {
throw new AssertionError();
}
ranges.forEach(r -> p(toJavaCC(r) + ", "));
int firstSplit = 0;
while (firstSplit < ranges.size() && ranges.get(firstSplit).getEnd() < 0x80) {
firstSplit++;
}
printJava(ranges, firstSplit, "");
}
private List<Range> generateRanges(Predicate<Integer> charCodePredicate) {
List<Range> ranges = new ArrayList<Range>();
int startedRange = -1;
int i;
for (i = 0; i < 0x10000; i++) {
if (charCodePredicate.test(i)) {
if (startedRange == -1) {
startedRange = i;
}
} else {
if (startedRange != -1) {
ranges.add(new Range(startedRange, i));
}
startedRange = -1;
}
}
if (startedRange != -1) {
ranges.add(new Range(startedRange, i));
}
return ranges;
}
private static void printJava(List<Range> ranges, int splitUntil, String indentation) {
final int rangeCount = ranges.size();
if (rangeCount > 2) {
Range secondHalfStart = ranges.get(splitUntil);
pp(indentation);
pp("if (c < "); pp(toJavaCharCode(secondHalfStart.getStart())); p(") {");
{
List<Range> firstHalf = ranges.subList(0, splitUntil);
printJava(firstHalf, (firstHalf.size() + 1) / 2, indentation + " ");
}
pp(indentation);
pp("} else { // c >= "); p(toJavaCharCode(secondHalfStart.start));
{
List<Range> secondHalf = ranges.subList(splitUntil, ranges.size());
printJava(secondHalf, (secondHalf.size() + 1) / 2, indentation + " ");
}
pp(indentation);
p("}");
} else if (rangeCount == 2) {
pp(indentation);
pp("return ");
printJavaCondition(ranges.get(0));
pp(" || ");
printJavaCondition(ranges.get(1));
p(";");
} else if (rangeCount == 1) {
pp(indentation);
pp("return ");
printJavaCondition(ranges.get(0));
p(";");
} else {
throw new IllegalArgumentException("Empty range list");
}
}
private static void printJavaCondition(Range range) {
if (range.size() > 1) {
pp("c >= "); pp(toJavaCharCode(range.getStart()));
pp(" && c <= "); pp(toJavaCharCode(range.getEnd() - 1));
} else {
pp("c == "); pp(toJavaCharCode(range.getStart()));
}
}
private boolean generatorPredicate(int c) {
return isLegacyFTLIdStartChar(c)
|| (Character.isJavaIdentifierPart(c) && Character.isLetterOrDigit(c) && !(c >= '0' && c <= '9'));
}
private static boolean isLegacyFTLIdStartChar(int i) {
return i == '$' || i == '_'
|| (i >= 'a' && i <= 'z')
|| (i >= '@' && i <= 'Z')
|| (i >= '\u00c0' && i <= '\u00d6')
|| (i >= '\u00d8' && i <= '\u00f6')
|| (i >= '\u00f8' && i <= '\u1fff')
|| (i >= '\u3040' && i <= '\u318f')
|| (i >= '\u3300' && i <= '\u337f')
|| (i >= '\u3400' && i <= '\u3d2d')
|| (i >= '\u4e00' && i <= '\u9fff')
|| (i >= '\uf900' && i <= '\ufaff');
}
private static boolean isXML11NameChar(int i) {
return isXML11NameStartChar(i)
|| i == '-' || i == '.' || (i >= '0' && i <= '9') || i == 0xB7
|| (i >= 0x0300 && i <= 0x036F) || (i >= 0x203F && i <= 0x2040);
}
private static boolean isXML11NameStartChar(int i) {
return i == ':' || (i >= 'A' && i <= 'Z') || i == '_' || (i >= 'a' && i <= 'z')
|| i >= 0xC0 && i <= 0xD6
|| i >= 0xD8 && i <= 0xF6
|| i >= 0xF8 && i <= 0x2FF
|| i >= 0x370 && i <= 0x37D
|| i >= 0x37F && i <= 0x1FFF
|| i >= 0x200C && i <= 0x200D
|| i >= 0x2070 && i <= 0x218F
|| i >= 0x2C00 && i <= 0x2FEF
|| i >= 0x3001 && i <= 0xD7FF
|| i >= 0xF900 && i <= 0xFDCF
|| i >= 0xFDF0 && i <= 0xFFFD;
}
private static String toJavaCC(Range range) {
final int start = range.getStart(), end = range.getEnd();
if (start == end) {
throw new IllegalArgumentException("Empty range");
}
if (end - start == 1) {
return toJavaCCCharString(start);
} else {
return toJavaCCCharString(start) + " - " + toJavaCCCharString(end - 1);
}
}
private static String toJavaCCCharString(int cc) {
StringBuilder sb = new StringBuilder();
sb.append('"');
if (cc < 0x7F && cc >= 0x20) {
sb.append((char) cc);
} else {
sb.append("\\u");
sb.append(toHexDigit((cc >> 12) & 0xF));
sb.append(toHexDigit((cc >> 8) & 0xF));
sb.append(toHexDigit((cc >> 4) & 0xF));
sb.append(toHexDigit(cc & 0xF));
}
sb.append('"');
return sb.toString();
}
private static String toJavaCharCode(int cc) {
StringBuilder sb = new StringBuilder();
if (cc < 0x7F && cc >= 0x20) {
sb.append('\'');
sb.append((char) cc);
sb.append('\'');
} else {
sb.append("0x");
if (cc > 0xFF) {
sb.append(toHexDigit((cc >> 12) & 0xF));
sb.append(toHexDigit((cc >> 8) & 0xF));
}
sb.append(toHexDigit((cc >> 4) & 0xF));
sb.append(toHexDigit(cc & 0xF));
}
return sb.toString();
}
private static char toHexDigit(int d) {
return (char) (d < 0xA ? d + '0' : d - 0xA + 'A');
}
private static class Range {
final int start;
final int end;
public Range(int start, int end) {
this.start = start;
this.end = end;
}
public int getStart() {
return start;
}
public int getEnd() {
return end;
}
public int size() {
return end - start;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + end;
result = prime * result + start;
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
Range other = (Range) obj;
if (end != other.end) return false;
if (start != other.start) return false;
return true;
}
}
static void p(final Object o) {
System.out.println(o);
}
static void pp(final Object o) {
System.out.print(o);
}
static void p(final Object[] o) {
System.out.println("[");
for (final Object i : o) {
System.out.println(" " + i);
}
System.out.println("]");
}
static void p() {
System.out.println();
}
// This is a copy of the generated code (somewhat modified), so that we can compare it with the generatorPredicate.
public static boolean isFTLIdentifierStart(int cc) {
char c = (char) cc;
if (c < 0xAA) {
if (c >= 'a' && c <= 'z' || c >= '@' && c <= 'Z') {
return true;
} else {
return c == '$' || c == '_';
}
} else { // c >= 0xAA
if (c < 0xA7F8) {
if (c < 0x2D6F) {
if (c < 0x2128) {
if (c < 0x2090) {
if (c < 0xD8) {
if (c < 0xBA) {
return c == 0xAA || c == 0xB5;
} else { // c >= 0xBA
return c == 0xBA || c >= 0xC0 && c <= 0xD6;
}
} else { // c >= 0xD8
if (c < 0x2071) {
return c >= 0xD8 && c <= 0xF6 || c >= 0xF8 && c <= 0x1FFF;
} else { // c >= 0x2071
return c == 0x2071 || c == 0x207F;
}
}
} else { // c >= 0x2090
if (c < 0x2115) {
if (c < 0x2107) {
return c >= 0x2090 && c <= 0x209C || c == 0x2102;
} else { // c >= 0x2107
return c == 0x2107 || c >= 0x210A && c <= 0x2113;
}
} else { // c >= 0x2115
if (c < 0x2124) {
return c == 0x2115 || c >= 0x2119 && c <= 0x211D;
} else { // c >= 0x2124
return c == 0x2124 || c == 0x2126;
}
}
}
} else { // c >= 0x2128
if (c < 0x2C30) {
if (c < 0x2145) {
if (c < 0x212F) {
return c == 0x2128 || c >= 0x212A && c <= 0x212D;
} else { // c >= 0x212F
return c >= 0x212F && c <= 0x2139 || c >= 0x213C && c <= 0x213F;
}
} else { // c >= 0x2145
if (c < 0x2183) {
return c >= 0x2145 && c <= 0x2149 || c == 0x214E;
} else { // c >= 0x2183
return c >= 0x2183 && c <= 0x2184 || c >= 0x2C00 && c <= 0x2C2E;
}
}
} else { // c >= 0x2C30
if (c < 0x2D00) {
if (c < 0x2CEB) {
return c >= 0x2C30 && c <= 0x2C5E || c >= 0x2C60 && c <= 0x2CE4;
} else { // c >= 0x2CEB
return c >= 0x2CEB && c <= 0x2CEE || c >= 0x2CF2 && c <= 0x2CF3;
}
} else { // c >= 0x2D00
if (c < 0x2D2D) {
return c >= 0x2D00 && c <= 0x2D25 || c == 0x2D27;
} else { // c >= 0x2D2D
return c == 0x2D2D || c >= 0x2D30 && c <= 0x2D67;
}
}
}
}
} else { // c >= 0x2D6F
if (c < 0x31F0) {
if (c < 0x2DD0) {
if (c < 0x2DB0) {
if (c < 0x2DA0) {
return c == 0x2D6F || c >= 0x2D80 && c <= 0x2D96;
} else { // c >= 0x2DA0
return c >= 0x2DA0 && c <= 0x2DA6 || c >= 0x2DA8 && c <= 0x2DAE;
}
} else { // c >= 0x2DB0
if (c < 0x2DC0) {
return c >= 0x2DB0 && c <= 0x2DB6 || c >= 0x2DB8 && c <= 0x2DBE;
} else { // c >= 0x2DC0
return c >= 0x2DC0 && c <= 0x2DC6 || c >= 0x2DC8 && c <= 0x2DCE;
}
}
} else { // c >= 0x2DD0
if (c < 0x3031) {
if (c < 0x2E2F) {
return c >= 0x2DD0 && c <= 0x2DD6 || c >= 0x2DD8 && c <= 0x2DDE;
} else { // c >= 0x2E2F
return c == 0x2E2F || c >= 0x3005 && c <= 0x3006;
}
} else { // c >= 0x3031
if (c < 0x3040) {
return c >= 0x3031 && c <= 0x3035 || c >= 0x303B && c <= 0x303C;
} else { // c >= 0x3040
return c >= 0x3040 && c <= 0x318F || c >= 0x31A0 && c <= 0x31BA;
}
}
}
} else { // c >= 0x31F0
if (c < 0xA67F) {
if (c < 0xA4D0) {
if (c < 0x3400) {
return c >= 0x31F0 && c <= 0x31FF || c >= 0x3300 && c <= 0x337F;
} else { // c >= 0x3400
return c >= 0x3400 && c <= 0x4DB5 || c >= 0x4E00 && c <= 0xA48C;
}
} else { // c >= 0xA4D0
if (c < 0xA610) {
return c >= 0xA4D0 && c <= 0xA4FD || c >= 0xA500 && c <= 0xA60C;
} else { // c >= 0xA610
return c >= 0xA610 && c <= 0xA62B || c >= 0xA640 && c <= 0xA66E;
}
}
} else { // c >= 0xA67F
if (c < 0xA78B) {
if (c < 0xA717) {
return c >= 0xA67F && c <= 0xA697 || c >= 0xA6A0 && c <= 0xA6E5;
} else { // c >= 0xA717
return c >= 0xA717 && c <= 0xA71F || c >= 0xA722 && c <= 0xA788;
}
} else { // c >= 0xA78B
if (c < 0xA7A0) {
return c >= 0xA78B && c <= 0xA78E || c >= 0xA790 && c <= 0xA793;
} else { // c >= 0xA7A0
return c >= 0xA7A0 && c <= 0xA7AA;
}
}
}
}
}
} else { // c >= 0xA7F8
if (c < 0xAB20) {
if (c < 0xAA44) {
if (c < 0xA8FB) {
if (c < 0xA840) {
if (c < 0xA807) {
return c >= 0xA7F8 && c <= 0xA801 || c >= 0xA803 && c <= 0xA805;
} else { // c >= 0xA807
return c >= 0xA807 && c <= 0xA80A || c >= 0xA80C && c <= 0xA822;
}
} else { // c >= 0xA840
if (c < 0xA8D0) {
return c >= 0xA840 && c <= 0xA873 || c >= 0xA882 && c <= 0xA8B3;
} else { // c >= 0xA8D0
return c >= 0xA8D0 && c <= 0xA8D9 || c >= 0xA8F2 && c <= 0xA8F7;
}
}
} else { // c >= 0xA8FB
if (c < 0xA984) {
if (c < 0xA930) {
return c == 0xA8FB || c >= 0xA900 && c <= 0xA925;
} else { // c >= 0xA930
return c >= 0xA930 && c <= 0xA946 || c >= 0xA960 && c <= 0xA97C;
}
} else { // c >= 0xA984
if (c < 0xAA00) {
return c >= 0xA984 && c <= 0xA9B2 || c >= 0xA9CF && c <= 0xA9D9;
} else { // c >= 0xAA00
return c >= 0xAA00 && c <= 0xAA28 || c >= 0xAA40 && c <= 0xAA42;
}
}
}
} else { // c >= 0xAA44
if (c < 0xAAC0) {
if (c < 0xAA80) {
if (c < 0xAA60) {
return c >= 0xAA44 && c <= 0xAA4B || c >= 0xAA50 && c <= 0xAA59;
} else { // c >= 0xAA60
return c >= 0xAA60 && c <= 0xAA76 || c == 0xAA7A;
}
} else { // c >= 0xAA80
if (c < 0xAAB5) {
return c >= 0xAA80 && c <= 0xAAAF || c == 0xAAB1;
} else { // c >= 0xAAB5
return c >= 0xAAB5 && c <= 0xAAB6 || c >= 0xAAB9 && c <= 0xAABD;
}
}
} else { // c >= 0xAAC0
if (c < 0xAAF2) {
if (c < 0xAADB) {
return c == 0xAAC0 || c == 0xAAC2;
} else { // c >= 0xAADB
return c >= 0xAADB && c <= 0xAADD || c >= 0xAAE0 && c <= 0xAAEA;
}
} else { // c >= 0xAAF2
if (c < 0xAB09) {
return c >= 0xAAF2 && c <= 0xAAF4 || c >= 0xAB01 && c <= 0xAB06;
} else { // c >= 0xAB09
return c >= 0xAB09 && c <= 0xAB0E || c >= 0xAB11 && c <= 0xAB16;
}
}
}
}
} else { // c >= 0xAB20
if (c < 0xFB46) {
if (c < 0xFB13) {
if (c < 0xAC00) {
if (c < 0xABC0) {
return c >= 0xAB20 && c <= 0xAB26 || c >= 0xAB28 && c <= 0xAB2E;
} else { // c >= 0xABC0
return c >= 0xABC0 && c <= 0xABE2 || c >= 0xABF0 && c <= 0xABF9;
}
} else { // c >= 0xAC00
if (c < 0xD7CB) {
return c >= 0xAC00 && c <= 0xD7A3 || c >= 0xD7B0 && c <= 0xD7C6;
} else { // c >= 0xD7CB
return c >= 0xD7CB && c <= 0xD7FB || c >= 0xF900 && c <= 0xFB06;
}
}
} else { // c >= 0xFB13
if (c < 0xFB38) {
if (c < 0xFB1F) {
return c >= 0xFB13 && c <= 0xFB17 || c == 0xFB1D;
} else { // c >= 0xFB1F
return c >= 0xFB1F && c <= 0xFB28 || c >= 0xFB2A && c <= 0xFB36;
}
} else { // c >= 0xFB38
if (c < 0xFB40) {
return c >= 0xFB38 && c <= 0xFB3C || c == 0xFB3E;
} else { // c >= 0xFB40
return c >= 0xFB40 && c <= 0xFB41 || c >= 0xFB43 && c <= 0xFB44;
}
}
}
} else { // c >= 0xFB46
if (c < 0xFF21) {
if (c < 0xFDF0) {
if (c < 0xFD50) {
return c >= 0xFB46 && c <= 0xFBB1 || c >= 0xFBD3 && c <= 0xFD3D;
} else { // c >= 0xFD50
return c >= 0xFD50 && c <= 0xFD8F || c >= 0xFD92 && c <= 0xFDC7;
}
} else { // c >= 0xFDF0
if (c < 0xFE76) {
return c >= 0xFDF0 && c <= 0xFDFB || c >= 0xFE70 && c <= 0xFE74;
} else { // c >= 0xFE76
return c >= 0xFE76 && c <= 0xFEFC || c >= 0xFF10 && c <= 0xFF19;
}
}
} else { // c >= 0xFF21
if (c < 0xFFCA) {
if (c < 0xFF66) {
return c >= 0xFF21 && c <= 0xFF3A || c >= 0xFF41 && c <= 0xFF5A;
} else { // c >= 0xFF66
return c >= 0xFF66 && c <= 0xFFBE || c >= 0xFFC2 && c <= 0xFFC7;
}
} else { // c >= 0xFFCA
if (c < 0xFFDA) {
return c >= 0xFFCA && c <= 0xFFCF || c >= 0xFFD2 && c <= 0xFFD7;
} else { // c >= 0xFFDA
return c >= 0xFFDA && c <= 0xFFDC;
}
}
}
}
}
}
}
}
}