| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import com.ibm.icu.lang.UCharacter; |
| import com.ibm.icu.util.VersionInfo; |
| |
| def linesep = properties['line.separator']; |
| |
| def appendChar = { StringBuilder sb, int c -> |
| int len = sb.length(); |
| if (len != 0) { |
| sb.append(', '); |
| } |
| if (len == 0 || len - sb.lastIndexOf(linesep) > 100) { |
| sb.append(linesep).append(' '); |
| } |
| sb.append(String.format(Locale.ROOT, "0x%04X", c)); |
| } |
| |
| def whitespace = new StringBuilder(); |
| for (int c = UCharacter.MIN_CODE_POINT; c <= UCharacter.MAX_CODE_POINT; c++) { |
| if (UCharacter.isUWhiteSpace(c)) { |
| appendChar(whitespace, c); |
| } |
| } |
| |
| def icuVersion = VersionInfo.ICU_VERSION.toString(); |
| def unicodeVersion = UCharacter.getUnicodeVersion().toString(); |
| |
| def code = """ |
| // DO NOT EDIT THIS FILE! Use "ant unicode-data" to recreate. |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.lucene.analysis.util; |
| |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.SparseFixedBitSet; |
| |
| /** |
| * This file contains unicode properties used by various {@link CharTokenizer}s. |
| * The data was created using ICU4J v${icuVersion} |
| * <p> |
| * Unicode version: ${unicodeVersion} |
| */ |
| public final class UnicodeProps { |
| private UnicodeProps() {} |
| |
| /** Unicode version that was used to generate this file: {@value} */ |
| public static final String UNICODE_VERSION = "${unicodeVersion}"; |
| |
| /** Bitset with Unicode WHITESPACE code points. */ |
| public static final Bits WHITESPACE = createBits(${whitespace}); |
| |
| private static Bits createBits(final int... codepoints) { |
| final int len = codepoints[codepoints.length - 1] + 1; |
| final SparseFixedBitSet bitset = new SparseFixedBitSet(len); |
| for (int i : codepoints) bitset.set(i); |
| return new Bits() { |
| @Override |
| public boolean get(int index) { |
| return index < len && bitset.get(index); |
| } |
| |
| @Override |
| public int length() { |
| return ${String.format(Locale.ROOT, "0x%X", UCharacter.MAX_CODE_POINT)} + 1; |
| } |
| }; |
| } |
| } |
| """; |
| |
| File f = new File(properties['unicode-props-file']); |
| f.write(code.trim(), 'UTF-8'); |
| |
| task.log("Unicode data written to: " + f); |