blob: 42553843a60c194d0dbf297d4f980a97cbeff72d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.riot.system;
public class RiotChars
{
// ---- Character classes
public static boolean isAlpha(int codepoint) {
return Character.isLetter(codepoint);
}
public static boolean isAlphaNumeric(int codepoint) {
return Character.isLetterOrDigit(codepoint);
}
/** ASCII A-Z */
public static boolean isA2Z(int ch) {
return range(ch, 'a', 'z') || range(ch, 'A', 'Z');
}
/** ASCII A-Z or 0-9 */
public static boolean isA2ZN(int ch) {
return range(ch, 'a', 'z') || range(ch, 'A', 'Z') || range(ch, '0', '9');
}
/** ASCII 0-9 */
public static boolean isDigit(int ch) {
return range(ch, '0', '9');
}
public static boolean isWhitespace(int ch) {
return isHorizontalWhitespace(ch) || isNewlineChar(ch) || ch == '\f';
}
public static boolean isHorizontalWhitespace(int ch) {
return ch == ' ' || ch == '\t';
}
public static boolean isNewlineChar(int ch) {
return ch == '\r' || ch == '\n';
}
/*
The token rules from SPARQL and Turtle.
PNAME_NS ::= PN_PREFIX? ':'
PNAME_LN ::= PNAME_NS PN_LOCAL[131] BLANK_NODE_LABEL ::= '_:' PN_LOCAL
PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
PN_CHARS_U ::= PN_CHARS_BASE | '_'
VARNAME ::= ( PN_CHARS_U | [0-9] ) ( PN_CHARS_U | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] )*
PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS|'.')* PN_CHARS)?
PN_LOCAL ::= ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
// "high surrogates" (D800–DBFF) "low surrogates" (DC00–DFFF).
Notes: PN_CHARS_BASE has a hole above #xD800 -- these are the surrogate pairs
*/
public static boolean isPNCharsBase(int ch) {
// PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] |
// [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] |
// [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] |
// [#x10000-#xEFFFF]
return
r(ch, 'a', 'z') || r(ch, 'A', 'Z') || r(ch, 0x00C0, 0x00D6) || r(ch, 0x00D8, 0x00F6) || r(ch, 0x00F8, 0x02FF) ||
r(ch, 0x0370, 0x037D) || r(ch, 0x037F, 0x1FFF) || r(ch, 0x200C, 0x200D) || r(ch, 0x2070, 0x218F) ||
r(ch, 0x2C00, 0x2FEF) || r(ch, 0x3001, 0xD7FF) ||
// Surrogate pairs
r(ch, 0xD800, 0xDFFF) ||
r(ch, 0xF900, 0xFDCF) || r(ch, 0xFDF0, 0xFFFD) ||
r(ch, 0x10000, 0xEFFFF); // Outside the basic plain.
}
public static boolean isPNChars_U(int ch) {
//PN_CHARS_BASE | '_'
return isPNCharsBase(ch) || ( ch == '_' );
}
public static boolean isPNChars_U_N(int ch) {
// PN_CHARS_U | [0-9]
return isPNCharsBase(ch) || ( ch == '_' ) || isDigit(ch);
}
public static boolean isPNChars(int ch) {
// PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
return isPNChars_U(ch) || isDigit(ch) || ( ch == '-' ) || ch == 0x00B7 || r(ch, 0x300, 0x036F) || r(ch, 0x203F, 0x2040);
}
public static boolean isPN_LOCAL_ESC(char ch) {
//[172s] PN_LOCAL_ESC ::= '\' ('_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%')
switch (ch) {
case '\\': case '_': case '~': case '.': case '-': case '!': case '$':
case '&': case '\'': case '(': case ')': case '*': case '+': case ',':
case ';': case '=': case '/': case '?': case '#': case '@': case '%':
return true;
default:
return false;
}
}
/** Hexadecimal character */
public static boolean isHexChar(int ch) {
return range(ch, '0', '9') || range(ch, 'a', 'f') || range(ch, 'A', 'F');
}
/** Hexadecimal character, only lower case a-f */
public static boolean isHexCharLC(int ch) {
return range(ch, '0', '9') || range(ch, 'a', 'f');
}
/** Hexadecimal character, only upper case A-F */
public static boolean isHexCharUC(int ch) {
return range(ch, '0', '9') || range(ch, 'A', 'F');
}
public static int valHexChar(int ch) {
if ( range(ch, '0', '9') )
return ch - '0';
if ( range(ch, 'a', 'f') )
return ch - 'a' + 10;
if ( range(ch, 'A', 'F') )
return ch - 'A' + 10;
return -1;
}
private static boolean r(int ch, int a, int b) { return ( ch >= a && ch <= b ); }
public static boolean range(int ch, char a, char b) {
return (ch >= a && ch <= b);
}
}