| #!/usr/bin/env python |
| # |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| # |
| """getctype.py - Generate the svn_ctype character classification table. |
| """ |
| |
| # Table of ASCII character names |
| names = ('nul', 'soh', 'stx', 'etx', 'eot', 'enq', 'ack', 'bel', |
| 'bs', 'ht', 'nl', 'vt', 'np', 'cr', 'so', 'si', |
| 'dle', 'dc1', 'dc2', 'dc3', 'dc4', 'nak', 'syn', 'etb', |
| 'can', 'em', 'sub', 'esc', 'fs', 'gs', 'rs', 'us', |
| 'sp', '!', '"', '#', '$', '%', '&', '\'', |
| '(', ')', '*', '+', ',', '-', '.', '/', |
| '0', '1', '2', '3', '4', '5', '6', '7', |
| '8', '9', ':', ';', '<', '=', '>', '?', |
| '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', |
| 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', |
| 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', |
| 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', |
| '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', |
| 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', |
| 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', |
| 'x', 'y', 'z', '{', '|', '}', '~', 'del') |
| |
| # All whitespace characters: |
| # horizontal tab, vertical tab, new line, form feed, carriage return, space |
| whitespace = (9, 10, 11, 12, 13, 32) |
| |
| # Bytes not valid in UTF-8 sequences |
| utf8_invalid = (0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF) |
| |
| print(' /* **** DO NOT EDIT! ****') |
| print(' This table was generated by genctype.py, make changes there. */') |
| |
| for c in range(256): |
| bits = [] |
| |
| # Ascii subrange |
| if c < 128: |
| bits.append('SVN_CTYPE_ASCII') |
| |
| if len(names[c]) == 1: |
| name = names[c].center(3) |
| else: |
| name = names[c].ljust(3) |
| |
| # Control characters |
| if c < 32 or c == 127: |
| bits.append('SVN_CTYPE_CNTRL') |
| |
| # Whitespace characters |
| if c in whitespace: |
| bits.append('SVN_CTYPE_SPACE') |
| |
| # Punctuation marks |
| if c >= 33 and c < 48 \ |
| or c >= 58 and c < 65 \ |
| or c >= 91 and c < 97 \ |
| or c >= 123 and c < 127: |
| bits.append('SVN_CTYPE_PUNCT') |
| |
| # Decimal digits |
| elif c >= 48 and c < 58: |
| bits.append('SVN_CTYPE_DIGIT') |
| |
| # Uppercase letters |
| elif c >= 65 and c < 91: |
| bits.append('SVN_CTYPE_UPPER') |
| # Hexadecimal digits |
| if c <= 70: |
| bits.append('SVN_CTYPE_XALPHA') |
| |
| # Lowercase letters |
| elif c >= 97 and c < 123: |
| bits.append('SVN_CTYPE_LOWER') |
| # Hexadecimal digits |
| if c <= 102: |
| bits.append('SVN_CTYPE_XALPHA') |
| |
| # UTF-8 multibyte sequences |
| else: |
| name = hex(c)[1:] |
| |
| # Lead bytes (start of sequence) |
| if c > 0xC0 and c < 0xFE and c not in utf8_invalid: |
| bits.append('SVN_CTYPE_UTF8LEAD') |
| |
| # Continuation bytes |
| elif (c & 0xC0) == 0x80: |
| bits.append('SVN_CTYPE_UTF8CONT') |
| |
| if len(bits) == 0: |
| flags = '0' |
| else: |
| flags = ' | '.join(bits) |
| print(' /* %s */ %s,' % (name, flags)) |