subversion/libsvn_subr/genctype.py - subversion - Git at Google

 #!/usr/bin/env python
 #
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #
 """getctype.py - Generate the svn_ctype character classification table.
 """

 # Table of ASCII character names
 names = ('nul', 'soh', 'stx', 'etx', 'eot', 'enq', 'ack', 'bel',
          'bs',  'ht',  'nl',  'vt',  'np',  'cr',  'so',  'si',
          'dle', 'dc1', 'dc2', 'dc3', 'dc4', 'nak', 'syn', 'etb',
          'can', 'em',  'sub', 'esc', 'fs',  'gs',  'rs',  'us',
          'sp',  '!',   '"',   '#',   '$',   '%',   '&',   '\'',
          '(',   ')',   '*',   '+',   ',',   '-',   '.',   '/',
          '0',   '1',   '2',   '3',   '4',   '5',   '6',   '7',
          '8',   '9',   ':',   ';',   '<',   '=',   '>',   '?',
          '@',   'A',   'B',   'C',   'D',   'E',   'F',   'G',
          'H',   'I',   'J',   'K',   'L',   'M',   'N',   'O',
          'P',   'Q',   'R',   'S',   'T',   'U',   'V',   'W',
          'X',   'Y',   'Z',   '[',   '\\',  ']',   '^',   '_',
          '`',   'a',   'b',   'c',   'd',   'e',   'f',   'g',
          'h',   'i',   'j',   'k',   'l',   'm',   'n',   'o',
          'p',   'q',   'r',   's',   't',   'u',   'v',   'w',
          'x',   'y',   'z',   '{',   '|',   '}',   '~',   'del')

 # All whitespace characters:
 #   horizontal tab, vertical tab, new line, form feed, carriage return, space
 whitespace = (9, 10, 11, 12, 13, 32)

 # Bytes not valid in UTF-8 sequences
 utf8_invalid = (0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF)

 print('    /* **** DO NOT EDIT! ****')
 print('       This table was generated by genctype.py, make changes there. */')

 for c in range(256):
     bits = []

     # Ascii subrange
     if c < 128:
         bits.append('SVN_CTYPE_ASCII')

         if len(names[c]) == 1:
             name = names[c].center(3)
         else:
             name = names[c].ljust(3)

         # Control characters
         if c < 32 or c == 127:
             bits.append('SVN_CTYPE_CNTRL')

         # Whitespace characters
         if c in whitespace:
             bits.append('SVN_CTYPE_SPACE')

         # Punctuation marks
         if c >= 33 and c < 48 \
            or c >= 58 and c < 65 \
            or c >= 91 and c < 97 \
            or c >= 123 and c < 127:
             bits.append('SVN_CTYPE_PUNCT')

         # Decimal digits
         elif c >= 48 and c < 58:
             bits.append('SVN_CTYPE_DIGIT')

         # Uppercase letters
         elif c >= 65 and c < 91:
             bits.append('SVN_CTYPE_UPPER')
             # Hexadecimal digits
             if c <= 70:
                 bits.append('SVN_CTYPE_XALPHA')

         # Lowercase letters
         elif c >= 97 and c < 123:
             bits.append('SVN_CTYPE_LOWER')
             # Hexadecimal digits
             if c <= 102:
                 bits.append('SVN_CTYPE_XALPHA')

     # UTF-8 multibyte sequences
     else:
         name = hex(c)[1:]

         # Lead bytes (start of sequence)
         if c > 0xC0 and c < 0xFE and c not in utf8_invalid:
             bits.append('SVN_CTYPE_UTF8LEAD')

         # Continuation bytes
         elif (c & 0xC0) == 0x80:
             bits.append('SVN_CTYPE_UTF8CONT')

     if len(bits) == 0:
         flags = '0'
     else:
         flags = ' | '.join(bits)
     print('    /* %s */ %s,' % (name, flags))
	#!/usr/bin/env python
	#
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#
	"""getctype.py - Generate the svn_ctype character classification table.
	"""

	# Table of ASCII character names
	names = ('nul', 'soh', 'stx', 'etx', 'eot', 'enq', 'ack', 'bel',
	'bs', 'ht', 'nl', 'vt', 'np', 'cr', 'so', 'si',
	'dle', 'dc1', 'dc2', 'dc3', 'dc4', 'nak', 'syn', 'etb',
	'can', 'em', 'sub', 'esc', 'fs', 'gs', 'rs', 'us',
	'sp', '!', '"', '#', '$', '%', '&', '\'',
	'(', ')', '*', '+', ',', '-', '.', '/',
	'0', '1', '2', '3', '4', '5', '6', '7',
	'8', '9', ':', ';', '<', '=', '>', '?',
	'@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
	'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
	'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
	'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
	'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
	'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
	'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
	'x', 'y', 'z', '{', '\|', '}', '~', 'del')

	# All whitespace characters:
	# horizontal tab, vertical tab, new line, form feed, carriage return, space
	whitespace = (9, 10, 11, 12, 13, 32)

	# Bytes not valid in UTF-8 sequences
	utf8_invalid = (0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF)

	print(' /* ** DO NOT EDIT! **')
	print(' This table was generated by genctype.py, make changes there. */')

	for c in range(256):
	bits = []

	# Ascii subrange
	if c < 128:
	bits.append('SVN_CTYPE_ASCII')

	if len(names[c]) == 1:
	name = names[c].center(3)
	else:
	name = names[c].ljust(3)

	# Control characters
	if c < 32 or c == 127:
	bits.append('SVN_CTYPE_CNTRL')

	# Whitespace characters
	if c in whitespace:
	bits.append('SVN_CTYPE_SPACE')

	# Punctuation marks
	if c >= 33 and c < 48 \
	or c >= 58 and c < 65 \
	or c >= 91 and c < 97 \
	or c >= 123 and c < 127:
	bits.append('SVN_CTYPE_PUNCT')

	# Decimal digits
	elif c >= 48 and c < 58:
	bits.append('SVN_CTYPE_DIGIT')

	# Uppercase letters
	elif c >= 65 and c < 91:
	bits.append('SVN_CTYPE_UPPER')
	# Hexadecimal digits
	if c <= 70:
	bits.append('SVN_CTYPE_XALPHA')

	# Lowercase letters
	elif c >= 97 and c < 123:
	bits.append('SVN_CTYPE_LOWER')
	# Hexadecimal digits
	if c <= 102:
	bits.append('SVN_CTYPE_XALPHA')

	# UTF-8 multibyte sequences
	else:
	name = hex(c)[1:]

	# Lead bytes (start of sequence)
	if c > 0xC0 and c < 0xFE and c not in utf8_invalid:
	bits.append('SVN_CTYPE_UTF8LEAD')

	# Continuation bytes
	elif (c & 0xC0) == 0x80:
	bits.append('SVN_CTYPE_UTF8CONT')

	if len(bits) == 0:
	flags = '0'
	else:
	flags = ' \| '.join(bits)
	print(' /* %s */ %s,' % (name, flags))