blob: d16e41039d17ed738095d8263d241358a3ea5385 [file] [log] [blame]
#!/usr/bin/env python
#
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
#
# transform_sql.py -- create a header file with the appropriate SQL variables
# from an SQL file
#
import operator
import os
import re
import sys
# operator.methodcaller doesn't exist in Python 2.5.
if not hasattr(operator, 'methodcaller'):
def methodcaller(method, *args, **kwargs):
return lambda x: getattr(x, method)(*args, **kwargs)
operator.methodcaller = methodcaller
del methodcaller
DEFINE_END = ' ""\n\n'
def usage_and_exit(msg):
if msg:
sys.stderr.write('%s\n\n' % msg)
sys.stderr.write(
'USAGE: %s SQLITE_FILE [OUTPUT_FILE]\n'
' stdout will be used if OUTPUT_FILE is not provided.\n'
% os.path.basename(sys.argv[0]))
sys.stderr.flush()
sys.exit(1)
class Processor(object):
re_comments = re.compile(r'/\*.*?\*/', re.MULTILINE|re.DOTALL)
# a few SQL comments that act as directives for this transform system
re_format = re.compile('-- *format: *([0-9]+)')
re_statement = re.compile('-- *STMT_([A-Z_0-9]+)( +\(([^\)]*)\))?')
re_include = re.compile('-- *include: *([-a-z]+)')
re_define = re.compile('-- *define: *([A-Z_0-9]+)')
def _sub_format(self, match):
vsn = match.group(1)
self.close_define()
self.output.write('#define %s_%s \\\n' % (self.var_name, match.group(1)))
self.var_printed = True
def _sub_statement(self, match):
name = match.group(1)
self.close_define()
self.output.write('#define STMT_%s %d\n' % (match.group(1),
self.stmt_count))
if match.group(3) == None:
info = 'NULL'
else:
info = '"' + match.group(3) + '"'
self.output.write('#define STMT_%d_INFO {"STMT_%s", %s}\n' %
(self.stmt_count, match.group(1), info))
self.output.write('#define STMT_%d \\\n' % (self.stmt_count,))
self.var_printed = True
self.stmt_count += 1
def _sub_include(self, match):
filepath = os.path.join(self.dirpath, match.group(1) + '.sql')
self.close_define()
self.process_file(open(filepath).read())
def _sub_define(self, match):
define = match.group(1)
self.output.write(' APR_STRINGIFY(%s) \\\n' % define)
def __init__(self, dirpath, output, var_name, token_map):
self.dirpath = dirpath
self.output = output
self.var_name = var_name
self.token_map = token_map
self.stmt_count = 0
self.var_printed = False
self._directives = {
self.re_format : self._sub_format,
self.re_statement : self._sub_statement,
self.re_include : self._sub_include,
self.re_define : self._sub_define,
}
def process_file(self, input):
input = self.re_comments.sub('', input)
for line in input.split('\n'):
line = line.replace('"', '\\"')
# IS_STRICT_DESCENDANT_OF()
# A common operation in the working copy is determining descendants of
# a node. To allow Sqlite to use its indexes to provide the answer we
# must provide simple less than and greater than operations.
#
# For relative paths that consist of one or more components like 'subdir'
# we can accomplish this by comparing local_relpath with 'subdir/' and
# 'subdir0' ('/'+1 = '0')
#
# For the working copy root this case is less simple and not strictly
# valid utf-8/16 (but luckily Sqlite doesn't validate utf-8 nor utf-16).
# The binary blob x'FFFF' is higher than any valid utf-8 and utf-16
# sequence.
#
# So for the root we can compare with > '' and < x'FFFF'. (This skips the
# root itself and selects all descendants)
#
# '/'+1 == '0'
line = re.sub(
r'IS_STRICT_DESCENDANT_OF[(]([?]?[A-Za-z0-9_.]+), ([?]?[A-Za-z0-9_.]+)[)]',
r"(((\1) > (CASE (\2) WHEN '' THEN '' ELSE (\2) || '/' END))" +
r" AND ((\1) < CASE (\2) WHEN '' THEN X'FFFF' ELSE (\2) || '0' END))",
line)
# RELPATH_SKIP_JOIN(x, y, z) skips the x prefix from z and the joins the
# result after y. In other words it replaces x with y, but follows the
# relpath rules.
#
# This matches the C version of:
# svn_relpath_join(y, svn_relpath_skip_ancestor(x, z), pool)
# but returns an SQL NULL in case z is not below x.
#
line = re.sub(
r'RELPATH_SKIP_JOIN[(]([?]?[A-Za-z0-9_.]+), ' +
r'([?]?[A-Za-z0-9_.]+), ' +
r'([?]?[A-Za-z0-9_.]+)[)]',
r"(CASE WHEN (\1) = '' THEN RELPATH_JOIN(\2, \3) " +
r"WHEN (\2) = '' THEN RELPATH_SKIP_ANCESTOR(\1, \3) " +
r"WHEN SUBSTR((\3), 1, LENGTH(\1)) = (\1) " +
r"THEN " +
r"CASE WHEN LENGTH(\1) = LENGTH(\3) THEN (\2) " +
r"WHEN SUBSTR((\3), LENGTH(\1)+1, 1) = '/' " +
r"THEN (\2) || SUBSTR((\3), LENGTH(\1)+1) " +
r"END " +
r"END)",
line)
# RELPATH_JOIN(x, y) joins x to y following the svn_relpath_join() rules
line = re.sub(
r'RELPATH_JOIN[(]([?]?[A-Za-z0-9_.]+), ([?]?[A-Za-z0-9_.]+)[)]',
r"(CASE WHEN (\1) = '' THEN (\2) " +
r"WHEN (\2) = '' THEN (\1) " +
r"ELSE (\1) || '/' || (\2) " +
r"END)",
line)
# RELPATH_SKIP_ANCESTOR(x, y) skips the x prefix from y following the
# svn_relpath_skip_ancestor() rules. Returns NULL when y is not below X.
line = re.sub(
r'RELPATH_SKIP_ANCESTOR[(]([?]?[A-Za-z0-9_.]+), ' +
r'([?]?[A-Za-z0-9_.]+)[)]',
r"(CASE WHEN (\1) = '' THEN (\2) " +
r" WHEN SUBSTR((\2), 1, LENGTH(\1)) = (\1) " +
r" THEN " +
r"CASE WHEN LENGTH(\1) = LENGTH(\2) THEN '' " +
r"WHEN SUBSTR((\2), LENGTH(\1)+1, 1) = '/' " +
r"THEN SUBSTR((\2), LENGTH(\1)+2) " +
r"END" +
r" END)",
line)
# Another preprocessing.
for symbol, string in self.token_map.items():
# ### This doesn't sql-escape 'string'
line = re.sub(r'\b%s\b' % re.escape(symbol), "'%s'" % string, line)
if line.strip():
handled = False
for regex, handler in self._directives.items():
match = regex.match(line)
if match:
handler(match)
handled = True
break
# we've handed the line, so skip it
if handled:
continue
if not self.var_printed:
self.output.write('#define %s \\\n' % self.var_name)
self.var_printed = True
# got something besides whitespace. write it out. include some whitespace
# to separate the SQL commands. and a backslash to continue the string
# onto the next line.
self.output.write(' "%s " \\\n' % line.rstrip())
# previous line had a continuation. end the madness.
self.close_define()
def close_define(self):
if self.var_printed:
self.output.write(DEFINE_END)
self.var_printed = False
class NonRewritableDict(dict):
"""A dictionary that does not allow self[k]=v when k in self
(unless v is equal to the stored value).
(An entry would have to be explicitly deleted before a new value
may be entered.)
"""
def __setitem__(self, key, val):
if self.__contains__(key) and self.__getitem__(key) != val:
raise Exception("Can't re-insert key %r with value %r "
"(already present with value %r)"
% (key, val, self.__getitem__(key)))
super(NonRewritableDict, self).__setitem__(key, val)
def hotspots(fd):
hotspot = False
for line in fd:
# hotspot is TRUE within definitions of static const svn_token_map_t[].
hotspot ^= int(('svn_token_map_t', '\x7d;')[hotspot] in line)
if hotspot:
yield line
def extract_token_map(filename):
try:
fd = open(filename)
except IOError:
return {}
pattern = re.compile(r'"(.*?)".*?(MAP_\w*)')
return \
NonRewritableDict(
map(operator.itemgetter(1,0),
map(operator.methodcaller('groups'),
filter(None,
map(pattern.search,
hotspots(fd))))))
def main(input_filepath, output):
filename = os.path.basename(input_filepath)
input = open(input_filepath, 'r').read()
token_map_filename = os.path.dirname(input_filepath) + '/token-map.h'
token_map = extract_token_map(token_map_filename)
var_name = re.sub('[-.]', '_', filename).upper()
output.write(
'/* This file is automatically generated from %s and %s.\n'
' * Do not edit this file -- edit the source and rerun gen-make.py */\n'
'\n'
% (filename, token_map_filename))
proc = Processor(os.path.dirname(input_filepath), output, var_name, token_map)
proc.process_file(input)
### the STMT_%d naming precludes *multiple* transform_sql headers from
### being used within the same .c file. for now, that's more than fine.
### in the future, we can always add a var_name discriminator or use
### the statement name itself (which should hopefully be unique across
### all names in use; or can easily be made so)
if proc.stmt_count > 0:
output.write(
'#define %s_DECLARE_STATEMENTS(varname) \\\n' % (var_name,)
+ ' static const char * const varname[] = { \\\n'
+ ', \\\n'.join(' STMT_%d' % (i,) for i in range(proc.stmt_count))
+ ', \\\n NULL \\\n }\n')
output.write('\n')
output.write(
'#define %s_DECLARE_STATEMENT_INFO(varname) \\\n' % (var_name,)
+ ' static const char * const varname[][2] = { \\\n'
+ ', \\\n'.join(' STMT_%d_INFO' % (i) for i in range(proc.stmt_count))
+ ', \\\n {NULL, NULL} \\\n }\n')
if __name__ == '__main__':
if len(sys.argv) < 2 or len(sys.argv) > 3:
usage_and_exit('Incorrect number of arguments')
# Note: we could use stdin, but then we'd have no var_name
input_filepath = sys.argv[1]
if len(sys.argv) > 2:
output_file = open(sys.argv[2], 'w')
else:
output_file = sys.stdout
main(input_filepath, output_file)