| # ************************************************************* |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| # ************************************************************* |
| |
| import sys, os.path |
| from globals import * |
| import macroparser |
| |
| class EOF(Exception): |
| def __init__ (self): |
| pass |
| |
| def str (self): |
| return "end of file" |
| |
| class BOF(Exception): |
| def __init__ (self): |
| pass |
| |
| def str (self): |
| return "beginning of file" |
| |
| |
| def removeHeaderQuotes (orig): |
| if len(orig) <= 2: |
| return orig |
| elif orig[0] == orig[-1] == '"': |
| return orig[1:-1] |
| elif orig[0] == '<' and orig[-1] == '>': |
| return orig[1:-1] |
| |
| return orig |
| |
| |
| def dumpTokens (tokens, toError=False): |
| |
| scope = 0 |
| indent = " " |
| line = '' |
| chars = '' |
| |
| for token in tokens: |
| if token in '{<': |
| if len(line) > 0: |
| chars += indent*scope + line + "\n" |
| line = '' |
| chars += indent*scope + token + "\n" |
| scope += 1 |
| |
| elif token in '}>': |
| if len(line) > 0: |
| chars += indent*scope + line + "\n" |
| line = '' |
| scope -= 1 |
| chars += indent*scope + token |
| |
| elif token == ';': |
| if len(line) > 0: |
| chars += indent*scope + line + ";\n" |
| line = '' |
| else: |
| chars += ";\n" |
| elif len(token) > 0: |
| line += token + ' ' |
| |
| if len(line) > 0: |
| chars += line |
| chars += "\n" |
| if toError: |
| sys.stderr.write(chars) |
| else: |
| sys.stdout.write(chars) |
| |
| |
| class HeaderData(object): |
| def __init__ (self): |
| self.defines = {} |
| self.tokens = [] |
| |
| |
| class SrcLexer(object): |
| """Lexicographical analyzer for .src format. |
| |
| The role of a lexer is to parse the source file and break it into |
| appropriate tokens. Such tokens are later passed to a parser to |
| build the syntax tree. |
| """ |
| headerCache = {} |
| |
| VISIBLE = 0 |
| INVISIBLE_PRE = 1 |
| INVISIBLE_POST = 2 |
| |
| def __init__ (self, chars, filepath = None): |
| self.filepath = filepath |
| self.parentLexer = None |
| self.chars = chars |
| self.bufsize = len(self.chars) |
| |
| # TODO: use parameters for this |
| # Properties that can be copied. |
| self.headerDict = dict () |
| self.debug = False |
| self.debugMacro = False |
| self.includeDirs = list () |
| self.expandHeaders = True |
| self.inMacroDefine = False |
| self.stopOnHeader = False |
| |
| def copyProperties (self, other): |
| """Copy properties from another instance of SrcLexer.""" |
| |
| # TODO: use parameters for this |
| self.headerDict = other.headerDict |
| self.debug = other.debug |
| self.debugMacro = other.debugMacro |
| self.includeDirs = other.includeDirs[:] |
| self.expandHeaders = other.expandHeaders |
| self.inMacroDefine = other.inMacroDefine |
| self.stopOnHeader = other.stopOnHeader |
| |
| def init (self): |
| self.firstNonBlank = '' |
| self.token = '' |
| self.tokens = [] |
| self.defines = {} |
| self.visibilityStack = [] |
| |
| def getTokens (self): |
| return self.tokens |
| |
| def getDefines (self): |
| return self.defines |
| |
| def nextPos (self, i): |
| while True: |
| i += 1 |
| try: |
| c = self.chars[i] |
| except IndexError: |
| raise EOF |
| |
| if ord(c) in [0x0D]: |
| continue |
| break |
| return i |
| |
| def prevPos (self, i): |
| while True: |
| i -= 1 |
| try: |
| c = self.chars[i] |
| except IndexError: |
| raise BOF |
| |
| if ord(c) in [0x0D]: |
| continue |
| break |
| return i |
| |
| def isCodeVisible (self): |
| if len(self.visibilityStack) == 0: |
| return True |
| for item in self.visibilityStack: |
| if item != SrcLexer.VISIBLE: |
| return False |
| return True |
| |
| def tokenize (self): |
| self.init() |
| |
| i = 0 |
| while True: |
| c = self.chars[i] |
| |
| if self.firstNonBlank == '' and not c in [' ', "\n", "\t"]: |
| # Store the first non-blank in a line. |
| self.firstNonBlank = c |
| elif c == "\n": |
| self.firstNonBlank = '' |
| |
| if c == '#': |
| i = self.pound(i) |
| elif c == '/': |
| i = self.slash(i) |
| elif c == "\n": |
| i = self.lineBreak(i) |
| elif c == '"': |
| i = self.doubleQuote(i) |
| elif c in [' ', "\t"]: |
| i = self.blank(i) |
| elif c in ";()[]{}<>,=+-*": |
| # Any outstanding single-character token. |
| i = self.anyToken(i, c) |
| elif self.isCodeVisible(): |
| self.token += c |
| |
| try: |
| i = self.nextPos(i) |
| except EOF: |
| break |
| |
| if len(self.token): |
| self.tokens.append(self.token) |
| |
| if not self.parentLexer and self.debug: |
| progress ("-"*68 + "\n") |
| progress ("All defines found in this translation unit:\n") |
| keys = sorted(self.defines.keys()) |
| for key in keys: |
| progress ("@ %s\n"%key) |
| |
| def dumpTokens (self, toError=False): |
| dumpTokens(self.tokens, toError) |
| |
| |
| def maybeAddToken (self): |
| if len(self.token) > 0: |
| self.tokens.append(self.token) |
| self.token = '' |
| |
| |
| #-------------------------------------------------------------------- |
| # character handlers |
| |
| def blank (self, i): |
| if not self.isCodeVisible(): |
| return i |
| |
| self.maybeAddToken() |
| return i |
| |
| |
| def pound (self, i): |
| |
| if self.inMacroDefine: |
| return i |
| |
| if not self.firstNonBlank == '#': |
| return i |
| |
| self.maybeAddToken() |
| # We are in preprocessing mode. |
| |
| # Get the macro command name '#<command> .....' |
| |
| command, define, buf = '', '', '' |
| firstNonBlank = False |
| while True: |
| try: |
| i = self.nextPos(i) |
| c = self.chars[i] |
| if c == '\\' and self.chars[self.nextPos(i)] == "\n": |
| i = self.nextPos(i) |
| continue |
| except EOF: |
| break |
| |
| if c == "\n": |
| if len(buf) > 0 and len(command) == 0: |
| command = buf |
| i = self.prevPos(i) |
| break |
| elif c in [' ', "\t"]: |
| if not firstNonBlank: |
| # Ignore any leading blanks after the '#'. |
| continue |
| |
| if len(command) == 0: |
| command = buf |
| buf = '' |
| else: |
| buf += ' ' |
| elif c == '(': |
| if len(buf) > 0 and len(command) == 0: |
| command = buf |
| buf += c |
| else: |
| if not firstNonBlank: |
| firstNonBlank = True |
| buf += c |
| |
| if command == 'define': |
| self.handleMacroDefine(buf) |
| elif command == 'include': |
| self.handleMacroInclude(buf) |
| elif command == 'ifdef': |
| defineName = buf.strip() |
| if defineName in self.defines: |
| self.visibilityStack.append(SrcLexer.VISIBLE) |
| else: |
| self.visibilityStack.append(SrcLexer.INVISIBLE_PRE) |
| |
| elif command == 'ifndef': |
| defineName = buf.strip() |
| if defineName in self.defines: |
| self.visibilityStack.append(SrcLexer.INVISIBLE_PRE) |
| else: |
| self.visibilityStack.append(SrcLexer.VISIBLE) |
| |
| elif command == 'if': |
| if self.evalCodeVisibility(buf): |
| self.visibilityStack.append(SrcLexer.VISIBLE) |
| else: |
| self.visibilityStack.append(SrcLexer.INVISIBLE_PRE) |
| |
| elif command == 'elif': |
| if len(self.visibilityStack) == 0: |
| raise ParseError ('') |
| |
| if self.visibilityStack[-1] == SrcLexer.VISIBLE: |
| self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST |
| elif self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE: |
| # Evaluate only if the current visibility is false. |
| if self.evalCodeVisibility(buf): |
| self.visibilityStack[-1] = SrcLexer.VISIBLE |
| |
| elif command == 'else': |
| if len(self.visibilityStack) == 0: |
| raise ParseError ('') |
| |
| if self.visibilityStack[-1] == SrcLexer.VISIBLE: |
| self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST |
| if self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE: |
| self.visibilityStack[-1] = SrcLexer.VISIBLE |
| |
| elif command == 'endif': |
| if len(self.visibilityStack) == 0: |
| raise ParseError ('') |
| self.visibilityStack.pop() |
| |
| elif command == 'undef': |
| pass |
| elif command in ['error', 'pragma']: |
| pass |
| else: |
| print("'%s' '%s'"%(command, buf)) |
| print(self.filepath) |
| sys.exit(0) |
| |
| return i |
| |
| |
| def evalCodeVisibility (self, buf): |
| try: |
| return eval(buf) |
| except: |
| return True |
| |
| def handleMacroDefine (self, buf): |
| |
| mparser = macroparser.MacroParser(buf) |
| mparser.debug = self.debugMacro |
| mparser.parse() |
| macro = mparser.getMacro() |
| if macro: |
| self.defines[macro.name] = macro |
| |
| def handleMacroInclude (self, buf): |
| |
| # Strip excess string if any. |
| pos = buf.find(' ') |
| if pos >= 0: |
| buf = buf[:pos] |
| headerSub = removeHeaderQuotes(buf) |
| |
| if not self.expandHeaders: |
| # We don't want to expand headers. Bail out. |
| if self.debug: |
| progress ("%s ignored\n"%headerSub) |
| return |
| |
| defines = {} |
| headerPath = None |
| for includeDir in self.includeDirs: |
| hpath = includeDir + '/' + headerSub |
| if os.path.isfile(hpath) and hpath != self.filepath: |
| headerPath = hpath |
| break |
| |
| if not headerPath: |
| error("included header file " + headerSub + " not found\n", self.stopOnHeader) |
| return |
| |
| if self.debug: |
| progress ("%s found\n"%headerPath) |
| |
| if headerPath in self.headerDict: |
| if self.debug: |
| progress ("%s already included\n"%headerPath) |
| return |
| |
| if headerPath in SrcLexer.headerCache: |
| if self.debug: |
| progress ("%s in cache\n"%headerPath) |
| for key in list(SrcLexer.headerCache[headerPath].defines.keys()): |
| self.defines[key] = SrcLexer.headerCache[headerPath].defines[key] |
| return |
| |
| chars = open(headerPath, 'r').read() |
| mclexer = SrcLexer(chars, headerPath) |
| mclexer.copyProperties(self) |
| mclexer.parentLexer = self |
| mclexer.tokenize() |
| hdrData = HeaderData() |
| hdrData.tokens = mclexer.getTokens() |
| headerDefines = mclexer.getDefines() |
| for key in list(headerDefines.keys()): |
| defines[key] = headerDefines[key] |
| hdrData.defines[key] = headerDefines[key] |
| |
| self.headerDict[headerPath] = True |
| SrcLexer.headerCache[headerPath] = hdrData |
| |
| # Update the list of headers that have already been expaneded. |
| for key in list(mclexer.headerDict.keys()): |
| self.headerDict[key] = True |
| |
| if self.debug: |
| progress ("defines found in header %s:\n"%headerSub) |
| for key in list(defines.keys()): |
| progress (" '%s'\n"%key) |
| |
| for key in list(defines.keys()): |
| self.defines[key] = defines[key] |
| |
| |
| def slash (self, i): |
| if not self.isCodeVisible(): |
| return i |
| |
| if i < self.bufsize - 1 and self.chars[i+1] == '/': |
| # Parse line comment. |
| line = '' |
| i += 2 |
| while i < self.bufsize: |
| c = self.chars[i] |
| if ord(c) in [0x0A, 0x0D]: |
| return i - 1 |
| line += c |
| i += 1 |
| self.token = '' |
| elif i < self.bufsize - 1 and self.chars[i+1] == '*': |
| comment = '' |
| i += 2 |
| while i < self.bufsize: |
| c = self.chars[i] |
| if c == '/' and self.chars[i-1] == '*': |
| return i |
| comment += c |
| i += 1 |
| else: |
| return self.anyToken(i, '/') |
| |
| return i |
| |
| |
| def lineBreak (self, i): |
| if not self.isCodeVisible(): |
| return i |
| |
| self.maybeAddToken() |
| |
| return i |
| |
| |
| def doubleQuote (self, i): |
| if not self.isCodeVisible(): |
| return i |
| |
| literal = '' |
| i += 1 |
| while i < self.bufsize: |
| c = self.chars[i] |
| if c == '"': |
| self.tokens.append('"'+literal+'"') |
| break |
| literal += c |
| i += 1 |
| |
| return i |
| |
| |
| def anyToken (self, i, token): |
| if not self.isCodeVisible(): |
| return i |
| |
| self.maybeAddToken() |
| self.token = token |
| self.maybeAddToken() |
| return i |