diff options
Diffstat (limited to 'libc/kernel/tools/cpp.py')
-rw-r--r-- | libc/kernel/tools/cpp.py | 2161 |
1 files changed, 1062 insertions, 1099 deletions
diff --git a/libc/kernel/tools/cpp.py b/libc/kernel/tools/cpp.py index 2be9532..10ce290 100644 --- a/libc/kernel/tools/cpp.py +++ b/libc/kernel/tools/cpp.py @@ -1,560 +1,403 @@ -# a glorified C pre-processor parser - -import sys, re, string -from utils import * -from defaults import * - -debugTokens = False -debugDirectiveTokenizer = False -debugLineParsing = False -debugCppExpr = False -debugOptimIf01 = False - -##################################################################################### -##################################################################################### -##### ##### -##### C P P T O K E N S ##### -##### ##### -##################################################################################### -##################################################################################### +#!/usr/bin/python +"""A glorified C pre-processor parser.""" + +import ctypes +import logging +import os +import re +import site +import utils + +top = os.getenv('ANDROID_BUILD_TOP') +if top is None: + utils.panic('ANDROID_BUILD_TOP not set.\n') + +# Set up the env vars for libclang. +site.addsitedir(os.path.join(top, 'external/clang/bindings/python')) + +import clang.cindex +from clang.cindex import conf +from clang.cindex import Cursor +from clang.cindex import CursorKind +from clang.cindex import SourceLocation +from clang.cindex import SourceRange +from clang.cindex import TokenGroup +from clang.cindex import TokenKind +from clang.cindex import TranslationUnit + +# Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, and etc. +# Note that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help. +clang.cindex.Config.set_library_path(os.path.join(top, 'prebuilts/sdk/tools/linux/lib64')) + +from defaults import kCppUndefinedMacro +from defaults import kernel_remove_config_macros +from defaults import kernel_token_replacements + + +debugBlockParser = False +debugCppExpr = False +debugOptimIf01 = False + +############################################################################### +############################################################################### +##### ##### +##### C P P T O K E N S ##### +##### ##### +############################################################################### +############################################################################### # the list of supported C-preprocessor tokens # plus a couple of C tokens as well -tokEOF = "\0" -tokLN = "\n" +tokEOF = "\0" +tokLN = "\n" tokSTRINGIFY = "#" -tokCONCAT = "##" -tokLOGICAND = "&&" -tokLOGICOR = "||" -tokSHL = "<<" -tokSHR = ">>" -tokEQUAL = "==" -tokNEQUAL = "!=" -tokLT = "<" -tokLTE = "<=" -tokGT = ">" -tokGTE = ">=" -tokELLIPSIS = "..." -tokSPACE = " " -tokDEFINED = "defined" -tokLPAREN = "(" -tokRPAREN = ")" -tokNOT = "!" -tokPLUS = "+" -tokMINUS = "-" -tokMULTIPLY = "*" -tokDIVIDE = "/" -tokMODULUS = "%" -tokBINAND = "&" -tokBINOR = "|" -tokBINXOR = "^" -tokCOMMA = "," -tokLBRACE = "{" -tokRBRACE = "}" -tokARROW = "->" +tokCONCAT = "##" +tokLOGICAND = "&&" +tokLOGICOR = "||" +tokSHL = "<<" +tokSHR = ">>" +tokEQUAL = "==" +tokNEQUAL = "!=" +tokLT = "<" +tokLTE = "<=" +tokGT = ">" +tokGTE = ">=" +tokELLIPSIS = "..." +tokSPACE = " " +tokDEFINED = "defined" +tokLPAREN = "(" +tokRPAREN = ")" +tokNOT = "!" +tokPLUS = "+" +tokMINUS = "-" +tokMULTIPLY = "*" +tokDIVIDE = "/" +tokMODULUS = "%" +tokBINAND = "&" +tokBINOR = "|" +tokBINXOR = "^" +tokCOMMA = "," +tokLBRACE = "{" +tokRBRACE = "}" +tokARROW = "->" tokINCREMENT = "++" tokDECREMENT = "--" -tokNUMBER = "<number>" -tokIDENT = "<ident>" -tokSTRING = "<string>" +tokNUMBER = "<number>" +tokIDENT = "<ident>" +tokSTRING = "<string>" + + +class Token(clang.cindex.Token): + """A class that represents one token after parsing. + + It inherits the class in libclang, with an extra id property to hold the + new spelling of the token. The spelling property in the base class is + defined as read-only. New names after macro instantiation are saved in + their ids now. It also facilitates the renaming of directive optimizations + like replacing 'ifndef X' with 'if !defined(X)'. + + It also overrides the cursor property of the base class. Because the one + in libclang always queries based on a single token, which usually doesn't + hold useful information. The cursor in this class can be set by calling + CppTokenizer.getTokensWithCursors(). Otherwise it returns the one in the + base class. + """ + + def __init__(self, tu=None, group=None, int_data=None, ptr_data=None, + cursor=None): + clang.cindex.Token.__init__(self) + self._id = None + self._tu = tu + self._group = group + self._cursor = cursor + # self.int_data and self.ptr_data are from the base class. But + # self.int_data doesn't accept a None value. + if int_data is not None: + self.int_data = int_data + self.ptr_data = ptr_data + + @property + def id(self): + """Name of the token.""" + if self._id is None: + return self.spelling + else: + return self._id -class Token: - """a simple class to hold information about a given token. - each token has a position in the source code, as well as - an 'id' and a 'value'. the id is a string that identifies - the token's class, while the value is the string of the - original token itself. + @id.setter + def id(self, new_id): + """Setting name of the token.""" + self._id = new_id - for example, the tokenizer concatenates a series of spaces - and tabs as a single tokSPACE id, whose value if the original - spaces+tabs sequence.""" + @property + def cursor(self): + if self._cursor is None: + self._cursor = clang.cindex.Token.cursor + return self._cursor - def __init__(self): - self.id = None - self.value = None - self.lineno = 0 - self.colno = 0 - - def set(self,id,val=None): - self.id = id - if val: - self.value = val - else: - self.value = id - return None - - def copyFrom(self,src): - self.id = src.id - self.value = src.value - self.lineno = src.lineno - self.colno = src.colno + @cursor.setter + def cursor(self, new_cursor): + self._cursor = new_cursor def __repr__(self): - if self.id == tokIDENT: - return "(ident %s)" % self.value - if self.id == tokNUMBER: - return "(number %s)" % self.value - if self.id == tokSTRING: - return "(string '%s')" % self.value - if self.id == tokLN: - return "<LN>" - if self.id == tokEOF: - return "<EOF>" - if self.id == tokSPACE and self.value == "\\": - # this corresponds to a trailing \ that was transformed into a tokSPACE - return "<\\>" + if self.id == 'defined': + return self.id + elif self.kind == TokenKind.IDENTIFIER: + return "(ident %s)" % self.id return self.id def __str__(self): - if self.id == tokIDENT: - return self.value - if self.id == tokNUMBER: - return self.value - if self.id == tokSTRING: - return self.value - if self.id == tokEOF: - return "<EOF>" - if self.id == tokSPACE: - if self.value == "\\": # trailing \ - return "\\\n" - else: - return self.value - return self.id -class BadExpectedToken(Exception): - def __init__(self,msg): - print msg - -##################################################################################### -##################################################################################### -##### ##### -##### C P P T O K E N I Z E R ##### -##### ##### -##################################################################################### -##################################################################################### +class BadExpectedToken(Exception): + """An exception that will be raised for unexpected tokens.""" + pass + + +# The __contains__ function in libclang SourceRange class contains a bug. It +# gives wrong result when dealing with single line range. +# Bug filed with upstream: +# http://llvm.org/bugs/show_bug.cgi?id=22243, http://reviews.llvm.org/D7277 +def SourceRange__contains__(self, other): + """Determine if a given location is inside the range.""" + if not isinstance(other, SourceLocation): + return False + if other.file is None and self.start.file is None: + pass + elif (self.start.file.name != other.file.name or + other.file.name != self.end.file.name): + # same file name + return False + # same file, in between lines + if self.start.line < other.line < self.end.line: + return True + # same file, same line + elif self.start.line == other.line == self.end.line: + if self.start.column <= other.column <= self.end.column: + return True + elif self.start.line == other.line: + # same file first line + if self.start.column <= other.column: + return True + elif other.line == self.end.line: + # same file last line + if other.column <= self.end.column: + return True + return False + + +SourceRange.__contains__ = SourceRange__contains__ + + +################################################################################ +################################################################################ +##### ##### +##### C P P T O K E N I Z E R ##### +##### ##### +################################################################################ +################################################################################ + + +class CppTokenizer(object): + """A tokenizer that converts some input text into a list of tokens. + + It calls libclang's tokenizer to get the parsed tokens. In addition, it + updates the cursor property in each token after parsing, by calling + getTokensWithCursors(). + """ + + clang_flags = ['-E', '-x', 'c'] + options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD -# list of long symbols, i.e. those that take more than one characters -cppLongSymbols = [ tokCONCAT, tokLOGICAND, tokLOGICOR, tokSHL, tokSHR, tokELLIPSIS, tokEQUAL,\ - tokNEQUAL, tokLTE, tokGTE, tokARROW, tokINCREMENT, tokDECREMENT ] + def __init__(self): + """Initialize a new CppTokenizer object.""" + self._indexer = clang.cindex.Index.create() + self._tu = None + self._index = 0 + self.tokens = None + + def _getTokensWithCursors(self): + """Helper method to return all tokens with their cursors. + + The cursor property in a clang Token doesn't provide enough + information. Because it is queried based on single token each time + without any context, i.e. via calling conf.lib.clang_annotateTokens() + with only one token given. So we often see 'INVALID_FILE' in one + token's cursor. In this function it passes all the available tokens + to get more informative cursors. + """ + + tokens_memory = ctypes.POINTER(clang.cindex.Token)() + tokens_count = ctypes.c_uint() + + conf.lib.clang_tokenize(self._tu, self._tu.cursor.extent, + ctypes.byref(tokens_memory), + ctypes.byref(tokens_count)) + + count = int(tokens_count.value) + + # If we get no tokens, no memory was allocated. Be sure not to return + # anything and potentially call a destructor on nothing. + if count < 1: + return -class CppTokenizer: - """an abstract class used to convert some input text into a list - of tokens. real implementations follow and differ in the format - of the input text only""" + cursors = (Cursor * count)() + cursors_memory = ctypes.cast(cursors, ctypes.POINTER(Cursor)) - def __init__(self): - """initialize a new CppTokenizer object""" - self.eof = False # end of file reached ? - self.text = None # content of current line, with final \n stripped - self.line = 0 # number of current line - self.pos = 0 # current character position in current line - self.len = 0 # length of current line text - self.held = Token() - - def setLineText(self,line): - """set the content of the (next) current line. should be called - by fillLineText() in derived classes""" - self.text = line - self.len = len(line) - self.pos = 0 - - def fillLineText(self): - """refresh the content of 'line' with a new line of input""" - # to be overriden - self.eof = True - - def markPos(self,tok): - """mark the position of the current token in the source file""" - if self.eof or self.pos > self.len: - tok.lineno = self.line + 1 - tok.colno = 0 - else: - tok.lineno = self.line - tok.colno = self.pos - - def peekChar(self): - """return the current token under the cursor without moving it""" - if self.eof: - return tokEOF - - if self.pos > self.len: - self.pos = 0 - self.line += 1 - self.fillLineText() - if self.eof: - return tokEOF - - if self.pos == self.len: - return tokLN - else: - return self.text[self.pos] + conf.lib.clang_annotateTokens(self._tu, tokens_memory, count, + cursors_memory) - def peekNChar(self,n): - """try to peek the next n chars on the same line""" - if self.pos + n > self.len: - return None - return self.text[self.pos:self.pos+n] + tokens_array = ctypes.cast( + tokens_memory, + ctypes.POINTER(clang.cindex.Token * count)).contents + token_group = TokenGroup(self._tu, tokens_memory, tokens_count) - def skipChar(self): - """increment the token cursor position""" - if not self.eof: - self.pos += 1 + tokens = [] + for i in xrange(0, count): + token = Token(self._tu, token_group, + int_data=tokens_array[i].int_data, + ptr_data=tokens_array[i].ptr_data, + cursor=cursors[i]) + # We only want non-comment tokens. + if token.kind != TokenKind.COMMENT: + tokens.append(token) + + return tokens + + def parseString(self, lines): + """Parse a list of text lines into a BlockList object.""" + file_ = 'dummy.c' + self._tu = self._indexer.parse(file_, self.clang_flags, + unsaved_files=[(file_, lines)], + options=self.options) + self.tokens = self._getTokensWithCursors() + + def parseFile(self, file_): + """Parse a file into a BlockList object.""" + self._tu = self._indexer.parse(file_, self.clang_flags, + options=self.options) + self.tokens = self._getTokensWithCursors() - def skipNChars(self,n): - if self.pos + n <= self.len: - self.pos += n + def nextToken(self): + """Return next token from the list.""" + if self._index < len(self.tokens): + t = self.tokens[self._index] + self._index += 1 + return t else: - while n > 0: - self.skipChar() - n -= 1 - - def nextChar(self): - """retrieve the token at the current cursor position, then skip it""" - result = self.peekChar() - self.skipChar() - return result - - def getEscape(self): - # try to get all characters after a backslash (\) - result = self.nextChar() - if result == "0": - # octal number ? - num = self.peekNChar(3) - if num != None: - isOctal = True - for d in num: - if not d in "01234567": - isOctal = False - break - if isOctal: - result += num - self.skipNChars(3) - elif result == "x" or result == "X": - # hex number ? - num = self.peekNChar(2) - if num != None: - isHex = True - for d in num: - if not d in "012345678abcdefABCDEF": - isHex = False - break - if isHex: - result += num - self.skipNChars(2) - elif result == "u" or result == "U": - # unicode char ? - num = self.peekNChar(4) - if num != None: - isHex = True - for d in num: - if not d in "012345678abcdefABCDEF": - isHex = False - break - if isHex: - result += num - self.skipNChars(4) - - return result + return None - def nextRealToken(self,tok): - """return next CPP token, used internally by nextToken()""" - c = self.nextChar() - if c == tokEOF or c == tokLN: - return tok.set(c) - - if c == '/': - c = self.peekChar() - if c == '/': # C++ comment line - self.skipChar() - while 1: - c = self.nextChar() - if c == tokEOF or c == tokLN: - break - return tok.set(tokLN) - if c == '*': # C comment start - self.skipChar() - value = "/*" - prev_c = None - while 1: - c = self.nextChar() - if c == tokEOF: - return tok.set(tokEOF,value) - if c == '/' and prev_c == '*': - break - prev_c = c - value += c - value += "/" - return tok.set(tokSPACE,value) - c = '/' +class CppStringTokenizer(CppTokenizer): + """A CppTokenizer derived class that accepts a string of text as input.""" - if c.isspace(): - while 1: - c2 = self.peekChar() - if c2 == tokLN or not c2.isspace(): - break - c += c2 - self.skipChar() - return tok.set(tokSPACE,c) - - if c == '\\': - if debugTokens: - print "nextRealToken: \\ found, next token is '%s'" % repr(self.peekChar()) - if self.peekChar() == tokLN: # trailing \ - # eat the tokLN - self.skipChar() - # we replace a trailing \ by a tokSPACE whose value is - # simply "\\". this allows us to detect them later when - # needed. - return tok.set(tokSPACE,"\\") - else: - # treat as a single token here ? - c +=self.getEscape() - return tok.set(c) - - if c == "'": # chars - c2 = self.nextChar() - c += c2 - if c2 == '\\': - c += self.getEscape() - - while 1: - c2 = self.nextChar() - if c2 == tokEOF: - break - c += c2 - if c2 == "'": - break + def __init__(self, line): + CppTokenizer.__init__(self) + self.parseString(line) - return tok.set(tokSTRING, c) - - if c == '"': # strings - quote = 0 - while 1: - c2 = self.nextChar() - if c2 == tokEOF: - return tok.set(tokSTRING,c) - - c += c2 - if not quote: - if c2 == '"': - return tok.set(tokSTRING,c) - if c2 == "\\": - quote = 1 - else: - quote = 0 - if c >= "0" and c <= "9": # integers ? - while 1: - c2 = self.peekChar() - if c2 == tokLN or (not c2.isalnum() and c2 != "_"): - break - c += c2 - self.skipChar() - return tok.set(tokNUMBER,c) - - if c.isalnum() or c == "_": # identifiers ? - while 1: - c2 = self.peekChar() - if c2 == tokLN or (not c2.isalnum() and c2 != "_"): - break - c += c2 - self.skipChar() - if c == tokDEFINED: - return tok.set(tokDEFINED) - else: - return tok.set(tokIDENT,c) - - # check special symbols - for sk in cppLongSymbols: - if c == sk[0]: - sklen = len(sk[1:]) - if self.pos + sklen <= self.len and \ - self.text[self.pos:self.pos+sklen] == sk[1:]: - self.pos += sklen - return tok.set(sk) - - return tok.set(c) - - def nextToken(self,tok): - """return the next token from the input text. this function - really updates 'tok', and does not return a new one""" - self.markPos(tok) - self.nextRealToken(tok) - - def getToken(self): - tok = Token() - self.nextToken(tok) - if debugTokens: - print "getTokens: %s" % repr(tok) - return tok - - def toTokenList(self): - """convert the input text of a CppTokenizer into a direct - list of token objects. tokEOF is stripped from the result""" - result = [] - while 1: - tok = Token() - self.nextToken(tok) - if tok.id == tokEOF: - break - result.append(tok) - return result +class CppFileTokenizer(CppTokenizer): + """A CppTokenizer derived class that accepts a file as input.""" -class CppLineTokenizer(CppTokenizer): - """a CppTokenizer derived class that accepts a single line of text as input""" - def __init__(self,line,lineno=1): + def __init__(self, file_): CppTokenizer.__init__(self) - self.line = lineno - self.setLineText(line) + self.parseFile(file_) -class CppLinesTokenizer(CppTokenizer): - """a CppTokenizer derived class that accepts a list of texdt lines as input. - the lines must not have a trailing \n""" - def __init__(self,lines=[],lineno=1): - """initialize a CppLinesTokenizer. you can later add lines using addLines()""" - CppTokenizer.__init__(self) - self.line = lineno - self.lines = lines - self.index = 0 - self.count = len(lines) +# Unit testing +# +class CppTokenizerTester(object): + """A class used to test CppTokenizer classes.""" - if self.count > 0: - self.fillLineText() - else: - self.eof = True - - def addLine(self,line): - """add a line to a CppLinesTokenizer. this can be done after tokenization - happens""" - if self.count == 0: - self.setLineText(line) - self.index = 1 - self.lines.append(line) - self.count += 1 - self.eof = False - - def fillLineText(self): - if self.index < self.count: - self.setLineText(self.lines[self.index]) - self.index += 1 - else: - self.eof = True + def __init__(self, tokenizer=None): + self._tokenizer = tokenizer + self._token = None + def setTokenizer(self, tokenizer): + self._tokenizer = tokenizer -class CppFileTokenizer(CppTokenizer): - def __init__(self,file,lineno=1): - CppTokenizer.__init__(self) - self.file = file - self.line = lineno - - def fillLineText(self): - line = self.file.readline() - if len(line) > 0: - if line[-1] == '\n': - line = line[:-1] - if len(line) > 0 and line[-1] == "\r": - line = line[:-1] - self.setLineText(line) + def expect(self, id): + self._token = self._tokenizer.nextToken() + if self._token is None: + tokid = '' else: - self.eof = True - -# Unit testing -# -class CppTokenizerTester: - """a class used to test CppTokenizer classes""" - def __init__(self,tokenizer=None): - self.tokenizer = tokenizer - self.token = Token() - - def setTokenizer(self,tokenizer): - self.tokenizer = tokenizer - - def expect(self,id): - self.tokenizer.nextToken(self.token) - tokid = self.token.id + tokid = self._token.id if tokid == id: return - if self.token.value == id and (tokid == tokIDENT or tokid == tokNUMBER): - return - raise BadExpectedToken, "### BAD TOKEN: '%s' expecting '%s'" % (self.token.id,id) + raise BadExpectedToken("### BAD TOKEN: '%s' expecting '%s'" % ( + tokid, id)) - def expectToken(self,id,line,col): + def expectToken(self, id, line, col): self.expect(id) - if self.token.lineno != line: - raise BadExpectedToken, "### BAD LINENO: token '%s' got '%d' expecting '%d'" % (id,self.token.lineno,line) - if self.token.colno != col: - raise BadExpectedToken, "### BAD COLNO: '%d' expecting '%d'" % (self.token.colno,col) - - def expectTokenVal(self,id,value,line,col): - self.expectToken(id,line,col) - if self.token.value != value: - raise BadExpectedToken, "### BAD VALUE: '%s' expecting '%s'" % (self.token.value,value) - - def expectList(self,list): - for item in list: + if self._token.location.line != line: + raise BadExpectedToken( + "### BAD LINENO: token '%s' got '%d' expecting '%d'" % ( + id, self._token.lineno, line)) + if self._token.location.column != col: + raise BadExpectedToken("### BAD COLNO: '%d' expecting '%d'" % ( + self._token.colno, col)) + + def expectTokens(self, tokens): + for id, line, col in tokens: + self.expectToken(id, line, col) + + def expectList(self, list_): + for item in list_: self.expect(item) + def test_CppTokenizer(): tester = CppTokenizerTester() - tester.setTokenizer( CppLineTokenizer("#an/example && (01923_xy)") ) - tester.expectList( ["#", "an", "/", "example", tokSPACE, tokLOGICAND, tokSPACE, tokLPAREN, "01923_xy", \ - tokRPAREN, tokLN, tokEOF] ) - - tester.setTokenizer( CppLineTokenizer("FOO(BAR) && defined(BAZ)") ) - tester.expectList( ["FOO", tokLPAREN, "BAR", tokRPAREN, tokSPACE, tokLOGICAND, tokSPACE, - tokDEFINED, tokLPAREN, "BAZ", tokRPAREN, tokLN, tokEOF] ) - - tester.setTokenizer( CppLinesTokenizer( ["/*", "#", "*/"] ) ) - tester.expectList( [ tokSPACE, tokLN, tokEOF ] ) - - tester.setTokenizer( CppLinesTokenizer( ["first", "second"] ) ) - tester.expectList( [ "first", tokLN, "second", tokLN, tokEOF ] ) - - tester.setTokenizer( CppLinesTokenizer( ["first second", " third"] ) ) - tester.expectToken( "first", 1, 0 ) - tester.expectToken( tokSPACE, 1, 5 ) - tester.expectToken( "second", 1, 6 ) - tester.expectToken( tokLN, 1, 12 ) - tester.expectToken( tokSPACE, 2, 0 ) - tester.expectToken( "third", 2, 2 ) - - tester.setTokenizer( CppLinesTokenizer( [ "boo /* what the", "hell */" ] ) ) - tester.expectList( [ "boo", tokSPACE ] ) - tester.expectTokenVal( tokSPACE, "/* what the\nhell */", 1, 4 ) - tester.expectList( [ tokLN, tokEOF ] ) - - tester.setTokenizer( CppLinesTokenizer( [ "an \\", " example" ] ) ) - tester.expectToken( "an", 1, 0 ) - tester.expectToken( tokSPACE, 1, 2 ) - tester.expectTokenVal( tokSPACE, "\\", 1, 3 ) - tester.expectToken( tokSPACE, 2, 0 ) - tester.expectToken( "example", 2, 1 ) - tester.expectToken( tokLN, 2, 8 ) + tester.setTokenizer(CppStringTokenizer("#an/example && (01923_xy)")) + tester.expectList(["#", "an", "/", "example", tokLOGICAND, tokLPAREN, + "01923_xy", tokRPAREN]) + + tester.setTokenizer(CppStringTokenizer("FOO(BAR) && defined(BAZ)")) + tester.expectList(["FOO", tokLPAREN, "BAR", tokRPAREN, tokLOGICAND, + "defined", tokLPAREN, "BAZ", tokRPAREN]) + + tester.setTokenizer(CppStringTokenizer("/*\n#\n*/")) + tester.expectList([]) + + tester.setTokenizer(CppStringTokenizer("first\nsecond")) + tester.expectList(["first", "second"]) + tester.setTokenizer(CppStringTokenizer("first second\n third")) + tester.expectTokens([("first", 1, 1), + ("second", 1, 7), + ("third", 2, 3)]) + + tester.setTokenizer(CppStringTokenizer("boo /* what the\nhell */")) + tester.expectTokens([("boo", 1, 1)]) + + tester.setTokenizer(CppStringTokenizer("an \\\n example")) + tester.expectTokens([("an", 1, 1), + ("example", 2, 2)]) return True -##################################################################################### -##################################################################################### -##### ##### -##### C P P E X P R E S S I O N S ##### -##### ##### -##################################################################################### -##################################################################################### +################################################################################ +################################################################################ +##### ##### +##### C P P E X P R E S S I O N S ##### +##### ##### +################################################################################ +################################################################################ + -class CppExpr: - """a class that models the condition of #if directives into - an expression tree. each node in the tree is of the form (op,arg) or (op,arg1,arg2) - where "op" is a string describing the operation""" +class CppExpr(object): + """A class that models the condition of #if directives into an expr tree. - unaries = [ "!", "~" ] - binaries = [ "+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%", "&", "|", "^", "<<", ">>", "==", "!=", "?", ":" ] + Each node in the tree is of the form (op, arg) or (op, arg1, arg2) where + "op" is a string describing the operation + """ + + unaries = ["!", "~"] + binaries = ["+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%", + "&", "|", "^", "<<", ">>", "==", "!=", "?", ":"] precedences = { "?": 1, ":": 1, "||": 2, @@ -570,197 +413,191 @@ class CppExpr: "!": 12, "~": 12 } - re_cpp_constant = re.compile(r"((\d|\w|_)+)") - def __init__(self, tokens): - """initialize a CppExpr. 'tokens' must be a CppToken list""" - self.tok = tokens - self.n = len(tokens) - self.i = 0 + """Initialize a CppExpr. 'tokens' must be a CppToken list.""" + self.tokens = tokens + self._num_tokens = len(tokens) + self._index = 0 + if debugCppExpr: print "CppExpr: trying to parse %s" % repr(tokens) self.expr = self.parseExpression(0) if debugCppExpr: print "CppExpr: got " + repr(self.expr) - if self.i != self.n: - print 'crap at end of input (%d != %d): %s' % (self.i, self.n, repr(tokens)) - raise - + if self._index != self._num_tokens: + self.throw(BadExpectedToken, "crap at end of input (%d != %d): %s" + % (self._index, self._num_tokens, repr(tokens))) def throw(self, exception, msg): - if self.i < self.n: - tok = self.tok[self.i] - print "%d:%d: %s" % (tok.lineno,tok.colno,msg) + if self._index < self._num_tokens: + tok = self.tokens[self._index] + print "%d:%d: %s" % (tok.location.line, tok.location.column, msg) else: print "EOF: %s" % msg raise exception(msg) - - def skip_spaces(self): - """skip spaces in input token list""" - while self.i < self.n: - t = self.tok[self.i] - if t.id != tokSPACE and t.id != tokLN: - break - self.i += 1 - - def expectId(self, id): - """check that a given token id is at the current position, then skip over it""" - self.skip_spaces() - if self.i >= self.n or self.tok[self.i].id != id: - self.throw(BadExpectedToken,self.i,"### expecting '%s' in expression, got '%s'" % (id, self.tok[self.i].id)) - self.i += 1 - - - def expectIdent(self): - self.skip_spaces() - if self.i >= self.n or self.tok[self.i].id != tokIDENT: - self.throw(BadExpectedToken, self.i,"### expecting identifier in expression, got '%s'" % (id, self.tok[self.i].id)) - self.i += 1 - + """Check that a given token id is at the current position.""" + token = self.tokens[self._index] + if self._index >= self._num_tokens or token.id != id: + self.throw(BadExpectedToken, + "### expecting '%s' in expression, got '%s'" % ( + id, token.id)) + self._index += 1 def is_decimal(self): - v = self.tok[self.i].value[:] - while len(v) > 0 and v[-1] in "ULul": - v = v[:-1] - for digit in v: - if not digit.isdigit(): - return None - - self.i += 1 - return ("int", string.atoi(v)) + token = self.tokens[self._index].id + if token[-1] in "ULul": + token = token[:-1] + try: + val = int(token, 10) + self._index += 1 + return ('int', val) + except ValueError: + return None + def is_octal(self): + token = self.tokens[self._index].id + if token[-1] in "ULul": + token = token[:-1] + if len(token) < 2 or token[0] != '0': + return None + try: + val = int(token, 8) + self._index += 1 + return ('oct', val) + except ValueError: + return None def is_hexadecimal(self): - v = self.tok[self.i].value[:] - while len(v) > 0 and v[-1] in "ULul": - v = v[:-1] - if len(v) > 2 and (v[0:2] == "0x" or v[0:2] == "0X"): - for digit in v[2:]: - if not digit in "0123456789abcdefABCDEF": - return None - - # for a hex expression tuple, the argument - # is the value as an integer - self.i += 1 - return ("hex", int(v[2:], 16)) - - return None - + token = self.tokens[self._index].id + if token[-1] in "ULul": + token = token[:-1] + if len(token) < 3 or (token[:2] != '0x' and token[:2] != '0X'): + return None + try: + val = int(token, 16) + self._index += 1 + return ('hex', val) + except ValueError: + return None def is_integer(self): - if self.tok[self.i].id != tokNUMBER: + if self.tokens[self._index].kind != TokenKind.LITERAL: return None - c = self.is_decimal() - if c: return c - c = self.is_hexadecimal() - if c: return c + if c: + return c - return None + c = self.is_octal() + if c: + return c + c = self.is_decimal() + if c: + return c + + return None def is_number(self): - t = self.tok[self.i] - if t.id == tokMINUS and self.i+1 < self.n: - self.i += 1 + t = self.tokens[self._index] + if t.id == tokMINUS and self._index + 1 < self._num_tokens: + self._index += 1 c = self.is_integer() if c: - op, val = c + op, val = c return (op, -val) - if t.id == tokPLUS and self.i+1 < self.n: + if t.id == tokPLUS and self._index + 1 < self._num_tokens: + self._index += 1 c = self.is_integer() - if c: return c + if c: + return c return self.is_integer() - def is_defined(self): - t = self.tok[self.i] + t = self.tokens[self._index] if t.id != tokDEFINED: return None - # we have the defined keyword, check the rest - self.i += 1 - self.skip_spaces() - used_parens = 0 - if self.i < self.n and self.tok[self.i].id == tokLPAREN: - used_parens = 1 - self.i += 1 - self.skip_spaces() - - if self.i >= self.n: - self.throw(CppConstantExpected,i,"### 'defined' must be followed by macro name or left paren") - - t = self.tok[self.i] - if t.id != tokIDENT: - self.throw(CppConstantExpected,i,"### 'defined' must be followed by macro name") - - self.i += 1 + # We have the defined keyword, check the rest. + self._index += 1 + used_parens = False + if (self._index < self._num_tokens and + self.tokens[self._index].id == tokLPAREN): + used_parens = True + self._index += 1 + + if self._index >= self._num_tokens: + self.throw(BadExpectedToken, + "### 'defined' must be followed by macro name or left " + "paren") + + t = self.tokens[self._index] + if t.kind != TokenKind.IDENTIFIER: + self.throw(BadExpectedToken, + "### 'defined' must be followed by macro name") + + self._index += 1 if used_parens: self.expectId(tokRPAREN) - return ("defined", t.value) - + return ("defined", t.id) def is_call_or_ident(self): - self.skip_spaces() - if self.i >= self.n: + if self._index >= self._num_tokens: return None - t = self.tok[self.i] - if t.id != tokIDENT: + t = self.tokens[self._index] + if t.kind != TokenKind.IDENTIFIER: return None - name = t.value + name = t.id - self.i += 1 - self.skip_spaces() - if self.i >= self.n or self.tok[self.i].id != tokLPAREN: + self._index += 1 + if (self._index >= self._num_tokens or + self.tokens[self._index].id != tokLPAREN): return ("ident", name) - params = [] - depth = 1 - self.i += 1 - j = self.i - while self.i < self.n: - id = self.tok[self.i].id + params = [] + depth = 1 + self._index += 1 + j = self._index + while self._index < self._num_tokens: + id = self.tokens[self._index].id if id == tokLPAREN: depth += 1 elif depth == 1 and (id == tokCOMMA or id == tokRPAREN): - while j < self.i and self.tok[j].id == tokSPACE: - j += 1 - k = self.i - while k > j and self.tok[k-1].id == tokSPACE: - k -= 1 - param = self.tok[j:k] + k = self._index + param = self.tokens[j:k] params.append(param) if id == tokRPAREN: break - j = self.i+1 + j = self._index + 1 elif id == tokRPAREN: depth -= 1 - self.i += 1 + self._index += 1 - if self.i >= self.n: + if self._index >= self._num_tokens: return None - self.i += 1 + self._index += 1 return ("call", (name, params)) + # Implements the "precedence climbing" algorithm from + # http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm. + # The "classic" algorithm would be fine if we were using a tool to + # generate the parser, but we're not. Dijkstra's "shunting yard" + # algorithm hasn't been necessary yet. - # Implements the "precedence climbing" algorithm from http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm. - # The "classic" algorithm would be fine if we were using a tool to generate the parser, but we're not. - # Dijkstra's "shunting yard" algorithm hasn't been necessary yet. def parseExpression(self, minPrecedence): - self.skip_spaces() - if self.i >= self.n: + if self._index >= self._num_tokens: return None node = self.parsePrimary() - while self.token() != None and self.isBinary(self.token()) and self.precedence(self.token()) >= minPrecedence: + while (self.token() and self.isBinary(self.token()) and + self.precedence(self.token()) >= minPrecedence): op = self.token() self.nextToken() rhs = self.parseExpression(self.precedence(op) + 1) @@ -768,7 +605,6 @@ class CppExpr: return node - def parsePrimary(self): op = self.token() if self.isUnary(op): @@ -784,51 +620,47 @@ class CppExpr: self.nextToken() primary = self.parseExpression(0) self.expectId(":") - elif op.id == tokNUMBER: + elif op.id == '+' or op.id == '-' or op.kind == TokenKind.LITERAL: primary = self.is_number() - elif op.id == tokIDENT: - primary = self.is_call_or_ident() + # Checking for 'defined' needs to come first now because 'defined' is + # recognized as IDENTIFIER. elif op.id == tokDEFINED: primary = self.is_defined() + elif op.kind == TokenKind.IDENTIFIER: + primary = self.is_call_or_ident() else: - self.throw(BadExpectedToken, "didn't expect to see a %s in factor" % (self.tok[self.i].id)) - - self.skip_spaces() - - return primary; - + self.throw(BadExpectedToken, + "didn't expect to see a %s in factor" % ( + self.tokens[self._index].id)) + return primary def isBinary(self, token): return token.id in self.binaries - def isUnary(self, token): return token.id in self.unaries - def precedence(self, token): return self.precedences.get(token.id) - def token(self): - if self.i >= self.n: + if self._index >= self._num_tokens: return None - return self.tok[self.i] - + return self.tokens[self._index] def nextToken(self): - self.i += 1 - self.skip_spaces() - if self.i >= self.n: + self._index += 1 + if self._index >= self._num_tokens: return None - return self.tok[self.i] - + return self.tokens[self._index] def dump_node(self, e): op = e[0] line = "(" + op if op == "int": line += " %d)" % e[1] + elif op == "oct": + line += " 0%o)" % e[1] elif op == "hex": line += " 0x%x)" % e[1] elif op == "ident": @@ -864,31 +696,33 @@ class CppExpr: return "%d" % e[1] if op == "hex": return "0x%x" % e[1] + if op == "oct": + return "0%o" % e[1] if op == "ident": # XXX: should try to expand return e[1] if op == "defined": return "defined(%s)" % e[1] - prec = CppExpr.precedences.get(op,1000) - arg = e[1] + prec = CppExpr.precedences.get(op, 1000) + arg = e[1] if op in CppExpr.unaries: arg_src = self.source_node(arg) - arg_op = arg[0] - arg_prec = CppExpr.precedences.get(arg[0],1000) + arg_op = arg[0] + arg_prec = CppExpr.precedences.get(arg_op, 1000) if arg_prec < prec: return "!(" + arg_src + ")" else: return "!" + arg_src if op in CppExpr.binaries: - arg2 = e[2] - arg1_op = arg[0] - arg2_op = arg2[0] + arg2 = e[2] + arg1_op = arg[0] + arg2_op = arg2[0] arg1_src = self.source_node(arg) arg2_src = self.source_node(arg2) - if CppExpr.precedences.get(arg1_op,1000) < prec: + if CppExpr.precedences.get(arg1_op, 1000) < prec: arg1_src = "(%s)" % arg1_src - if CppExpr.precedences.get(arg2_op,1000) < prec: + if CppExpr.precedences.get(arg2_op, 1000) < prec: arg2_src = "(%s)" % arg2_src return "%s %s %s" % (arg1_src, op, arg2_src) @@ -897,19 +731,21 @@ class CppExpr: def __str__(self): return self.source_node(self.expr) - def int_node(self,e): - if e[0] == "int": + @staticmethod + def int_node(e): + if e[0] in ["int", "oct", "hex"]: return e[1] - elif e[1] == "hex": - return int(e[1],16) else: return None def toInt(self): return self.int_node(self.expr) - def optimize_node(self, e, macros={}): + def optimize_node(self, e, macros=None): + if macros is None: + macros = {} op = e[0] + if op == "defined": op, name = e if macros.has_key(name): @@ -919,7 +755,7 @@ class CppExpr: try: value = int(macros[name]) return ("int", value) - except: + except ValueError: return ("defined", macros[name]) if kernel_remove_config_macros and name.startswith("CONFIG_"): @@ -933,7 +769,7 @@ class CppExpr: try: value = int(macros[name]) expanded = ("int", value) - except: + except ValueError: expanded = ("ident", macros[name]) return self.optimize_node(expanded, macros) return e @@ -950,16 +786,16 @@ class CppExpr: elif op == "&&": op, l, r = e - l = self.optimize_node(l, macros) - r = self.optimize_node(r, macros) + l = self.optimize_node(l, macros) + r = self.optimize_node(r, macros) li = self.int_node(l) ri = self.int_node(r) - if li != None: + if li is not None: if li == 0: return ("int", 0) else: return r - elif ri != None: + elif ri is not None: if ri == 0: return ("int", 0) else: @@ -968,16 +804,16 @@ class CppExpr: elif op == "||": op, l, r = e - l = self.optimize_node(l, macros) - r = self.optimize_node(r, macros) + l = self.optimize_node(l, macros) + r = self.optimize_node(r, macros) li = self.int_node(l) ri = self.int_node(r) - if li != None: + if li is not None: if li == 0: return r else: return ("int", 1) - elif ri != None: + elif ri is not None: if ri == 0: return l else: @@ -987,50 +823,54 @@ class CppExpr: else: return e - def optimize(self,macros={}): + def optimize(self, macros=None): + if macros is None: + macros = {} self.expr = self.optimize_node(self.expr, macros) - def is_equal_node(self,e1,e2): - if e1[0] != e2[0] or len(e1) != len(e2): - return False - - op = e1[0] - if op == "int" or op == "hex" or op == "!" or op == "defined": - return e1[0] == e2[0] - - return self.is_equal_node(e1[1],e2[1]) and self.is_equal_node(e1[2],e2[2]) - - def is_equal(self,other): - return self.is_equal_node(self.expr,other.expr) def test_cpp_expr(expr, expected): - e = CppExpr( CppLineTokenizer( expr ).toTokenList() ) + e = CppExpr(CppStringTokenizer(expr).tokens) s1 = repr(e) if s1 != expected: - print "[FAIL]: expression '%s' generates '%s', should be '%s'" % (expr, s1, expected) + print ("[FAIL]: expression '%s' generates '%s', should be " + "'%s'" % (expr, s1, expected)) global failure_count failure_count += 1 -def test_cpp_expr_optim(expr, expected, macros={}): - e = CppExpr( CppLineTokenizer( expr ).toTokenList() ) + +def test_cpp_expr_optim(expr, expected, macros=None): + if macros is None: + macros = {} + e = CppExpr(CppStringTokenizer(expr).tokens) e.optimize(macros) s1 = repr(e) if s1 != expected: - print "[FAIL]: optimized expression '%s' generates '%s' with macros %s, should be '%s'" % (expr, s1, macros, expected) + print ("[FAIL]: optimized expression '%s' generates '%s' with " + "macros %s, should be '%s'" % (expr, s1, macros, expected)) global failure_count failure_count += 1 + def test_cpp_expr_source(expr, expected): - e = CppExpr( CppLineTokenizer( expr ).toTokenList() ) + e = CppExpr(CppStringTokenizer(expr).tokens) s1 = str(e) if s1 != expected: - print "[FAIL]: source expression '%s' generates '%s', should be '%s'" % (expr, s1, expected) + print ("[FAIL]: source expression '%s' generates '%s', should " + "be '%s'" % (expr, s1, expected)) global failure_count failure_count += 1 + def test_CppExpr(): test_cpp_expr("0", "(int 0)") test_cpp_expr("1", "(int 1)") + test_cpp_expr("-5", "(int -5)") + test_cpp_expr("+1", "(int 1)") + test_cpp_expr("0U", "(int 0)") + test_cpp_expr("015", "(oct 015)") + test_cpp_expr("015l", "(oct 015)") + test_cpp_expr("0x3e", "(hex 0x3e)") test_cpp_expr("(0)", "(int 0)") test_cpp_expr("1 && 1", "(&& (int 1) (int 1))") test_cpp_expr("1 && 0", "(&& (int 1) (int 0))") @@ -1039,13 +879,17 @@ def test_CppExpr(): test_cpp_expr("defined(EXAMPLE)", "(defined EXAMPLE)") test_cpp_expr("defined ( EXAMPLE ) ", "(defined EXAMPLE)") test_cpp_expr("!defined(EXAMPLE)", "(! (defined EXAMPLE))") - test_cpp_expr("defined(ABC) || defined(BINGO)", "(|| (defined ABC) (defined BINGO))") - test_cpp_expr("FOO(BAR)", "(call FOO [BAR])") - test_cpp_expr("A == 1 || defined(B)", "(|| (== (ident A) (int 1)) (defined B))") + test_cpp_expr("defined(ABC) || defined(BINGO)", + "(|| (defined ABC) (defined BINGO))") + test_cpp_expr("FOO(BAR,5)", "(call FOO [BAR,5])") + test_cpp_expr("A == 1 || defined(B)", + "(|| (== (ident A) (int 1)) (defined B))") test_cpp_expr_optim("0", "(int 0)") test_cpp_expr_optim("1", "(int 1)") test_cpp_expr_optim("1 && 1", "(int 1)") + test_cpp_expr_optim("1 && +1", "(int 1)") + test_cpp_expr_optim("0x1 && 01", "(oct 01)") test_cpp_expr_optim("1 && 0", "(int 0)") test_cpp_expr_optim("0 && 1", "(int 0)") test_cpp_expr_optim("0 && 0", "(int 0)") @@ -1054,32 +898,48 @@ def test_CppExpr(): test_cpp_expr_optim("0 || 1", "(int 1)") test_cpp_expr_optim("0 || 0", "(int 0)") test_cpp_expr_optim("A", "(ident A)") - test_cpp_expr_optim("A", "(int 1)", { "A": 1 }) - test_cpp_expr_optim("A || B", "(int 1)", { "A": 1 }) - test_cpp_expr_optim("A || B", "(int 1)", { "B": 1 }) - test_cpp_expr_optim("A && B", "(ident B)", { "A": 1 }) - test_cpp_expr_optim("A && B", "(ident A)", { "B": 1 }) + test_cpp_expr_optim("A", "(int 1)", {"A": 1}) + test_cpp_expr_optim("A || B", "(int 1)", {"A": 1}) + test_cpp_expr_optim("A || B", "(int 1)", {"B": 1}) + test_cpp_expr_optim("A && B", "(ident B)", {"A": 1}) + test_cpp_expr_optim("A && B", "(ident A)", {"B": 1}) test_cpp_expr_optim("A && B", "(&& (ident A) (ident B))") test_cpp_expr_optim("EXAMPLE", "(ident EXAMPLE)") test_cpp_expr_optim("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))") test_cpp_expr_optim("defined(EXAMPLE)", "(defined EXAMPLE)") - test_cpp_expr_optim("defined(EXAMPLE)", "(defined XOWOE)", { "EXAMPLE": "XOWOE" }) - test_cpp_expr_optim("defined(EXAMPLE)", "(int 0)", { "EXAMPLE": kCppUndefinedMacro}) + test_cpp_expr_optim("defined(EXAMPLE)", "(defined XOWOE)", + {"EXAMPLE": "XOWOE"}) + test_cpp_expr_optim("defined(EXAMPLE)", "(int 0)", + {"EXAMPLE": kCppUndefinedMacro}) test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined EXAMPLE))") - test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined XOWOE))", { "EXAMPLE" : "XOWOE" }) - test_cpp_expr_optim("!defined(EXAMPLE)", "(int 1)", { "EXAMPLE" : kCppUndefinedMacro }) - test_cpp_expr_optim("defined(A) || defined(B)", "(|| (defined A) (defined B))") - test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", { "A" : "1" }) - test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", { "B" : "1" }) - test_cpp_expr_optim("defined(A) || defined(B)", "(defined A)", { "B" : kCppUndefinedMacro }) - test_cpp_expr_optim("defined(A) || defined(B)", "(int 0)", { "A" : kCppUndefinedMacro, "B" : kCppUndefinedMacro }) - test_cpp_expr_optim("defined(A) && defined(B)", "(&& (defined A) (defined B))") - test_cpp_expr_optim("defined(A) && defined(B)", "(defined B)", { "A" : "1" }) - test_cpp_expr_optim("defined(A) && defined(B)", "(defined A)", { "B" : "1" }) - test_cpp_expr_optim("defined(A) && defined(B)", "(int 0)", { "B" : kCppUndefinedMacro }) - test_cpp_expr_optim("defined(A) && defined(B)", "(int 0)", { "A" : kCppUndefinedMacro }) - test_cpp_expr_optim("A == 1 || defined(B)", "(|| (== (ident A) (int 1)) (defined B))" ) - test_cpp_expr_optim("defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)", "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))", { "__KERNEL__": kCppUndefinedMacro }) + test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined XOWOE))", + {"EXAMPLE": "XOWOE"}) + test_cpp_expr_optim("!defined(EXAMPLE)", "(int 1)", + {"EXAMPLE": kCppUndefinedMacro}) + test_cpp_expr_optim("defined(A) || defined(B)", + "(|| (defined A) (defined B))") + test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"A": "1"}) + test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"B": "1"}) + test_cpp_expr_optim("defined(A) || defined(B)", "(defined A)", + {"B": kCppUndefinedMacro}) + test_cpp_expr_optim("defined(A) || defined(B)", "(int 0)", + {"A": kCppUndefinedMacro, "B": kCppUndefinedMacro}) + test_cpp_expr_optim("defined(A) && defined(B)", + "(&& (defined A) (defined B))") + test_cpp_expr_optim("defined(A) && defined(B)", + "(defined B)", {"A": "1"}) + test_cpp_expr_optim("defined(A) && defined(B)", + "(defined A)", {"B": "1"}) + test_cpp_expr_optim("defined(A) && defined(B)", "(int 0)", + {"B": kCppUndefinedMacro}) + test_cpp_expr_optim("defined(A) && defined(B)", + "(int 0)", {"A": kCppUndefinedMacro}) + test_cpp_expr_optim("A == 1 || defined(B)", + "(|| (== (ident A) (int 1)) (defined B))") + test_cpp_expr_optim( + "defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)", + "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))", + {"__KERNEL__": kCppUndefinedMacro}) test_cpp_expr_source("0", "0") test_cpp_expr_source("1", "1") @@ -1098,179 +958,176 @@ def test_CppExpr(): test_cpp_expr_source("A == 1 || defined(B)", "A == 1 || defined(B)") -##################################################################################### -##################################################################################### -##### ##### -##### C P P B L O C K ##### -##### ##### -##################################################################################### -##################################################################################### +################################################################################ +################################################################################ +##### ##### +##### C P P B L O C K ##### +##### ##### +################################################################################ +################################################################################ -class Block: - """a class used to model a block of input source text. there are two block types: - - directive blocks: contain the tokens of a single pre-processor directive (e.g. #if) - - text blocks, contain the tokens of non-directive blocks - the cpp parser class below will transform an input source file into a list of Block - objects (grouped in a BlockList object for convenience)""" +class Block(object): + """A class used to model a block of input source text. + + There are two block types: + - directive blocks: contain the tokens of a single pre-processor + directive (e.g. #if) + - text blocks, contain the tokens of non-directive blocks + + The cpp parser class below will transform an input source file into a list + of Block objects (grouped in a BlockList object for convenience) + """ + + def __init__(self, tokens, directive=None, lineno=0, identifier=None): + """Initialize a new block, if 'directive' is None, it is a text block. + + NOTE: This automatically converts '#ifdef MACRO' into + '#if defined(MACRO)' and '#ifndef MACRO' into '#if !defined(MACRO)'. + """ - def __init__(self,tokens,directive=None,lineno=0): - """initialize a new block, if 'directive' is None, this is a text block - NOTE: this automatically converts '#ifdef MACRO' into '#if defined(MACRO)' - and '#ifndef MACRO' into '#if !defined(MACRO)'""" if directive == "ifdef": tok = Token() - tok.set(tokDEFINED) - tokens = [ tok ] + tokens + tok.id = tokDEFINED + tokens = [tok] + tokens directive = "if" elif directive == "ifndef": tok1 = Token() tok2 = Token() - tok1.set(tokNOT) - tok2.set(tokDEFINED) - tokens = [ tok1, tok2 ] + tokens + tok1.id = tokNOT + tok2.id = tokDEFINED + tokens = [tok1, tok2] + tokens directive = "if" - self.tokens = tokens + self.tokens = tokens self.directive = directive + self.define_id = identifier if lineno > 0: self.lineno = lineno else: - self.lineno = self.tokens[0].lineno + self.lineno = self.tokens[0].location.line if self.isIf(): - self.expr = CppExpr( self.tokens ) + self.expr = CppExpr(self.tokens) def isDirective(self): - """returns True iff this is a directive block""" - return self.directive != None + """Return True iff this is a directive block.""" + return self.directive is not None def isConditional(self): - """returns True iff this is a conditional directive block""" - return self.directive in ["if","ifdef","ifndef","else","elif","endif"] + """Return True iff this is a conditional directive block.""" + return self.directive in ["if", "ifdef", "ifndef", "else", "elif", + "endif"] def isDefine(self): - """returns the macro name in a #define directive, or None otherwise""" + """Return the macro name in a #define directive, or None otherwise.""" if self.directive != "define": return None - - return self.tokens[0].value + return self.define_id def isIf(self): - """returns True iff this is an #if-like directive block""" - return self.directive in ["if","ifdef","ifndef","elif"] + """Return True iff this is an #if-like directive block.""" + return self.directive in ["if", "ifdef", "ifndef", "elif"] - def isInclude(self): - """checks whether this is a #include directive. if true, then returns the - corresponding file name (with brackets or double-qoutes). None otherwise""" - if self.directive != "include": - return None - - if self.tokens[0].id == tokSTRING: - # a double-quote include, that's easy - return self.tokens[0].value + def isEndif(self): + """Return True iff this is an #endif directive block.""" + return self.directive == "endif" - # we only want the bracket part, not any comments or junk after it - if self.tokens[0].id == "<": - i = 0 - tok = self.tokens - n = len(tok) - while i < n and tok[i].id != ">": - i += 1 - - if i >= n: - return None + def isInclude(self): + """Check whether this is a #include directive. - return string.join([ str(x) for x in tok[:i+1] ],"") + If true, returns the corresponding file name (with brackets or + double-qoutes). None otherwise. + """ - else: + if self.directive != "include": return None + return ''.join([str(x) for x in self.tokens]) - def removeWhiteSpace(self): - # Remove trailing whitespace and empty lines - # All whitespace is also contracted to a single space - if self.directive != None: - return + @staticmethod + def format_blocks(tokens, indent=0): + """Return the formatted lines of strings with proper indentation.""" + newline = True + result = [] + buf = '' + i = 0 + while i < len(tokens): + t = tokens[i] + if t.id == '{': + buf += ' {' + result.append(strip_space(buf)) + indent += 2 + buf = '' + newline = True + elif t.id == '}': + indent -= 2 + if not newline: + result.append(strip_space(buf)) + # Look ahead to determine if it's the end of line. + if (i + 1 < len(tokens) and + (tokens[i+1].id == ';' or + tokens[i+1].id in ['else', '__attribute__', + '__attribute', '__packed'] or + tokens[i+1].kind == TokenKind.IDENTIFIER)): + buf = ' ' * indent + '}' + newline = False + else: + result.append(' ' * indent + '}') + buf = '' + newline = True + elif t.id == ';': + result.append(strip_space(buf) + ';') + buf = '' + newline = True + # We prefer a new line for each constant in enum. + elif t.id == ',' and t.cursor.kind == CursorKind.ENUM_DECL: + result.append(strip_space(buf) + ',') + buf = '' + newline = True + else: + if newline: + buf += ' ' * indent + str(t) + else: + buf += ' ' + str(t) + newline = False + i += 1 - tokens = [] - line = 0 # index of line start - space = -1 # index of first space, or -1 - ii = 0 - nn = len(self.tokens) - while ii < nn: - tok = self.tokens[ii] - - # If we find a space, record its position if this is the first - # one the line start or the previous character. Don't append - # anything to tokens array yet though. - if tok.id == tokSPACE: - if space < 0: - space = ii - ii += 1 - continue - - # If this is a line space, ignore the spaces we found previously - # on the line, and remove empty lines. - if tok.id == tokLN: - old_line = line - old_space = space - ii += 1 - line = ii - space = -1 - if old_space == old_line: # line only contains spaces - continue - if ii-1 == old_line: # line is empty - continue - tokens.append(tok) - continue - - # Other token, append any space range if any, converting each - # one to a single space character, then append the token. - if space >= 0: - jj = space - space = -1 - while jj < ii: - tok2 = self.tokens[jj] - tok2.value = " " - tokens.append(tok2) - jj += 1 - - tokens.append(tok) - ii += 1 + if buf: + result.append(strip_space(buf)) - self.tokens = tokens + return result, indent - def writeWithWarning(self,out,warning,left_count,repeat_count): + def writeWithWarning(self, out, warning, left_count, repeat_count, indent): + """Dump the current block with warnings.""" # removeWhiteSpace() will sometimes creates non-directive blocks # without any tokens. These come from blocks that only contained # empty lines and spaces. They should not be printed in the final # output, and then should not be counted for this operation. # - if not self.directive and self.tokens == []: - return left_count + if self.directive is None and not self.tokens: + return left_count, indent if self.directive: - out.write(str(self).rstrip() + "\n") + out.write(str(self) + '\n') left_count -= 1 if left_count == 0: out.write(warning) left_count = repeat_count else: - for tok in self.tokens: - out.write(str(tok)) - if tok.id == tokLN: - left_count -= 1 - if left_count == 0: - out.write(warning) - left_count = repeat_count - - return left_count + lines, indent = self.format_blocks(self.tokens, indent) + for line in lines: + out.write(line + '\n') + left_count -= 1 + if left_count == 0: + out.write(warning) + left_count = repeat_count + return left_count, indent def __repr__(self): - """generate the representation of a given block""" + """Generate the representation of a given block.""" if self.directive: result = "#%s " % self.directive if self.isIf(): @@ -1286,8 +1143,9 @@ class Block: return result def __str__(self): - """generate the string representation of a given block""" + """Generate the string representation of a given block.""" if self.directive: + # "#if" if self.directive == "if": # small optimization to re-generate #ifdef and #ifndef e = self.expr.expr @@ -1298,114 +1156,138 @@ class Block: result = "#ifndef %s" % e[1][1] else: result = "#if " + str(self.expr) + + # "#define" + elif self.isDefine(): + result = "#%s %s" % (self.directive, self.define_id) + if self.tokens: + result += " " + expr = strip_space(' '.join([tok.id for tok in self.tokens])) + # remove the space between name and '(' in function call + result += re.sub(r'(\w+) \(', r'\1(', expr) + + # "#error" + # Concatenating tokens with a space separator, because they may + # not be quoted and broken into several tokens + elif self.directive == "error": + result = "#error %s" % ' '.join([tok.id for tok in self.tokens]) + else: result = "#%s" % self.directive - if len(self.tokens): + if self.tokens: result += " " - for tok in self.tokens: - result += str(tok) + result += ''.join([tok.id for tok in self.tokens]) else: - result = "" - for tok in self.tokens: - result += str(tok) + lines, _ = self.format_blocks(self.tokens) + result = '\n'.join(lines) return result -class BlockList: - """a convenience class used to hold and process a list of blocks returned by - the cpp parser""" - def __init__(self,blocks): + +class BlockList(object): + """A convenience class used to hold and process a list of blocks. + + It calls the cpp parser to get the blocks. + """ + + def __init__(self, blocks): self.blocks = blocks def __len__(self): return len(self.blocks) - def __getitem__(self,n): + def __getitem__(self, n): return self.blocks[n] def __repr__(self): return repr(self.blocks) def __str__(self): - result = "" - for b in self.blocks: - result += str(b) - if b.isDirective(): - result = result.rstrip() + '\n' + result = '\n'.join([str(b) for b in self.blocks]) return result - def optimizeIf01(self): - """remove the code between #if 0 .. #endif in a BlockList""" + def dump(self): + """Dump all the blocks in current BlockList.""" + print '##### BEGIN #####' + for i, b in enumerate(self.blocks): + print '### BLOCK %d ###' % i + print b + print '##### END #####' + + def optimizeIf01(self): + """Remove the code between #if 0 .. #endif in a BlockList.""" self.blocks = optimize_if01(self.blocks) def optimizeMacros(self, macros): - """remove known defined and undefined macros from a BlockList""" + """Remove known defined and undefined macros from a BlockList.""" for b in self.blocks: if b.isIf(): b.expr.optimize(macros) - def removeMacroDefines(self,macros): - """remove known macro definitions from a BlockList""" - self.blocks = remove_macro_defines(self.blocks,macros) - - def removeWhiteSpace(self): - for b in self.blocks: - b.removeWhiteSpace() + def removeMacroDefines(self, macros): + """Remove known macro definitions from a BlockList.""" + self.blocks = remove_macro_defines(self.blocks, macros) - def optimizeAll(self,macros): + def optimizeAll(self, macros): self.optimizeMacros(macros) self.optimizeIf01() return def findIncludes(self): - """return the list of included files in a BlockList""" + """Return the list of included files in a BlockList.""" result = [] for b in self.blocks: i = b.isInclude() if i: result.append(i) - return result - - def write(self,out): + def write(self, out): out.write(str(self)) - def writeWithWarning(self,out,warning,repeat_count): + def writeWithWarning(self, out, warning, repeat_count): left_count = repeat_count + indent = 0 for b in self.blocks: - left_count = b.writeWithWarning(out,warning,left_count,repeat_count) + left_count, indent = b.writeWithWarning(out, warning, left_count, + repeat_count, indent) + + def removeVarsAndFuncs(self, knownStatics=None): + """Remove variable and function declarations. + + All extern and static declarations corresponding to variable and + function declarations are removed. We only accept typedefs and + enum/structs/union declarations. + + However, we keep the definitions corresponding to the set of known + static inline functions in the set 'knownStatics', which is useful + for optimized byteorder swap functions and stuff like that. + """ + + # NOTE: It's also removing function-like macros, such as __SYSCALL(...) + # in uapi/asm-generic/unistd.h, or KEY_FIELD(...) in linux/bcache.h. + # It could be problematic when we have function-like macros but without + # '}' following them. It will skip all the tokens/blocks until seeing a + # '}' as the function end. Fortunately we don't have such cases in the + # current kernel headers. - def removeComments(self): - for b in self.blocks: - for tok in b.tokens: - if tok.id == tokSPACE: - tok.value = " " - - def removeVarsAndFuncs(self,knownStatics=set()): - """remove all extern and static declarations corresponding - to variable and function declarations. we only accept typedefs - and enum/structs/union declarations. - - however, we keep the definitions corresponding to the set - of known static inline functions in the set 'knownStatics', - which is useful for optimized byteorder swap functions and - stuff like that. - """ # state = 0 => normal (i.e. LN + spaces) # state = 1 => typedef/struct encountered, ends with ";" # state = 2 => var declaration encountered, ends with ";" # state = 3 => func declaration encountered, ends with "}" - state = 0 - depth = 0 - blocks2 = [] + + if knownStatics is None: + knownStatics = set() + state = 0 + depth = 0 + blocks2 = [] skipTokens = False for b in self.blocks: if b.isDirective(): blocks2.append(b) else: - n = len(b.tokens) - i = 0 + n = len(b.tokens) + i = 0 if skipTokens: first = n else: @@ -1434,21 +1316,16 @@ class BlockList: state = 0 if skipTokens: skipTokens = False - first = i+1 + first = i + 1 - i = i+1 - continue - - # We are looking for the start of a new type/func/var - # ignore whitespace - if tokid in [tokLN, tokSPACE]: - i = i+1 + i += 1 continue # Is it a new type definition, then start recording it - if tok.value in [ 'struct', 'typedef', 'enum', 'union', '__extension__' ]: + if tok.id in ['struct', 'typedef', 'enum', 'union', + '__extension__']: state = 1 - i = i+1 + i += 1 continue # Is it a variable or function definition. If so, first @@ -1464,18 +1341,18 @@ class BlockList: # We also assume that the var/func name is the last # identifier before the terminator. # - j = i+1 + j = i + 1 ident = "" while j < n: tokid = b.tokens[j].id if tokid == '(': # a function declaration state = 3 break - elif tokid == ';': # a variable declaration + elif tokid == ';': # a variable declaration state = 2 break - if tokid == tokIDENT: - ident = b.tokens[j].value + if b.tokens[j].kind == TokenKind.IDENTIFIER: + ident = b.tokens[j].id j += 1 if j >= n: @@ -1488,221 +1365,309 @@ class BlockList: # without making our parser much more # complex. # - #print "### skip unterminated static '%s'" % ident + logging.debug("### skip unterminated static '%s'", + ident) break if ident in knownStatics: - #print "### keep var/func '%s': %s" % (ident,repr(b.tokens[i:j])) - pass + logging.debug("### keep var/func '%s': %s", ident, + repr(b.tokens[i:j])) else: # We're going to skip the tokens for this declaration - #print "### skip variable /func'%s': %s" % (ident,repr(b.tokens[i:j])) + logging.debug("### skip var/func '%s': %s", ident, + repr(b.tokens[i:j])) if i > first: - blocks2.append( Block(b.tokens[first:i])) + blocks2.append(Block(b.tokens[first:i])) skipTokens = True - first = n + first = n - i = i+1 + i += 1 if i > first: - #print "### final '%s'" % repr(b.tokens[first:i]) - blocks2.append( Block(b.tokens[first:i]) ) + # print "### final '%s'" % repr(b.tokens[first:i]) + blocks2.append(Block(b.tokens[first:i])) self.blocks = blocks2 - def insertDisclaimer(self,disclaimer="/* auto-generated file, DO NOT EDIT */"): - """insert your standard issue disclaimer that this is an - auto-generated file, etc..""" - tokens = CppLineTokenizer( disclaimer ).toTokenList() - tokens = tokens[:-1] # remove trailing tokLN - self.blocks = [ Block(tokens) ] + self.blocks - - def replaceTokens(self,replacements): - """replace tokens according to the given dict""" + def replaceTokens(self, replacements): + """Replace tokens according to the given dict.""" for b in self.blocks: made_change = False - if b.isInclude() == None: + if b.isInclude() is None: for tok in b.tokens: - if tok.id == tokIDENT: - if tok.value in replacements: - tok.value = replacements[tok.value] + if tok.kind == TokenKind.IDENTIFIER: + if tok.id in replacements: + tok.id = replacements[tok.id] made_change = True + if b.isDefine() and b.define_id in replacements: + b.define_id = replacements[b.define_id] + made_change = True + if made_change and b.isIf(): # Keep 'expr' in sync with 'tokens'. b.expr = CppExpr(b.tokens) -class BlockParser: - """a class used to convert an input source file into a BlockList object""" - - def __init__(self,tokzer=None): - """initialize a block parser. the input source is provided through a Tokenizer - object""" - self.reset(tokzer) - - def reset(self,tokzer): - self.state = 1 - self.tokzer = tokzer - - def getBlocks(self,tokzer=None): - """tokenize and parse the input source, return a BlockList object - NOTE: empty and line-numbering directives are ignored and removed - from the result. as a consequence, it is possible to have - two successive text blocks in the result""" - # state 0 => in source code - # state 1 => in source code, after a LN - # state 2 => in source code, after LN then some space - state = 1 - lastLN = 0 - current = [] - blocks = [] - - if tokzer == None: - tokzer = self.tokzer - - while 1: - tok = tokzer.getToken() - if tok.id == tokEOF: - break - if tok.id == tokLN: - state = 1 - current.append(tok) - lastLN = len(current) +def strip_space(s): + """Strip out redundant space in a given string.""" + + # NOTE: It ought to be more clever to not destroy spaces in string tokens. + replacements = {' . ': '.', + ' [': '[', + '[ ': '[', + ' ]': ']', + '( ': '(', + ' )': ')', + ' ,': ',', + '# ': '#', + ' ;': ';', + '~ ': '~', + ' -> ': '->'} + result = s + for r in replacements: + result = result.replace(r, replacements[r]) + + # Remove the space between function name and the parenthesis. + result = re.sub(r'(\w+) \(', r'\1(', result) + return result - elif tok.id == tokSPACE: - if state == 1: - state = 2 - current.append(tok) - elif tok.id == "#": - if state > 0: - # this is the start of a directive +class BlockParser(object): + """A class that converts an input source file into a BlockList object.""" - if lastLN > 0: - # record previous tokens as text block - block = Block(current[:lastLN]) - blocks.append(block) - lastLN = 0 + def __init__(self, tokzer=None): + """Initialize a block parser. - current = [] + The input source is provided through a Tokenizer object. + """ + self._tokzer = tokzer + self._parsed = False - # skip spaces after the # - while 1: - tok = tokzer.getToken() - if tok.id != tokSPACE: - break + @property + def parsed(self): + return self._parsed - if tok.id != tokIDENT: - # empty or line-numbering, ignore it - if tok.id != tokLN and tok.id != tokEOF: - while 1: - tok = tokzer.getToken() - if tok.id == tokLN or tok.id == tokEOF: - break - continue + @staticmethod + def _short_extent(extent): + return '%d:%d - %d:%d' % (extent.start.line, extent.start.column, + extent.end.line, extent.end.column) + + def getBlocks(self, tokzer=None): + """Return all the blocks parsed.""" + + def consume_extent(i, tokens, extent=None, detect_change=False): + """Return tokens that belong to the given extent. + + It parses all the tokens that follow tokens[i], until getting out + of the extent. When detect_change is True, it may terminate early + when detecting preprocessing directives inside the extent. + """ + + result = [] + if extent is None: + extent = tokens[i].cursor.extent + + while i < len(tokens) and tokens[i].location in extent: + t = tokens[i] + if debugBlockParser: + print ' ' * 2, t.id, t.kind, t.cursor.kind + if (detect_change and t.cursor.extent != extent and + t.cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE): + break + result.append(t) + i += 1 + return (i, result) + + def consume_line(i, tokens): + """Return tokens that follow tokens[i] in the same line.""" + result = [] + line = tokens[i].location.line + while i < len(tokens) and tokens[i].location.line == line: + if tokens[i].cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE: + break + result.append(tokens[i]) + i += 1 + return (i, result) + + if tokzer is None: + tokzer = self._tokzer + tokens = tokzer.tokens + + blocks = [] + buf = [] + i = 0 + + while i < len(tokens): + t = tokens[i] + cursor = t.cursor + + if debugBlockParser: + print ("%d: Processing [%s], kind=[%s], cursor=[%s], " + "extent=[%s]" % (t.location.line, t.spelling, t.kind, + cursor.kind, + self._short_extent(cursor.extent))) + + if cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE: + if buf: + blocks.append(Block(buf)) + buf = [] + + j = i + if j + 1 >= len(tokens): + raise BadExpectedToken("### BAD TOKEN at %s" % (t.location)) + directive = tokens[j+1].id + + if directive == 'define': + if i+2 >= len(tokens): + raise BadExpectedToken("### BAD TOKEN at %s" % + (tokens[i].location)) + + # Skip '#' and 'define'. + extent = tokens[i].cursor.extent + i += 2 + id = '' + # We need to separate the id from the remaining of + # the line, especially for the function-like macro. + if (i + 1 < len(tokens) and tokens[i+1].id == '(' and + (tokens[i].location.column + len(tokens[i].spelling) == + tokens[i+1].location.column)): + while i < len(tokens): + id += tokens[i].id + if tokens[i].spelling == ')': + i += 1 + break + i += 1 + else: + id += tokens[i].id + # Advance to the next token that follows the macro id + i += 1 + + (i, ret) = consume_extent(i, tokens, extent=extent) + blocks.append(Block(ret, directive=directive, + lineno=t.location.line, identifier=id)) + + else: + (i, ret) = consume_extent(i, tokens) + blocks.append(Block(ret[2:], directive=directive, + lineno=t.location.line)) + + elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE: + if buf: + blocks.append(Block(buf)) + buf = [] + directive = tokens[i+1].id + (i, ret) = consume_extent(i, tokens) - directive = tok.value - lineno = tok.lineno + blocks.append(Block(ret[2:], directive=directive, + lineno=t.location.line)) - # skip spaces - tok = tokzer.getToken() - while tok.id == tokSPACE: - tok = tokzer.getToken() + elif cursor.kind == CursorKind.VAR_DECL: + if buf: + blocks.append(Block(buf)) + buf = [] - # then record tokens until LN - dirtokens = [] - while tok.id != tokLN and tok.id != tokEOF: - dirtokens.append(tok) - tok = tokzer.getToken() + (i, ret) = consume_extent(i, tokens, detect_change=True) + buf += ret - block = Block(dirtokens,directive,lineno) - blocks.append(block) - state = 1 + elif cursor.kind == CursorKind.FUNCTION_DECL: + if buf: + blocks.append(Block(buf)) + buf = [] + + (i, ret) = consume_extent(i, tokens, detect_change=True) + buf += ret else: - state = 0 - current.append(tok) + (i, ret) = consume_line(i, tokens) + buf += ret - if len(current) > 0: - block = Block(current) - blocks.append(block) + if buf: + blocks.append(Block(buf)) - return BlockList(blocks) + # _parsed=True indicates a successful parsing, although may result an + # empty BlockList. + self._parsed = True - def parse(self,tokzer): - return self.getBlocks( tokzer ) + return BlockList(blocks) - def parseLines(self,lines): - """parse a list of text lines into a BlockList object""" - return self.getBlocks( CppLinesTokenizer(lines) ) + def parse(self, tokzer): + return self.getBlocks(tokzer) - def parseFile(self,path): - """parse a file into a BlockList object""" - file = open(path, "rt") - result = self.getBlocks( CppFileTokenizer(file) ) - file.close() - return result + def parseFile(self, path): + return self.getBlocks(CppFileTokenizer(path)) -def test_block_parsing(lines,expected): - blocks = BlockParser().parse( CppLinesTokenizer(lines) ) +def test_block_parsing(lines, expected): + """Helper method to test the correctness of BlockParser.parse.""" + blocks = BlockParser().parse(CppStringTokenizer('\n'.join(lines))) if len(blocks) != len(expected): - raise BadExpectedToken, "parser.buildBlocks returned '%s' expecting '%s'" \ - % (str(blocks), repr(expected)) + raise BadExpectedToken("BlockParser.parse() returned '%s' expecting " + "'%s'" % (str(blocks), repr(expected))) for n in range(len(blocks)): if str(blocks[n]) != expected[n]: - raise BadExpectedToken, "parser.buildBlocks()[%d] is '%s', expecting '%s'" \ - % (n, str(blocks[n]), expected[n]) - #for block in blocks: - # print block + raise BadExpectedToken("BlockParser.parse()[%d] is '%s', " + "expecting '%s'" % (n, str(blocks[n]), + expected[n])) + def test_BlockParser(): - test_block_parsing(["#error hello"],["#error hello"]) - test_block_parsing([ "foo", "", "bar" ], [ "foo\n\nbar\n" ]) - test_block_parsing([ "foo", " # ", "bar" ], [ "foo\n","bar\n" ]) - test_block_parsing(\ - [ "foo", " # ", " # /* ahah */ if defined(__KERNEL__) ", "bar", "#endif" ], - [ "foo\n", "#ifdef __KERNEL__", "bar\n", "#endif" ] ) - - -##################################################################################### -##################################################################################### -##### ##### -##### B L O C K L I S T O P T I M I Z A T I O N ##### -##### ##### -##################################################################################### -##################################################################################### - -def remove_macro_defines( blocks, excludedMacros=set() ): - """remove macro definitions like #define <macroName> ....""" + test_block_parsing(["#error hello"], ["#error hello"]) + test_block_parsing(["foo", "", "bar"], ["foo bar"]) + + # We currently cannot handle the following case with libclang properly. + # Fortunately it doesn't appear in current headers. + # test_block_parsing(["foo", " # ", "bar"], ["foo", "bar"]) + + test_block_parsing(["foo", + " # /* ahah */ if defined(__KERNEL__) /* more */", + "bar", "#endif"], + ["foo", "#ifdef __KERNEL__", "bar", "#endif"]) + + +################################################################################ +################################################################################ +##### ##### +##### B L O C K L I S T O P T I M I Z A T I O N ##### +##### ##### +################################################################################ +################################################################################ + + +def remove_macro_defines(blocks, excludedMacros=None): + """Remove macro definitions like #define <macroName> ....""" + if excludedMacros is None: + excludedMacros = set() result = [] for b in blocks: macroName = b.isDefine() - if macroName == None or not macroName in excludedMacros: + if macroName is None or macroName not in excludedMacros: result.append(b) return result -def find_matching_endif( blocks, i ): - n = len(blocks) + +def find_matching_endif(blocks, i): + """Traverse the blocks to find out the matching #endif.""" + n = len(blocks) depth = 1 while i < n: if blocks[i].isDirective(): - dir = blocks[i].directive - if dir in [ "if", "ifndef", "ifdef" ]: + dir_ = blocks[i].directive + if dir_ in ["if", "ifndef", "ifdef"]: depth += 1 - elif depth == 1 and dir in [ "else", "elif" ]: + elif depth == 1 and dir_ in ["else", "elif"]: return i - elif dir == "endif": + elif dir_ == "endif": depth -= 1 if depth == 0: return i i += 1 return i -def optimize_if01( blocks ): - """remove the code between #if 0 .. #endif in a list of CppBlocks""" + +def optimize_if01(blocks): + """Remove the code between #if 0 .. #endif in a list of CppBlocks.""" i = 0 n = len(blocks) result = [] @@ -1711,69 +1676,76 @@ def optimize_if01( blocks ): while j < n and not blocks[j].isIf(): j += 1 if j > i: - D2("appending lines %d to %d" % (blocks[i].lineno, blocks[j-1].lineno)) + logging.debug("appending lines %d to %d", blocks[i].lineno, + blocks[j-1].lineno) result += blocks[i:j] if j >= n: break expr = blocks[j].expr - r = expr.toInt() - if r == None: + r = expr.toInt() + if r is None: result.append(blocks[j]) i = j + 1 continue if r == 0: # if 0 => skip everything until the corresponding #endif - j = find_matching_endif( blocks, j+1 ) + j = find_matching_endif(blocks, j + 1) if j >= n: # unterminated #if 0, finish here break - dir = blocks[j].directive - if dir == "endif": - D2("remove 'if 0' .. 'endif' (lines %d to %d)" % (blocks[i].lineno, blocks[j].lineno)) + dir_ = blocks[j].directive + if dir_ == "endif": + logging.debug("remove 'if 0' .. 'endif' (lines %d to %d)", + blocks[i].lineno, blocks[j].lineno) i = j + 1 - elif dir == "else": + elif dir_ == "else": # convert 'else' into 'if 1' - D2("convert 'if 0' .. 'else' into 'if 1' (lines %d to %d)" % (blocks[i].lineno, blocks[j-1].lineno)) + logging.debug("convert 'if 0' .. 'else' into 'if 1' (lines %d " + "to %d)", blocks[i].lineno, blocks[j-1].lineno) blocks[j].directive = "if" - blocks[j].expr = CppExpr( CppLineTokenizer("1").toTokenList() ) + blocks[j].expr = CppExpr(CppStringTokenizer("1").tokens) i = j - elif dir == "elif": + elif dir_ == "elif": # convert 'elif' into 'if' - D2("convert 'if 0' .. 'elif' into 'if'") + logging.debug("convert 'if 0' .. 'elif' into 'if'") blocks[j].directive = "if" i = j continue # if 1 => find corresponding endif and remove/transform them - k = find_matching_endif( blocks, j+1 ) + k = find_matching_endif(blocks, j + 1) if k >= n: # unterminated #if 1, finish here - D2("unterminated 'if 1'") + logging.debug("unterminated 'if 1'") result += blocks[j+1:k] break - dir = blocks[k].directive - if dir == "endif": - D2("convert 'if 1' .. 'endif' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno)) + dir_ = blocks[k].directive + if dir_ == "endif": + logging.debug("convert 'if 1' .. 'endif' (lines %d to %d)", + blocks[j].lineno, blocks[k].lineno) result += optimize_if01(blocks[j+1:k]) - i = k+1 - elif dir == "else": + i = k + 1 + elif dir_ == "else": # convert 'else' into 'if 0' - D2("convert 'if 1' .. 'else' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno)) + logging.debug("convert 'if 1' .. 'else' (lines %d to %d)", + blocks[j].lineno, blocks[k].lineno) result += optimize_if01(blocks[j+1:k]) blocks[k].directive = "if" - blocks[k].expr = CppExpr( CppLineTokenizer("0").toTokenList() ) + blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens) i = k - elif dir == "elif": + elif dir_ == "elif": # convert 'elif' into 'if 0' - D2("convert 'if 1' .. 'elif' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno)) + logging.debug("convert 'if 1' .. 'elif' (lines %d to %d)", + blocks[j].lineno, blocks[k].lineno) result += optimize_if01(blocks[j+1:k]) - blocks[k].expr = CppExpr( CppLineTokenizer("0").toTokenList() ) + blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens) i = k return result -def test_optimizeAll(): + +def test_optimizeAll(): text = """\ #if 1 #define GOOD_1 @@ -1816,50 +1788,41 @@ def test_optimizeAll(): expected = """\ #define GOOD_1 - #define GOOD_2 - #define GOOD_3 - - #if !defined(__GLIBC__) || __GLIBC__ < 2 #define X #endif - #ifndef __SIGRTMAX #define __SIGRTMAX 123 -#endif - +#endif\ """ - out = StringOutput() - lines = string.split(text, '\n') - list = BlockParser().parse( CppLinesTokenizer(lines) ) - #D_setlevel(2) - list.replaceTokens( kernel_token_replacements ) - list.optimizeAll( {"__KERNEL__":kCppUndefinedMacro} ) - list.write(out) + out = utils.StringOutput() + blocks = BlockParser().parse(CppStringTokenizer(text)) + blocks.replaceTokens(kernel_token_replacements) + blocks.optimizeAll({"__KERNEL__": kCppUndefinedMacro}) + blocks.write(out) if out.get() != expected: print "[FAIL]: macro optimization failed\n" print "<<<< expecting '", print expected, - print "'\n>>>> result '" + print "'\n>>>> result '", print out.get(), print "'\n----" global failure_count failure_count += 1 -# -- Always run the unit tests. - def runUnitTests(): - """run all unit tests for this program""" + """Always run all unit tests for this program.""" test_CppTokenizer() test_CppExpr() test_optimizeAll() test_BlockParser() + failure_count = 0 runUnitTests() if failure_count != 0: - sys.exit(1) + utils.panic("Unit tests failed in cpp.py.\n") |