summaryrefslogtreecommitdiffstats
path: root/libc/kernel/tools/cpp.py
diff options
context:
space:
mode:
Diffstat (limited to 'libc/kernel/tools/cpp.py')
-rw-r--r--libc/kernel/tools/cpp.py2180
1 files changed, 2180 insertions, 0 deletions
diff --git a/libc/kernel/tools/cpp.py b/libc/kernel/tools/cpp.py
new file mode 100644
index 0000000..4b4bd38
--- /dev/null
+++ b/libc/kernel/tools/cpp.py
@@ -0,0 +1,2180 @@
+# a glorified C pre-processor parser
+
+import sys, re, string
+from utils import *
+from defaults import *
+
+debugTokens = False
+debugDirectiveTokenizer = False
+debugLineParsing = False
+debugCppExpr = False
+debugOptimIf01 = False
+
+#####################################################################################
+#####################################################################################
+##### #####
+##### C P P T O K E N S #####
+##### #####
+#####################################################################################
+#####################################################################################
+
+# the list of supported C-preprocessor tokens
+# plus a couple of C tokens as well
+tokEOF = "\0"
+tokLN = "\n"
+tokSTRINGIFY = "#"
+tokCONCAT = "##"
+tokLOGICAND = "&&"
+tokLOGICOR = "||"
+tokSHL = "<<"
+tokSHR = ">>"
+tokEQUAL = "=="
+tokNEQUAL = "!="
+tokLT = "<"
+tokLTE = "<="
+tokGT = ">"
+tokGTE = ">="
+tokELLIPSIS = "..."
+tokSPACE = " "
+tokDEFINED = "defined"
+tokLPAREN = "("
+tokRPAREN = ")"
+tokNOT = "!"
+tokPLUS = "+"
+tokMINUS = "-"
+tokMULTIPLY = "*"
+tokDIVIDE = "/"
+tokMODULUS = "%"
+tokBINAND = "&"
+tokBINOR = "|"
+tokBINXOR = "^"
+tokCOMMA = ","
+tokLBRACE = "{"
+tokRBRACE = "}"
+tokARROW = "->"
+tokINCREMENT = "++"
+tokDECREMENT = "--"
+tokNUMBER = "<number>"
+tokIDENT = "<ident>"
+tokSTRING = "<string>"
+
+class Token:
+ """a simple class to hold information about a given token.
+ each token has a position in the source code, as well as
+ an 'id' and a 'value'. the id is a string that identifies
+ the token's class, while the value is the string of the
+ original token itself.
+
+ for example, the tokenizer concatenates a series of spaces
+ and tabs as a single tokSPACE id, whose value if the original
+ spaces+tabs sequence."""
+
+ def __init__(self):
+ self.id = None
+ self.value = None
+ self.lineno = 0
+ self.colno = 0
+
+ def set(self,id,val=None):
+ self.id = id
+ if val:
+ self.value = val
+ else:
+ self.value = id
+ return None
+
+ def copyFrom(self,src):
+ self.id = src.id
+ self.value = src.value
+ self.lineno = src.lineno
+ self.colno = src.colno
+
+ def __repr__(self):
+ if self.id == tokIDENT:
+ return "(ident %s)" % self.value
+ if self.id == tokNUMBER:
+ return "(number %s)" % self.value
+ if self.id == tokSTRING:
+ return "(string '%s')" % self.value
+ if self.id == tokLN:
+ return "<LN>"
+ if self.id == tokEOF:
+ return "<EOF>"
+ if self.id == tokSPACE and self.value == "\\":
+ # this corresponds to a trailing \ that was transformed into a tokSPACE
+ return "<\\>"
+
+ return self.id
+
+ def __str__(self):
+ if self.id == tokIDENT:
+ return self.value
+ if self.id == tokNUMBER:
+ return self.value
+ if self.id == tokSTRING:
+ return self.value
+ if self.id == tokEOF:
+ return "<EOF>"
+ if self.id == tokSPACE:
+ if self.value == "\\": # trailing \
+ return "\\\n"
+ else:
+ return self.value
+
+ return self.id
+
+class BadExpectedToken(Exception):
+ def __init__(self,msg):
+ print msg
+
+#####################################################################################
+#####################################################################################
+##### #####
+##### C P P T O K E N C U R S O R #####
+##### #####
+#####################################################################################
+#####################################################################################
+
+class TokenCursor:
+ """a small class to iterate over a list of Token objects"""
+ def __init__(self,tokens):
+ self.tokens = tokens
+ self.n = 0
+ self.count = len(tokens)
+
+ def set(self,n):
+ """set the current position"""
+ if n < 0:
+ n = 0
+ if n > self.count:
+ n = self.count
+ self.n = n
+
+ def peekId(self):
+ """retrieve the id of the current token"""
+ if (self.n >= self.count):
+ return None
+ return self.tokens[self.n].id
+
+ def peek(self):
+ """retrieve the current token. does not change position"""
+ if (self.n >= self.count):
+ return None
+ return self.tokens[self.n]
+
+ def skip(self):
+ """increase current token position"""
+ if (self.n < self.count):
+ self.n += 1
+
+ def skipSpaces(self):
+ """skip over all space tokens, this includes tokSPACE and tokLN"""
+ while 1:
+ tok = self.peekId()
+ if tok != tokSPACE and tok != tokLN:
+ break
+ self.skip()
+
+ def skipIfId(self,id):
+ """skip an optional token"""
+ if self.peekId() == id:
+ self.skip()
+
+ def expectId(self,id):
+ """raise an exception if the current token hasn't a given id.
+ otherwise skip over it"""
+ tok = self.peek()
+ if tok.id != id:
+ raise BadExpectedToken, "%d:%d: '%s' expected, received '%s'" % (tok.lineno, tok.colno, id, tok.id)
+ self.skip()
+
+ def remain(self):
+ """return the list of remaining tokens"""
+ return self.tokens[self.n:]
+
+
+#####################################################################################
+#####################################################################################
+##### #####
+##### C P P T O K E N I Z E R #####
+##### #####
+#####################################################################################
+#####################################################################################
+
+# list of long symbols, i.e. those that take more than one characters
+cppLongSymbols = [ tokCONCAT, tokLOGICAND, tokLOGICOR, tokSHL, tokSHR, tokELLIPSIS, tokEQUAL,\
+ tokNEQUAL, tokLTE, tokGTE, tokARROW, tokINCREMENT, tokDECREMENT ]
+
+class CppTokenizer:
+ """an abstract class used to convert some input text into a list
+ of tokens. real implementations follow and differ in the format
+ of the input text only"""
+
+ def __init__(self):
+ """initialize a new CppTokenizer object"""
+ self.eof = False # end of file reached ?
+ self.text = None # content of current line, with final \n stripped
+ self.line = 0 # number of current line
+ self.pos = 0 # current character position in current line
+ self.len = 0 # length of current line text
+ self.held = Token()
+
+ def setLineText(self,line):
+ """set the content of the (next) current line. should be called
+ by fillLineText() in derived classes"""
+ self.text = line
+ self.len = len(line)
+ self.pos = 0
+
+ def fillLineText(self):
+ """refresh the content of 'line' with a new line of input"""
+ # to be overriden
+ self.eof = True
+
+ def markPos(self,tok):
+ """mark the position of the current token in the source file"""
+ if self.eof or self.pos > self.len:
+ tok.lineno = self.line + 1
+ tok.colno = 0
+ else:
+ tok.lineno = self.line
+ tok.colno = self.pos
+
+ def peekChar(self):
+ """return the current token under the cursor without moving it"""
+ if self.eof:
+ return tokEOF
+
+ if self.pos > self.len:
+ self.pos = 0
+ self.line += 1
+ self.fillLineText()
+ if self.eof:
+ return tokEOF
+
+ if self.pos == self.len:
+ return tokLN
+ else:
+ return self.text[self.pos]
+
+ def peekNChar(self,n):
+ """try to peek the next n chars on the same line"""
+ if self.pos + n > self.len:
+ return None
+ return self.text[self.pos:self.pos+n]
+
+ def skipChar(self):
+ """increment the token cursor position"""
+ if not self.eof:
+ self.pos += 1
+
+ def skipNChars(self,n):
+ if self.pos + n <= self.len:
+ self.pos += n
+ else:
+ while n > 0:
+ self.skipChar()
+ n -= 1
+
+ def nextChar(self):
+ """retrieve the token at the current cursor position, then skip it"""
+ result = self.peekChar()
+ self.skipChar()
+ return result
+
+ def getEscape(self):
+ # try to get all characters after a backslash (\)
+ result = self.nextChar()
+ if result == "0":
+ # octal number ?
+ num = self.peekNChar(3)
+ if num != None:
+ isOctal = True
+ for d in num:
+ if not d in "01234567":
+ isOctal = False
+ break
+ if isOctal:
+ result += num
+ self.skipNChars(3)
+ elif result == "x" or result == "X":
+ # hex number ?
+ num = self.peekNChar(2)
+ if num != None:
+ isHex = True
+ for d in num:
+ if not d in "012345678abcdefABCDEF":
+ isHex = False
+ break
+ if isHex:
+ result += num
+ self.skipNChars(2)
+ elif result == "u" or result == "U":
+ # unicode char ?
+ num = self.peekNChar(4)
+ if num != None:
+ isHex = True
+ for d in num:
+ if not d in "012345678abcdefABCDEF":
+ isHex = False
+ break
+ if isHex:
+ result += num
+ self.skipNChars(4)
+
+ return result
+
+ def nextRealToken(self,tok):
+ """return next CPP token, used internally by nextToken()"""
+ c = self.nextChar()
+ if c == tokEOF or c == tokLN:
+ return tok.set(c)
+
+ if c == '/':
+ c = self.peekChar()
+ if c == '/': # C++ comment line
+ self.skipChar()
+ while 1:
+ c = self.nextChar()
+ if c == tokEOF or c == tokLN:
+ break
+ return tok.set(tokLN)
+ if c == '*': # C comment start
+ self.skipChar()
+ value = "/*"
+ prev_c = None
+ while 1:
+ c = self.nextChar()
+ if c == tokEOF:
+ #print "## EOF after '%s'" % value
+ return tok.set(tokEOF,value)
+ if c == '/' and prev_c == '*':
+ break
+ prev_c = c
+ value += c
+
+ value += "/"
+ #print "## COMMENT: '%s'" % value
+ return tok.set(tokSPACE,value)
+ c = '/'
+
+ if c.isspace():
+ while 1:
+ c2 = self.peekChar()
+ if c2 == tokLN or not c2.isspace():
+ break
+ c += c2
+ self.skipChar()
+ return tok.set(tokSPACE,c)
+
+ if c == '\\':
+ if debugTokens:
+ print "nextRealToken: \\ found, next token is '%s'" % repr(self.peekChar())
+ if self.peekChar() == tokLN: # trailing \
+ # eat the tokLN
+ self.skipChar()
+ # we replace a trailing \ by a tokSPACE whose value is
+ # simply "\\". this allows us to detect them later when
+ # needed.
+ return tok.set(tokSPACE,"\\")
+ else:
+ # treat as a single token here ?
+ c +=self.getEscape()
+ return tok.set(c)
+
+ if c == "'": # chars
+ c2 = self.nextChar()
+ c += c2
+ if c2 == '\\':
+ c += self.getEscape()
+
+ while 1:
+ c2 = self.nextChar()
+ if c2 == tokEOF:
+ break
+ c += c2
+ if c2 == "'":
+ break
+
+ return tok.set(tokSTRING, c)
+
+ if c == '"': # strings
+ quote = 0
+ while 1:
+ c2 = self.nextChar()
+ if c2 == tokEOF:
+ return tok.set(tokSTRING,c)
+
+ c += c2
+ if not quote:
+ if c2 == '"':
+ return tok.set(tokSTRING,c)
+ if c2 == "\\":
+ quote = 1
+ else:
+ quote = 0
+
+ if c >= "0" and c <= "9": # integers ?
+ while 1:
+ c2 = self.peekChar()
+ if c2 == tokLN or (not c2.isalnum() and c2 != "_"):
+ break
+ c += c2
+ self.skipChar()
+ return tok.set(tokNUMBER,c)
+
+ if c.isalnum() or c == "_": # identifiers ?
+ while 1:
+ c2 = self.peekChar()
+ if c2 == tokLN or (not c2.isalnum() and c2 != "_"):
+ break
+ c += c2
+ self.skipChar()
+ if c == tokDEFINED:
+ return tok.set(tokDEFINED)
+ else:
+ return tok.set(tokIDENT,c)
+
+ # check special symbols
+ for sk in cppLongSymbols:
+ if c == sk[0]:
+ sklen = len(sk[1:])
+ if self.pos + sklen <= self.len and \
+ self.text[self.pos:self.pos+sklen] == sk[1:]:
+ self.pos += sklen
+ return tok.set(sk)
+
+ return tok.set(c)
+
+ def nextToken(self,tok):
+ """return the next token from the input text. this function
+ really updates 'tok', and does not return a new one"""
+ self.markPos(tok)
+ self.nextRealToken(tok)
+
+ def getToken(self):
+ tok = Token()
+ self.nextToken(tok)
+ if debugTokens:
+ print "getTokens: %s" % repr(tok)
+ return tok
+
+ def toTokenList(self):
+ """convert the input text of a CppTokenizer into a direct
+ list of token objects. tokEOF is stripped from the result"""
+ result = []
+ while 1:
+ tok = Token()
+ self.nextToken(tok)
+ if tok.id == tokEOF:
+ break
+ result.append(tok)
+ return result
+
+class CppLineTokenizer(CppTokenizer):
+ """a CppTokenizer derived class that accepts a single line of text as input"""
+ def __init__(self,line,lineno=1):
+ CppTokenizer.__init__(self)
+ self.line = lineno
+ self.setLineText(line)
+
+
+class CppLinesTokenizer(CppTokenizer):
+ """a CppTokenizer derived class that accepts a list of texdt lines as input.
+ the lines must not have a trailing \n"""
+ def __init__(self,lines=[],lineno=1):
+ """initialize a CppLinesTokenizer. you can later add lines using addLines()"""
+ CppTokenizer.__init__(self)
+ self.line = lineno
+ self.lines = lines
+ self.index = 0
+ self.count = len(lines)
+
+ if self.count > 0:
+ self.fillLineText()
+ else:
+ self.eof = True
+
+ def addLine(self,line):
+ """add a line to a CppLinesTokenizer. this can be done after tokenization
+ happens"""
+ if self.count == 0:
+ self.setLineText(line)
+ self.index = 1
+ self.lines.append(line)
+ self.count += 1
+ self.eof = False
+
+ def fillLineText(self):
+ if self.index < self.count:
+ self.setLineText(self.lines[self.index])
+ self.index += 1
+ else:
+ self.eof = True
+
+
+class CppFileTokenizer(CppTokenizer):
+ def __init__(self,file,lineno=1):
+ CppTokenizer.__init__(self)
+ self.file = file
+ self.line = lineno
+
+ def fillLineText(self):
+ line = self.file.readline()
+ if len(line) > 0:
+ if line[-1] == '\n':
+ line = line[:-1]
+ if len(line) > 0 and line[-1] == "\r":
+ line = line[:-1]
+ self.setLineText(line)
+ else:
+ self.eof = True
+
+# Unit testing
+#
+class CppTokenizerTester:
+ """a class used to test CppTokenizer classes"""
+ def __init__(self,tokenizer=None):
+ self.tokenizer = tokenizer
+ self.token = Token()
+
+ def setTokenizer(self,tokenizer):
+ self.tokenizer = tokenizer
+
+ def expect(self,id):
+ self.tokenizer.nextToken(self.token)
+ tokid = self.token.id
+ if tokid == id:
+ return
+ if self.token.value == id and (tokid == tokIDENT or tokid == tokNUMBER):
+ return
+ raise BadExpectedToken, "### BAD TOKEN: '%s' expecting '%s'" % (self.token.id,id)
+
+ def expectToken(self,id,line,col):
+ self.expect(id)
+ if self.token.lineno != line:
+ raise BadExpectedToken, "### BAD LINENO: token '%s' got '%d' expecting '%d'" % (id,self.token.lineno,line)
+ if self.token.colno != col:
+ raise BadExpectedToken, "### BAD COLNO: '%d' expecting '%d'" % (self.token.colno,col)
+
+ def expectTokenVal(self,id,value,line,col):
+ self.expectToken(id,line,col)
+ if self.token.value != value:
+ raise BadExpectedToken, "### BAD VALUE: '%s' expecting '%s'" % (self.token.value,value)
+
+ def expectList(self,list):
+ for item in list:
+ self.expect(item)
+
+def test_CppTokenizer():
+ print "running CppTokenizer tests"
+ tester = CppTokenizerTester()
+
+ tester.setTokenizer( CppLineTokenizer("#an/example && (01923_xy)") )
+ tester.expectList( ["#", "an", "/", "example", tokSPACE, tokLOGICAND, tokSPACE, tokLPAREN, "01923_xy", \
+ tokRPAREN, tokLN, tokEOF] )
+
+ tester.setTokenizer( CppLineTokenizer("FOO(BAR) && defined(BAZ)") )
+ tester.expectList( ["FOO", tokLPAREN, "BAR", tokRPAREN, tokSPACE, tokLOGICAND, tokSPACE,
+ tokDEFINED, tokLPAREN, "BAZ", tokRPAREN, tokLN, tokEOF] )
+
+ tester.setTokenizer( CppLinesTokenizer( ["/*", "#", "*/"] ) )
+ tester.expectList( [ tokSPACE, tokLN, tokEOF ] )
+
+ tester.setTokenizer( CppLinesTokenizer( ["first", "second"] ) )
+ tester.expectList( [ "first", tokLN, "second", tokLN, tokEOF ] )
+
+ tester.setTokenizer( CppLinesTokenizer( ["first second", " third"] ) )
+ tester.expectToken( "first", 1, 0 )
+ tester.expectToken( tokSPACE, 1, 5 )
+ tester.expectToken( "second", 1, 6 )
+ tester.expectToken( tokLN, 1, 12 )
+ tester.expectToken( tokSPACE, 2, 0 )
+ tester.expectToken( "third", 2, 2 )
+
+ tester.setTokenizer( CppLinesTokenizer( [ "boo /* what the", "hell */" ] ) )
+ tester.expectList( [ "boo", tokSPACE ] )
+ tester.expectTokenVal( tokSPACE, "/* what the\nhell */", 1, 4 )
+ tester.expectList( [ tokLN, tokEOF ] )
+
+ tester.setTokenizer( CppLinesTokenizer( [ "an \\", " example" ] ) )
+ tester.expectToken( "an", 1, 0 )
+ tester.expectToken( tokSPACE, 1, 2 )
+ tester.expectTokenVal( tokSPACE, "\\", 1, 3 )
+ tester.expectToken( tokSPACE, 2, 0 )
+ tester.expectToken( "example", 2, 1 )
+ tester.expectToken( tokLN, 2, 8 )
+
+ return True
+
+
+#####################################################################################
+#####################################################################################
+##### #####
+##### C P P E X P R E S S I O N S #####
+##### #####
+#####################################################################################
+#####################################################################################
+
+# Cpp expressions are modeled by tuples of the form (op,arg) or (op,arg1,arg2), etc..
+# op is an "operator" string
+
+class Expr:
+ """a class used to model a CPP expression"""
+ opInteger = "int"
+ opIdent = "ident"
+ opCall = "call"
+ opDefined = "defined"
+ opTest = "?"
+ opLogicNot = "!"
+ opNot = "~"
+ opNeg = "[-]"
+ opUnaryPlus = "[+]"
+ opAdd = "+"
+ opSub = "-"
+ opMul = "*"
+ opDiv = "/"
+ opMod = "%"
+ opAnd = "&"
+ opOr = "|"
+ opXor = "^"
+ opLogicAnd = "&&"
+ opLogicOr = "||"
+ opEqual = "=="
+ opNotEqual = "!="
+ opLess = "<"
+ opLessEq = "<="
+ opGreater = ">"
+ opGreaterEq = ">="
+ opShl = "<<"
+ opShr = ">>"
+
+ unaries = [ opLogicNot, opNot, opNeg, opUnaryPlus ]
+ binaries = [ opAdd, opSub, opMul, opDiv, opMod, opAnd, opOr, opXor, opLogicAnd, opLogicOr,
+ opEqual, opNotEqual, opLess, opLessEq, opGreater, opGreaterEq ]
+
+ precedences = {
+ opTest: 0,
+ opLogicOr: 1,
+ opLogicNot: 2,
+ opOr : 3,
+ opXor: 4,
+ opAnd: 5,
+ opEqual: 6, opNotEqual: 6,
+ opLess:7, opLessEq:7, opGreater:7, opGreaterEq:7,
+ opShl:8, opShr:8,
+ opAdd:9, opSub:9,
+ opMul:10, opDiv:10, opMod:10,
+ opLogicNot:11,
+ opNot: 12,
+ }
+
+ def __init__(self,op):
+ self.op = op
+
+ def __repr__(self):
+ return "(%s)" % self.op
+
+ def __str__(self):
+ return "operator(%s)" % self.op
+
+ def precedence(self):
+ """return the precedence of a given operator"""
+ return Expr.precedences.get(self.op, 1000)
+
+ def isUnary(self):
+ return self.op in Expr.unaries
+
+ def isBinary(self):
+ return self.op in Expr.binaries
+
+ def isDefined(self):
+ return self.op is opDefined
+
+ def toInt(self):
+ """return the integer value of a given expression. only valid for integer expressions
+ will return None otherwise"""
+ return None
+
+class IntExpr(Expr):
+ def __init__(self,value):
+ Expr.__init__(self,opInteger)
+ self.arg = value
+
+ def __repr__(self):
+ return "(int %s)" % self.arg
+
+ def __str__(self):
+ return self.arg
+
+ def toInt(self):
+ s = self.arg # string value
+ # get rid of U or L suffixes
+ while len(s) > 0 and s[-1] in "LUlu":
+ s = s[:-1]
+ return string.atoi(s)
+
+class IdentExpr(Expr):
+ def __init__(self,name):
+ Expr.__init__(self,opIdent)
+ self.name = name
+
+ def __repr__(self):
+ return "(ident %s)" % self.name
+
+ def __str__(self):
+ return self.name
+
+class CallExpr(Expr):
+ def __init__(self,funcname,params):
+ Expr.__init__(self,opCall)
+ self.funcname = funcname
+ self.params = params
+
+ def __repr__(self):
+ result = "(call %s [" % self.funcname
+ comma = ""
+ for param in self.params:
+ result += "%s%s" % (comma, repr(param))
+ comma = ","
+ result += "])"
+ return result
+
+ def __str__(self):
+ result = "%s(" % self.funcname
+ comma = ""
+ for param in self.params:
+ result += "%s%s" % (comma, str(param))
+ comma = ","
+
+ result += ")"
+ return result
+
+class TestExpr(Expr):
+ def __init__(self,cond,iftrue,iffalse):
+ Expr.__init__(self,opTest)
+ self.cond = cond
+ self.iftrue = iftrue
+ self.iffalse = iffalse
+
+ def __repr__(self):
+ return "(?: %s %s %s)" % (repr(self.cond),repr(self.iftrue),repr(self.iffalse))
+
+ def __str__(self):
+ return "(%s) ? (%s) : (%s)" % (self.cond, self.iftrue, self.iffalse)
+
+class SingleArgExpr(Expr):
+ def __init__(self,op,arg):
+ Expr.__init__(self,op)
+ self.arg = arg
+
+ def __repr__(self):
+ return "(%s %s)" % (self.op, repr(self.arg))
+
+class DefinedExpr(SingleArgExpr):
+ def __init__(self,op,macroname):
+ SingleArgExpr.__init__(self.opDefined,macroname)
+
+ def __str__(self):
+ return "defined(%s)" % self.arg
+
+
+class UnaryExpr(SingleArgExpr):
+ def __init__(self,op,arg,opstr=None):
+ SingleArgExpr.__init__(self,op,arg)
+ if not opstr:
+ opstr = op
+ self.opstr = opstr
+
+ def __str__(self):
+ arg_s = str(self.arg)
+ arg_prec = self.arg.precedence()
+ self_prec = self.precedence()
+ if arg_prec < self_prec:
+ return "%s(%s)" % (self.opstr,arg_s)
+ else:
+ return "%s%s" % (self.opstr, arg_s)
+
+class TwoArgExpr(Expr):
+ def __init__(self,op,arg1,arg2):
+ Expr.__init__(self,op)
+ self.arg1 = arg1
+ self.arg2 = arg2
+
+ def __repr__(self):
+ return "(%s %s %s)" % (self.op, repr(self.arg1), repr(self.arg2))
+
+class BinaryExpr(TwoArgExpr):
+ def __init__(self,op,arg1,arg2,opstr=None):
+ TwoArgExpr.__init__(self,op,arg1,arg2)
+ if not opstr:
+ opstr = op
+ self.opstr = opstr
+
+ def __str__(self):
+ arg1_s = str(self.arg1)
+ arg2_s = str(self.arg2)
+ arg1_prec = self.arg1.precedence()
+ arg2_prec = self.arg2.precedence()
+ self_prec = self.precedence()
+
+ result = ""
+ if arg1_prec < self_prec:
+ result += "(%s)" % arg1_s
+ else:
+ result += arg1_s
+
+ result += " %s " % self.opstr
+
+ if arg2_prec < self_prec:
+ result += "(%s)" % arg2_s
+ else:
+ result += arg2_s
+
+ return result
+
+#####################################################################################
+#####################################################################################
+##### #####
+##### C P P E X P R E S S I O N P A R S E R #####
+##### #####
+#####################################################################################
+#####################################################################################
+
+
+class ExprParser:
+ """a class used to convert a list of tokens into a cpp Expr object"""
+
+ re_octal = re.compile(r"\s*\(0[0-7]+\).*")
+ re_decimal = re.compile(r"\s*\(\d+[ulUL]*\).*")
+ re_hexadecimal = re.compile(r"\s*\(0[xX][0-9a-fA-F]*\).*")
+
+ def __init__(self,tokens):
+ self.tok = tokens
+ self.n = len(self.tok)
+ self.i = 0
+
+ def mark(self):
+ return self.i
+
+ def release(self,pos):
+ self.i = pos
+
+ def peekId(self):
+ if self.i < self.n:
+ return self.tok[self.i].id
+ return None
+
+ def peek(self):
+ if self.i < self.n:
+ return self.tok[self.i]
+ return None
+
+ def skip(self):
+ if self.i < self.n:
+ self.i += 1
+
+ def skipOptional(self,id):
+ if self.i < self.n and self.tok[self.i].id == id:
+ self.i += 1
+
+ def skipSpaces(self):
+ i = self.i
+ n = self.n
+ tok = self.tok
+ while i < n and (tok[i] == tokSPACE or tok[i] == tokLN):
+ i += 1
+ self.i = i
+
+ # all the isXXX functions returns a (expr,nextpos) pair if a match is found
+ # or None if not
+
+ def is_integer(self):
+ id = self.tok[self.i].id
+ c = id[0]
+ if c < '0' or c > '9':
+ return None
+
+ m = ExprParser.re_octal.match(id)
+ if m:
+ return (IntExpr(id), m.end(1))
+
+ m = ExprParser.re_decimal.match(id)
+ if m:
+ return (IntExpr(id), m.end(1))
+
+ m = ExprParser.re_hexadecimal(id)
+ if m:
+ return (IntExpr(id), m.end(1))
+
+ return None
+
+ def is_defined(self):
+ id = self.tok[self.i].id
+ if id != "defined":
+ return None
+
+ pos = self.mark()
+
+ use_paren = 0
+ if self.peekId() == tokLPAREN:
+ self.skip()
+ use_paren = 1
+
+ if self.peekId() != tokIDENT:
+ self.throw( BadExpectedToken, "identifier expected")
+
+ macroname = self.peek().value
+ self.skip()
+ if use_paren:
+ self.skipSpaces()
+ if self.peekId() != tokRPAREN:
+ self.throw( BadExpectedToken, "missing right-paren after 'defined' directive")
+ self.skip()
+
+ i = self.i
+ return (DefinedExpr(macroname),i+1)
+
+ def is_call_or_ident(self):
+ pass
+
+ def parse(self, i):
+ return None
+
+#####################################################################################
+#####################################################################################
+##### #####
+##### C P P E X P R E S S I O N S #####
+##### #####
+#####################################################################################
+#####################################################################################
+
+class CppInvalidExpression(Exception):
+ """an exception raised when an invalid/unsupported cpp expression is detected"""
+ pass
+
+class CppExpr:
+ """a class that models the condition of #if directives into
+ an expression tree. each node in the tree is of the form (op,arg) or (op,arg1,arg2)
+ where "op" is a string describing the operation"""
+
+ unaries = [ "!", "~" ]
+ binaries = [ "+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%", "&", "|", "^", "<<", ">>", "==", "!=" ]
+ precedences = { "||": 1,
+ "&&": 2,
+ "|": 3,
+ "^": 4,
+ "&": 5,
+ "==":6, "!=":6,
+ "<":7, "<=":7, ">":7, ">=":7,
+ "<<":8, ">>":8,
+ "+":9, "-":9,
+ "*":10, "/":10, "%":10,
+ "!":11, "~":12
+ }
+
+ def __init__(self, tokens):
+ """initialize a CppExpr. 'tokens' must be a CppToken list"""
+ self.tok = tokens
+ self.n = len(tokens)
+ if debugCppExpr:
+ print "CppExpr: trying to parse %s" % repr(tokens)
+ expr = self.is_expr(0)
+ if debugCppExpr:
+ print "CppExpr: got " + repr(expr)
+ self.expr = expr[0]
+
+ re_cpp_constant = re.compile(r"((\d|\w|_)+)")
+
+ def throw(self,exception,i,msg):
+ if i < self.n:
+ tok = self.tok[i]
+ print "%d:%d: %s" % (tok.lineno,tok.colno,msg)
+ else:
+ print "EOF: %s" % msg
+ raise exception
+
+ def skip_spaces(self,i):
+ """skip spaces in input token list"""
+ while i < self.n:
+ t = self.tok[i]
+ if t.id != tokSPACE and t.id != tokLN:
+ break
+ i += 1
+ return i
+
+ def expectId(self,i,id):
+ """check that a given token id is at the current position, then skip over it"""
+ i = self.skip_spaces(i)
+ if i >= self.n or self.tok[i].id != id:
+ self.throw(BadExpectedToken,i,"### expecting '%s' in expression, got '%s'" % (id, self.tok[i].id))
+ return i+1
+
+ def expectIdent(self,i):
+ i = self.skip_spaces(i)
+ if i >= self.n or self.tok[i].id != tokIDENT:
+ self.throw(BadExpectedToken,i,"### expecting identifier in expression, got '%s'" % (id, self.tok[i].id))
+ return i+1
+
+ # the is_xxxxx function returns either None or a pair (e,nextpos)
+ # where 'e' is an expression tuple (e.g. (op,arg)) and 'nextpos' is
+ # the corresponding next position in the input token list
+ #
+
+ def is_decimal(self,i):
+ v = self.tok[i].value[:]
+ while len(v) > 0 and v[-1] in "ULul":
+ v = v[:-1]
+ for digit in v:
+ if not digit.isdigit():
+ return None
+
+ # for an integer expression tuple, the argument
+ # is simply the value as an integer
+ val = string.atoi(v)
+ return ("int", val), i+1
+
+ def is_hexadecimal(self,i):
+ v = self.tok[i].value[:]
+ while len(v) > 0 and v[-1] in "ULul":
+ v = v[:-1]
+ if len(v) > 2 and (v[0:2] == "0x" or v[0:2] == "0X"):
+ for digit in v[2:]:
+ if not digit in "0123456789abcdefABCDEF":
+ return None
+
+ # for an hex expression tuple, the argument
+ # is the value as an integer
+ val = int(v[2:], 16)
+ return ("hex", val), i+1
+
+ return None
+
+ def is_integer(self,i):
+ if self.tok[i].id != tokNUMBER:
+ return None
+
+ c = self.is_decimal(i)
+ if c: return c
+
+ c = self.is_hexadecimal(i)
+ if c: return c
+
+ return None
+
+ def is_number(self,i):
+ t = self.tok[i]
+ if t.id == tokMINUS and i+1 < self.n:
+ c = self.is_integer(i+1)
+ if c:
+ e, i2 = c
+ op, val = e
+ return (op, -val), i2
+ if t.id == tokPLUS and i+1 < self.n:
+ c = self.is_integer(i+1)
+ if c: return c
+
+ return self.is_integer(i)
+
+
+ def is_alnum(self,i):
+ """test wether a given token is alpha-numeric"""
+ i = self.skip_spaces(i)
+ if i >= self.n:
+ return None
+ t = self.tok[i]
+ m = CppExpr.re_cpp_constant.match(t.id)
+ if m:
+ #print "... alnum '%s'" % m.group(1)
+ r = m.group(1)
+ return ("ident", r), i+1
+ return None
+
+ def is_defined(self,i):
+ t = self.tok[i]
+ if t.id != tokDEFINED:
+ return None
+
+ # we have the defined keyword, check the rest
+ i = self.skip_spaces(i+1)
+ use_parens = 0
+ if i < self.n and self.tok[i].id == tokLPAREN:
+ use_parens = 1
+ i = self.skip_spaces(i+1)
+
+ if i >= self.n:
+ self.throw(CppConstantExpected,i,"### 'defined' must be followed by macro name or left paren")
+
+ t = self.tok[i]
+ if t.id != tokIDENT:
+ self.throw(CppConstantExpected,i,"### 'defined' must be followed by macro name")
+
+ i += 1
+ if use_parens:
+ i = self.expectId(i,tokRPAREN)
+
+ return ("defined",t.value), i
+
+
+ def is_call_or_ident(self,i):
+ i = self.skip_spaces(i)
+ if i >= self.n:
+ return None
+
+ t = self.tok[i]
+ if t.id != tokIDENT:
+ return None
+
+ name = t.value
+
+ i = self.skip_spaces(i+1)
+ if i >= self.n or self.tok[i].id != tokLPAREN:
+ return ("ident", name), i
+
+ params = []
+ depth = 1
+ i += 1
+ j = i
+ while i < self.n:
+ id = self.tok[i].id
+ if id == tokLPAREN:
+ depth += 1
+ elif depth == 1 and (id == tokCOMMA or id == tokRPAREN):
+ while j < i and self.tok[j].id == tokSPACE:
+ j += 1
+ k = i
+ while k > j and self.tok[k-1].id == tokSPACE:
+ k -= 1
+ param = self.tok[j:k]
+ params.append( param )
+ if id == tokRPAREN:
+ break
+ j = i+1
+ elif id == tokRPAREN:
+ depth -= 1
+ i += 1
+
+ if i >= self.n:
+ return None
+
+ return ("call", (name, params)), i+1
+
+ def is_token(self,i,token):
+ i = self.skip_spaces(i)
+ if i >= self.n or self.tok[i].id != token:
+ return None
+ return token, i+1
+
+
+ def is_value(self,i):
+ t = self.tok[i]
+ if t.id == tokSTRING:
+ return ("string", t.value), i+1
+
+ c = self.is_number(i)
+ if c: return c
+
+ c = self.is_defined(i)
+ if c: return c
+
+ c = self.is_call_or_ident(i)
+ if c: return c
+
+ i = self.skip_spaces(i)
+ if i >= self.n or self.tok[i].id != tokLPAREN:
+ return None
+
+ popcount = 1
+ i2 = i+1
+ while i2 < self.n:
+ t = self.tok[i2]
+ if t.id == tokLPAREN:
+ popcount += 1
+ elif t.id == tokRPAREN:
+ popcount -= 1
+ if popcount == 0:
+ break
+ i2 += 1
+
+ if popcount != 0:
+ self.throw(CppInvalidExpression, i, "expression missing closing parenthesis")
+
+ if debugCppExpr:
+ print "CppExpr: trying to parse sub-expression %s" % repr(self.tok[i+1:i2])
+ oldcount = self.n
+ self.n = i2
+ c = self.is_expr(i+1)
+ self.n = oldcount
+ if not c:
+ self.throw(CppInvalidExpression, i, "invalid expression within parenthesis")
+
+ e, i = c
+ return e, i2+1
+
+ def is_unary(self,i):
+ i = self.skip_spaces(i)
+ if i >= self.n:
+ return None
+
+ t = self.tok[i]
+ if t.id in CppExpr.unaries:
+ c = self.is_unary(i+1)
+ if not c:
+ self.throw(CppInvalidExpression, i, "%s operator must be followed by value" % t.id)
+ e, i = c
+ return (t.id, e), i
+
+ return self.is_value(i)
+
+ def is_binary(self,i):
+ i = self.skip_spaces(i)
+ if i >= self.n:
+ return None
+
+ c = self.is_unary(i)
+ if not c:
+ return None
+
+ e1, i2 = c
+ i2 = self.skip_spaces(i2)
+ if i2 >= self.n:
+ return c
+
+ t = self.tok[i2]
+ if t.id in CppExpr.binaries:
+ c = self.is_binary(i2+1)
+ if not c:
+ self.throw(CppInvalidExpression, i,"### %s operator must be followed by value" % t.id )
+ e2, i3 = c
+ return (t.id, e1, e2), i3
+
+ return None
+
+ def is_expr(self,i):
+ return self.is_binary(i)
+
+ def dump_node(self,e):
+ op = e[0]
+ line = "(" + op
+ if op == "int":
+ line += " %d)" % e[1]
+ elif op == "hex":
+ line += " 0x%x)" % e[1]
+ elif op == "ident":
+ line += " %s)" % e[1]
+ elif op == "defined":
+ line += " %s)" % e[1]
+ elif op == "call":
+ arg = e[1]
+ line += " %s [" % arg[0]
+ prefix = ""
+ for param in arg[1]:
+ par = ""
+ for tok in param:
+ par += str(tok)
+ line += "%s%s" % (prefix, par)
+ prefix = ","
+ line += "])"
+ elif op in CppExpr.unaries:
+ line += " %s)" % self.dump_node(e[1])
+ elif op in CppExpr.binaries:
+ line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2]))
+ else:
+ line += " ?%s)" % repr(e[1])
+
+ return line
+
+ def __repr__(self):
+ return self.dump_node(self.expr)
+
+ def source_node(self,e):
+ op = e[0]
+ if op == "int":
+ return "%d" % e[1]
+ if op == "hex":
+ return "0x%x" % e[1]
+ if op == "ident":
+ # XXX: should try to expand
+ return e[1]
+ if op == "defined":
+ return "defined(%s)" % e[1]
+
+ prec = CppExpr.precedences.get(op,1000)
+ arg = e[1]
+ if op in CppExpr.unaries:
+ arg_src = self.source_node(arg)
+ arg_op = arg[0]
+ arg_prec = CppExpr.precedences.get(arg[0],1000)
+ if arg_prec < prec:
+ return "!(" + arg_src + ")"
+ else:
+ return "!" + arg_src
+ if op in CppExpr.binaries:
+ arg2 = e[2]
+ arg1_op = arg[0]
+ arg2_op = arg2[0]
+ arg1_src = self.source_node(arg)
+ arg2_src = self.source_node(arg2)
+ if CppExpr.precedences.get(arg1_op,1000) < prec:
+ arg1_src = "(%s)" % arg1_src
+ if CppExpr.precedences.get(arg2_op,1000) < prec:
+ arg2_src = "(%s)" % arg2_src
+
+ return "%s %s %s" % (arg1_src, op, arg2_src)
+ return "???"
+
+ def __str__(self):
+ return self.source_node(self.expr)
+
+ def int_node(self,e):
+ if e[0] == "int":
+ return e[1]
+ elif e[1] == "hex":
+ return int(e[1],16)
+ else:
+ return None
+
+ def toInt(self):
+ return self.int_node(self.expr)
+
+ def optimize_node(self,e,macros={}):
+ op = e[0]
+ if op == "defined":
+ name = e[1]
+ if macros.has_key(name):
+ if macros[name] == kCppUndefinedMacro:
+ return ("int", 0)
+ else:
+ return ("int", 1)
+
+ if kernel_remove_config_macros and name.startswith("CONFIG_"):
+ return ("int", 0)
+
+ elif op == "!":
+ op, v = e
+ v = self.optimize_node(v, macros)
+ if v[0] == "int":
+ if v[1] == 0:
+ return ("int", 1)
+ else:
+ return ("int", 0)
+
+ elif op == "&&":
+ op, l, r = e
+ l = self.optimize_node(l, macros)
+ r = self.optimize_node(r, macros)
+ li = self.int_node(l)
+ ri = self.int_node(r)
+ if li != None:
+ if li == 0:
+ return ("int", 0)
+ else:
+ return r
+
+ elif op == "||":
+ op, l, r = e
+ l = self.optimize_node(l, macros)
+ r = self.optimize_node(r, macros)
+ li = self.int_node(l)
+ ri = self.int_node(r)
+ if li != None:
+ if li == 0:
+ return r
+ else:
+ return ("int", 1)
+ elif ri != None:
+ if ri == 0:
+ return l
+ else:
+ return ("int", 1)
+ return e
+
+ def optimize(self,macros={}):
+ self.expr = self.optimize_node(self.expr,macros)
+
+ def removePrefixedNode(self,e,prefix,names):
+ op = e[0]
+ if op == "defined":
+ name = e[1]
+ if name.startswith(prefix):
+ if names.has_key[name] and names[name] == "y":
+ return ("int", 1)
+ else:
+ return ("int", 0)
+
+ elif op in CppExpr.unaries:
+ op, v = e
+ v = self.removePrefixedNode(v,prefix,names)
+ return (op, v)
+ elif op in CppExpr.binaries:
+ op, v1, v2 = e
+ v1 = self.removePrefixedNode(v1,prefix,names)
+ v2 = self.removePrefixedNode(v2,prefix,names)
+ return (op, v1, v2)
+ elif op == "call":
+ func, params = e[1]
+ params2 = []
+ for param in params:
+ params2.append( self.removePrefixedNode(param,prefix,names) )
+ return (op, (func, params2))
+
+ return e
+
+ def removePrefixed(self,prefix,names={}):
+ self.expr = self.removePrefixedNode(self.expr,prefix,names)
+
+ def is_equal_node(self,e1,e2):
+ if e1[0] != e2[0] or len(e1) != len(e2):
+ return False
+
+ op = e1[0]
+ if op == "int" or op == "hex" or op == "!" or op == "defined":
+ return e1[0] == e2[0]
+
+ return self.is_equal_node(e1[1],e2[1]) and self.is_equal_node(e1[2],e2[2])
+
+ def is_equal(self,other):
+ return self.is_equal_node(self.expr,other.expr)
+
+def test_cpp_expr(expr, expected):
+ e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
+ #print repr(e.expr)
+ s1 = repr(e)
+ if s1 != expected:
+ print "KO: expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
+ else:
+ #print "OK: expression '%s'" % expr
+ pass
+
+def test_cpp_expr_optim(expr, expected, macros={}):
+ e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
+ e.optimize(macros)
+
+ s1 = repr(e)
+ if s1 != expected:
+ print "KO: optimized expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
+ else:
+ #print "OK: optmized expression '%s'" % expr
+ pass
+
+def test_cpp_expr_source(expr, expected):
+ e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
+ s1 = str(e)
+ if s1 != expected:
+ print "KO: source expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
+ else:
+ #print "OK: source expression '%s'" % expr
+ pass
+
+def test_CppExpr():
+ print "testing CppExpr"
+ test_cpp_expr( "0", "(int 0)" )
+ test_cpp_expr( "1", "(int 1)" )
+ test_cpp_expr( "1 && 1", "(&& (int 1) (int 1))" )
+ test_cpp_expr( "1 && 0", "(&& (int 1) (int 0))" )
+ test_cpp_expr( "EXAMPLE", "(ident EXAMPLE)" )
+ test_cpp_expr( "EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))" )
+ test_cpp_expr( "defined(EXAMPLE)", "(defined EXAMPLE)" )
+ test_cpp_expr( "!defined(EXAMPLE)", "(! (defined EXAMPLE))" )
+ test_cpp_expr( "defined(ABC) || defined(BINGO)", "(|| (defined ABC) (defined BINGO))" )
+ test_cpp_expr( "FOO(BAR)", "(call FOO [BAR])" )
+
+ test_cpp_expr_optim( "0", "(int 0)" )
+ test_cpp_expr_optim( "1", "(int 1)" )
+ test_cpp_expr_optim( "1 && 1", "(int 1)" )
+ test_cpp_expr_optim( "1 && 0", "(int 0)" )
+ test_cpp_expr_optim( "0 && 1", "(int 0)" )
+ test_cpp_expr_optim( "0 && 0", "(int 0)" )
+ test_cpp_expr_optim( "1 || 1", "(int 1)" )
+ test_cpp_expr_optim( "1 || 0", "(int 1)" )
+ test_cpp_expr_optim( "0 || 1", "(int 1)" )
+ test_cpp_expr_optim( "0 || 0", "(int 0)" )
+ test_cpp_expr_optim( "EXAMPLE", "(ident EXAMPLE)" )
+ test_cpp_expr_optim( "EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))" )
+ test_cpp_expr_optim( "defined(EXAMPLE)", "(defined EXAMPLE)" )
+ test_cpp_expr_optim( "defined(EXAMPLE)", "(int 1)", { "EXAMPLE": "XOWOE" } )
+ test_cpp_expr_optim( "defined(EXAMPLE)", "(int 0)", { "EXAMPLE": kCppUndefinedMacro} )
+ test_cpp_expr_optim( "!defined(EXAMPLE)", "(! (defined EXAMPLE))" )
+ test_cpp_expr_optim( "!defined(EXAMPLE)", "(int 0)", { "EXAMPLE" : "XOWOE" } )
+ test_cpp_expr_optim( "!defined(EXAMPLE)", "(int 1)", { "EXAMPLE" : kCppUndefinedMacro } )
+ test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(|| (defined ABC) (defined BINGO))" )
+ test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 1)", { "ABC" : "1" } )
+ test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 1)", { "BINGO" : "1" } )
+ test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(defined ABC)", { "BINGO" : kCppUndefinedMacro } )
+ test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 0)", { "ABC" : kCppUndefinedMacro, "BINGO" : kCppUndefinedMacro } )
+
+ test_cpp_expr_source( "0", "0" )
+ test_cpp_expr_source( "1", "1" )
+ test_cpp_expr_source( "1 && 1", "1 && 1" )
+ test_cpp_expr_source( "1 && 0", "1 && 0" )
+ test_cpp_expr_source( "0 && 1", "0 && 1" )
+ test_cpp_expr_source( "0 && 0", "0 && 0" )
+ test_cpp_expr_source( "1 || 1", "1 || 1" )
+ test_cpp_expr_source( "1 || 0", "1 || 0" )
+ test_cpp_expr_source( "0 || 1", "0 || 1" )
+ test_cpp_expr_source( "0 || 0", "0 || 0" )
+ test_cpp_expr_source( "EXAMPLE", "EXAMPLE" )
+ test_cpp_expr_source( "EXAMPLE - 3", "EXAMPLE - 3" )
+ test_cpp_expr_source( "defined(EXAMPLE)", "defined(EXAMPLE)" )
+ test_cpp_expr_source( "defined EXAMPLE", "defined(EXAMPLE)" )
+
+
+#####################################################################################
+#####################################################################################
+##### #####
+##### C P P B L O C K #####
+##### #####
+#####################################################################################
+#####################################################################################
+
+class Block:
+ """a class used to model a block of input source text. there are two block types:
+ - direcive blocks: contain the tokens of a single pre-processor directive (e.g. #if)
+ - text blocks, contain the tokens of non-directive blocks
+
+ the cpp parser class below will transform an input source file into a list of Block
+ objects (grouped in a BlockList object for convenience)"""
+
+ def __init__(self,tokens,directive=None,lineno=0):
+ """initialize a new block, if 'directive' is None, this is a text block
+ NOTE: this automatically converts '#ifdef MACRO' into '#if defined(MACRO)'
+ and '#ifndef MACRO' into '#if !defined(MACRO)'"""
+ if directive == "ifdef":
+ tok = Token()
+ tok.set(tokDEFINED)
+ tokens = [ tok ] + tokens
+ directive = "if"
+
+ elif directive == "ifndef":
+ tok1 = Token()
+ tok2 = Token()
+ tok1.set(tokNOT)
+ tok2.set(tokDEFINED)
+ tokens = [ tok1, tok2 ] + tokens
+ directive = "if"
+
+ self.tokens = tokens
+ self.directive = directive
+ if lineno > 0:
+ self.lineno = lineno
+ else:
+ self.lineno = self.tokens[0].lineno
+
+ if self.isIf():
+ self.expr = CppExpr( self.tokens )
+
+ def isDirective(self):
+ """returns True iff this is a directive block"""
+ return self.directive != None
+
+ def isConditional(self):
+ """returns True iff this is a conditional directive block"""
+ return self.directive in ["if","ifdef","ifndef","else","elif","endif"]
+
+ def isDefine(self):
+ """returns the macro name in a #define directive, or None otherwise"""
+ if self.directive != "define":
+ return None
+
+ return self.tokens[0].value
+
+ def isIf(self):
+ """returns True iff this is an #if-like directive block"""
+ return self.directive in ["if","ifdef","ifndef","elif"]
+
+ def isInclude(self):
+ """checks wether this is a #include directive. if true, then returns the
+ corresponding file name (with brackets or double-qoutes). None otherwise"""
+ if self.directive != "include":
+ return None
+
+ #print "iii " + repr(self.tokens)
+ if self.tokens[0].id == tokSTRING:
+ # a double-quote include, that's easy
+ return self.tokens[0].value
+
+ # we only want the bracket part, not any comments or junk after it
+ if self.tokens[0].id == "<":
+ i = 0
+ tok = self.tokens
+ n = len(tok)
+ while i < n and tok[i].id != ">":
+ i += 1
+
+ if i >= n:
+ return None
+
+ return string.join([ str(x) for x in tok[:i+1] ],"")
+
+ else:
+ return None
+
+ def __repr__(self):
+ """generate the representation of a given block"""
+ if self.directive:
+ result = "#%s " % self.directive
+ if self.isIf():
+ result += repr(self.expr)
+ else:
+ for tok in self.tokens:
+ result += repr(tok)
+ else:
+ result = ""
+ for tok in self.tokens:
+ result += repr(tok)
+
+ return result
+
+ def __str__(self):
+ """generate the string representation of a given block"""
+ if self.directive:
+ if self.directive == "if":
+ # small optimization to re-generate #ifdef and #ifndef
+ e = self.expr.expr
+ op = e[0]
+ if op == "defined":
+ result = "#ifdef %s" % e[1]
+ elif op == "!" and e[1][0] == "defined":
+ result = "#ifndef %s" % e[1][1]
+ else:
+ result = "#if " + str(self.expr)
+ else:
+ result = "#%s" % self.directive
+ if len(self.tokens):
+ result += " "
+ for tok in self.tokens:
+ result += str(tok)
+ else:
+ result = ""
+ for tok in self.tokens:
+ result += str(tok)
+
+ return result
+
+
+class BlockList:
+ """a convenience class used to hold and process a list of blocks returned by
+ the cpp parser"""
+ def __init__(self,blocks):
+ self.blocks = blocks
+
+ def __len__(self):
+ return len(self.blocks)
+
+ def __getitem__(self,n):
+ return self.blocks[n]
+
+ def __repr__(self):
+ return repr(self.blocks)
+
+ def __str__(self):
+ result = ""
+ for b in self.blocks:
+ result += str(b)
+ if b.isDirective():
+ result += '\n'
+ return result
+
+ def optimizeIf01(self):
+ """remove the code between #if 0 .. #endif in a BlockList"""
+ self.blocks = optimize_if01(self.blocks)
+
+ def optimizeMacros(self, macros):
+ """remove known defined and undefined macros from a BlockList"""
+ for b in self.blocks:
+ if b.isIf():
+ b.expr.optimize(macros)
+
+ def removeMacroDefines(self,macros):
+ """remove known macro definitions from a BlockList"""
+ self.blocks = remove_macro_defines(self.blocks,macros)
+
+ def removePrefixed(self,prefix,names):
+ for b in self.blocks:
+ if b.isIf():
+ b.expr.removePrefixed(prefix,names)
+
+ def optimizeAll(self,macros):
+ self.optimizeMacros(macros)
+ self.optimizeIf01()
+ return
+
+ def findIncludes(self):
+ """return the list of included files in a BlockList"""
+ result = []
+ for b in self.blocks:
+ i = b.isInclude()
+ if i:
+ result.append(i)
+
+ return result
+
+
+ def write(self,out):
+ out.write(str(self))
+
+ def removeComments(self):
+ for b in self.blocks:
+ for tok in b.tokens:
+ if tok.id == tokSPACE:
+ tok.value = " "
+
+ def removeEmptyLines(self):
+ # state = 1 => previous line was tokLN
+ # state = 0 => previous line was directive
+ state = 1
+ for b in self.blocks:
+ if b.isDirective():
+ #print "$$$ directive %s" % str(b)
+ state = 0
+ else:
+ # a tokLN followed by spaces is replaced by a single tokLN
+ # several successive tokLN are replaced by a single one
+ #
+ dst = []
+ src = b.tokens
+ n = len(src)
+ i = 0
+ #print "$$$ parsing %s" % repr(src)
+ while i < n:
+ # find final tokLN
+ j = i
+ while j < n and src[j].id != tokLN:
+ j += 1
+
+ if j >= n:
+ # uhhh
+ dst += src[i:]
+ break
+
+ if src[i].id == tokSPACE:
+ k = i+1
+ while src[k].id == tokSPACE:
+ k += 1
+
+ if k == j: # empty lines with spaces in it
+ i = j # remove the spaces
+
+ if i == j:
+ # an empty line
+ if state == 1:
+ i += 1 # remove it
+ else:
+ state = 1
+ dst.append(src[i])
+ i += 1
+ else:
+ # this line is not empty, remove trailing spaces
+ k = j
+ while k > i and src[k-1].id == tokSPACE:
+ k -= 1
+
+ nn = i
+ while nn < k:
+ dst.append(src[nn])
+ nn += 1
+ dst.append(src[j])
+ state = 0
+ i = j+1
+
+ b.tokens = dst
+
+ def removeVarsAndFuncs(self,knownStatics=set()):
+ """remove all extern and static declarations corresponding
+ to variable and function declarations. we only accept typedefs
+ and enum/structs/union declarations.
+
+ however, we keep the definitions corresponding to the set
+ of known static inline functions in the set 'knownStatics',
+ which is useful for optimized byteorder swap functions and
+ stuff like that.
+ """
+ # state = 1 => typedef/struct encountered
+ # state = 2 => vars or func declaration encountered, skipping until ";"
+ # state = 0 => normal (i.e. LN + spaces)
+ state = 0
+ depth = 0
+ blocks2 = []
+ for b in self.blocks:
+ if b.isDirective():
+ blocks2.append(b)
+ else:
+ n = len(b.tokens)
+ i = 0
+ first = 0
+ if state == 2:
+ first = n
+ while i < n:
+ tok = b.tokens[i]
+ if state == 0:
+ bad = 0
+ if tok.id in [tokLN, tokSPACE]:
+ pass
+ elif tok.value in [ 'struct', 'typedef', 'enum', 'union', '__extension__' ]:
+ state = 1
+ else:
+ if tok.value in [ 'static', 'extern', '__KINLINE' ]:
+ j = i+1
+ ident = ""
+ while j < n and not (b.tokens[j].id in [ '(', ';' ]):
+ if b.tokens[j].id == tokIDENT:
+ ident = b.tokens[j].value
+ j += 1
+ if j < n and ident in knownStatics:
+ # this is a known static, we're going to keep its
+ # definition in the final output
+ state = 1
+ else:
+ #print "### skip static '%s'" % ident
+ pass
+
+ if state == 0:
+ if i > first:
+ #print "### intermediate from '%s': '%s'" % (tok.value, repr(b.tokens[first:i]))
+ blocks2.append( Block(b.tokens[first:i]) )
+ state = 2
+ first = n
+
+ else: # state > 0
+ if tok.id == '{':
+ depth += 1
+
+ elif tok.id == '}':
+ if depth > 0:
+ depth -= 1
+
+ elif depth == 0 and tok.id == ';':
+ if state == 2:
+ first = i+1
+ state = 0
+
+ i += 1
+
+ if i > first:
+ #print "### final '%s'" % repr(b.tokens[first:i])
+ blocks2.append( Block(b.tokens[first:i]) )
+
+ self.blocks = blocks2
+
+ def insertDisclaimer(self,disclaimer="/* auto-generated file, DO NOT EDIT */"):
+ """insert your standard issue disclaimer that this is an
+ auto-generated file, etc.."""
+ tokens = CppLineTokenizer( disclaimer ).toTokenList()
+ tokens = tokens[:-1] # remove trailing tokLN
+ self.blocks = [ Block(tokens) ] + self.blocks
+
+class BlockParser:
+ """a class used to convert an input source file into a BlockList object"""
+
+ def __init__(self,tokzer=None):
+ """initialize a block parser. the input source is provided through a Tokenizer
+ object"""
+ self.reset(tokzer)
+
+ def reset(self,tokzer):
+ self.state = 1
+ self.tokzer = tokzer
+
+ def getBlocks(self,tokzer=None):
+ """tokenize and parse the input source, return a BlockList object
+ NOTE: empty and line-numbering directives are ignored and removed
+ from the result. as a consequence, it is possible to have
+ two successive text blocks in the result"""
+ # state 0 => in source code
+ # state 1 => in source code, after a LN
+ # state 2 => in source code, after LN then some space
+ state = 1
+ lastLN = 0
+ current = []
+ blocks = []
+
+ if tokzer == None:
+ tokzer = self.tokzer
+
+ while 1:
+ tok = tokzer.getToken()
+ if tok.id == tokEOF:
+ break
+
+ if tok.id == tokLN:
+ state = 1
+ current.append(tok)
+ lastLN = len(current)
+
+ elif tok.id == tokSPACE:
+ if state == 1:
+ state = 2
+ current.append(tok)
+
+ elif tok.id == "#":
+ if state > 0:
+ # this is the start of a directive
+
+ if lastLN > 0:
+ # record previous tokens as text block
+ block = Block(current[:lastLN])
+ blocks.append(block)
+ lastLN = 0
+
+ current = []
+
+ # skip spaces after the #
+ while 1:
+ tok = tokzer.getToken()
+ if tok.id != tokSPACE:
+ break
+
+ if tok.id != tokIDENT:
+ # empty or line-numbering, ignore it
+ if tok.id != tokLN and tok.id != tokEOF:
+ while 1:
+ tok = tokzer.getToken()
+ if tok.id == tokLN or tok.id == tokEOF:
+ break
+ continue
+
+ directive = tok.value
+ lineno = tok.lineno
+
+ # skip spaces
+ tok = tokzer.getToken()
+ while tok.id == tokSPACE:
+ tok = tokzer.getToken()
+
+ # then record tokens until LN
+ dirtokens = []
+ while tok.id != tokLN and tok.id != tokEOF:
+ dirtokens.append(tok)
+ tok = tokzer.getToken()
+
+ block = Block(dirtokens,directive,lineno)
+ blocks.append(block)
+ state = 1
+
+ else:
+ state = 0
+ current.append(tok)
+
+ if len(current) > 0:
+ block = Block(current)
+ blocks.append(block)
+
+ return BlockList(blocks)
+
+ def parse(self,tokzer):
+ return self.getBlocks( tokzer )
+
+ def parseLines(self,lines):
+ """parse a list of text lines into a BlockList object"""
+ return self.getBlocks( CppLinesTokenizer(lines) )
+
+ def parseFile(self,path):
+ """parse a file into a BlockList object"""
+ file = open(path, "rt")
+ result = self.getBlocks( CppFileTokenizer(file) )
+ file.close()
+ return result
+
+
+def test_block_parsing(lines,expected):
+ blocks = BlockParser().parse( CppLinesTokenizer(lines) )
+ if len(blocks) != len(expected):
+ raise BadExpectedToken, "parser.buildBlocks returned '%s' expecting '%s'" \
+ % (str(blocks), repr(expected))
+ for n in range(len(blocks)):
+ if str(blocks[n]) != expected[n]:
+ raise BadExpectedToken, "parser.buildBlocks()[%d] is '%s', expecting '%s'" \
+ % (n, str(blocks[n]), expected[n])
+ #for block in blocks:
+ # print block
+
+def test_BlockParser():
+ test_block_parsing(["#error hello"],["#error hello"])
+ test_block_parsing([ "foo", "", "bar" ], [ "foo\n\nbar\n" ])
+ test_block_parsing([ "foo", " # ", "bar" ], [ "foo\n","bar\n" ])
+ test_block_parsing(\
+ [ "foo", " # ", " # /* ahah */ if defined(__KERNEL__) ", "bar", "#endif" ],
+ [ "foo\n", "#ifdef __KERNEL__", "bar\n", "#endif" ] )
+
+
+#####################################################################################
+#####################################################################################
+##### #####
+##### B L O C K L I S T O P T I M I Z A T I O N #####
+##### #####
+#####################################################################################
+#####################################################################################
+
+def remove_macro_defines( blocks, excludedMacros=set() ):
+ """remove macro definitions like #define <macroName> ...."""
+ result = []
+ for b in blocks:
+ macroName = b.isDefine()
+ if macroName == None or not macroName in excludedMacros:
+ result.append(b)
+
+ return result
+
+def find_matching_endif( blocks, i ):
+ n = len(blocks)
+ depth = 1
+ while i < n:
+ if blocks[i].isDirective():
+ dir = blocks[i].directive
+ if dir in [ "if", "ifndef", "ifdef" ]:
+ depth += 1
+ elif depth == 1 and dir in [ "else", "elif" ]:
+ return i
+ elif dir == "endif":
+ depth -= 1
+ if depth == 0:
+ return i
+ i += 1
+ return i
+
+def optimize_if01( blocks ):
+ """remove the code between #if 0 .. #endif in a list of CppBlocks"""
+ i = 0
+ n = len(blocks)
+ result = []
+ while i < n:
+ j = i
+ while j < n and not blocks[j].isIf():
+ j += 1
+ if j > i:
+ D2("appending lines %d to %d" % (blocks[i].lineno, blocks[j-1].lineno))
+ result += blocks[i:j]
+ if j >= n:
+ break
+ expr = blocks[j].expr
+ r = expr.toInt()
+ if r == None:
+ result.append(blocks[j])
+ i = j + 1
+ continue
+
+ if r == 0:
+ # if 0 => skip everything until the corresponding #endif
+ j = find_matching_endif( blocks, j+1 )
+ if j >= n:
+ # unterminated #if 0, finish here
+ break
+ dir = blocks[j].directive
+ if dir == "endif":
+ D2("remove 'if 0' .. 'endif' (lines %d to %d)" % (blocks[i].lineno, blocks[j].lineno))
+ i = j + 1
+ elif dir == "else":
+ # convert 'else' into 'if 1'
+ D2("convert 'if 0' .. 'else' into 'if 1' (lines %d to %d)" % (blocks[i].lineno, blocks[j-1].lineno))
+ blocks[j].directive = "if"
+ blocks[j].expr = CppExpr( CppLineTokenizer("1").toTokenList() )
+ i = j
+ elif dir == "elif":
+ # convert 'elif' into 'if'
+ D2("convert 'if 0' .. 'elif' into 'if'")
+ blocks[j].directive = "if"
+ i = j
+ continue
+
+ # if 1 => find corresponding endif and remove/transform them
+ k = find_matching_endif( blocks, j+1 )
+ if k >= n:
+ # unterminated #if 1, finish here
+ D2("unterminated 'if 1'")
+ result += blocks[j+1:k]
+ break
+
+ dir = blocks[k].directive
+ if dir == "endif":
+ D2("convert 'if 1' .. 'endif' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno))
+ result += optimize_if01(blocks[j+1:k])
+ i = k+1
+ elif dir == "else":
+ # convert 'else' into 'if 0'
+ D2("convert 'if 1' .. 'else' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno))
+ result += optimize_if01(blocks[j+1:k])
+ blocks[k].directive = "if"
+ blocks[k].expr = CppExpr( CppLineTokenizer("0").toTokenList() )
+ i = k
+ elif dir == "elif":
+ # convert 'elif' into 'if 0'
+ D2("convert 'if 1' .. 'elif' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno))
+ result += optimize_if01(blocks[j+1:k])
+ blocks[k].expr = CppExpr( CppLineTokenizer("0").toTokenList() )
+ i = k
+ return result
+
+def test_optimizeAll():
+ text = """\
+#if 1
+#define GOOD_1
+#endif
+#if 0
+#define BAD_2
+#define BAD_3
+#endif
+
+#if 1
+#define GOOD_2
+#else
+#define BAD_4
+#endif
+
+#if 0
+#define BAD_5
+#else
+#define GOOD_3
+#endif
+
+#if 0
+#if 1
+#define BAD_6
+#endif
+#endif\
+"""
+
+ expected = """\
+#define GOOD_1
+
+#define GOOD_2
+
+#define GOOD_3
+
+"""
+
+ print "running test_BlockList.optimizeAll"
+ out = StringOutput()
+ lines = string.split(text, '\n')
+ list = BlockParser().parse( CppLinesTokenizer(lines) )
+ #D_setlevel(2)
+ list.optimizeAll( {"__KERNEL__":kCppUndefinedMacro} )
+ #print repr(list)
+ list.write(out)
+ if out.get() != expected:
+ print "KO: macro optimization failed\n"
+ print "<<<< expecting '",
+ print expected,
+ print "'\n>>>> result '"
+ print out.get(),
+ print "'\n----"
+
+
+#####################################################################################
+#####################################################################################
+##### #####
+##### #####
+##### #####
+#####################################################################################
+#####################################################################################
+
+def runUnitTests():
+ """run all unit tests for this program"""
+ print "running unit tests"
+ test_CppTokenizer()
+ test_CppExpr()
+ test_optimizeAll()
+ test_BlockParser()
+ print "OK"
+
+if __name__ == "__main__":
+ runUnitTests()