diff options
author | The Android Open Source Project <initial-contribution@android.com> | 2009-03-03 19:28:35 -0800 |
---|---|---|
committer | The Android Open Source Project <initial-contribution@android.com> | 2009-03-03 19:28:35 -0800 |
commit | 1dc9e472e19acfe6dc7f41e429236e7eef7ceda1 (patch) | |
tree | 3be0c520fae17689bbf5584e1136fb820caef26f /libc/kernel/tools | |
parent | 1767f908af327fa388b1c66883760ad851267013 (diff) | |
download | bionic-1dc9e472e19acfe6dc7f41e429236e7eef7ceda1.zip bionic-1dc9e472e19acfe6dc7f41e429236e7eef7ceda1.tar.gz bionic-1dc9e472e19acfe6dc7f41e429236e7eef7ceda1.tar.bz2 |
auto import from //depot/cupcake/@135843
Diffstat (limited to 'libc/kernel/tools')
-rwxr-xr-x | libc/kernel/tools/clean_header.py | 148 | ||||
-rw-r--r-- | libc/kernel/tools/cpp.py | 2180 | ||||
-rw-r--r-- | libc/kernel/tools/defaults.py | 101 | ||||
-rwxr-xr-x | libc/kernel/tools/find_headers.py | 175 | ||||
-rwxr-xr-x | libc/kernel/tools/find_users.py | 63 | ||||
-rw-r--r-- | libc/kernel/tools/kernel.py | 338 | ||||
-rwxr-xr-x | libc/kernel/tools/update_all.py | 83 | ||||
-rw-r--r-- | libc/kernel/tools/utils.py | 397 |
8 files changed, 3485 insertions, 0 deletions
diff --git a/libc/kernel/tools/clean_header.py b/libc/kernel/tools/clean_header.py new file mode 100755 index 0000000..de4bf85 --- /dev/null +++ b/libc/kernel/tools/clean_header.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python +# + +import sys, cpp, kernel, glob, os, re, getopt +from defaults import * +from utils import * + +noUpdate = 1 + +def cleanupFile( path ): + """reads an original header and perform the cleanup operation on it + this functions returns the destination path and the clean header + as a single string""" + # check the header path + src_path = path + + if not os.path.exists(src_path): + if noUpdate: + panic( "file does not exist: '%s'\n" % path ) + sys.stderr.write( "warning: file does not exit: %s\n" % path ) + return None, None + + if not os.path.isfile(src_path): + if noUpdate: + panic( "path is not a file: '%s'\n" % path ) + sys.stderr.write( "warning: not a file: %s\n" % path ) + return None, None + + original_path = kernel_original_path + if os.path.commonprefix( [ src_path, original_path ] ) != original_path: + if noUpdate: + panic( "file is not in 'original' directory: %s\n" % path ); + sys.stderr.write( "warning: file not in 'original' ignored: %s\n" % path ) + return None, None + + src_path = src_path[len(original_path):] + if len(src_path) > 0 and src_path[0] == '/': + src_path = src_path[1:] + + if len(src_path) == 0: + panic( "oops, internal error, can't extract correct relative path" ) + + # convert into destination path, extracting architecture if needed + # and the corresponding list of known static functions + # + arch = None + re_asm_arch = re.compile( r"asm-([\w\d_\+\.\-]+)(/.*)" ) + m = re_asm_arch.match(src_path) + statics = kernel_known_generic_statics + if m and m.group(1) != 'generic': + dst_path = "arch-%s/asm/%s" % m.groups() + arch = m.group(1) + statics = statics.union( kernel_known_statics.get( arch, set() ) ) + else: + dst_path = "common/" + src_path + + dst_path = os.path.normpath( original_path + "/../" + dst_path ) + + # now, let's parse the file + # + list = cpp.BlockParser().parseFile(path) + if not list: + sys.stderr.write( "error: can't parse '%s'" % path ) + sys.exit(1) + + + list.optimizeMacros( kernel_known_macros ) + list.optimizeIf01() + list.removeVarsAndFuncs( statics ) + list.removeComments() + list.removeEmptyLines() + list.removeMacroDefines( kernel_ignored_macros ) + list.insertDisclaimer( kernel.kernel_disclaimer ) + + out = StringOutput() + list.write(out) + return dst_path, out.get() + + +if __name__ == "__main__": + + def usage(): + print """\ + usage: %s [options] <header_path> + + options: + -v enable verbose mode + + -u enabled update mode + this will try to update the corresponding 'clean header' + if the content has changed. with this, you can pass more + than one file on the command-line + + <header_path> must be in a subdirectory of 'original' + """ % os.path.basename(sys.argv[0]) + sys.exit(1) + + try: + optlist, args = getopt.getopt( sys.argv[1:], 'uv' ) + except: + # unrecognized option + sys.stderr.write( "error: unrecognized option\n" ) + usage() + + for opt, arg in optlist: + if opt == '-u': + noUpdate = 0 + elif opt == '-v': + verbose = 1 + D_setlevel(1) + + if len(args) == 0: + usage() + + if noUpdate: + for path in args: + dst_path, newdata = cleanupFile(path) + print newdata + + sys.exit(0) + + # now let's update our files. + + b = BatchFileUpdater() + + for path in args: + dst_path, newdata = cleanupFile(path) + if not dst_path: + continue + + b.readFile( dst_path ) + r = b.editFile( dst_path, newdata ) + if r == 0: + r = "unchanged" + elif r == 1: + r = "edited" + else: + r = "added" + + print "cleaning: %-*s -> %-*s (%s)" % ( 35, path, 35, dst_path, r ) + + + if os.environ.has_key("ANDROID_PRODUCT_OUT"): + b.updateP4Files() + else: + b.updateFiles() + + sys.exit(0) diff --git a/libc/kernel/tools/cpp.py b/libc/kernel/tools/cpp.py new file mode 100644 index 0000000..4b4bd38 --- /dev/null +++ b/libc/kernel/tools/cpp.py @@ -0,0 +1,2180 @@ +# a glorified C pre-processor parser + +import sys, re, string +from utils import * +from defaults import * + +debugTokens = False +debugDirectiveTokenizer = False +debugLineParsing = False +debugCppExpr = False +debugOptimIf01 = False + +##################################################################################### +##################################################################################### +##### ##### +##### C P P T O K E N S ##### +##### ##### +##################################################################################### +##################################################################################### + +# the list of supported C-preprocessor tokens +# plus a couple of C tokens as well +tokEOF = "\0" +tokLN = "\n" +tokSTRINGIFY = "#" +tokCONCAT = "##" +tokLOGICAND = "&&" +tokLOGICOR = "||" +tokSHL = "<<" +tokSHR = ">>" +tokEQUAL = "==" +tokNEQUAL = "!=" +tokLT = "<" +tokLTE = "<=" +tokGT = ">" +tokGTE = ">=" +tokELLIPSIS = "..." +tokSPACE = " " +tokDEFINED = "defined" +tokLPAREN = "(" +tokRPAREN = ")" +tokNOT = "!" +tokPLUS = "+" +tokMINUS = "-" +tokMULTIPLY = "*" +tokDIVIDE = "/" +tokMODULUS = "%" +tokBINAND = "&" +tokBINOR = "|" +tokBINXOR = "^" +tokCOMMA = "," +tokLBRACE = "{" +tokRBRACE = "}" +tokARROW = "->" +tokINCREMENT = "++" +tokDECREMENT = "--" +tokNUMBER = "<number>" +tokIDENT = "<ident>" +tokSTRING = "<string>" + +class Token: + """a simple class to hold information about a given token. + each token has a position in the source code, as well as + an 'id' and a 'value'. the id is a string that identifies + the token's class, while the value is the string of the + original token itself. + + for example, the tokenizer concatenates a series of spaces + and tabs as a single tokSPACE id, whose value if the original + spaces+tabs sequence.""" + + def __init__(self): + self.id = None + self.value = None + self.lineno = 0 + self.colno = 0 + + def set(self,id,val=None): + self.id = id + if val: + self.value = val + else: + self.value = id + return None + + def copyFrom(self,src): + self.id = src.id + self.value = src.value + self.lineno = src.lineno + self.colno = src.colno + + def __repr__(self): + if self.id == tokIDENT: + return "(ident %s)" % self.value + if self.id == tokNUMBER: + return "(number %s)" % self.value + if self.id == tokSTRING: + return "(string '%s')" % self.value + if self.id == tokLN: + return "<LN>" + if self.id == tokEOF: + return "<EOF>" + if self.id == tokSPACE and self.value == "\\": + # this corresponds to a trailing \ that was transformed into a tokSPACE + return "<\\>" + + return self.id + + def __str__(self): + if self.id == tokIDENT: + return self.value + if self.id == tokNUMBER: + return self.value + if self.id == tokSTRING: + return self.value + if self.id == tokEOF: + return "<EOF>" + if self.id == tokSPACE: + if self.value == "\\": # trailing \ + return "\\\n" + else: + return self.value + + return self.id + +class BadExpectedToken(Exception): + def __init__(self,msg): + print msg + +##################################################################################### +##################################################################################### +##### ##### +##### C P P T O K E N C U R S O R ##### +##### ##### +##################################################################################### +##################################################################################### + +class TokenCursor: + """a small class to iterate over a list of Token objects""" + def __init__(self,tokens): + self.tokens = tokens + self.n = 0 + self.count = len(tokens) + + def set(self,n): + """set the current position""" + if n < 0: + n = 0 + if n > self.count: + n = self.count + self.n = n + + def peekId(self): + """retrieve the id of the current token""" + if (self.n >= self.count): + return None + return self.tokens[self.n].id + + def peek(self): + """retrieve the current token. does not change position""" + if (self.n >= self.count): + return None + return self.tokens[self.n] + + def skip(self): + """increase current token position""" + if (self.n < self.count): + self.n += 1 + + def skipSpaces(self): + """skip over all space tokens, this includes tokSPACE and tokLN""" + while 1: + tok = self.peekId() + if tok != tokSPACE and tok != tokLN: + break + self.skip() + + def skipIfId(self,id): + """skip an optional token""" + if self.peekId() == id: + self.skip() + + def expectId(self,id): + """raise an exception if the current token hasn't a given id. + otherwise skip over it""" + tok = self.peek() + if tok.id != id: + raise BadExpectedToken, "%d:%d: '%s' expected, received '%s'" % (tok.lineno, tok.colno, id, tok.id) + self.skip() + + def remain(self): + """return the list of remaining tokens""" + return self.tokens[self.n:] + + +##################################################################################### +##################################################################################### +##### ##### +##### C P P T O K E N I Z E R ##### +##### ##### +##################################################################################### +##################################################################################### + +# list of long symbols, i.e. those that take more than one characters +cppLongSymbols = [ tokCONCAT, tokLOGICAND, tokLOGICOR, tokSHL, tokSHR, tokELLIPSIS, tokEQUAL,\ + tokNEQUAL, tokLTE, tokGTE, tokARROW, tokINCREMENT, tokDECREMENT ] + +class CppTokenizer: + """an abstract class used to convert some input text into a list + of tokens. real implementations follow and differ in the format + of the input text only""" + + def __init__(self): + """initialize a new CppTokenizer object""" + self.eof = False # end of file reached ? + self.text = None # content of current line, with final \n stripped + self.line = 0 # number of current line + self.pos = 0 # current character position in current line + self.len = 0 # length of current line text + self.held = Token() + + def setLineText(self,line): + """set the content of the (next) current line. should be called + by fillLineText() in derived classes""" + self.text = line + self.len = len(line) + self.pos = 0 + + def fillLineText(self): + """refresh the content of 'line' with a new line of input""" + # to be overriden + self.eof = True + + def markPos(self,tok): + """mark the position of the current token in the source file""" + if self.eof or self.pos > self.len: + tok.lineno = self.line + 1 + tok.colno = 0 + else: + tok.lineno = self.line + tok.colno = self.pos + + def peekChar(self): + """return the current token under the cursor without moving it""" + if self.eof: + return tokEOF + + if self.pos > self.len: + self.pos = 0 + self.line += 1 + self.fillLineText() + if self.eof: + return tokEOF + + if self.pos == self.len: + return tokLN + else: + return self.text[self.pos] + + def peekNChar(self,n): + """try to peek the next n chars on the same line""" + if self.pos + n > self.len: + return None + return self.text[self.pos:self.pos+n] + + def skipChar(self): + """increment the token cursor position""" + if not self.eof: + self.pos += 1 + + def skipNChars(self,n): + if self.pos + n <= self.len: + self.pos += n + else: + while n > 0: + self.skipChar() + n -= 1 + + def nextChar(self): + """retrieve the token at the current cursor position, then skip it""" + result = self.peekChar() + self.skipChar() + return result + + def getEscape(self): + # try to get all characters after a backslash (\) + result = self.nextChar() + if result == "0": + # octal number ? + num = self.peekNChar(3) + if num != None: + isOctal = True + for d in num: + if not d in "01234567": + isOctal = False + break + if isOctal: + result += num + self.skipNChars(3) + elif result == "x" or result == "X": + # hex number ? + num = self.peekNChar(2) + if num != None: + isHex = True + for d in num: + if not d in "012345678abcdefABCDEF": + isHex = False + break + if isHex: + result += num + self.skipNChars(2) + elif result == "u" or result == "U": + # unicode char ? + num = self.peekNChar(4) + if num != None: + isHex = True + for d in num: + if not d in "012345678abcdefABCDEF": + isHex = False + break + if isHex: + result += num + self.skipNChars(4) + + return result + + def nextRealToken(self,tok): + """return next CPP token, used internally by nextToken()""" + c = self.nextChar() + if c == tokEOF or c == tokLN: + return tok.set(c) + + if c == '/': + c = self.peekChar() + if c == '/': # C++ comment line + self.skipChar() + while 1: + c = self.nextChar() + if c == tokEOF or c == tokLN: + break + return tok.set(tokLN) + if c == '*': # C comment start + self.skipChar() + value = "/*" + prev_c = None + while 1: + c = self.nextChar() + if c == tokEOF: + #print "## EOF after '%s'" % value + return tok.set(tokEOF,value) + if c == '/' and prev_c == '*': + break + prev_c = c + value += c + + value += "/" + #print "## COMMENT: '%s'" % value + return tok.set(tokSPACE,value) + c = '/' + + if c.isspace(): + while 1: + c2 = self.peekChar() + if c2 == tokLN or not c2.isspace(): + break + c += c2 + self.skipChar() + return tok.set(tokSPACE,c) + + if c == '\\': + if debugTokens: + print "nextRealToken: \\ found, next token is '%s'" % repr(self.peekChar()) + if self.peekChar() == tokLN: # trailing \ + # eat the tokLN + self.skipChar() + # we replace a trailing \ by a tokSPACE whose value is + # simply "\\". this allows us to detect them later when + # needed. + return tok.set(tokSPACE,"\\") + else: + # treat as a single token here ? + c +=self.getEscape() + return tok.set(c) + + if c == "'": # chars + c2 = self.nextChar() + c += c2 + if c2 == '\\': + c += self.getEscape() + + while 1: + c2 = self.nextChar() + if c2 == tokEOF: + break + c += c2 + if c2 == "'": + break + + return tok.set(tokSTRING, c) + + if c == '"': # strings + quote = 0 + while 1: + c2 = self.nextChar() + if c2 == tokEOF: + return tok.set(tokSTRING,c) + + c += c2 + if not quote: + if c2 == '"': + return tok.set(tokSTRING,c) + if c2 == "\\": + quote = 1 + else: + quote = 0 + + if c >= "0" and c <= "9": # integers ? + while 1: + c2 = self.peekChar() + if c2 == tokLN or (not c2.isalnum() and c2 != "_"): + break + c += c2 + self.skipChar() + return tok.set(tokNUMBER,c) + + if c.isalnum() or c == "_": # identifiers ? + while 1: + c2 = self.peekChar() + if c2 == tokLN or (not c2.isalnum() and c2 != "_"): + break + c += c2 + self.skipChar() + if c == tokDEFINED: + return tok.set(tokDEFINED) + else: + return tok.set(tokIDENT,c) + + # check special symbols + for sk in cppLongSymbols: + if c == sk[0]: + sklen = len(sk[1:]) + if self.pos + sklen <= self.len and \ + self.text[self.pos:self.pos+sklen] == sk[1:]: + self.pos += sklen + return tok.set(sk) + + return tok.set(c) + + def nextToken(self,tok): + """return the next token from the input text. this function + really updates 'tok', and does not return a new one""" + self.markPos(tok) + self.nextRealToken(tok) + + def getToken(self): + tok = Token() + self.nextToken(tok) + if debugTokens: + print "getTokens: %s" % repr(tok) + return tok + + def toTokenList(self): + """convert the input text of a CppTokenizer into a direct + list of token objects. tokEOF is stripped from the result""" + result = [] + while 1: + tok = Token() + self.nextToken(tok) + if tok.id == tokEOF: + break + result.append(tok) + return result + +class CppLineTokenizer(CppTokenizer): + """a CppTokenizer derived class that accepts a single line of text as input""" + def __init__(self,line,lineno=1): + CppTokenizer.__init__(self) + self.line = lineno + self.setLineText(line) + + +class CppLinesTokenizer(CppTokenizer): + """a CppTokenizer derived class that accepts a list of texdt lines as input. + the lines must not have a trailing \n""" + def __init__(self,lines=[],lineno=1): + """initialize a CppLinesTokenizer. you can later add lines using addLines()""" + CppTokenizer.__init__(self) + self.line = lineno + self.lines = lines + self.index = 0 + self.count = len(lines) + + if self.count > 0: + self.fillLineText() + else: + self.eof = True + + def addLine(self,line): + """add a line to a CppLinesTokenizer. this can be done after tokenization + happens""" + if self.count == 0: + self.setLineText(line) + self.index = 1 + self.lines.append(line) + self.count += 1 + self.eof = False + + def fillLineText(self): + if self.index < self.count: + self.setLineText(self.lines[self.index]) + self.index += 1 + else: + self.eof = True + + +class CppFileTokenizer(CppTokenizer): + def __init__(self,file,lineno=1): + CppTokenizer.__init__(self) + self.file = file + self.line = lineno + + def fillLineText(self): + line = self.file.readline() + if len(line) > 0: + if line[-1] == '\n': + line = line[:-1] + if len(line) > 0 and line[-1] == "\r": + line = line[:-1] + self.setLineText(line) + else: + self.eof = True + +# Unit testing +# +class CppTokenizerTester: + """a class used to test CppTokenizer classes""" + def __init__(self,tokenizer=None): + self.tokenizer = tokenizer + self.token = Token() + + def setTokenizer(self,tokenizer): + self.tokenizer = tokenizer + + def expect(self,id): + self.tokenizer.nextToken(self.token) + tokid = self.token.id + if tokid == id: + return + if self.token.value == id and (tokid == tokIDENT or tokid == tokNUMBER): + return + raise BadExpectedToken, "### BAD TOKEN: '%s' expecting '%s'" % (self.token.id,id) + + def expectToken(self,id,line,col): + self.expect(id) + if self.token.lineno != line: + raise BadExpectedToken, "### BAD LINENO: token '%s' got '%d' expecting '%d'" % (id,self.token.lineno,line) + if self.token.colno != col: + raise BadExpectedToken, "### BAD COLNO: '%d' expecting '%d'" % (self.token.colno,col) + + def expectTokenVal(self,id,value,line,col): + self.expectToken(id,line,col) + if self.token.value != value: + raise BadExpectedToken, "### BAD VALUE: '%s' expecting '%s'" % (self.token.value,value) + + def expectList(self,list): + for item in list: + self.expect(item) + +def test_CppTokenizer(): + print "running CppTokenizer tests" + tester = CppTokenizerTester() + + tester.setTokenizer( CppLineTokenizer("#an/example && (01923_xy)") ) + tester.expectList( ["#", "an", "/", "example", tokSPACE, tokLOGICAND, tokSPACE, tokLPAREN, "01923_xy", \ + tokRPAREN, tokLN, tokEOF] ) + + tester.setTokenizer( CppLineTokenizer("FOO(BAR) && defined(BAZ)") ) + tester.expectList( ["FOO", tokLPAREN, "BAR", tokRPAREN, tokSPACE, tokLOGICAND, tokSPACE, + tokDEFINED, tokLPAREN, "BAZ", tokRPAREN, tokLN, tokEOF] ) + + tester.setTokenizer( CppLinesTokenizer( ["/*", "#", "*/"] ) ) + tester.expectList( [ tokSPACE, tokLN, tokEOF ] ) + + tester.setTokenizer( CppLinesTokenizer( ["first", "second"] ) ) + tester.expectList( [ "first", tokLN, "second", tokLN, tokEOF ] ) + + tester.setTokenizer( CppLinesTokenizer( ["first second", " third"] ) ) + tester.expectToken( "first", 1, 0 ) + tester.expectToken( tokSPACE, 1, 5 ) + tester.expectToken( "second", 1, 6 ) + tester.expectToken( tokLN, 1, 12 ) + tester.expectToken( tokSPACE, 2, 0 ) + tester.expectToken( "third", 2, 2 ) + + tester.setTokenizer( CppLinesTokenizer( [ "boo /* what the", "hell */" ] ) ) + tester.expectList( [ "boo", tokSPACE ] ) + tester.expectTokenVal( tokSPACE, "/* what the\nhell */", 1, 4 ) + tester.expectList( [ tokLN, tokEOF ] ) + + tester.setTokenizer( CppLinesTokenizer( [ "an \\", " example" ] ) ) + tester.expectToken( "an", 1, 0 ) + tester.expectToken( tokSPACE, 1, 2 ) + tester.expectTokenVal( tokSPACE, "\\", 1, 3 ) + tester.expectToken( tokSPACE, 2, 0 ) + tester.expectToken( "example", 2, 1 ) + tester.expectToken( tokLN, 2, 8 ) + + return True + + +##################################################################################### +##################################################################################### +##### ##### +##### C P P E X P R E S S I O N S ##### +##### ##### +##################################################################################### +##################################################################################### + +# Cpp expressions are modeled by tuples of the form (op,arg) or (op,arg1,arg2), etc.. +# op is an "operator" string + +class Expr: + """a class used to model a CPP expression""" + opInteger = "int" + opIdent = "ident" + opCall = "call" + opDefined = "defined" + opTest = "?" + opLogicNot = "!" + opNot = "~" + opNeg = "[-]" + opUnaryPlus = "[+]" + opAdd = "+" + opSub = "-" + opMul = "*" + opDiv = "/" + opMod = "%" + opAnd = "&" + opOr = "|" + opXor = "^" + opLogicAnd = "&&" + opLogicOr = "||" + opEqual = "==" + opNotEqual = "!=" + opLess = "<" + opLessEq = "<=" + opGreater = ">" + opGreaterEq = ">=" + opShl = "<<" + opShr = ">>" + + unaries = [ opLogicNot, opNot, opNeg, opUnaryPlus ] + binaries = [ opAdd, opSub, opMul, opDiv, opMod, opAnd, opOr, opXor, opLogicAnd, opLogicOr, + opEqual, opNotEqual, opLess, opLessEq, opGreater, opGreaterEq ] + + precedences = { + opTest: 0, + opLogicOr: 1, + opLogicNot: 2, + opOr : 3, + opXor: 4, + opAnd: 5, + opEqual: 6, opNotEqual: 6, + opLess:7, opLessEq:7, opGreater:7, opGreaterEq:7, + opShl:8, opShr:8, + opAdd:9, opSub:9, + opMul:10, opDiv:10, opMod:10, + opLogicNot:11, + opNot: 12, + } + + def __init__(self,op): + self.op = op + + def __repr__(self): + return "(%s)" % self.op + + def __str__(self): + return "operator(%s)" % self.op + + def precedence(self): + """return the precedence of a given operator""" + return Expr.precedences.get(self.op, 1000) + + def isUnary(self): + return self.op in Expr.unaries + + def isBinary(self): + return self.op in Expr.binaries + + def isDefined(self): + return self.op is opDefined + + def toInt(self): + """return the integer value of a given expression. only valid for integer expressions + will return None otherwise""" + return None + +class IntExpr(Expr): + def __init__(self,value): + Expr.__init__(self,opInteger) + self.arg = value + + def __repr__(self): + return "(int %s)" % self.arg + + def __str__(self): + return self.arg + + def toInt(self): + s = self.arg # string value + # get rid of U or L suffixes + while len(s) > 0 and s[-1] in "LUlu": + s = s[:-1] + return string.atoi(s) + +class IdentExpr(Expr): + def __init__(self,name): + Expr.__init__(self,opIdent) + self.name = name + + def __repr__(self): + return "(ident %s)" % self.name + + def __str__(self): + return self.name + +class CallExpr(Expr): + def __init__(self,funcname,params): + Expr.__init__(self,opCall) + self.funcname = funcname + self.params = params + + def __repr__(self): + result = "(call %s [" % self.funcname + comma = "" + for param in self.params: + result += "%s%s" % (comma, repr(param)) + comma = "," + result += "])" + return result + + def __str__(self): + result = "%s(" % self.funcname + comma = "" + for param in self.params: + result += "%s%s" % (comma, str(param)) + comma = "," + + result += ")" + return result + +class TestExpr(Expr): + def __init__(self,cond,iftrue,iffalse): + Expr.__init__(self,opTest) + self.cond = cond + self.iftrue = iftrue + self.iffalse = iffalse + + def __repr__(self): + return "(?: %s %s %s)" % (repr(self.cond),repr(self.iftrue),repr(self.iffalse)) + + def __str__(self): + return "(%s) ? (%s) : (%s)" % (self.cond, self.iftrue, self.iffalse) + +class SingleArgExpr(Expr): + def __init__(self,op,arg): + Expr.__init__(self,op) + self.arg = arg + + def __repr__(self): + return "(%s %s)" % (self.op, repr(self.arg)) + +class DefinedExpr(SingleArgExpr): + def __init__(self,op,macroname): + SingleArgExpr.__init__(self.opDefined,macroname) + + def __str__(self): + return "defined(%s)" % self.arg + + +class UnaryExpr(SingleArgExpr): + def __init__(self,op,arg,opstr=None): + SingleArgExpr.__init__(self,op,arg) + if not opstr: + opstr = op + self.opstr = opstr + + def __str__(self): + arg_s = str(self.arg) + arg_prec = self.arg.precedence() + self_prec = self.precedence() + if arg_prec < self_prec: + return "%s(%s)" % (self.opstr,arg_s) + else: + return "%s%s" % (self.opstr, arg_s) + +class TwoArgExpr(Expr): + def __init__(self,op,arg1,arg2): + Expr.__init__(self,op) + self.arg1 = arg1 + self.arg2 = arg2 + + def __repr__(self): + return "(%s %s %s)" % (self.op, repr(self.arg1), repr(self.arg2)) + +class BinaryExpr(TwoArgExpr): + def __init__(self,op,arg1,arg2,opstr=None): + TwoArgExpr.__init__(self,op,arg1,arg2) + if not opstr: + opstr = op + self.opstr = opstr + + def __str__(self): + arg1_s = str(self.arg1) + arg2_s = str(self.arg2) + arg1_prec = self.arg1.precedence() + arg2_prec = self.arg2.precedence() + self_prec = self.precedence() + + result = "" + if arg1_prec < self_prec: + result += "(%s)" % arg1_s + else: + result += arg1_s + + result += " %s " % self.opstr + + if arg2_prec < self_prec: + result += "(%s)" % arg2_s + else: + result += arg2_s + + return result + +##################################################################################### +##################################################################################### +##### ##### +##### C P P E X P R E S S I O N P A R S E R ##### +##### ##### +##################################################################################### +##################################################################################### + + +class ExprParser: + """a class used to convert a list of tokens into a cpp Expr object""" + + re_octal = re.compile(r"\s*\(0[0-7]+\).*") + re_decimal = re.compile(r"\s*\(\d+[ulUL]*\).*") + re_hexadecimal = re.compile(r"\s*\(0[xX][0-9a-fA-F]*\).*") + + def __init__(self,tokens): + self.tok = tokens + self.n = len(self.tok) + self.i = 0 + + def mark(self): + return self.i + + def release(self,pos): + self.i = pos + + def peekId(self): + if self.i < self.n: + return self.tok[self.i].id + return None + + def peek(self): + if self.i < self.n: + return self.tok[self.i] + return None + + def skip(self): + if self.i < self.n: + self.i += 1 + + def skipOptional(self,id): + if self.i < self.n and self.tok[self.i].id == id: + self.i += 1 + + def skipSpaces(self): + i = self.i + n = self.n + tok = self.tok + while i < n and (tok[i] == tokSPACE or tok[i] == tokLN): + i += 1 + self.i = i + + # all the isXXX functions returns a (expr,nextpos) pair if a match is found + # or None if not + + def is_integer(self): + id = self.tok[self.i].id + c = id[0] + if c < '0' or c > '9': + return None + + m = ExprParser.re_octal.match(id) + if m: + return (IntExpr(id), m.end(1)) + + m = ExprParser.re_decimal.match(id) + if m: + return (IntExpr(id), m.end(1)) + + m = ExprParser.re_hexadecimal(id) + if m: + return (IntExpr(id), m.end(1)) + + return None + + def is_defined(self): + id = self.tok[self.i].id + if id != "defined": + return None + + pos = self.mark() + + use_paren = 0 + if self.peekId() == tokLPAREN: + self.skip() + use_paren = 1 + + if self.peekId() != tokIDENT: + self.throw( BadExpectedToken, "identifier expected") + + macroname = self.peek().value + self.skip() + if use_paren: + self.skipSpaces() + if self.peekId() != tokRPAREN: + self.throw( BadExpectedToken, "missing right-paren after 'defined' directive") + self.skip() + + i = self.i + return (DefinedExpr(macroname),i+1) + + def is_call_or_ident(self): + pass + + def parse(self, i): + return None + +##################################################################################### +##################################################################################### +##### ##### +##### C P P E X P R E S S I O N S ##### +##### ##### +##################################################################################### +##################################################################################### + +class CppInvalidExpression(Exception): + """an exception raised when an invalid/unsupported cpp expression is detected""" + pass + +class CppExpr: + """a class that models the condition of #if directives into + an expression tree. each node in the tree is of the form (op,arg) or (op,arg1,arg2) + where "op" is a string describing the operation""" + + unaries = [ "!", "~" ] + binaries = [ "+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%", "&", "|", "^", "<<", ">>", "==", "!=" ] + precedences = { "||": 1, + "&&": 2, + "|": 3, + "^": 4, + "&": 5, + "==":6, "!=":6, + "<":7, "<=":7, ">":7, ">=":7, + "<<":8, ">>":8, + "+":9, "-":9, + "*":10, "/":10, "%":10, + "!":11, "~":12 + } + + def __init__(self, tokens): + """initialize a CppExpr. 'tokens' must be a CppToken list""" + self.tok = tokens + self.n = len(tokens) + if debugCppExpr: + print "CppExpr: trying to parse %s" % repr(tokens) + expr = self.is_expr(0) + if debugCppExpr: + print "CppExpr: got " + repr(expr) + self.expr = expr[0] + + re_cpp_constant = re.compile(r"((\d|\w|_)+)") + + def throw(self,exception,i,msg): + if i < self.n: + tok = self.tok[i] + print "%d:%d: %s" % (tok.lineno,tok.colno,msg) + else: + print "EOF: %s" % msg + raise exception + + def skip_spaces(self,i): + """skip spaces in input token list""" + while i < self.n: + t = self.tok[i] + if t.id != tokSPACE and t.id != tokLN: + break + i += 1 + return i + + def expectId(self,i,id): + """check that a given token id is at the current position, then skip over it""" + i = self.skip_spaces(i) + if i >= self.n or self.tok[i].id != id: + self.throw(BadExpectedToken,i,"### expecting '%s' in expression, got '%s'" % (id, self.tok[i].id)) + return i+1 + + def expectIdent(self,i): + i = self.skip_spaces(i) + if i >= self.n or self.tok[i].id != tokIDENT: + self.throw(BadExpectedToken,i,"### expecting identifier in expression, got '%s'" % (id, self.tok[i].id)) + return i+1 + + # the is_xxxxx function returns either None or a pair (e,nextpos) + # where 'e' is an expression tuple (e.g. (op,arg)) and 'nextpos' is + # the corresponding next position in the input token list + # + + def is_decimal(self,i): + v = self.tok[i].value[:] + while len(v) > 0 and v[-1] in "ULul": + v = v[:-1] + for digit in v: + if not digit.isdigit(): + return None + + # for an integer expression tuple, the argument + # is simply the value as an integer + val = string.atoi(v) + return ("int", val), i+1 + + def is_hexadecimal(self,i): + v = self.tok[i].value[:] + while len(v) > 0 and v[-1] in "ULul": + v = v[:-1] + if len(v) > 2 and (v[0:2] == "0x" or v[0:2] == "0X"): + for digit in v[2:]: + if not digit in "0123456789abcdefABCDEF": + return None + + # for an hex expression tuple, the argument + # is the value as an integer + val = int(v[2:], 16) + return ("hex", val), i+1 + + return None + + def is_integer(self,i): + if self.tok[i].id != tokNUMBER: + return None + + c = self.is_decimal(i) + if c: return c + + c = self.is_hexadecimal(i) + if c: return c + + return None + + def is_number(self,i): + t = self.tok[i] + if t.id == tokMINUS and i+1 < self.n: + c = self.is_integer(i+1) + if c: + e, i2 = c + op, val = e + return (op, -val), i2 + if t.id == tokPLUS and i+1 < self.n: + c = self.is_integer(i+1) + if c: return c + + return self.is_integer(i) + + + def is_alnum(self,i): + """test wether a given token is alpha-numeric""" + i = self.skip_spaces(i) + if i >= self.n: + return None + t = self.tok[i] + m = CppExpr.re_cpp_constant.match(t.id) + if m: + #print "... alnum '%s'" % m.group(1) + r = m.group(1) + return ("ident", r), i+1 + return None + + def is_defined(self,i): + t = self.tok[i] + if t.id != tokDEFINED: + return None + + # we have the defined keyword, check the rest + i = self.skip_spaces(i+1) + use_parens = 0 + if i < self.n and self.tok[i].id == tokLPAREN: + use_parens = 1 + i = self.skip_spaces(i+1) + + if i >= self.n: + self.throw(CppConstantExpected,i,"### 'defined' must be followed by macro name or left paren") + + t = self.tok[i] + if t.id != tokIDENT: + self.throw(CppConstantExpected,i,"### 'defined' must be followed by macro name") + + i += 1 + if use_parens: + i = self.expectId(i,tokRPAREN) + + return ("defined",t.value), i + + + def is_call_or_ident(self,i): + i = self.skip_spaces(i) + if i >= self.n: + return None + + t = self.tok[i] + if t.id != tokIDENT: + return None + + name = t.value + + i = self.skip_spaces(i+1) + if i >= self.n or self.tok[i].id != tokLPAREN: + return ("ident", name), i + + params = [] + depth = 1 + i += 1 + j = i + while i < self.n: + id = self.tok[i].id + if id == tokLPAREN: + depth += 1 + elif depth == 1 and (id == tokCOMMA or id == tokRPAREN): + while j < i and self.tok[j].id == tokSPACE: + j += 1 + k = i + while k > j and self.tok[k-1].id == tokSPACE: + k -= 1 + param = self.tok[j:k] + params.append( param ) + if id == tokRPAREN: + break + j = i+1 + elif id == tokRPAREN: + depth -= 1 + i += 1 + + if i >= self.n: + return None + + return ("call", (name, params)), i+1 + + def is_token(self,i,token): + i = self.skip_spaces(i) + if i >= self.n or self.tok[i].id != token: + return None + return token, i+1 + + + def is_value(self,i): + t = self.tok[i] + if t.id == tokSTRING: + return ("string", t.value), i+1 + + c = self.is_number(i) + if c: return c + + c = self.is_defined(i) + if c: return c + + c = self.is_call_or_ident(i) + if c: return c + + i = self.skip_spaces(i) + if i >= self.n or self.tok[i].id != tokLPAREN: + return None + + popcount = 1 + i2 = i+1 + while i2 < self.n: + t = self.tok[i2] + if t.id == tokLPAREN: + popcount += 1 + elif t.id == tokRPAREN: + popcount -= 1 + if popcount == 0: + break + i2 += 1 + + if popcount != 0: + self.throw(CppInvalidExpression, i, "expression missing closing parenthesis") + + if debugCppExpr: + print "CppExpr: trying to parse sub-expression %s" % repr(self.tok[i+1:i2]) + oldcount = self.n + self.n = i2 + c = self.is_expr(i+1) + self.n = oldcount + if not c: + self.throw(CppInvalidExpression, i, "invalid expression within parenthesis") + + e, i = c + return e, i2+1 + + def is_unary(self,i): + i = self.skip_spaces(i) + if i >= self.n: + return None + + t = self.tok[i] + if t.id in CppExpr.unaries: + c = self.is_unary(i+1) + if not c: + self.throw(CppInvalidExpression, i, "%s operator must be followed by value" % t.id) + e, i = c + return (t.id, e), i + + return self.is_value(i) + + def is_binary(self,i): + i = self.skip_spaces(i) + if i >= self.n: + return None + + c = self.is_unary(i) + if not c: + return None + + e1, i2 = c + i2 = self.skip_spaces(i2) + if i2 >= self.n: + return c + + t = self.tok[i2] + if t.id in CppExpr.binaries: + c = self.is_binary(i2+1) + if not c: + self.throw(CppInvalidExpression, i,"### %s operator must be followed by value" % t.id ) + e2, i3 = c + return (t.id, e1, e2), i3 + + return None + + def is_expr(self,i): + return self.is_binary(i) + + def dump_node(self,e): + op = e[0] + line = "(" + op + if op == "int": + line += " %d)" % e[1] + elif op == "hex": + line += " 0x%x)" % e[1] + elif op == "ident": + line += " %s)" % e[1] + elif op == "defined": + line += " %s)" % e[1] + elif op == "call": + arg = e[1] + line += " %s [" % arg[0] + prefix = "" + for param in arg[1]: + par = "" + for tok in param: + par += str(tok) + line += "%s%s" % (prefix, par) + prefix = "," + line += "])" + elif op in CppExpr.unaries: + line += " %s)" % self.dump_node(e[1]) + elif op in CppExpr.binaries: + line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2])) + else: + line += " ?%s)" % repr(e[1]) + + return line + + def __repr__(self): + return self.dump_node(self.expr) + + def source_node(self,e): + op = e[0] + if op == "int": + return "%d" % e[1] + if op == "hex": + return "0x%x" % e[1] + if op == "ident": + # XXX: should try to expand + return e[1] + if op == "defined": + return "defined(%s)" % e[1] + + prec = CppExpr.precedences.get(op,1000) + arg = e[1] + if op in CppExpr.unaries: + arg_src = self.source_node(arg) + arg_op = arg[0] + arg_prec = CppExpr.precedences.get(arg[0],1000) + if arg_prec < prec: + return "!(" + arg_src + ")" + else: + return "!" + arg_src + if op in CppExpr.binaries: + arg2 = e[2] + arg1_op = arg[0] + arg2_op = arg2[0] + arg1_src = self.source_node(arg) + arg2_src = self.source_node(arg2) + if CppExpr.precedences.get(arg1_op,1000) < prec: + arg1_src = "(%s)" % arg1_src + if CppExpr.precedences.get(arg2_op,1000) < prec: + arg2_src = "(%s)" % arg2_src + + return "%s %s %s" % (arg1_src, op, arg2_src) + return "???" + + def __str__(self): + return self.source_node(self.expr) + + def int_node(self,e): + if e[0] == "int": + return e[1] + elif e[1] == "hex": + return int(e[1],16) + else: + return None + + def toInt(self): + return self.int_node(self.expr) + + def optimize_node(self,e,macros={}): + op = e[0] + if op == "defined": + name = e[1] + if macros.has_key(name): + if macros[name] == kCppUndefinedMacro: + return ("int", 0) + else: + return ("int", 1) + + if kernel_remove_config_macros and name.startswith("CONFIG_"): + return ("int", 0) + + elif op == "!": + op, v = e + v = self.optimize_node(v, macros) + if v[0] == "int": + if v[1] == 0: + return ("int", 1) + else: + return ("int", 0) + + elif op == "&&": + op, l, r = e + l = self.optimize_node(l, macros) + r = self.optimize_node(r, macros) + li = self.int_node(l) + ri = self.int_node(r) + if li != None: + if li == 0: + return ("int", 0) + else: + return r + + elif op == "||": + op, l, r = e + l = self.optimize_node(l, macros) + r = self.optimize_node(r, macros) + li = self.int_node(l) + ri = self.int_node(r) + if li != None: + if li == 0: + return r + else: + return ("int", 1) + elif ri != None: + if ri == 0: + return l + else: + return ("int", 1) + return e + + def optimize(self,macros={}): + self.expr = self.optimize_node(self.expr,macros) + + def removePrefixedNode(self,e,prefix,names): + op = e[0] + if op == "defined": + name = e[1] + if name.startswith(prefix): + if names.has_key[name] and names[name] == "y": + return ("int", 1) + else: + return ("int", 0) + + elif op in CppExpr.unaries: + op, v = e + v = self.removePrefixedNode(v,prefix,names) + return (op, v) + elif op in CppExpr.binaries: + op, v1, v2 = e + v1 = self.removePrefixedNode(v1,prefix,names) + v2 = self.removePrefixedNode(v2,prefix,names) + return (op, v1, v2) + elif op == "call": + func, params = e[1] + params2 = [] + for param in params: + params2.append( self.removePrefixedNode(param,prefix,names) ) + return (op, (func, params2)) + + return e + + def removePrefixed(self,prefix,names={}): + self.expr = self.removePrefixedNode(self.expr,prefix,names) + + def is_equal_node(self,e1,e2): + if e1[0] != e2[0] or len(e1) != len(e2): + return False + + op = e1[0] + if op == "int" or op == "hex" or op == "!" or op == "defined": + return e1[0] == e2[0] + + return self.is_equal_node(e1[1],e2[1]) and self.is_equal_node(e1[2],e2[2]) + + def is_equal(self,other): + return self.is_equal_node(self.expr,other.expr) + +def test_cpp_expr(expr, expected): + e = CppExpr( CppLineTokenizer( expr ).toTokenList() ) + #print repr(e.expr) + s1 = repr(e) + if s1 != expected: + print "KO: expression '%s' generates '%s', should be '%s'" % (expr, s1, expected) + else: + #print "OK: expression '%s'" % expr + pass + +def test_cpp_expr_optim(expr, expected, macros={}): + e = CppExpr( CppLineTokenizer( expr ).toTokenList() ) + e.optimize(macros) + + s1 = repr(e) + if s1 != expected: + print "KO: optimized expression '%s' generates '%s', should be '%s'" % (expr, s1, expected) + else: + #print "OK: optmized expression '%s'" % expr + pass + +def test_cpp_expr_source(expr, expected): + e = CppExpr( CppLineTokenizer( expr ).toTokenList() ) + s1 = str(e) + if s1 != expected: + print "KO: source expression '%s' generates '%s', should be '%s'" % (expr, s1, expected) + else: + #print "OK: source expression '%s'" % expr + pass + +def test_CppExpr(): + print "testing CppExpr" + test_cpp_expr( "0", "(int 0)" ) + test_cpp_expr( "1", "(int 1)" ) + test_cpp_expr( "1 && 1", "(&& (int 1) (int 1))" ) + test_cpp_expr( "1 && 0", "(&& (int 1) (int 0))" ) + test_cpp_expr( "EXAMPLE", "(ident EXAMPLE)" ) + test_cpp_expr( "EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))" ) + test_cpp_expr( "defined(EXAMPLE)", "(defined EXAMPLE)" ) + test_cpp_expr( "!defined(EXAMPLE)", "(! (defined EXAMPLE))" ) + test_cpp_expr( "defined(ABC) || defined(BINGO)", "(|| (defined ABC) (defined BINGO))" ) + test_cpp_expr( "FOO(BAR)", "(call FOO [BAR])" ) + + test_cpp_expr_optim( "0", "(int 0)" ) + test_cpp_expr_optim( "1", "(int 1)" ) + test_cpp_expr_optim( "1 && 1", "(int 1)" ) + test_cpp_expr_optim( "1 && 0", "(int 0)" ) + test_cpp_expr_optim( "0 && 1", "(int 0)" ) + test_cpp_expr_optim( "0 && 0", "(int 0)" ) + test_cpp_expr_optim( "1 || 1", "(int 1)" ) + test_cpp_expr_optim( "1 || 0", "(int 1)" ) + test_cpp_expr_optim( "0 || 1", "(int 1)" ) + test_cpp_expr_optim( "0 || 0", "(int 0)" ) + test_cpp_expr_optim( "EXAMPLE", "(ident EXAMPLE)" ) + test_cpp_expr_optim( "EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))" ) + test_cpp_expr_optim( "defined(EXAMPLE)", "(defined EXAMPLE)" ) + test_cpp_expr_optim( "defined(EXAMPLE)", "(int 1)", { "EXAMPLE": "XOWOE" } ) + test_cpp_expr_optim( "defined(EXAMPLE)", "(int 0)", { "EXAMPLE": kCppUndefinedMacro} ) + test_cpp_expr_optim( "!defined(EXAMPLE)", "(! (defined EXAMPLE))" ) + test_cpp_expr_optim( "!defined(EXAMPLE)", "(int 0)", { "EXAMPLE" : "XOWOE" } ) + test_cpp_expr_optim( "!defined(EXAMPLE)", "(int 1)", { "EXAMPLE" : kCppUndefinedMacro } ) + test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(|| (defined ABC) (defined BINGO))" ) + test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 1)", { "ABC" : "1" } ) + test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 1)", { "BINGO" : "1" } ) + test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(defined ABC)", { "BINGO" : kCppUndefinedMacro } ) + test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 0)", { "ABC" : kCppUndefinedMacro, "BINGO" : kCppUndefinedMacro } ) + + test_cpp_expr_source( "0", "0" ) + test_cpp_expr_source( "1", "1" ) + test_cpp_expr_source( "1 && 1", "1 && 1" ) + test_cpp_expr_source( "1 && 0", "1 && 0" ) + test_cpp_expr_source( "0 && 1", "0 && 1" ) + test_cpp_expr_source( "0 && 0", "0 && 0" ) + test_cpp_expr_source( "1 || 1", "1 || 1" ) + test_cpp_expr_source( "1 || 0", "1 || 0" ) + test_cpp_expr_source( "0 || 1", "0 || 1" ) + test_cpp_expr_source( "0 || 0", "0 || 0" ) + test_cpp_expr_source( "EXAMPLE", "EXAMPLE" ) + test_cpp_expr_source( "EXAMPLE - 3", "EXAMPLE - 3" ) + test_cpp_expr_source( "defined(EXAMPLE)", "defined(EXAMPLE)" ) + test_cpp_expr_source( "defined EXAMPLE", "defined(EXAMPLE)" ) + + +##################################################################################### +##################################################################################### +##### ##### +##### C P P B L O C K ##### +##### ##### +##################################################################################### +##################################################################################### + +class Block: + """a class used to model a block of input source text. there are two block types: + - direcive blocks: contain the tokens of a single pre-processor directive (e.g. #if) + - text blocks, contain the tokens of non-directive blocks + + the cpp parser class below will transform an input source file into a list of Block + objects (grouped in a BlockList object for convenience)""" + + def __init__(self,tokens,directive=None,lineno=0): + """initialize a new block, if 'directive' is None, this is a text block + NOTE: this automatically converts '#ifdef MACRO' into '#if defined(MACRO)' + and '#ifndef MACRO' into '#if !defined(MACRO)'""" + if directive == "ifdef": + tok = Token() + tok.set(tokDEFINED) + tokens = [ tok ] + tokens + directive = "if" + + elif directive == "ifndef": + tok1 = Token() + tok2 = Token() + tok1.set(tokNOT) + tok2.set(tokDEFINED) + tokens = [ tok1, tok2 ] + tokens + directive = "if" + + self.tokens = tokens + self.directive = directive + if lineno > 0: + self.lineno = lineno + else: + self.lineno = self.tokens[0].lineno + + if self.isIf(): + self.expr = CppExpr( self.tokens ) + + def isDirective(self): + """returns True iff this is a directive block""" + return self.directive != None + + def isConditional(self): + """returns True iff this is a conditional directive block""" + return self.directive in ["if","ifdef","ifndef","else","elif","endif"] + + def isDefine(self): + """returns the macro name in a #define directive, or None otherwise""" + if self.directive != "define": + return None + + return self.tokens[0].value + + def isIf(self): + """returns True iff this is an #if-like directive block""" + return self.directive in ["if","ifdef","ifndef","elif"] + + def isInclude(self): + """checks wether this is a #include directive. if true, then returns the + corresponding file name (with brackets or double-qoutes). None otherwise""" + if self.directive != "include": + return None + + #print "iii " + repr(self.tokens) + if self.tokens[0].id == tokSTRING: + # a double-quote include, that's easy + return self.tokens[0].value + + # we only want the bracket part, not any comments or junk after it + if self.tokens[0].id == "<": + i = 0 + tok = self.tokens + n = len(tok) + while i < n and tok[i].id != ">": + i += 1 + + if i >= n: + return None + + return string.join([ str(x) for x in tok[:i+1] ],"") + + else: + return None + + def __repr__(self): + """generate the representation of a given block""" + if self.directive: + result = "#%s " % self.directive + if self.isIf(): + result += repr(self.expr) + else: + for tok in self.tokens: + result += repr(tok) + else: + result = "" + for tok in self.tokens: + result += repr(tok) + + return result + + def __str__(self): + """generate the string representation of a given block""" + if self.directive: + if self.directive == "if": + # small optimization to re-generate #ifdef and #ifndef + e = self.expr.expr + op = e[0] + if op == "defined": + result = "#ifdef %s" % e[1] + elif op == "!" and e[1][0] == "defined": + result = "#ifndef %s" % e[1][1] + else: + result = "#if " + str(self.expr) + else: + result = "#%s" % self.directive + if len(self.tokens): + result += " " + for tok in self.tokens: + result += str(tok) + else: + result = "" + for tok in self.tokens: + result += str(tok) + + return result + + +class BlockList: + """a convenience class used to hold and process a list of blocks returned by + the cpp parser""" + def __init__(self,blocks): + self.blocks = blocks + + def __len__(self): + return len(self.blocks) + + def __getitem__(self,n): + return self.blocks[n] + + def __repr__(self): + return repr(self.blocks) + + def __str__(self): + result = "" + for b in self.blocks: + result += str(b) + if b.isDirective(): + result += '\n' + return result + + def optimizeIf01(self): + """remove the code between #if 0 .. #endif in a BlockList""" + self.blocks = optimize_if01(self.blocks) + + def optimizeMacros(self, macros): + """remove known defined and undefined macros from a BlockList""" + for b in self.blocks: + if b.isIf(): + b.expr.optimize(macros) + + def removeMacroDefines(self,macros): + """remove known macro definitions from a BlockList""" + self.blocks = remove_macro_defines(self.blocks,macros) + + def removePrefixed(self,prefix,names): + for b in self.blocks: + if b.isIf(): + b.expr.removePrefixed(prefix,names) + + def optimizeAll(self,macros): + self.optimizeMacros(macros) + self.optimizeIf01() + return + + def findIncludes(self): + """return the list of included files in a BlockList""" + result = [] + for b in self.blocks: + i = b.isInclude() + if i: + result.append(i) + + return result + + + def write(self,out): + out.write(str(self)) + + def removeComments(self): + for b in self.blocks: + for tok in b.tokens: + if tok.id == tokSPACE: + tok.value = " " + + def removeEmptyLines(self): + # state = 1 => previous line was tokLN + # state = 0 => previous line was directive + state = 1 + for b in self.blocks: + if b.isDirective(): + #print "$$$ directive %s" % str(b) + state = 0 + else: + # a tokLN followed by spaces is replaced by a single tokLN + # several successive tokLN are replaced by a single one + # + dst = [] + src = b.tokens + n = len(src) + i = 0 + #print "$$$ parsing %s" % repr(src) + while i < n: + # find final tokLN + j = i + while j < n and src[j].id != tokLN: + j += 1 + + if j >= n: + # uhhh + dst += src[i:] + break + + if src[i].id == tokSPACE: + k = i+1 + while src[k].id == tokSPACE: + k += 1 + + if k == j: # empty lines with spaces in it + i = j # remove the spaces + + if i == j: + # an empty line + if state == 1: + i += 1 # remove it + else: + state = 1 + dst.append(src[i]) + i += 1 + else: + # this line is not empty, remove trailing spaces + k = j + while k > i and src[k-1].id == tokSPACE: + k -= 1 + + nn = i + while nn < k: + dst.append(src[nn]) + nn += 1 + dst.append(src[j]) + state = 0 + i = j+1 + + b.tokens = dst + + def removeVarsAndFuncs(self,knownStatics=set()): + """remove all extern and static declarations corresponding + to variable and function declarations. we only accept typedefs + and enum/structs/union declarations. + + however, we keep the definitions corresponding to the set + of known static inline functions in the set 'knownStatics', + which is useful for optimized byteorder swap functions and + stuff like that. + """ + # state = 1 => typedef/struct encountered + # state = 2 => vars or func declaration encountered, skipping until ";" + # state = 0 => normal (i.e. LN + spaces) + state = 0 + depth = 0 + blocks2 = [] + for b in self.blocks: + if b.isDirective(): + blocks2.append(b) + else: + n = len(b.tokens) + i = 0 + first = 0 + if state == 2: + first = n + while i < n: + tok = b.tokens[i] + if state == 0: + bad = 0 + if tok.id in [tokLN, tokSPACE]: + pass + elif tok.value in [ 'struct', 'typedef', 'enum', 'union', '__extension__' ]: + state = 1 + else: + if tok.value in [ 'static', 'extern', '__KINLINE' ]: + j = i+1 + ident = "" + while j < n and not (b.tokens[j].id in [ '(', ';' ]): + if b.tokens[j].id == tokIDENT: + ident = b.tokens[j].value + j += 1 + if j < n and ident in knownStatics: + # this is a known static, we're going to keep its + # definition in the final output + state = 1 + else: + #print "### skip static '%s'" % ident + pass + + if state == 0: + if i > first: + #print "### intermediate from '%s': '%s'" % (tok.value, repr(b.tokens[first:i])) + blocks2.append( Block(b.tokens[first:i]) ) + state = 2 + first = n + + else: # state > 0 + if tok.id == '{': + depth += 1 + + elif tok.id == '}': + if depth > 0: + depth -= 1 + + elif depth == 0 and tok.id == ';': + if state == 2: + first = i+1 + state = 0 + + i += 1 + + if i > first: + #print "### final '%s'" % repr(b.tokens[first:i]) + blocks2.append( Block(b.tokens[first:i]) ) + + self.blocks = blocks2 + + def insertDisclaimer(self,disclaimer="/* auto-generated file, DO NOT EDIT */"): + """insert your standard issue disclaimer that this is an + auto-generated file, etc..""" + tokens = CppLineTokenizer( disclaimer ).toTokenList() + tokens = tokens[:-1] # remove trailing tokLN + self.blocks = [ Block(tokens) ] + self.blocks + +class BlockParser: + """a class used to convert an input source file into a BlockList object""" + + def __init__(self,tokzer=None): + """initialize a block parser. the input source is provided through a Tokenizer + object""" + self.reset(tokzer) + + def reset(self,tokzer): + self.state = 1 + self.tokzer = tokzer + + def getBlocks(self,tokzer=None): + """tokenize and parse the input source, return a BlockList object + NOTE: empty and line-numbering directives are ignored and removed + from the result. as a consequence, it is possible to have + two successive text blocks in the result""" + # state 0 => in source code + # state 1 => in source code, after a LN + # state 2 => in source code, after LN then some space + state = 1 + lastLN = 0 + current = [] + blocks = [] + + if tokzer == None: + tokzer = self.tokzer + + while 1: + tok = tokzer.getToken() + if tok.id == tokEOF: + break + + if tok.id == tokLN: + state = 1 + current.append(tok) + lastLN = len(current) + + elif tok.id == tokSPACE: + if state == 1: + state = 2 + current.append(tok) + + elif tok.id == "#": + if state > 0: + # this is the start of a directive + + if lastLN > 0: + # record previous tokens as text block + block = Block(current[:lastLN]) + blocks.append(block) + lastLN = 0 + + current = [] + + # skip spaces after the # + while 1: + tok = tokzer.getToken() + if tok.id != tokSPACE: + break + + if tok.id != tokIDENT: + # empty or line-numbering, ignore it + if tok.id != tokLN and tok.id != tokEOF: + while 1: + tok = tokzer.getToken() + if tok.id == tokLN or tok.id == tokEOF: + break + continue + + directive = tok.value + lineno = tok.lineno + + # skip spaces + tok = tokzer.getToken() + while tok.id == tokSPACE: + tok = tokzer.getToken() + + # then record tokens until LN + dirtokens = [] + while tok.id != tokLN and tok.id != tokEOF: + dirtokens.append(tok) + tok = tokzer.getToken() + + block = Block(dirtokens,directive,lineno) + blocks.append(block) + state = 1 + + else: + state = 0 + current.append(tok) + + if len(current) > 0: + block = Block(current) + blocks.append(block) + + return BlockList(blocks) + + def parse(self,tokzer): + return self.getBlocks( tokzer ) + + def parseLines(self,lines): + """parse a list of text lines into a BlockList object""" + return self.getBlocks( CppLinesTokenizer(lines) ) + + def parseFile(self,path): + """parse a file into a BlockList object""" + file = open(path, "rt") + result = self.getBlocks( CppFileTokenizer(file) ) + file.close() + return result + + +def test_block_parsing(lines,expected): + blocks = BlockParser().parse( CppLinesTokenizer(lines) ) + if len(blocks) != len(expected): + raise BadExpectedToken, "parser.buildBlocks returned '%s' expecting '%s'" \ + % (str(blocks), repr(expected)) + for n in range(len(blocks)): + if str(blocks[n]) != expected[n]: + raise BadExpectedToken, "parser.buildBlocks()[%d] is '%s', expecting '%s'" \ + % (n, str(blocks[n]), expected[n]) + #for block in blocks: + # print block + +def test_BlockParser(): + test_block_parsing(["#error hello"],["#error hello"]) + test_block_parsing([ "foo", "", "bar" ], [ "foo\n\nbar\n" ]) + test_block_parsing([ "foo", " # ", "bar" ], [ "foo\n","bar\n" ]) + test_block_parsing(\ + [ "foo", " # ", " # /* ahah */ if defined(__KERNEL__) ", "bar", "#endif" ], + [ "foo\n", "#ifdef __KERNEL__", "bar\n", "#endif" ] ) + + +##################################################################################### +##################################################################################### +##### ##### +##### B L O C K L I S T O P T I M I Z A T I O N ##### +##### ##### +##################################################################################### +##################################################################################### + +def remove_macro_defines( blocks, excludedMacros=set() ): + """remove macro definitions like #define <macroName> ....""" + result = [] + for b in blocks: + macroName = b.isDefine() + if macroName == None or not macroName in excludedMacros: + result.append(b) + + return result + +def find_matching_endif( blocks, i ): + n = len(blocks) + depth = 1 + while i < n: + if blocks[i].isDirective(): + dir = blocks[i].directive + if dir in [ "if", "ifndef", "ifdef" ]: + depth += 1 + elif depth == 1 and dir in [ "else", "elif" ]: + return i + elif dir == "endif": + depth -= 1 + if depth == 0: + return i + i += 1 + return i + +def optimize_if01( blocks ): + """remove the code between #if 0 .. #endif in a list of CppBlocks""" + i = 0 + n = len(blocks) + result = [] + while i < n: + j = i + while j < n and not blocks[j].isIf(): + j += 1 + if j > i: + D2("appending lines %d to %d" % (blocks[i].lineno, blocks[j-1].lineno)) + result += blocks[i:j] + if j >= n: + break + expr = blocks[j].expr + r = expr.toInt() + if r == None: + result.append(blocks[j]) + i = j + 1 + continue + + if r == 0: + # if 0 => skip everything until the corresponding #endif + j = find_matching_endif( blocks, j+1 ) + if j >= n: + # unterminated #if 0, finish here + break + dir = blocks[j].directive + if dir == "endif": + D2("remove 'if 0' .. 'endif' (lines %d to %d)" % (blocks[i].lineno, blocks[j].lineno)) + i = j + 1 + elif dir == "else": + # convert 'else' into 'if 1' + D2("convert 'if 0' .. 'else' into 'if 1' (lines %d to %d)" % (blocks[i].lineno, blocks[j-1].lineno)) + blocks[j].directive = "if" + blocks[j].expr = CppExpr( CppLineTokenizer("1").toTokenList() ) + i = j + elif dir == "elif": + # convert 'elif' into 'if' + D2("convert 'if 0' .. 'elif' into 'if'") + blocks[j].directive = "if" + i = j + continue + + # if 1 => find corresponding endif and remove/transform them + k = find_matching_endif( blocks, j+1 ) + if k >= n: + # unterminated #if 1, finish here + D2("unterminated 'if 1'") + result += blocks[j+1:k] + break + + dir = blocks[k].directive + if dir == "endif": + D2("convert 'if 1' .. 'endif' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno)) + result += optimize_if01(blocks[j+1:k]) + i = k+1 + elif dir == "else": + # convert 'else' into 'if 0' + D2("convert 'if 1' .. 'else' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno)) + result += optimize_if01(blocks[j+1:k]) + blocks[k].directive = "if" + blocks[k].expr = CppExpr( CppLineTokenizer("0").toTokenList() ) + i = k + elif dir == "elif": + # convert 'elif' into 'if 0' + D2("convert 'if 1' .. 'elif' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno)) + result += optimize_if01(blocks[j+1:k]) + blocks[k].expr = CppExpr( CppLineTokenizer("0").toTokenList() ) + i = k + return result + +def test_optimizeAll(): + text = """\ +#if 1 +#define GOOD_1 +#endif +#if 0 +#define BAD_2 +#define BAD_3 +#endif + +#if 1 +#define GOOD_2 +#else +#define BAD_4 +#endif + +#if 0 +#define BAD_5 +#else +#define GOOD_3 +#endif + +#if 0 +#if 1 +#define BAD_6 +#endif +#endif\ +""" + + expected = """\ +#define GOOD_1 + +#define GOOD_2 + +#define GOOD_3 + +""" + + print "running test_BlockList.optimizeAll" + out = StringOutput() + lines = string.split(text, '\n') + list = BlockParser().parse( CppLinesTokenizer(lines) ) + #D_setlevel(2) + list.optimizeAll( {"__KERNEL__":kCppUndefinedMacro} ) + #print repr(list) + list.write(out) + if out.get() != expected: + print "KO: macro optimization failed\n" + print "<<<< expecting '", + print expected, + print "'\n>>>> result '" + print out.get(), + print "'\n----" + + +##################################################################################### +##################################################################################### +##### ##### +##### ##### +##### ##### +##################################################################################### +##################################################################################### + +def runUnitTests(): + """run all unit tests for this program""" + print "running unit tests" + test_CppTokenizer() + test_CppExpr() + test_optimizeAll() + test_BlockParser() + print "OK" + +if __name__ == "__main__": + runUnitTests() diff --git a/libc/kernel/tools/defaults.py b/libc/kernel/tools/defaults.py new file mode 100644 index 0000000..aad0092 --- /dev/null +++ b/libc/kernel/tools/defaults.py @@ -0,0 +1,101 @@ +# this module contains all the defaults used by the generation of cleaned-up headers +# for the Bionic C library +# + +import time, os, sys +from utils import * + +# the list of supported architectures +# +kernel_archs = [ 'arm', 'x86' ] + +# the list of include directories that belong to the kernel +# tree. used when looking for sources... +# +kernel_dirs = [ "linux", "asm", "asm-generic", "mtd" ] + +# path to the directory containing the original kernel headers +# +kernel_original_path = os.path.normpath( find_program_dir() + '/../original' ) + +# a special value that is used to indicate that a given macro is known to be +# undefined during optimization +kCppUndefinedMacro = "<<<undefined>>>" + +# this is the set of known macros we want to totally optimize out from the +# final headers +kernel_known_macros = { + "__KERNEL__": kCppUndefinedMacro, + "__KERNEL_STRICT_NAMES":"1", + "__CHECKER__": kCppUndefinedMacro, + "__CHECK_ENDIAN__": kCppUndefinedMacro, + } + +# define to true if you want to remove all defined(CONFIG_FOO) tests +# from the clean headers. testing shows that this is not strictly necessary +# but just generates cleaner results +kernel_remove_config_macros = True + +# maps an architecture to a set of default macros that would be provided by +# toolchain preprocessor +kernel_default_arch_macros = { + "arm": {}, + "x86": {"__i386__": "1"}, + } + +# this is the set of known static inline functions that we want to keep +# in the final ARM headers. this is only used to keep optimized byteswapping +# static functions and stuff like that. +kernel_known_arm_statics = set( + [ "___arch__swab32", # asm-arm/byteorder.h + ] + ) + +kernel_known_x86_statics = set( + [ "___arch__swab32", # asm-x86/byteorder.h + "___arch__swab64", # asm-x86/byteorder.h + ] + ) + +kernel_known_generic_statics = set( + [ "__invalid_size_argument_for_IOC", # asm-generic/ioctl.h + "__cmsg_nxthdr", # linux/socket.h + "cmsg_nxthdr", # linux/socket.h + "ipt_get_target", + ] + ) + +# this maps an architecture to the set of static inline functions that +# we want to keep in the final headers +# +kernel_known_statics = { + "arm" : kernel_known_arm_statics, + "x86" : kernel_known_x86_statics + } + +# this is a list of macros which we want to specifically exclude from +# the generated files. +# +kernel_ignored_macros = set( + [ "MAXHOSTNAMELEN", # for some reason, Linux defines it to 64 + # while most of the BSD code expects this to be 256 + # so ignore the kernel-provided definition and + # define it in the Bionic headers instead + ] + ) + +# this is the standard disclaimer +# +kernel_disclaimer = """\ +/**************************************************************************** + **************************************************************************** + *** + *** This header was automatically generated from a Linux kernel header + *** of the same name, to make information necessary for userspace to + *** call into the kernel available to libc. It contains only constants, + *** structures, and macros generated from the original header, and thus, + *** contains no copyrightable information. + *** + **************************************************************************** + ****************************************************************************/ +""" diff --git a/libc/kernel/tools/find_headers.py b/libc/kernel/tools/find_headers.py new file mode 100755 index 0000000..8e72bb6 --- /dev/null +++ b/libc/kernel/tools/find_headers.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python +# +# this program is used to find source code that includes linux kernel headers directly +# (e.g. with #include <linux/...> or #include <asm/...>) +# +# then it lists + +import sys, cpp, glob, os, re, getopt, kernel +from utils import * +from defaults import * + +program_dir = find_program_dir() + +wanted_archs = kernel_archs +wanted_include = os.path.normpath(program_dir + '/../original') +wanted_config = os.path.normpath(program_dir + '/../original/config') + +def usage(): + print """\ + usage: find_headers.py [options] (file|directory|@listfile)+ + + options: + -d <include-dir> specify alternate kernel headers + 'include' directory + ('%s' by default) + + -c <file> specify alternate .config file + ('%s' by default) + + -a <archs> used to specify an alternative list + of architectures to support + ('%s' by default) + + -v enable verbose mode + + this program is used to find all the kernel headers that are used + by a set of source files or directories containing them. the search + is recursive to find *all* required files. + +""" % ( wanted_include, wanted_config, string.join(kernel_archs,",") ) + sys.exit(1) + + +try: + optlist, args = getopt.getopt( sys.argv[1:], 'vc:d:a:' ) +except: + # unrecognized option + print "error: unrecognized option" + usage() + +for opt, arg in optlist: + if opt == '-a': + wanted_archs = string.split(arg,',') + elif opt == '-d': + wanted_include = arg + elif opt == '-c': + wanted_config = arg + elif opt == '-v': + kernel.verboseSearch = 1 + kernel.verboseFind = 1 + verbose = 1 + else: + usage() + +if len(args) < 1: + usage() + +kernel_root = wanted_include +if not os.path.exists(kernel_root): + sys.stderr.write( "error: directory '%s' does not exist\n" % kernel_root ) + sys.exit(1) + +if not os.path.isdir(kernel_root): + sys.stderr.write( "error: '%s' is not a directory\n" % kernel_root ) + sys.exit(1) + +if not os.path.isdir(kernel_root+"/linux"): + sys.stderr.write( "error: '%s' does not have a 'linux' directory\n" % kernel_root ) + sys.exit(1) + +if not os.path.exists(wanted_config): + sys.stderr.write( "error: file '%s' does not exist\n" % wanted_config ) + sys.exit(1) + +if not os.path.isfile(wanted_config): + sys.stderr.write( "error: '%s' is not a file\n" % wanted_config ) + sys.exit(1) + +# find all architectures in the kernel tree +re_asm_ = re.compile(r"asm-(\w+)") +archs = [] +for dir in os.listdir(kernel_root): + m = re_asm_.match(dir) + if m: + if verbose: print ">> found kernel arch '%s'" % m.group(1) + archs.append(m.group(1)) + +# if we're using the 'kernel_headers' directory, there is only asm/ +# and no other asm-<arch> directories (arm is assumed, which sucks) +# +in_kernel_headers = False +if len(archs) == 0: + # this can happen when we're using the 'kernel_headers' directory + if os.path.isdir(kernel_root+"/asm"): + in_kernel_headers = True + archs = [ "arm" ] + +# if the user has specified some architectures with -a <archs> ensure that +# all those he wants are available from the kernel include tree +if wanted_archs != None: + if in_kernel_headers and wanted_archs != [ "arm" ]: + sys.stderr.write( "error: when parsing kernel_headers, 'arm' architecture only is supported at the moment\n" ) + sys.exit(1) + missing = [] + for arch in wanted_archs: + if arch not in archs: + missing.append(arch) + if len(missing) > 0: + sys.stderr.write( "error: the following requested architectures are not in the kernel tree: " ) + for a in missing: + sys.stderr.write( " %s" % a ) + sys.stderr.write( "\n" ) + sys.exit(1) + + archs = wanted_archs + +# helper function used to walk the user files +def parse_file(path, parser): + parser.parseFile(path) + + +# remove previous destination directory +#destdir = "/tmp/bionic-kernel-headers/" +#cleanup_dir(destdir) + +# try to read the config file +try: + cparser = kernel.ConfigParser() + cparser.parseFile( wanted_config ) +except: + sys.stderr.write( "error: can't parse '%s'" % wanted_config ) + sys.exit(1) + +kernel_config = cparser.getDefinitions() + +# first, obtain the list of kernel files used by our clients +fparser = kernel.HeaderScanner() +walk_source_files( args, parse_file, fparser, excludes=["kernel_headers"] ) +headers = fparser.getHeaders() +files = fparser.getFiles() + +# now recursively scan the kernel headers for additionnal sub-included headers +hparser = kernel.KernelHeaderFinder(headers,archs,kernel_root,kernel_config) +headers = hparser.scanForAllArchs() + +if 0: # just for debugging + dumpHeaderUsers = False + + print "the following %d headers:" % len(headers) + for h in sorted(headers): + if dumpHeaderUsers: + print " %s (%s)" % (h, repr(hparser.getHeaderUsers(h))) + else: + print " %s" % h + + print "are used by the following %d files:" % len(files) + for f in sorted(files): + print " %s" % f + + sys.exit(0) + +for h in sorted(headers): + print h + +sys.exit(0) diff --git a/libc/kernel/tools/find_users.py b/libc/kernel/tools/find_users.py new file mode 100755 index 0000000..5ee308c --- /dev/null +++ b/libc/kernel/tools/find_users.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python +# +# this program is used to find source code that includes linux kernel headers directly +# (e.g. with #include <linux/...> or #include <asm/...>) +# +# then it lists + +import sys, cpp, glob, os, re, getopt +import kernel +from utils import * +from defaults import * + + +def usage(): + print """\ + usage: find_users.py [-v] (file|directory|@listfile)+ + + this program is used to scan a list of files or directories for + sources that include kernel headers directly. the program prints + the list of said source files when it's done. + + when scanning directories, only files matching the following + extension will be searched: .c .cpp .S .h + + use -v to enable verbose output +""" + sys.exit(1) + + +try: + optlist, args = getopt.getopt( sys.argv[1:], 'v' ) +except: + # unrecognized option + print "error: unrecognized option" + usage() + +for opt, arg in optlist: + if opt == '-v': + kernel.verboseSearch = 1 + kernel.verboseFind = 1 + else: + usage() + +if len(args) < 1: + usage() + +# helper function used to walk the user files +def parse_file(path, parser): + parser.parseFile(path) + + +# first, obtain the list of kernel files used by our clients +# avoid parsing the 'kernel_headers' directory itself since we +# use this program with the Android source tree by default. +# +fparser = kernel.HeaderScanner() +walk_source_files( args, parse_file, fparser, excludes=["kernel_headers","original"] ) +files = fparser.getFiles() + +for f in sorted(files): + print f + +sys.exit(0) diff --git a/libc/kernel/tools/kernel.py b/libc/kernel/tools/kernel.py new file mode 100644 index 0000000..9d9b5f0 --- /dev/null +++ b/libc/kernel/tools/kernel.py @@ -0,0 +1,338 @@ +# this file contains definitions related to the Linux kernel itself +# + +# list here the macros that you know are always defined/undefined when including +# the kernel headers +# +import sys, cpp, re, os.path, string, time +from defaults import * + +verboseSearch = 0 +verboseFind = 0 + +######################################################################## +######################################################################## +##### ##### +##### H E A D E R S C A N N E R ##### +##### ##### +######################################################################## +######################################################################## + + +class HeaderScanner: + """a class used to non-recursively detect which Linux kernel headers are + used by a given set of input source files""" + + # to use the HeaderScanner, do the following: + # + # scanner = HeaderScanner() + # for path in <your list of files>: + # scanner.parseFile(path) + # + # # get the set of Linux headers included by your files + # headers = scanner.getHeaders() + # + # # get the set of of input files that do include Linux headers + # files = scanner.getFiles() + # + # note that the result of getHeaders() is a set of strings, each one + # corresponding to a non-bracketed path name, e.g.: + # + # set("linux/types","asm/types.h") + # + + # the default algorithm is pretty smart and will analyze the input + # files with a custom C pre-processor in order to optimize out macros, + # get rid of comments, empty lines, etc.. + # + # this avoids many annoying false positives... !! + # + + # this regular expression is used to detect include paths that relate to + # the kernel, by default, it selects one of: + # <linux/*> + # <asm/*> + # <asm-generic/*> + # <mtd/*> + # + re_combined =\ + re.compile(r"^.*<((%s)/[\d\w_\+\.\-/]*)>.*$" % string.join(kernel_dirs,"|") ) + # some kernel files choose to include files with relative paths (x86 32/64 + # dispatch for instance) + re_rel_dir = re.compile(r'^.*"([\d\w_\+\.\-/]+)".*$') + + def __init__(self,config={}): + """initialize a HeaderScanner""" + self.reset() + self.config = config + + def reset(self,config={}): + self.files = set() # set of files being parsed for headers + self.headers = {} # maps headers to set of users + self.config = config + + def checkInclude(self, line, from_file, kernel_root=None): + relative = False + m = HeaderScanner.re_combined.match(line) + if kernel_root and not m: + m = HeaderScanner.re_rel_dir.match(line) + relative = True + if not m: return + + header = m.group(1) + if from_file: + self.files.add(from_file) + if kernel_root and relative: + hdr_dir = os.path.realpath(os.path.dirname(from_file)) + hdr_dir = hdr_dir.replace("%s/" % os.path.realpath(kernel_root), + "") + if hdr_dir: + _prefix = "%s/" % hdr_dir + else: + _prefix = "" + header = "%s%s" % (_prefix, header) + + if not header in self.headers: + self.headers[header] = set() + + if from_file: + if verboseFind: + print "=== %s uses %s" % (from_file, header) + self.headers[header].add(from_file) + + def parseFile(self, path, arch=None, kernel_root=None): + """parse a given file for Linux headers""" + if not os.path.exists(path): + return + + # since tokenizing the file is very slow, we first try a quick grep + # to see if this returns any meaningful results. only if this is true + # do we do the tokenization""" + try: + f = open(path, "rt") + except: + print "!!! can't read '%s'" % path + return + + hasIncludes = False + for line in f: + if (HeaderScanner.re_combined.match(line) or + (kernel_root and HeaderScanner.re_rel_dir.match(line))): + hasIncludes = True + break + + if not hasIncludes: + if verboseSearch: print "::: " + path + return + + if verboseSearch: print "*** " + path + + list = cpp.BlockParser().parseFile(path) + if list: + #list.removePrefixed("CONFIG_",self.config) + macros = kernel_known_macros.copy() + if kernel_root: + macros.update(self.config) + if arch and arch in kernel_default_arch_macros: + macros.update(kernel_default_arch_macros[arch]) + list.optimizeMacros(macros) + list.optimizeIf01() + includes = list.findIncludes() + for inc in includes: + self.checkInclude(inc, path, kernel_root) + + def getHeaders(self): + """return the set of all needed kernel headers""" + return set(self.headers.keys()) + + def getHeaderUsers(self,header): + """return the set of all users for a given header""" + return set(self.headers.get(header)) + + def getAllUsers(self): + """return a dictionary mapping heaaders to their user set""" + return self.headers.copy() + + def getFiles(self): + """returns the set of files that do include kernel headers""" + return self.files.copy() + + +########################################################################## +########################################################################## +##### ##### +##### H E A D E R F I N D E R ##### +##### ##### +########################################################################## +########################################################################## + + +class KernelHeaderFinder: + """a class used to scan the kernel headers themselves.""" + + # this is different + # from a HeaderScanner because we need to translate the path returned by + # HeaderScanner.getHeaders() into possibly architecture-specific ones. + # + # for example, <asm/XXXX.h> needs to be translated in <asm-ARCH/XXXX.h> + # where ARCH is appropriately chosen + + # here's how to use this: + # + # scanner = HeaderScanner() + # for path in <your list of user sources>: + # scanner.parseFile(path) + # + # used_headers = scanner.getHeaders() + # finder = KernelHeaderFinder(used_headers, [ "arm", "x86" ], + # "<kernel_include_path>") + # all_headers = finder.scanForAllArchs() + # + # not that the result of scanForAllArchs() is a list of relative + # header paths that are not bracketed + # + + def __init__(self,headers,archs,kernel_root,kernel_config): + """init a KernelHeaderScanner, + + 'headers' is a list or set of headers, + 'archs' is a list of architectures + 'kernel_root' is the path to the 'include' directory + of your original kernel sources + """ + + if len(kernel_root) > 0 and kernel_root[-1] != "/": + kernel_root += "/" + #print "using kernel_root %s" % kernel_root + self.archs = archs + self.searched = set(headers) + self.kernel_root = kernel_root + self.kernel_config = kernel_config + self.needed = {} + self.setArch(arch=None) + + def setArch(self,arch=None): + self.curr_arch = arch + self.arch_headers = set() + if arch: + self.prefix = "asm-%s/" % arch + else: + self.prefix = None + + def pathFromHeader(self,header): + path = header + if self.prefix and path.startswith("asm/"): + path = "%s%s" % (self.prefix, path[4:]) + return path + + def pathToHeader(self,path): + if self.prefix and path.startswith(self.prefix): + path = "asm/%s" % path[len(self.prefix):] + return "%s" % path + + def setSearchedHeaders(self,headers): + self.searched = set(headers) + + def scanForArch(self): + fparser = HeaderScanner(config=self.kernel_config) + workqueue = [] + needed = {} + for h in self.searched: + path = self.pathFromHeader(h) + if not path in needed: + needed[path] = set() + workqueue.append(path) + + i = 0 + while i < len(workqueue): + path = workqueue[i] + i += 1 + fparser.parseFile(self.kernel_root + path, + arch=self.curr_arch, kernel_root=self.kernel_root) + for used in fparser.getHeaders(): + path = self.pathFromHeader(used) + if not path in needed: + needed[path] = set() + workqueue.append(path) + for user in fparser.getHeaderUsers(used): + needed[path].add(user) + + # now copy the arch-specific headers into the global list + for header in needed.keys(): + users = needed[header] + if not header in self.needed: + self.needed[header] = set() + + for user in users: + self.needed[header].add(user) + + def scanForAllArchs(self): + """scan for all architectures and return the set of all needed kernel headers""" + for arch in self.archs: + self.setArch(arch) + self.scanForArch() + + return set(self.needed.keys()) + + def getHeaderUsers(self,header): + """return the set of all users for a given header""" + return set(self.needed[header]) + + def getArchHeaders(self,arch): + """return the set of all <asm/...> headers required by a given architecture""" + return set() # XXX: TODO + +##################################################################################### +##################################################################################### +##### ##### +##### C O N F I G P A R S E R ##### +##### ##### +##################################################################################### +##################################################################################### + +class ConfigParser: + """a class used to parse the Linux kernel .config file""" + re_CONFIG_ = re.compile(r"^(CONFIG_\w+)=(.*)$") + + def __init__(self): + self.items = {} + self.duplicates = False + + def parseLine(self,line): + line = string.strip(line) + + # skip empty and comment lines + if len(line) == 0 or line[0] == "#": + return + + m = ConfigParser.re_CONFIG_.match(line) + if not m: return + + name = m.group(1) + value = m.group(2) + + if name in self.items: # aarg, duplicate value + self.duplicates = True + + self.items[name] = value + + def parseFile(self,path): + f = file(path, "r") + for line in f: + if len(line) > 0: + if line[-1] == "\n": + line = line[:-1] + if len(line) > 0 and line[-1] == "\r": + line = line[:-1] + self.parseLine(line) + f.close() + + def getDefinitions(self): + """retrieve a dictionary containing definitions for CONFIG_XXX""" + return self.items.copy() + + def __repr__(self): + return repr(self.items) + + def __str__(self): + return str(self.items) diff --git a/libc/kernel/tools/update_all.py b/libc/kernel/tools/update_all.py new file mode 100755 index 0000000..6272fcf --- /dev/null +++ b/libc/kernel/tools/update_all.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python +# +import sys, cpp, kernel, glob, os, re, getopt, clean_header +from defaults import * +from utils import * + +def usage(): + print """\ + usage: %(progname)s + + this program is used to update all the auto-generated clean headers + used by the Bionic C library. it assumes the following: + + - a set of source kernel headers is located in '../original', + relative to the program's directory + + - the clean headers will be placed in '../arch-<arch>/asm', + '../common/linux', '../common/asm-generic', etc.. + + - if ANDROID_PRODUCT_OUT is defined in your environment, you're + using the Android build system, and the program will issue + p4 add / edit / delete commands to update the depot for you. + (you'll need to p4 submit manually though) +""" % { "progname" : os.path.basename(sys.argv[0]) } + sys.exit(0) + +try: + optlist, args = getopt.getopt( sys.argv[1:], '' ) +except: + # unrecognized option + sys.stderr.write( "error: unrecognized option\n" ) + usage() + +if len(optlist) > 0 or len(args) > 0: + usage() + +progdir = find_program_dir() +original_dir = os.path.normpath( progdir + "/../original" ) +if not os.path.isdir( original_dir ): + panic( "required directory does not exists: %s\n" % original_dir ) + +# find all source files in 'original' +# +sources = [] +for root, dirs, files in os.walk( original_dir ): + for file in files: + base, ext = os.path.splitext(file) + if ext == ".h": + sources.append( "%s/%s" % (root,file) ) + +b = BatchFileUpdater() + +for arch in kernel_archs: + b.readDir( os.path.normpath( progdir + "/../arch-%s" % arch ) ) + +b.readDir( os.path.normpath( progdir + "/../common" ) ) + +#print "OLD " + repr(b.old_files) + +for path in sources: + dst_path, newdata = clean_header.cleanupFile(path) + if not dst_path: + continue + + b.readFile( dst_path ) + r = b.editFile( dst_path, newdata ) + if r == 0: + r = "unchanged" + elif r == 1: + r = "edited" + else: + r = "added" + + print "cleaning: %-*s -> %-*s (%s)" % ( 35, path, 35, dst_path, r ) + +usePerforce = os.environ.has_key("ANDROID_PRODUCT_OUT") + +if usePerforce: + b.updateP4Files() +else: + b.updateFiles() + +sys.exit(0) diff --git a/libc/kernel/tools/utils.py b/libc/kernel/tools/utils.py new file mode 100644 index 0000000..763c7d2 --- /dev/null +++ b/libc/kernel/tools/utils.py @@ -0,0 +1,397 @@ +# common python utility routines for the Bionic tool scripts + +import sys, os, commands, string, commands + +# basic debugging trace support +# call D_setlevel to set the verbosity level +# and D(), D2(), D3(), D4() to add traces +# +verbose = 0 + +def panic(msg): + sys.stderr.write( find_program_name() + ": error: " ) + sys.stderr.write( msg ) + sys.exit(1) + +def D(msg): + global verbose + if verbose > 0: + print msg + +def D2(msg): + global verbose + if verbose >= 2: + print msg + +def D3(msg): + global verbose + if verbose >= 3: + print msg + +def D4(msg): + global verbose + if verbose >= 4: + print msg + +def D_setlevel(level): + global verbose + verbose = level + + +# other stuff +# +# +def find_program_name(): + return os.path.basename(sys.argv[0]) + +def find_program_dir(): + return os.path.dirname(sys.argv[0]) + +def find_file_from_upwards(from_path,target_file): + """find a file in the current directory or its parents. if 'from_path' is None, + seach from the current program's directory""" + path = from_path + if path == None: + path = os.path.realpath(sys.argv[0]) + path = os.path.dirname(path) + D("this script seems to be located in: %s" % path) + + while 1: + D("probing "+path) + if path == "": + file = target_file + else: + file = path + "/" + target_file + + if os.path.isfile(file): + D("found %s in %s" % (target_file, path)) + return file + + if path == "": + return None + + path = os.path.dirname(path) + +def find_bionic_root(): + file = find_file_from_upwards(None, "SYSCALLS.TXT") + if file: + return os.path.dirname(file) + else: + return None + +def find_kernel_headers(): + """try to find the directory containing the kernel headers for this machine""" + status, version = commands.getstatusoutput( "uname -r" ) # get Linux kernel version + if status != 0: + D("could not execute 'uname -r' command properly") + return None + + # get rid of the "-xenU" suffix that is found in Xen virtual machines + if len(version) > 5 and version[-5:] == "-xenU": + version = version[:-5] + + path = "/usr/src/linux-headers-" + version + D("probing %s for kernel headers" % (path+"/include")) + ret = os.path.isdir( path ) + if ret: + D("found kernel headers in: %s" % (path + "/include")) + return path + return None + + +# parser for the SYSCALLS.TXT file +# +class SysCallsTxtParser: + def __init__(self): + self.syscalls = [] + self.lineno = 0 + + def E(msg): + print "%d: %s" % (self.lineno, msg) + + def parse_line(self, line): + pos_lparen = line.find('(') + E = self.E + if pos_lparen < 0: + E("missing left parenthesis in '%s'" % line) + return + + pos_rparen = line.rfind(')') + if pos_rparen < 0 or pos_rparen <= pos_lparen: + E("missing or misplaced right parenthesis in '%s'" % line) + return + + return_type = line[:pos_lparen].strip().split() + if len(return_type) < 2: + E("missing return type in '%s'" % line) + return + + syscall_func = return_type[-1] + return_type = string.join(return_type[:-1],' ') + + pos_colon = syscall_func.find(':') + if pos_colon < 0: + syscall_name = syscall_func + else: + if pos_colon == 0 or pos_colon+1 >= len(syscall_func): + E("misplaced colon in '%s'" % line) + return + syscall_name = syscall_func[pos_colon+1:] + syscall_func = syscall_func[:pos_colon] + + if pos_rparen > pos_lparen+1: + syscall_params = line[pos_lparen+1:pos_rparen].split(',') + params = string.join(syscall_params,',') + else: + syscall_params = [] + params = "void" + + number = line[pos_rparen+1:].strip() + if number == "stub": + syscall_id = -1 + syscall_id2 = -1 + else: + try: + if number[0] == '#': + number = number[1:].strip() + numbers = string.split(number,',') + syscall_id = int(numbers[0]) + syscall_id2 = syscall_id + if len(numbers) > 1: + syscall_id2 = int(numbers[1]) + except: + E("invalid syscall number in '%s'" % line) + return + + t = { "id" : syscall_id, + "id2" : syscall_id2, + "name" : syscall_name, + "func" : syscall_func, + "params" : syscall_params, + "decl" : "%-15s %s (%s);" % (return_type, syscall_func, params) } + + self.syscalls.append(t) + + def parse_file(self, file_path): + fp = open(file_path) + for line in fp.xreadlines(): + self.lineno += 1 + line = line.strip() + if not line: continue + if line[0] == '#': continue + self.parse_line(line) + + fp.close() + + +class Output: + def __init__(self,out=sys.stdout): + self.out = out + + def write(self,msg): + self.out.write(msg) + + def writeln(self,msg): + self.out.write(msg) + self.out.write("\n") + +class StringOutput: + def __init__(self): + self.line = "" + + def write(self,msg): + self.line += msg + D2("write '%s'" % msg) + + def writeln(self,msg): + self.line += msg + '\n' + D2("write '%s\\n'"% msg) + + def get(self): + return self.line + + +def create_file_path(path): + dirs = [] + while 1: + parent = os.path.dirname(path) + #print "parent: %s <- %s" % (parent, path) + if parent == "/" or parent == "": + break + dirs.append(parent) + path = parent + + dirs.reverse() + for dir in dirs: + #print "dir %s" % dir + if os.path.isdir(dir): + continue + os.mkdir(dir) + +def walk_source_files(paths,callback,args,excludes=[]): + """recursively walk a list of paths and files, only keeping the source files in directories""" + for path in paths: + if not os.path.isdir(path): + callback(path,args) + else: + for root, dirs, files in os.walk(path): + #print "w-- %s (ex: %s)" % (repr((root,dirs)), repr(excludes)) + if len(excludes): + for d in dirs[:]: + if d in excludes: + dirs.remove(d) + for f in files: + r, ext = os.path.splitext(f) + if ext in [ ".h", ".c", ".cpp", ".S" ]: + callback( "%s/%s" % (root,f), args ) + +def cleanup_dir(path): + """create a directory if needed, and ensure that it is totally empty + by removing any existing content in it""" + if not os.path.exists(path): + os.mkdir(path) + else: + for root, dirs, files in os.walk(path, topdown=False): + if root.endswith("kernel_headers/"): + # skip 'kernel_headers' + continue + for name in files: + os.remove(os.path.join(root, name)) + for name in dirs: + os.rmdir(os.path.join(root, name)) + +def update_file( path, newdata ): + """update a file on disk, only if its content has changed""" + if os.path.exists( path ): + try: + f = open( path, "r" ) + olddata = f.read() + f.close() + except: + D("update_file: cannot read existing file '%s'" % path) + return 0 + + if oldata == newdata: + D2("update_file: no change to file '%s'" % path ) + return 0 + + update = 1 + else: + try: + create_file_path(path) + except: + D("update_file: cannot create path to '%s'" % path) + return 0 + + f = open( path, "w" ) + f.write( newdata ) + f.close() + + return 1 + + +class BatchFileUpdater: + """a class used to edit several files at once""" + def __init__(self): + self.old_files = set() + self.new_files = set() + self.new_data = {} + + def readFile(self,path): + #path = os.path.realpath(path) + if os.path.exists(path): + self.old_files.add(path) + + def readDir(self,path): + #path = os.path.realpath(path) + for root, dirs, files in os.walk(path): + for f in files: + dst = "%s/%s" % (root,f) + self.old_files.add(dst) + + def editFile(self,dst,data): + """edit a destination file. if the file is not mapped from a source, + it will be added. return 0 if the file content wasn't changed, + 1 if it was edited, or 2 if the file is new""" + #dst = os.path.realpath(dst) + result = 1 + if os.path.exists(dst): + f = open(dst, "r") + olddata = f.read() + f.close() + if olddata == data: + self.old_files.remove(dst) + return 0 + else: + result = 2 + + self.new_data[dst] = data + self.new_files.add(dst) + return result + + def getChanges(self): + """determine changes, returns (adds, deletes, edits)""" + adds = set() + edits = set() + deletes = set() + + for dst in self.new_files: + if not (dst in self.old_files): + adds.add(dst) + else: + edits.add(dst) + + for dst in self.old_files: + if not dst in self.new_files: + deletes.add(dst) + + return (adds, deletes, edits) + + def _writeFile(self,dst,data=None): + if not os.path.exists(os.path.dirname(dst)): + create_file_path(dst) + if data == None: + data = self.new_data[dst] + f = open(dst, "w") + f.write(self.new_data[dst]) + f.close() + + def updateFiles(self): + adds, deletes, edits = self.getChanges() + + for dst in sorted(adds): + self._writeFile(dst) + + for dst in sorted(edits): + self._writeFile(dst) + + for dst in sorted(deletes): + os.remove(dst) + + def updateP4Files(self): + adds, deletes, edits = self.getChanges() + + if len(adds): + files = string.join(sorted(adds)," ") + D( "%d new files will be p4 add-ed" % len(adds) ) + for dst in adds: + self._writeFile(dst) + D2("P4 ADDS: %s" % files) + o = commands.getoutput( "p4 add " + files ) + D2( o ) + + if len(edits): + files = string.join(sorted(edits)," ") + D( "%d files will be p4 edit-ed" % len(edits) ) + D2("P4 EDITS: %s" % files) + o = commands.getoutput( "p4 edit " + files ) + D2( o ) + for dst in edits: + self._writeFile(dst) + + if len(deletes): + files = string.join(sorted(deletes)," ") + D( "%d files will be p4 delete-d" % len(deletes) ) + D2("P4 DELETES: %s" % files) + o = commands.getoutput( "p4 delete " + files ) + D2( o ) |