diff options
author | noelallen@google.com <noelallen@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-04-08 15:19:04 +0000 |
---|---|---|
committer | noelallen@google.com <noelallen@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-04-08 15:19:04 +0000 |
commit | d3864f5c1b43d97fa80f2007c2dc24b4002c3d0a (patch) | |
tree | d25074af1b20945edf9bdf79bbae9276fa594042 /ppapi/generators | |
parent | ab63c4781a5d2cd898364677bb21a0d62de9173d (diff) | |
download | chromium_src-d3864f5c1b43d97fa80f2007c2dc24b4002c3d0a.zip chromium_src-d3864f5c1b43d97fa80f2007c2dc24b4002c3d0a.tar.gz chromium_src-d3864f5c1b43d97fa80f2007c2dc24b4002c3d0a.tar.bz2 |
Add parser and test files
This CL adds the PLY based parser, parser test files, and makes some additional
cleanup to the lexer. The parser is part of a generator which is still under
development. The parser can be run standalone which works as a test mode
verifying success and failure tags it find in the sources embedded as
comments. Adding the switch --output will print the AST.
R=sehr@google.com
BUG=77551
TEST=python idl_parser.py test_parser/*.idl
Review URL: http://codereview.chromium.org/6754001
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@80933 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'ppapi/generators')
-rw-r--r-- | ppapi/generators/idl_lexer.py | 64 | ||||
-rw-r--r-- | ppapi/generators/idl_parser.py | 829 | ||||
-rw-r--r-- | ppapi/generators/test_lex.in | 11 | ||||
-rw-r--r-- | ppapi/generators/test_parser/enum.idl | 50 | ||||
-rw-r--r-- | ppapi/generators/test_parser/interface.idl | 56 | ||||
-rw-r--r-- | ppapi/generators/test_parser/struct.idl | 34 | ||||
-rw-r--r-- | ppapi/generators/test_parser/typedef.idl | 46 |
7 files changed, 1068 insertions, 22 deletions
diff --git a/ppapi/generators/idl_lexer.py b/ppapi/generators/idl_lexer.py index 2eb1668..b8092ec 100644 --- a/ppapi/generators/idl_lexer.py +++ b/ppapi/generators/idl_lexer.py @@ -6,12 +6,30 @@ """ Lexer for PPAPI IDL """ +# +# IDL Lexer +# +# The lexer is uses the PLY lex library to build a tokenizer which understands +# WebIDL tokens. +# +# WebIDL, and WebIDL regular expressions can be found at: +# http://dev.w3.org/2006/webapi/WebIDL/ +# PLY can be found at: +# http://www.dabeaz.com/ply/ import getopt import os.path import re import sys +LEXER_OPTIONS = { + 'output': False, + 'test_expect' : False, + 'test_same' : False, + 'verbose': False +} + + # # Try to load the ply module, if not, then assume it is in the third_party # directory, relative to ppapi @@ -24,6 +42,8 @@ except: sys.path.append(third_party) from ply import lex + + # # IDL Lexer # @@ -42,6 +62,7 @@ class IDLLexer(object): # Data types 'FLOAT', + 'OCT', 'INT', 'HEX', 'STRING', @@ -69,15 +90,17 @@ class IDLLexer(object): # # Lex assumes any value or function in the form of 't_<TYPE>' represents a # regular expression where a match will emit a token of type <TYPE>. In the - # case of a function, the function is called when a match is made. + # case of a function, the function is called when a match is made. These + # definitions come from WebIDL. # 't_ignore' is a special match of items to ignore t_ignore = ' \t' # Constant values t_FLOAT = r'-?(\d+\.\d*|\d*\.\d+)([Ee][+-]?\d+)?|-?\d+[Ee][+-]?\d+' - t_HEX = r'0x[a-fA-F0-9]+' - t_INT = r'-?\d+' + t_INT = r'-?[0-9]+' + t_OCT = r'-?0[0-7]+' + t_HEX = r'-?0[Xx][0-9A-Fa-f]+' t_LSHIFT = r'<<' # A line ending '\n', we use this to increment the line number @@ -146,8 +169,11 @@ class IDLLexer(object): self.index = [0] self.lexobj.input(data) - def __init__(self): + def __init__(self, options = {}): self.lexobj = lex.lex(object=self, lextab=None, optimize=0) + for k in options: + LEXER_OPTIONS[k] = True + # @@ -191,11 +217,13 @@ def TextToTokens(source): # single space. The new source is then tokenized and compared against the # old set. # -def TestSame(values, output=False, verbose=False): +def TestSame(values): + global LEXER_OPTIONS + src1 = ' '.join(values) src2 = ' '.join(TextToTokens(src1)) - if output: + if LEXER_OPTIONS['output']: sys.stdout.write('Generating original.txt and tokenized.txt\n') open('original.txt', 'w').write(src1) open('tokenized.txt', 'w').write(src2) @@ -227,7 +255,7 @@ def TestExpect(tokens): index += 2 if type != token.type: - sys.stderr.write('Mismatch: Expected %s, but got %s = %s.' % + sys.stderr.write('Mismatch: Expected %s, but got %s = %s.\n' % (type, token.type, token.value)) errors += 1 @@ -242,6 +270,8 @@ def TestExpect(tokens): def Main(args): + global LEXER_OPTIONS + try: long_opts = ['output', 'verbose', 'test_expect', 'test_same'] usage = 'Usage: idl_lexer.py %s [<src.idl> ...]' % ' '.join( @@ -258,27 +288,17 @@ def Main(args): verbose = False for opt, val in opts: - if opt == '--output': - output = True - - if opt == '--test_expect': - test_expect = True - - if opt == '--test_same': - test_same = True - - if opt == '--verbose': - verbose = True + LEXER_OPTIONS[opt[2:]] = True try: tokens = FilesToTokens(filenames, verbose) values = [tok.value for tok in tokens] - if output: sys.stdout.write(' <> '.join(values) + '\n') - if test_same: - if TestSame(values, output = output, verbose = verbose): + if LEXER_OPTIONS['output']: sys.stdout.write(' <> '.join(values) + '\n') + if LEXER_OPTIONS['test_same']: + if TestSame(values): return -1 - if test_expect: + if LEXER_OPTIONS['test_expect']: if TestExpect(tokens): return -1 return 0 diff --git a/ppapi/generators/idl_parser.py b/ppapi/generators/idl_parser.py new file mode 100644 index 0000000..8a5350f --- /dev/null +++ b/ppapi/generators/idl_parser.py @@ -0,0 +1,829 @@ +#!/usr/bin/python +# +# Copyright (c) 2011 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +""" Lexer for PPAPI IDL """ + +# +# IDL Parser +# +# The parser is uses the PLY yacc library to build a set of parsing rules based +# on WebIDL. +# +# WebIDL, and WebIDL regular expressions can be found at: +# http://dev.w3.org/2006/webapi/WebIDL/ +# PLY can be found at: +# http://www.dabeaz.com/ply/ +# +# The parser generates a tree by recursively matching sets of items against +# defined patterns. When a match is made, that set of items is reduced +# to a new item. The new item can provide a match for parent patterns. +# In this way an AST is built (reduced) depth first. + + +import getopt +import os.path +import re +import sys + +from idl_lexer import IDLLexer +from ply import lex +from ply import yacc + +PARSER_OPTIONS = { + 'build_debug': False, + 'parse_debug': False, + 'token_debug': False, + 'output': False, + 'verbose': False +} + +# +# ERROR_REMAP +# +# Maps the standard error formula into a more friendly error message. +# +ERROR_REMAP = { + 'Unexpected ")" after "(".' : 'Empty argument list.', + 'Unexpected ")" after ",".' : 'Missing argument.', + 'Unexpected "}" after ",".' : 'Trailing comma in block.', + 'Unexpected "}" after "{".' : 'Unexpected empty block.', + 'Unexpected comment "/*" after "}".' : 'Unexpected trailing comment.', + 'Unexpected "{" after keyword "enum".' : 'Enum missing name.', + 'Unexpected "{" after keyword "struct".' : 'Struct missing name.', + 'Unexpected "{" after keyword "interface".' : 'Interface missing name.', +} + +# DumpReduction +# +# Prints out the set of items which matched a particular pattern and the +# new item or set it was reduced to. +def DumpReduction(cls, p): + if p[0] is None: + print "OBJ: %s(%d) - None" % (cls, len(p)) + else: + out = "" + for index in range(len(p) - 1): + out += " >%s< " % str(p[index + 1]) + print "OBJ: %s(%d) - %s : %s" % (cls, len(p), str(p[0]), out) + + +# CopyToList +# +# Takes an input item, list, or None, and returns a new list of that set. +def CopyToList(item): + # If the item is 'Empty' make it an empty list + if not item: item = [] + + # If the item is not a list + if type(item) is not type([]): item = [item] + + # Make a copy we can modify + return list(item) + + + +# ListFromConcat +# +# Generate a new List by joining of two sets of inputs which can be an +# individual item, a list of items, or None. +def ListFromConcat(*items): + itemsout = [] + for item in items: + itemlist = CopyToList(item) + itemsout.extend(itemlist) + + return itemsout + + +# TokenTypeName +# +# Generate a string which has the type and value of the token. +def TokenTypeName(t): + if t.type == 'SYMBOL': return 'symbol %s' % t.value + if t.type in ['HEX', 'INT', 'OCT', 'FLOAT']: + return 'value %s' % t.value + if t.type == 'STRING' : return 'string "%s"' % t.value + if t.type == 'COMMENT' : return 'comment "%s"' % t.value[:2] + if t.type == t.value: return '"%s"' % t.value + return 'keyword "%s"' % t.value + + +# Send a string to stdout +def PrintInfo(text): + sys.stdout.write("%s\n" % text) + +def PrintError(text): + sys.stderr.write("%s\n" % text) + +# Send a string to stderr containing a file, line number and error message +def LogError(filename, lineno, pos, msg): + PrintError("%s(%d) : %s\n" % (filename, lineno + 1, msg)) + + +# +# IDL Parser +# +# The Parser inherits the from the Lexer to provide PLY with the tokenizing +# definitions. Parsing patterns are encoded as function where p_<name> is +# is called any time a patern matching the function documentation is found. +# Paterns are expressed in the form of: +# """ <new item> : <item> .... +# | <item> ....""" +# +# Where new item is the result of a match against one or more sets of items +# separated by the "|". +# +# The function is called with an object 'p' where p[0] is the output object +# and p[n] is the set of inputs for positive values of 'n'. Len(p) can be +# used to distinguish between multiple item sets in the pattern. +# +# For more details on parsing refer to the PLY documentation at +# http://www.dabeaz.com/ply/ +# +# +# The parser uses the following conventions: +# a <type>_block defines a block of <type> definitions in the form of: +# [comment] [ext_attr_block] <type> <name> '{' <type>_list '}' ';' +# A block is reduced by returning an object of <type> with a name of <name> +# which in turn has <type>_list as children. +# +# A [comment] is a optional C style comment block enclosed in /* ... */ which +# is appended to the adjacent node as a child. +# +# A [ext_attr_block] is an optional list of Extended Attributes which is +# appended to the adjacent node as a child. +# +# a <type>_list defines a list of <type> items which will be passed as a +# list of children to the parent pattern. A list is in the form of: +# [comment] [ext_attr_block] <...DEF...> ';' <type>_list | (empty) +# or +# [comment] [ext_attr_block] <...DEF...> <type>_cont +# +# In the first form, the list is reduced recursively, where the right side +# <type>_list is first reduced then joined with pattern currently being +# matched. The list is terminated with the (empty) pattern is matched. +# +# In the second form the list is reduced recursively, where the right side +# <type>_cont is first reduced then joined with the pattern currently being +# matched. The type_<cont> is in the form of: +# ',' <type>_list | (empty) +# The <type>_cont form is used to consume the ',' which only occurs when +# there is more than one object in the list. The <type>_cont also provides +# the terminating (empty) definition. +# + + +class IDLParser(IDLLexer): +# TOP +# +# This pattern defines the top of the parse tree. The parse tree is in the +# the form of: +# +# top +# *modifiers +# *comments +# *ext_attr_block +# ext_attr_list +# attr_arg_list +# *integer, value +# *param_list +# *typeref +# +# top_list +# describe_block +# describe_list +# enum_block +# enum_type +# interface_block +# struct_block +# member +# typedef_decl +# typedef_data +# typedef_func +# +# (* sub matches found at multiple levels and are not truly children of top) +# +# We force all input files to start with two comments. The first comment is a +# Copyright notice followed by a set of file wide Extended Attributes, followed +# by the file comment and finally by file level patterns. +# + # Find the Copyright, File comment, and optional file wide attributes. We + # use a match with COMMENT instead of comments to force the token to be + # present. The extended attributes and the top_list become siblings which + # in turn are children of the file object created from the results of top. + def p_top(self, p): + """top : COMMENT COMMENT ext_attr_block top_list""" + + Copyright = self.BuildProduction('Copyright', p, 1, None) + Filedoc = self.BuildProduction('Comment', p, 2, None) + + out = ListFromConcat(p[3], p[4]) + out = ListFromConcat(Filedoc, out) + p[0] = ListFromConcat(Copyright, out) + if self.parse_debug: DumpReduction('top', p) + + # Build a list of top level items. + def p_top_list(self, p): + """top_list : describe_block top_list + | enum_block top_list + | interface_block top_list + | struct_block top_list + | typedef_def top_list + | """ + if len(p) > 2: + p[0] = ListFromConcat(p[1], p[2]) + if self.parse_debug: DumpReduction('top_list', p) + + # Recover from error and continue parsing at the next top match. + def p_top_error(self, p): + """top_list : error top_list""" + p[0] = p[2] + +# +# Modifier List +# +# + def p_modifiers(self, p): + """modifiers : comments ext_attr_block""" + p[0] = ListFromConcat(p[1], p[2]) + if self.parse_debug: DumpReduction('modifiers', p) + +# +# Comments +# +# Comments are optional list of C style comment objects. Comments are returned +# as a list or None. +# + def p_comments(self, p): + """comments : COMMENT comments + | """ + if len(p) > 1: + child = self.BuildProduction('Comment', p, 1, None) + p[0] = ListFromConcat(child, p[2]) + if self.parse_debug: DumpReduction('comments', p) + else: + if self.parse_debug: DumpReduction('no comments', p) + +# +# Extended Attributes +# +# Extended Attributes denote properties which will be applied to a node in the +# AST. A list of extended attributes are denoted by a brackets '[' ... ']' +# enclosing a comma separated list of extended attributes in the form of: +# +# Name +# Name=HEX | INT | OCT | FLOAT +# Name="STRING" +# Name=Function(arg ...) +# TODO(noelallen) -Not currently supported: +# ** Name(arg ...) ... +# ** Name=Scope::Value +# +# Extended Attributes are returned as a list or None. + + def p_ext_attr_block(self, p): + """ext_attr_block : '[' ext_attr_list ']' + | """ + if len(p) > 1: + p[0] = p[2] + if self.parse_debug: DumpReduction('ext_attr_block', p) + else: + if self.parse_debug: DumpReduction('no ext_attr_block', p) + + def p_ext_attr_list(self, p): + """ext_attr_list : SYMBOL '=' value ext_attr_cont + | SYMBOL '(' attr_arg_list ')' ext_attr_cont + | SYMBOL ext_attr_cont """ + if len(p) == 3: + p[0] = ListFromConcat(self.BuildExtAttribute(p[1], 'True'), p[2]) + if len(p) == 5: + p[0] = ListFromConcat(self.BuildExtAttribute(p[1], p[3]), p[4]) + if len(p) == 6: + p[0] = ListFromConcat(self.BuildExtAttribute(p[1], p[3]), p[5]) + if self.parse_debug: DumpReduction('ext_attribute_list', p) + + def p_ext_attr_cont(self, p): + """ext_attr_cont : ',' ext_attr_list + |""" + if len(p) > 1: + p[0] = ListFromConcat(p[2], p[3]) + if self.parse_debug: DumpReduction('ext_attribute_cont', p) + + def p_attr_arg_list(self, p): + """attr_arg_list : SYMBOL attr_arg_cont + | value attr_arg_cont """ + p[0] = ','.join(ListFromConcat(p[1], p[2])) + if self.parse_debug: DumpReduction('attr_arg_list', p) + + def p_attr_arg_cont(self, p): + """attr_arg_cont : ',' attr_arg_list + | """ + if len(p) > 1: p[0] = p[2] + if self.parse_debug: DumpReduction('attr_arg_cont', p) + + def p_attr_arg_error(self, p): + """attr_arg_cont : error attr_arg_cont""" + p[0] = p[2] + if self.parse_debug: DumpReduction('attr_arg_error', p) + + +# +# Describe +# +# A describe block is defined at the top level. It provides a mechanism for +# attributing a group of ext_attr to a describe_list. Members of the +# describe list are language specific 'Type' declarations +# + def p_describe_block(self, p): + """describe_block : modifiers DESCRIBE '{' describe_list '}' ';'""" + children = ListFromConcat(p[1], p[2]) + p[0] = self.BuildProduction('Describe', p, 2, children) + if self.parse_debug: DumpReduction('describe_block', p) + + def p_describe_list(self, p): + """describe_list : modifiers SYMBOL ';' describe_list + | modifiers ENUM ';' describe_list + | modifiers STRUCT ';' describe_list + | modifiers TYPEDEF ';' describe_list + | """ + if len(p) > 1: + Type = self.BuildProduction('Type', p, 2, p[1]) + p[0] = ListFromConcat(Type, p[4]) + + def p_describe_error(self, p): + """describe_list : error describe_list""" + p[0] = p[2] + +# +# Constant Values (integer, value) +# +# Constant values can be found at various levels. A Constant value is returns +# as the string value after validated against a FLOAT, HEX, INT, OCT or +# STRING pattern as appropriate. +# + def p_value(self, p): + """value : FLOAT + | HEX + | INT + | OCT + | STRING""" + p[0] = p[1] + if self.parse_debug: DumpReduction('value', p) + + def p_value_lshift(self, p): + """value : integer LSHIFT INT""" + p[0] = "(%s << %s)" % (p[1], p[3]) + if self.parse_debug: DumpReduction('value', p) + +# Integers are numbers which may not be floats used in cases like array sizes. + def p_integer(self, p): + """integer : HEX + | INT + | OCT""" + p[0] = p[1] + +# +# Parameter List +# +# A parameter list is a collection of arguments which are passed to a +# function. In the case of a PPAPI, it is illegal to have a function +# which passes no parameters. +# +# NOTE:-We currently do not support functions which take no arguments in PPAPI. + def p_param_list(self, p): + """param_list : modifiers typeref SYMBOL param_cont""" + children = ListFromConcat(p[1], p[2]) + param = self.BuildProduction('Param', p, 3, children) + p[0] = ListFromConcat(param, p[4]) + if self.parse_debug: DumpReduction('param_list', p) + + def p_param_cont(self, p): + """param_cont : ',' param_list + | """ + if len(p) > 1: + p[0] = p[2] + if self.parse_debug: DumpReduction('param_cont', p) + + def p_param_error(self, p): + """param_cont : error param_cont""" + p[0] = p[2] + +# +# Typeref +# +# A typeref is a reference to a type definition. The type definition may +# be a built in such as int32_t or a defined type such as an enum, or +# struct, or typedef. Part of the reference to the type is how it is +# used, such as directly, a fixed size array, or unsized (pointer). The +# reference is reduced and becomes a property of the parent Node. +# + def p_typeref_data(self, p): + """typeref : SYMBOL typeref_arrays""" + + Type = self.BuildExtAttribute('TYPEREF', p[1]) + p[0] = ListFromConcat(Type, p[2]) + if self.parse_debug: DumpReduction('typeref', p) + + def p_typeref_arrays(self, p): + """typeref_arrays : '[' ']' typeref_arrays + | '[' integer ']' typeref_arrays + | """ + if len(p) == 1: return + if len(p) == 5: + count = self.BuildExtAttribute('FIXED', p[2]) + array = self.BuildProduction('Array', p, 2, ListFromConcat(p[4], count)) + else: + array = self.BuildProduction('Array', p, 1, p[3]) + + p[0] = [array] + if self.parse_debug: DumpReduction('arrays', p) + +# +# Enumeration +# +# An enumeration is a set of named integer constants. An enumeration +# is valid type which can be referenced in other definitions. +# + def p_enum_block(self, p): + """enum_block : modifiers ENUM SYMBOL '{' enum_list '}' ';'""" + p[0] = self.BuildProduction('Enum', p, 3, ListFromConcat(p[1], p[5])) + if self.parse_debug: DumpReduction('enum_block', p) + + def p_enum_list(self, p): + """enum_list : comments SYMBOL '=' value enum_cont""" + val = self.BuildExtAttribute('VALUE', p[4]) + enum = self.BuildProduction('EnumItem', p, 2, ListFromConcat(val, p[1])) + p[0] = ListFromConcat(enum, p[5]) + if self.parse_debug: DumpReduction('enum_list', p) + + def p_enum_cont(self, p): + """enum_cont : ',' enum_list + |""" + if len(p) > 1: p[0] = p[2] + if self.parse_debug: DumpReduction('enum_cont', p) + + def p_enum_cont_error(self, p): + """enum_cont : error enum_cont""" + p[0] = p[2] + if self.parse_debug: DumpReduction('enum_error', p) + + +# +# Interface +# +# An interface is a named collection of functions. +# + def p_interface_block(self, p): + """interface_block : modifiers INTERFACE SYMBOL '{' member_list '}' ';'""" + p[0] = self.BuildProduction('Interface', p, 3, ListFromConcat(p[1], p[5])) + if self.parse_debug: DumpReduction('interface_block', p) + + def p_member_list(self, p): + """member_list : member_function member_list + | """ + if len(p) > 1 : + p[0] = ListFromConcat(p[1], p[2]) + if self.parse_debug: DumpReduction('member_list', p) + + def p_member_function(self, p): + """member_function : modifiers typeref SYMBOL '(' param_list ')' ';'""" + params = self.BuildProduction('Callspec', p, 4, p[5]) + p[0] = self.BuildProduction('Function', p, 3, ListFromConcat(p[1], params)) + if self.parse_debug: DumpReduction('member_function', p) + + def p_member_error(self, p): + """member_list : error member_list""" + p[0] = p[2] + +# +# Struct +# +# A struct is a named collection of members which in turn reference other +# types. The struct is a referencable type. +# + def p_struct_block(self, p): + """struct_block : modifiers STRUCT SYMBOL '{' struct_list '}' ';'""" + p[0] = self.BuildProduction('Struct', p, 3, ListFromConcat(p[1], p[5])) + if self.parse_debug: DumpReduction('struct_block', p) + + def p_struct_list(self, p): + """struct_list : modifiers typeref SYMBOL ';' struct_list + | """ + if len(p) > 1: + member = self.BuildProduction('Member', p, 3, ListFromConcat(p[1], p[2])) + p[0] = ListFromConcat(member, p[5]) + if self.parse_debug: DumpReduction('struct_list', p) + +# +# Typedef +# +# A typedef creates a new referencable type. The tyepdef can specify an array +# definition as well as a function declaration. +# + def p_typedef_data(self, p): + """typedef_def : modifiers TYPEDEF typeref SYMBOL ';' """ + p[0] = self.BuildProduction('Typedef', p, 4, ListFromConcat(p[1], p[3])) + if self.parse_debug: DumpReduction('typedef_data', p) + + def p_typedef_func(self, p): + """typedef_def : modifiers TYPEDEF typeref SYMBOL '(' param_list ')' ';'""" + params = self.BuildProduction('Callspec', p, 5, p[6]) + children = ListFromConcat(p[1], p[3], params) + p[0] = self.BuildProduction('Typedef', p, 4, children) + if self.parse_debug: DumpReduction('typedef_func', p) + + +# +# Parser Errors +# +# p_error is called whenever the parser can not find a pattern match for +# a set of items from the current state. The p_error function defined here +# is triggered logging an error, and parsing recover happens as the +# p_<type>_error functions defined above are called. This allows the parser +# to continue so as to capture more than one error per file. +# + def p_error(self, t): + filename = self.lexobj.filename + self.parse_errors += 1 + if t: + lineno = t.lineno + pos = t.lexpos + prev = self.yaccobj.symstack[-1] + if type(prev) == lex.LexToken: + msg = "Unexpected %s after %s." % ( + TokenTypeName(t), TokenTypeName(prev)) + else: + msg = "Unexpected %s." % (t.value) + else: + lineno = self.last.lineno + pos = self.last.lexpos + msg = "Unexpected end of file after %s." % TokenTypeName(self.last) + self.yaccobj.restart() + + # Attempt to remap the error to a friendlier form + if msg in ERROR_REMAP: + msg = ERROR_REMAP[msg] + + # Log the error + self.Logger(filename, lineno, pos, msg) + + + def __init__(self, builder, logger, options = {}): + global PARSER_OPTIONS + + IDLLexer.__init__(self, options) + self.yaccobj = yacc.yacc(module=self, tabmodule=None, debug=False, + optimize=0, write_tables=0) + + for k in options: + PARSER_OPTIONS[k] = options[k] + + self.build_debug = PARSER_OPTIONS['build_debug'] + self.parse_debug = PARSER_OPTIONS['parse_debug'] + self.token_debug = PARSER_OPTIONS['token_debug'] + self.verbose = PARSER_OPTIONS['verbose'] + self.Builder = builder + self.Logger = logger + self.parse_errors = 0 + +# +# Tokenizer +# +# The token function returns the next token provided by IDLLexer for matching +# against the leaf paterns. +# + def token(self): + tok = self.lexobj.token() + if tok: + self.last = tok + if self.token_debug: + PrintInfo("TOKEN %s(%s)" % (tok.type, tok.value)) + return tok + +# +# BuildProduction +# +# Production is the set of items sent to a grammar rule resulting in a new +# item being returned. +# +# p - Is the Yacc production object containing the stack of items +# index - Index into the production of the name for the item being produced. +# cls - The type of item being producted +# childlist - The children of the new item + def BuildProduction(self, cls, p, index, childlist): + name = p[index] + filename = self.lexobj.filename + lineno = p.lineno(index) + pos = p.lexpos(index) + if self.build_debug: + PrintInfo("Building %s(%s)" % (cls, name)) + return self.Builder(cls, name, filename, lineno, pos, childlist) + +# +# BuildExtAttribute +# +# An ExtendedAttribute is a special production that results in a property +# which is applied to the adjacent item. Attributes have no children and +# instead represent key/value pairs. +# + def BuildExtAttribute(self, name, value): + if self.build_debug: + PrintInfo("Adding ExtAttribute %s = %s" % (name, str(value))) + return self.Builder('ExtAttribute', '%s=%s' % (name,value), + self.lexobj.filename, self.last.lineno, self.last.lexpos, []) + +# +# ParseData +# +# Attempts to parse the current data loaded in the lexer. +# + def ParseData(self): + try: + return self.yaccobj.parse(lexer=self) + + except lex.LexError as le: + PrintError(str(le)) + return [] + +# +# ParseFile +# +# Loads a new file into the lexer and attemps to parse it. +# + def ParseFile(self, filename): + data = open(filename).read() + self.SetData(filename, data) + if self.verbose: + PrintInfo("Parsing %s" % filename) + try: + out = self.ParseData() + return out + + except Exception as e: + LogError(filename, self.last.lineno, self.last.lexpos, + 'Internal parsing error - %s.' % str(e)) + raise + return [] + + + + +class TestNode(object): + def __init__(self, cls, name, filename, lineno, pos, children): + self.cls = cls + self.name = name + if children: + self.childlist = children + else: + self.childlist = [] + + def __str__(self): + return "%s(%s)" % (self.cls, self.name) + + def Dump(self, depth, comments = False, out=sys.stdout): + if not comments: + if self.cls == 'Comment' or self.cls == 'Copyright': + return + + tab = "" + for t in range(depth): + tab += ' ' + + print >>out, "%s%s" % (tab, self) + for c in self.childlist: + c.Dump(depth + 1, out) + +def FlattenTree(node): + add_self = False + out = [] + for c in node.childlist: + if c.cls == 'Comment': + add_self = True + else: + out.extend(FlattenTree(c)) + + if add_self: + out = [str(node)] + out + return out + + +err_list = [] +def TestLog(filename, lineno, pos, msg): + global err_list + global PARSER_OPTIONS + + err_list.append(msg) + if PARSER_OPTIONS['verbose']: + sys.stdout.write("%s(%d) : %s\n" % (filename, lineno + 1, msg)) + + +def Test(filename, nodes): + global err_list + lexer = IDLLexer() + data = open(filename).read() + lexer.SetData(filename, data) + + pass_comments = [] + fail_comments = [] + while True: + tok = lexer.lexobj.token() + if tok == None: break + if tok.type == 'COMMENT': + args = tok.value.split() + if args[1] == 'OK': + pass_comments.append((tok.lineno, ' '.join(args[2:-1]))) + else: + if args[1] == 'FAIL': + fail_comments.append((tok.lineno, ' '.join(args[2:-1]))) + obj_list = [] + for node in nodes: + obj_list.extend(FlattenTree(node)) + + errors = 0 + + # + # Check for expected successes + # + obj_cnt = len(obj_list) + pass_cnt = len(pass_comments) + if obj_cnt != pass_cnt: + PrintInfo("Mismatched pass (%d) vs. nodes built (%d)." + % (pass_cnt, obj_cnt)) + PrintInfo("PASS: %s" % [x[1] for x in pass_comments]) + PrintInfo("OBJS: %s" % obj_list) + errors += 1 + if pass_cnt > obj_cnt: pass_cnt = obj_cnt + + for i in range(pass_cnt): + line, comment = pass_comments[i] + if obj_list[i] != comment: + print "%s(%d) Error: OBJ %s : EXPECTED %s" % ( + filename, line, obj_list[i], comment) + errors += 1 + + # + # Check for expected errors + # + err_cnt = len(err_list) + fail_cnt = len(fail_comments) + if err_cnt != fail_cnt: + PrintInfo("Mismatched fail (%d) vs. errors seen (%d)." + % (fail_cnt, err_cnt)) + PrintInfo("FAIL: %s" % [x[1] for x in fail_comments]) + PrintInfo("ERRS: %s" % err_list) + errors += 1 + if fail_cnt > err_cnt: fail_cnt = err_cnt + + for i in range(fail_cnt): + line, comment = fail_comments[i] + if err_list[i] != comment: + PrintError("%s(%d) Error\n\tERROR : %s\n\tEXPECT: %s" % ( + filename, line, err_list[i], comment)) + errors += 1 + + # Clear the error list for the next run + err_list = [] + return errors + + +def Main(args): + global PARSER_OPTIONS + + long_opts = PARSER_OPTIONS.keys() + usage = 'Usage: idl_parser.py %s [<src.idl> ...]' % ' '.join(long_opts) + try: + opts, filenames = getopt.getopt(args, '', long_opts) + + except getopt.error, e: + PrintError('Illegal option: %s\n\t%s' % (str(e) % usage)) + return 1 + + for opt, val in opts: + PARSER_OPTIONS[opt[2:]] = True + print 'Set %s to True.' % opt + + parser = IDLParser(TestNode, TestLog, PARSER_OPTIONS) + + total_errs = 0 + for filename in filenames: + tokens = parser.ParseFile(filename) + errs = Test(filename, tokens) + total_errs += errs + if errs: + PrintError("%s test failed with %d error(s)." % (filename, errs)) + else: + PrintInfo("%s passed." % filename) + + if PARSER_OPTIONS['output']: + for token in tokens: + token.Dump(0) + + return total_errs + + +if __name__ == '__main__': + sys.exit(Main(sys.argv[1:])) + diff --git a/ppapi/generators/test_lex.in b/ppapi/generators/test_lex.in index 4230843..31edfaa4 100644 --- a/ppapi/generators/test_lex.in +++ b/ppapi/generators/test_lex.in @@ -21,8 +21,19 @@ FLOAT -1.1 FLOAT -1e1 FLOAT 1e-1 FLOAT -1e-1 +FLOAT 1.0e1 +FLOAT -1.0e-1 HEX 0x1 HEX 0x0 HEX 0x10 HEX 0x112312 +HEX 0x1ABCD0 +HEX 0xA0B0C0 +HEX 0xabcdef +HEX 0x1ab2cd + +OCT 00 +OCT 01 +OCT 0123 + diff --git a/ppapi/generators/test_parser/enum.idl b/ppapi/generators/test_parser/enum.idl new file mode 100644 index 0000000..a545d1b --- /dev/null +++ b/ppapi/generators/test_parser/enum.idl @@ -0,0 +1,50 @@ +/* Copyright (c) 2011 The Chromium Authors. All rights reserved. + Use of this source code is governed by a BSD-style license that can be + found in the LICENSE file. */ + +/* This file tests parsing of enumerations under different conditions */ + +/* OK Enum(Es1) */ +enum Es1 { + /* OK EnumItem(E1) */ + E1 = 1, + /* OK EnumItem(E2) */ + E2 = 2 +}; + +/* FAIL Enum missing name. */ +enum { + E3 = 3, + E4 = 4 +}; + +/* OK Enum(Es3) */ +enum Es3 { + E5 = 5, + E6 = 6 +}; + +/* FAIL Unexpected empty block. */ +enum Es4 { +}; + +/* OK Enum(Es5) */ +enum Es5 { + /* OK EnumItem(E9) */ + E9 = 9, + /* OK EnumItem(E10) */ + /* FAIL Trailing comma in block. */ + E10 = 10, +}; + +/* FAIL Unexpected trailing comment. */ +enum Es6 { + E5 = 11, + E6 = 12 +} + +/* OK Enum(Es7) */ +enum Es7 { + /* OK EnumItem(E11) */ + E11 = 11 +}; diff --git a/ppapi/generators/test_parser/interface.idl b/ppapi/generators/test_parser/interface.idl new file mode 100644 index 0000000..61dfd66 --- /dev/null +++ b/ppapi/generators/test_parser/interface.idl @@ -0,0 +1,56 @@ +/* Copyright (c) 2011 The Chromium Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +/* Tests for interface */ + +/* OK Interface(Interface_0_1) */ +interface Interface_0_1 { + /* OK Function(OneParam) */ + PP_Bool OneParam( + /* OK Param(resource) */ + [in] PP_Resource resource); + + /* OK Function(TwoParam) */ + PP_Resource TwoParam( + /* OK Param(instance) */ + [in] PP_Instance instance, + /* OK Param(size) */ + [in] PP_Size size); + + /* OK Function(ThreeParam) */ + PP_Bool ThreeParam( + /* OK Param(graphics_2d) */ + [in] PP_Resource graphics_2d, + /* OK Param(size) */ + [out] PP_Size size, + /* OK Param(is_always_opaque) */ + [out] PP_Bool is_always_opaque); +}; + + +/* OK Interface(Interface_0_2) */ +interface Interface_0_2 { + /* OK Function(OneParam) */ + PP_Bool OneParam( + /* OK Param(resource) */ + [in] PP_Resource resource); + + /* OK Function(TwoParam) */ + PP_Resource TwoParam( + /* OK Param(instance) */ + [in] PP_Instance instance, + /* OK Param(size) */ + /* FAIL Missing argument. */ + [in] PP_Size size, ); + + /* OK Function(ThreeParam) */ + PP_Bool ThreeParam( + /* OK Param(graphics_2d) */ + [in] PP_Resource graphics_2d, + /* FAIL Unexpected "," after symbol PP_Size. */ + [out] PP_Size, + /* OK Param(is_always_opaque) */ + [out] PP_Bool is_always_opaque); +}; diff --git a/ppapi/generators/test_parser/struct.idl b/ppapi/generators/test_parser/struct.idl new file mode 100644 index 0000000..8633c4e --- /dev/null +++ b/ppapi/generators/test_parser/struct.idl @@ -0,0 +1,34 @@ +/* Copyright (c) 2011 The Chromium Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +/* Tests for structures */ + +/* OK Struct(S1) */ +struct S1 { + /* OK Member(Mem1) */ + PP_Bool Mem1; + /* OK Member(Mem2) */ + PP_Resource Mem2; +}; + +typedef int[] func(int x, int y); + +/* OK Struct(S2) */ +struct S2 { + /* OK Member(Mem1) */ + PP_Bool Mem1; + /* OK Member(Mem2) */ + PP_Resource Mem2; + /* OK Member(Mem3) */ + [ATTRIBUTE] PP_Resource Mem3; + /* OK Member(foo) */ + FuncFoo foo; +}; + +/* FAIL Struct missing name. */ +struct { + PP_Bool Mem1; + PP_Resource Mem2; +};
\ No newline at end of file diff --git a/ppapi/generators/test_parser/typedef.idl b/ppapi/generators/test_parser/typedef.idl new file mode 100644 index 0000000..6b48b58 --- /dev/null +++ b/ppapi/generators/test_parser/typedef.idl @@ -0,0 +1,46 @@ +/* Copyright (c) 2011 The Chromium Authors. All rights reserved. + Use of this source code is governed by a BSD-style license that can be + found in the LICENSE file. */ + +/* This file tests parsing of typedefs under different conditions */ + +/* OK Typedef(T1) */ +typedef int32_t T1; + +/* FAIL Unexpected comment "/*" after symbol T2. */ +typedef int32_t T2 + +/* OK Typedef(T3) */ +typedef int32_t[] T3; + +/* OK Typedef(T4) */ +typedef int32_t[][4] T4; + +/* FAIL Empty argument list. */ +typedef int32_t[][4] T5(); + +/* OK Typedef(T6) */ +typedef int32_t[][4] T6(int x); + +/* OK Typedef(T7) */ +typedef int32_t[][4] T7( + /* OK Param(x) */ + int x, + /* OK Param(y) */ + int y); + +/* OK Typedef(T8) */ +typedef int32_t[][4][5] T8( + /* OK Param(x) */ + int x, + /* OK Param(y) */ + int y, + /* OK Param(z) */ + /* FAIL Missing argument. */ + int z,); + +/* FAIL Unexpected keyword "enum" after symbol int32_t. */ +typedef int32_t enum; + +/* FAIL Unexpected ";" after symbol foo. */ +typedef foo; |