Source code for uppaal2jetracer.parser.declarationparser.declaration_parser

"""
This module defines the `DeclarationParser` class for parsing UPPAAL declarations 
and generating an Abstract Syntax Tree (AST). It leverages the PLY (Python Lex-Yacc) 
framework to build a lexer and parser, enabling the parsing of complex declaration 
rules and structures.

Based on the c_parser.py from the pycparser project by Eli Bendersky (https://eli.thegreenplace.net)
under the BSD license.

Classes:
    * `DeclarationParser`: Main parser class responsible for processing UPPAAL declarations.
    * `SingleStatementParser`: Parser for single statements with relaxed rules on typing.

Key Features:
    * Manages scope information with a stack-based system for accurate type and identifier resolution.
    * Supports lexer and parser customization, as well as optimization for performance.
    * Includes utility methods for handling specific parsing scenarios, such as typedef management.
    * Provides detailed debugging capabilities, such as generating lexer and parser tables and debug outputs.
"""
from uppaal2jetracer.declarations import declarations_ast
from uppaal2jetracer.parser.declarationparser.ply import yacc
from uppaal2jetracer.parser.declarationparser.declaration_lexer import DeclarationLexer
from uppaal2jetracer.parser.declarationparser.plyparser import PLYParser, parameterized, template


[docs] @template class DeclarationParser(PLYParser): """ Parses UPPAAL declarations and generates an Abstract Syntax Tree (AST). This class is responsible for parsing code fom UPPAAL declarations, handling lexer and parser configurations, and managing scope information during parsing. It uses the PLY (Python Lex-Yacc) parser framework. It provides mechanisms to configure and optimize the lexer and parser, as well as utility methods for scope management and type lookup. The class maintains a stack-based scope system to resolve identifiers and types, supports typedef name management, and facilitates interaction with the lexer and parser. :ivar dlex: The lexer used by the parser. Configured during the initialization phase. :type dlex: DeclarationLexer :ivar tokens: List of tokens supported by the lexer for the parser. :type tokens: list :ivar _scope_stack: Maintains the stack of scopes for tracking symbols and types. Each scope is a dictionary of name-to-type mappings. A value of True indicates the name is a typedef; False means not a typedef. :type _scope_stack: list of dict :ivar _last_yielded_token: Stores the most recently seen token during parsing. :type _last_yielded_token: Token """ def __init__( self, lex_optimize = False, lexer = DeclarationLexer, lextab = "pycparser.lextab", yacc_optimize = False, yacctab = "pycparser.yacctab", yacc_debug = False, taboutputdir = "" ): """ Create a new CParser. Some arguments for controlling the debug/optimization level of the parser are provided. The defaults are tuned for release/performance mode. The simple rules for using them are: * When tweaking CParser/CLexer, set these to False * When releasing a stable parser, set to True lex_optimize: Set to False when you're modifying the lexer. Otherwise, changes in the lexer won't be used, if some lextab.py file exists. When releasing with a stable lexer, set to True to save the re-generation of the lexer table on each run. lexer: Set this parameter to define the lexer to use if you're not using the default CLexer. lextab: Points to the lex table that's used for optimized mode. Only if you're modifying the lexer and want some tests to avoid re-generating the table, make this point to a local lex table file (that's been earlier generated with lex_optimize=True) yacc_optimize: Set to False when you're modifying the parser. Otherwise, changes in the parser won't be used, if some parsetab.py file exists. When releasing with a stable parser, set to True to save the re-generation of the parser table on each run. yacctab: Points to the yacc table that's used for optimized mode. Only if you're modifying the parser, make this point to a local yacc table file yacc_debug: Generate a parser.out file that explains how yacc built the parsing table from the grammar. taboutputdir: Set this parameter to control the location of generated lextab and yacctab files. """ self.dlex = lexer( error_func = self._lex_error_func, on_lbrace_func = self._lex_on_lbrace_func, on_rbrace_func = self._lex_on_rbrace_func, type_lookup_func = self._lex_type_lookup_func ) self.dlex.build( optimize = lex_optimize, lextab = lextab, outputdir = taboutputdir ) self.tokens = self.dlex.tokens rules_with_opt = [ "abstract_declarator", "assignment_expression", "declaration_list", "declaration_specifiers_no_type", "designation", "expression", "identifier_list", "init_declarator_list", "id_init_declarator_list", "initializer_list", "parameter_type_list", "block_item_list", "type_qualifier_list", "struct_declarator_list" ] for rule in rules_with_opt: self._create_opt_rule(rule) self.cparser = yacc.yacc( module = self, start = "translation_unit_or_empty", debug = yacc_debug, optimize = yacc_optimize, tabmodule = yacctab, outputdir = taboutputdir ) # Stack of scopes for keeping track of symbols. _scope_stack[-1] is # the current (topmost) scope. Each scope is a dictionary that # specifies whether a name is a type. If _scope_stack[n][name] is # True, "name" is currently a type in the scope. If it's False, # "name" is used in the scope but not as a type (for instance, if we saw: int name;) # If "name" is not a key in _scope_stack[n] then "name" was not defined # in this scope at all. self._scope_stack = [{}] # Keeps track of the last token given to yacc (the lookahead token) self._last_yielded_token = None
[docs] def parse(self, text, filename = "", debug = False): """ Parses C code and returns an AST. text: A string containing the C source code filename: Name of the file being parsed (for meaningful error messages) debug: Debug flag to YACC """ self.dlex.filename = filename self.dlex.reset_lineno() self._scope_stack = [{}] self._last_yielded_token = None return self.cparser.parse( input = text, lexer = self.dlex, debug = debug )
######################-- PRIVATE --###################### def _push_scope(self): self._scope_stack.append({}) def _pop_scope(self): assert len(self._scope_stack) > 1 self._scope_stack.pop() def _add_typedef_name(self, name, coord): """ Add a new typedef name (ie a TYPEID) to the current scope """ if not self._scope_stack[-1].get(name, True): self._parse_error( f"Typedef {name} previously declared as non-typedef in this scope", coord ) self._scope_stack[-1][name] = True def _add_identifier(self, name, coord): """ Add a new object or function member name (i.e. an ID) to the current scope """ if self._scope_stack[-1].get(name, False): self._parse_error( f"Non-typedef {name} previously declared as typedef in this scope", coord ) self._scope_stack[-1][name] = False def _is_type_in_scope(self, name): """ Is *name* a typedef-name in the current scope? """ for scope in reversed(self._scope_stack): # If name is an identifier in this scope it shadows typedefs in # higher scopes. in_scope = scope.get(name) if in_scope is not None: return in_scope return False def _lex_error_func(self, msg, line, column): self._parse_error(msg, self._coord(line, column)) def _lex_on_lbrace_func(self): self._push_scope() def _lex_on_rbrace_func(self): self._pop_scope() def _lex_type_lookup_func(self, name): """ Looks up types that were previously defined with typedef. Passed to the lexer for recognizing identifiers that are types. """ is_type = self._is_type_in_scope(name) return is_type def _get_yacc_lookahead_token(self): """ We need access to yacc's lookahead token in certain cases. This is the last token yacc requested from the lexer, so we ask the lexer. """ return self.dlex.last_token # Due to the order in which declarators are constructed, # they have to be fixed in order to look like a normal AST. # # When a declaration arrives from syntax construction, it has # these problems: # * The innermost TypeDecl has no type (because the basic # type is only known at the uppermost declaration level) # * The declaration has no variable name, since that is saved # in the innermost TypeDecl # * The typename of the declaration is a list of type # specifiers, and not a node. Here, basic identifier types # should be separated from more complex types like structs. # # This method fixes these problems. def _fix_decl_name_type(self, decl, typename): """ Fixes a declaration. Modifies decl. """ # Reach the underlying basic type # type = decl while not isinstance(type, declarations_ast.TypeDecl): type = type.type decl.name = type.declname type.quals = decl.quals[:] # The typename is a list of types. If any type in this # list isn't an IdentifierType, it must be the only # type in the list (it's illegal to declare "int struct ..") # If all the types are basic, they're collected in the # IdentifierType holder. for tn in typename: if not isinstance(tn, declarations_ast.IdentifierType): if len(typename) > 1: self._parse_error( "Invalid multiple types specified", tn.coord) else: type.type = tn return decl if not typename: # Functions default to returning int # if not isinstance(decl.type, declarations_ast.FuncDecl): self._parse_error( "Missing type in declaration", decl.coord) type.type = declarations_ast.IdentifierType( ["int"], coord = decl.coord) else: # At this point, we know that typename is a list of IdentifierType # nodes. Concatenate all the names into a single list. # type.type = declarations_ast.IdentifierType( [name for id in typename for name in id.names], coord = typename[0].coord) return decl def _verify_declaration(self, decl, spec): decl_type = spec["type"][0].names[0] if "broadcast" in spec["qual"] and decl_type != "chan": self._parse_error( "Error: 'broadcast' qualifier only allowed for 'chan' type", decl["decl"].coord ) if decl_type == "string" and "const" not in spec["qual"]: self._parse_error( "Error: 'string' type is not allowed to be used without 'const' qualifier.", decl["decl"].coord ) if hasattr(decl.get("init"), "value"): init_value = decl.get("init").value if decl_type == "chan": self._parse_error( "Error: 'chan' type cannot be initialized with any value.", decl.get("init").coord ) if decl_type == "bool": if init_value.strip().lower() not in ["true", "false"]: self._parse_error("Error: 'bool' type can only be assigned " "value 'true' or 'false'.", decl.get("init").coord ) if decl_type != "bool": if init_value.strip().lower() in ["true", "false"]: self._parse_error("Error: 'true' or 'false' values can only be " "assigned to 'bool' type.", decl.get("init").coord ) if decl_type == "clock": try: int(init_value) except ValueError: self._parse_error( "Error: 'chan' type can only be assigned integer values.", decl.get("init").coord ) def _build_declarations(self, spec, decls, typedef_namespace = False): """ Builds a list of declarations all sharing the given specifiers. If typedef_namespace is true, each declared name is added to the "typedef namespace", which also includes objects and functions. """ is_typedef = spec["is_typedef"] declarations = [] # When redeclaring typedef names as identifiers in inner scopes, a # problem can occur where the identifier gets grouped into # spec["type"], leaving decl as None. This can only occur for the # first declarator. if decls[0]["decl"] is None: if len(spec["type"]) < 2 or len(spec["type"][-1].names) != 1 or \ not self._is_type_in_scope(spec["type"][-1].names[0]): coord = "?" for t in spec["type"]: if hasattr(t, "coord"): coord = t.coord break self._parse_error("Invalid declaration", coord) # Make this look as if it came from "direct_declarator:ID" decls[0]["decl"] = declarations_ast.TypeDecl( declname = spec["type"][-1].names[0], type = None, quals = None, coord = spec["type"][-1].coord ) # Remove the "new" type's name from the end of spec["type"] del spec["type"][-1] # A similar problem can occur where the declaration ends up looking # like an abstract declarator. Give it a name if this is the case. elif not isinstance(decls[0]["decl"], (declarations_ast.Struct, declarations_ast.IdentifierType) ): decls_0_tail = decls[0]["decl"] while not isinstance(decls_0_tail, declarations_ast.TypeDecl): decls_0_tail = decls_0_tail.type if decls_0_tail.declname is None: decls_0_tail.declname = spec["type"][-1].names[0] del spec["type"][-1] for decl in decls: assert decl["decl"] is not None if is_typedef: declaration = declarations_ast.Typedef( name = None, quals = spec["qual"], is_typedef = spec["is_typedef"], type = decl["decl"], coord = decl["decl"].coord ) else: self._verify_declaration(decl, spec) declaration = declarations_ast.Decl( name = None, quals = spec["qual"], is_typedef = spec["is_typedef"], funcspec = spec["function"], type = decl["decl"], init = decl.get("init"), coord = decl["decl"].coord ) if isinstance(declaration.type, (declarations_ast.Struct, declarations_ast.IdentifierType)): fixed_decl = declaration else: fixed_decl = self._fix_decl_name_type(declaration, spec["type"]) # Add the type name defined by typedef to a # symbol table (for usage in the lexer) if typedef_namespace: if is_typedef: self._add_typedef_name(fixed_decl.name, fixed_decl.coord) else: self._add_identifier(fixed_decl.name, fixed_decl.coord) declarations.append(fixed_decl) return declarations def _build_function_definition(self, spec, decl, param_decls, body): """ Builds a function definition. """ if spec["is_typedef"]: self._parse_error("Invalid typedef", decl.coord) declaration = self._build_declarations( spec = spec, decls = [{"decl": decl, "init": None}], typedef_namespace = True )[0] return declarations_ast.FuncDef( decl = declaration, param_decls = param_decls, body = body, coord = decl.coord ) ## ## Precedence and associativity of operators ## precedence = ( ("left", "LOR"), ("left", "LAND"), ("left", "OR"), ("left", "XOR"), ("left", "AND"), ("left", "EQ", "NE"), ("left", "GT", "GE", "LT", "LE"), ("left", "RSHIFT", "LSHIFT"), ("left", "PLUS", "MINUS"), ("left", "TIMES", "DIVIDE", "MOD") ) ## ## Grammar productions ## Implementation of the BNF defined in K&R2 A.13 ## # Wrapper around a translation unit, to allow for empty input.
[docs] def p_translation_unit_or_empty(self, p): """ translation_unit_or_empty : translation_unit | empty """ if p[1] is None: p[0] = declarations_ast.FileAST([]) else: p[0] = declarations_ast.FileAST(p[1])
[docs] def p_translation_unit_1(self, p): """ translation_unit : external_declaration """ # Note: external_declaration is already a list p[0] = p[1]
[docs] def p_translation_unit_2(self, p): """ translation_unit : translation_unit external_declaration """ p[1].extend(p[2]) p[0] = p[1]
# Declarations always come as lists (because they can be # several in one line), so we wrap the function definition # into a list as well, to make the return value of # external_declaration homogeneous.
[docs] def p_external_declaration_1(self, p): """ external_declaration : function_definition """ p[0] = [p[1]]
[docs] def p_external_declaration_2(self, p): """ external_declaration : declaration """ p[0] = p[1]
[docs] def p_external_declaration_3(self, p): """ external_declaration : SEMI """ p[0] = []
[docs] def p_function_definition(self, p): """ function_definition : declaration_specifiers id_declarator declaration_list_opt compound_statement """ spec = p[1] p[0] = self._build_function_definition( spec = spec, decl = p[2], param_decls = p[3], body = p[4] )
[docs] def p_statement(self, p): """ statement : expression_statement | compound_statement | selection_statement | iteration_statement | jump_statement """ p[0] = p[1]
[docs] def p_decl_body(self, p): """ decl_body : declaration_specifiers init_declarator_list_opt | declaration_specifiers_no_type id_init_declarator_list_opt """ spec = p[1] # p[2] (init_declarator_list_opt) is either a list or None # if p[2] is None: # By the standard, you must have at least one declarator unless # declaring a structure tag. # ty = spec["type"] if len(ty) == 1 and isinstance(ty[0], declarations_ast.Struct): decls = [declarations_ast.Decl( name = None, quals = spec["qual"], is_typedef = spec["is_typedef"], funcspec = spec["function"], type = ty[0], init = None, coord = ty[0].coord )] # However, this case can also occur on redeclared identifiers in # an inner scope. The trouble is that the redeclared type's name # gets grouped into declaration_specifiers; _build_declarations # compensates for this. # else: decls = self._build_declarations( spec = spec, decls = [{"decl": None, "init": None}], typedef_namespace = True ) else: decls = self._build_declarations( spec = spec, decls = p[2], typedef_namespace = True ) p[0] = decls
# The declaration has been split to a decl_body sub-rule and # SEMI, because having them in a single rule created a problem # for defining typedefs. # # If a typedef line was directly followed by a line using the # type defined with the typedef, the type would not be # recognized. This is because to reduce the declaration rule, # the parser's lookahead asked for the token after SEMI, which # was the type from the next line, and the lexer had no chance # to see the updated type symbol table. # # Splitting solves this problem, because after seeing SEMI, # the parser reduces decl_body, which actually adds the new # type into the table to be seen by the lexer before the next # line is reached.
[docs] def p_declaration(self, p): """ declaration : decl_body SEMI """ p[0] = p[1]
# Since each declaration is a list of declarations, this # rule will combine all the declarations and return a single list
[docs] def p_declaration_list(self, p): """ declaration_list : declaration | declaration_list declaration """ p[0] = p[1] if len(p) == 2 else p[1] + p[2]
# To know when declaration-specifiers end and declarators begin, # we require declaration-specifiers to have at least one # type-specifier, and disallow typedef-names after we've seen any # type-specifier. These are both required by the spec.
[docs] def p_declaration_specifiers_no_type_1(self, p): """ declaration_specifiers_no_type : type_qualifier declaration_specifiers_no_type_opt """ p[0] = _add_declaration_specifier(p[2], p[1], "qual")
[docs] def p_declaration_specifiers_no_type_2(self, p): """ declaration_specifiers_no_type : TYPEDEF declaration_specifiers_no_type_opt """ p[0] = _add_declaration_specifier(p[2], p[1], "is_typedef")
[docs] def p_declaration_specifiers_1(self, p): """ declaration_specifiers : declaration_specifiers type_qualifier """ p[0] = _add_declaration_specifier(p[1], p[2], "qual", append = True)
[docs] def p_declaration_specifiers_2(self, p): """ declaration_specifiers : declaration_specifiers TYPEDEF """ p[0] = _add_declaration_specifier(p[1], p[2], "is_typedef", append = True)
[docs] def p_declaration_specifiers_4(self, p): """ declaration_specifiers : declaration_specifiers type_specifier_no_typeid """ p[0] = _add_declaration_specifier(p[1], p[2], "type", append = True)
[docs] def p_declaration_specifiers_5(self, p): """ declaration_specifiers : type_specifier """ p[0] = _add_declaration_specifier(None, p[1], "type")
[docs] def p_declaration_specifiers_6(self, p): """ declaration_specifiers : declaration_specifiers_no_type type_specifier """ p[0] = _add_declaration_specifier(p[1], p[2], "type", append = True)
[docs] def p_type_specifier_no_typeid(self, p): """ type_specifier_no_typeid : VOID | BOOL | CHAN | CHAR | STRING | CLOCK | INT | DOUBLE """ p[0] = declarations_ast.IdentifierType([p[1]], coord = self._token_coord(p, 1))
[docs] def p_type_specifier(self, p): """ type_specifier : typedef_name | struct_specifier | type_specifier_no_typeid """ p[0] = p[1]
[docs] def p_type_qualifier(self, p): """ type_qualifier : CONST | BROADCAST """ p[0] = p[1]
[docs] def p_init_declarator_list(self, p): """ init_declarator_list : init_declarator | init_declarator_list COMMA init_declarator """ p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
# Returns a {decl=<declarator> : init=<initializer>} dictionary # If there's no initializer, uses None
[docs] def p_init_declarator(self, p): """ init_declarator : declarator | declarator EQUALS initializer """ p[0] = {"decl": p[1], "init": (p[3] if len(p) > 2 else None)}
[docs] def p_id_init_declarator_list(self, p): """ id_init_declarator_list : id_init_declarator | id_init_declarator_list COMMA init_declarator """ p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
[docs] def p_id_init_declarator(self, p): """ id_init_declarator : id_declarator | id_declarator EQUALS initializer """ p[0] = {"decl": p[1], "init": (p[3] if len(p) > 2 else None)}
# Require at least one type specifier in a specifier-qualifier-list
[docs] def p_specifier_qualifier_list_1(self, p): """ specifier_qualifier_list : specifier_qualifier_list type_specifier_no_typeid """ p[0] = _add_declaration_specifier(p[1], p[2], "type", append = True)
[docs] def p_specifier_qualifier_list_2(self, p): """ specifier_qualifier_list : specifier_qualifier_list type_qualifier """ p[0] = _add_declaration_specifier(p[1], p[2], "qual", append = True)
[docs] def p_specifier_qualifier_list_3(self, p): """ specifier_qualifier_list : type_specifier """ p[0] = _add_declaration_specifier(None, p[1], "type")
[docs] def p_specifier_qualifier_list_4(self, p): """ specifier_qualifier_list : type_qualifier_list type_specifier """ p[0] = {"qual": p[1], "is_typedef": False, "type": [p[2]], "function": []}
# TYPEID is allowed here (and in other struct related tag names), because # struct tags reside in their own namespace and can be named the same as types
[docs] def p_struct_specifier_1(self, p): """ struct_specifier : STRUCT ID | STRUCT TYPEID """ p[0] = declarations_ast.Struct( name = p[2], decls = None, coord = self._token_coord(p, 2) )
[docs] def p_struct_specifier_2(self, p): """ struct_specifier : STRUCT brace_open struct_declaration_list brace_close | STRUCT brace_open brace_close """ p[0] = declarations_ast.Struct( name = None, decls = [] if len(p) == 4 else p[3], coord = self._token_coord(p, 2) )
[docs] def p_struct_specifier_3(self, p): """ struct_specifier : STRUCT ID brace_open struct_declaration_list brace_close | STRUCT ID brace_open brace_close | STRUCT TYPEID brace_open struct_declaration_list brace_close | STRUCT TYPEID brace_open brace_close """ p[0] = declarations_ast.Struct( name = p[2], decls = [] if len(p) == 5 else p[4], coord = self._token_coord(p, 2) )
# Combine all declarations into a single list
[docs] def p_struct_declaration_list(self, p): """ struct_declaration_list : struct_declaration | struct_declaration_list struct_declaration """ p[0] = p[1] or [] if len(p) == 2 else p[1] + (p[2] or [])
[docs] def p_struct_declaration_1(self, p): """ struct_declaration : specifier_qualifier_list struct_declarator_list_opt SEMI """ spec = p[1] assert not spec["is_typedef"] if p[2] is not None: decls = self._build_declarations( spec = spec, decls = p[2] ) elif len(spec["type"]) == 1: node = spec["type"][0] if isinstance(node, declarations_ast.Node): decl_type = node else: decl_type = declarations_ast.IdentifierType(node) decls = self._build_declarations( spec = spec, decls = [{"decl": decl_type}] ) else: # Struct members can have the same names as typedefs. # The trouble is that the member's name gets grouped into # specifier_qualifier_list; _build_declarations compensates. decls = self._build_declarations( spec = spec, decls = [{"decl": None, "init": None}] ) p[0] = decls
[docs] def p_struct_declaration_2(self, p): """ struct_declaration : SEMI """ p[0] = None
[docs] def p_struct_declarator_list(self, p): """ struct_declarator_list : struct_declarator | struct_declarator_list COMMA struct_declarator """ p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
# struct_declarator passes up a dict with the keys: decl (for # the underlying declarator)
[docs] def p_struct_declarator(self, p): """ struct_declarator : declarator """ p[0] = {"decl": p[1]}
[docs] def p_declarator(self, p): """ declarator : id_declarator | typeid_declarator """ p[0] = p[1]
@parameterized(("id", "ID"), ("typeid", "TYPEID"), ("typeid_noparen", "TYPEID")) def p_xxx_declarator_1(self, p): """ xxx_declarator : direct_xxx_declarator """ p[0] = p[1] @parameterized(("id", "ID"), ("typeid", "TYPEID"), ("typeid_noparen", "TYPEID")) def p_direct_xxx_declarator_1(self, p): """ direct_xxx_declarator : yyy """ p[0] = declarations_ast.TypeDecl( declname = p[1], type = None, quals = None, coord = self._token_coord(p, 1)) @parameterized(("id", "ID"), ("typeid", "TYPEID")) def p_direct_xxx_declarator_2(self, p): """ direct_xxx_declarator : LPAREN xxx_declarator RPAREN """ p[0] = p[2] @parameterized(("id", "ID"), ("typeid", "TYPEID"), ("typeid_noparen", "TYPEID")) def p_direct_xxx_declarator_3(self, p): """ direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET """ quals = (p[3] if len(p) > 5 else []) or [] # Accept dimension qualifiers # Per C99 6.7.5.3 p7 arr = declarations_ast.ArrayDecl( type = None, dim = p[4] if len(p) > 5 else p[3], dim_quals = quals, coord = p[1].coord) p[0] = _type_modify_decl(decl = p[1], modifier = arr) @parameterized(("id", "ID"), ("typeid", "TYPEID"), ("typeid_noparen", "TYPEID")) def p_direct_xxx_declarator_4(self, p): """ direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt assignment_expression RBRACKET | direct_xxx_declarator LBRACKET type_qualifier_list assignment_expression RBRACKET """ # Using slice notation for PLY objects doesn't work in Python 3 for the # version of PLY embedded with pycparser; see PLY Google Code issue 30. # Work around that here by listing the two elements separately. listed_quals = [item if isinstance(item, list) else [item] for item in [p[3], p[4]]] dim_quals = [qual for sublist in listed_quals for qual in sublist if qual is not None] arr = declarations_ast.ArrayDecl( type = None, dim = p[5], dim_quals = dim_quals, coord = p[1].coord ) p[0] = _type_modify_decl(decl = p[1], modifier = arr) # Special for VLAs @parameterized(("id", "ID"), ("typeid", "TYPEID"), ("typeid_noparen", "TYPEID")) def p_direct_xxx_declarator_5(self, p): """ direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt TIMES RBRACKET """ arr = declarations_ast.ArrayDecl( type = None, dim = declarations_ast.ID(p[4], self._token_coord(p, 4)), dim_quals = p[3] if p[3] is not None else [], coord = p[1].coord ) p[0] = _type_modify_decl(decl = p[1], modifier = arr) @parameterized(("id", "ID"), ("typeid", "TYPEID"), ("typeid_noparen", "TYPEID")) def p_direct_xxx_declarator_6(self, p): """ direct_xxx_declarator : direct_xxx_declarator LPAREN parameter_type_list RPAREN | direct_xxx_declarator LPAREN identifier_list_opt RPAREN """ func = declarations_ast.FuncDecl( args = p[3], type = None, coord = p[1].coord ) # To see why _get_yacc_lookahead_token is needed, consider: # typedef char TT; # void foo(int TT) { TT = 10; } # Outside the function, TT is a typedef, but inside (starting and # ending with the braces) it's a parameter. The trouble begins with # yacc's lookahead token. We don't know if we're declaring or # defining a function until we see LBRACE, but if we wait for yacc to # trigger a rule on that token, then TT will have already been read # and incorrectly interpreted as TYPEID. We need to add the # parameters to the scope the moment the lexer sees LBRACE. if self._get_yacc_lookahead_token().type == "LBRACE": if func.args is not None: for param in func.args.params: self._add_identifier(param.name, param.coord) p[0] = _type_modify_decl(decl = p[1], modifier = func) @parameterized(("id", "ID"), ("typeid", "TYPEID"), ("typeid_noparen", "TYPEID")) def p_direct_xxx_declarator_7(self, p): """ direct_xxx_declarator : LBRACKET assignment_expression_opt COMMA assignment_expression_opt RBRACKET direct_xxx_declarator """ declaration_code_line = p.lexer.lexer.lexdata l_bracket_index = declaration_code_line.find("[", p.lexpos(1)) l_bracket_coord = self._token_coord(p, 1) # get three characters before "[" range_type = declaration_code_line[l_bracket_index - 3:l_bracket_index] # "[" not found or wrong type (or whitespaces) if l_bracket_index < 0 or range_type != "int": self._parse_error("Error: Whitespace between 'int' and '[...]' is not allowed;" " ranges can only be of type 'int'.", l_bracket_coord) rrange = declarations_ast.RangeDecl( type = None, lower = p[2], upper = p[4], coord = p[6].coord ) p[0] = _type_modify_decl(decl = p[6], modifier = rrange)
[docs] def p_type_qualifier_list(self, p): """ type_qualifier_list : type_qualifier | type_qualifier_list type_qualifier """ p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
[docs] def p_parameter_type_list(self, p): """ parameter_type_list : parameter_list """ p[0] = p[1]
[docs] def p_parameter_list(self, p): """ parameter_list : parameter_declaration | parameter_list COMMA parameter_declaration """ if len(p) == 2: # single parameter p[0] = declarations_ast.ParamList([p[1]], p[1].coord) else: p[1].params.append(p[3]) p[0] = p[1]
# Inside a parameter declaration, once we've reduced declaration specifiers, # if we shift in an LPAREN and see a TYPEID, it could be either an abstract # declarator or a declarator nested inside parens. This rule tells us to # always treat it as an abstract declarator. Therefore, we only accept # `id_declarator`s and `typeid_noparen_declarator`s.
[docs] def p_parameter_declaration_1(self, p): """ parameter_declaration : declaration_specifiers id_declarator | declaration_specifiers typeid_noparen_declarator """ spec = p[1] if not spec["type"]: spec["type"] = [declarations_ast.IdentifierType( ["int"], coord = self._token_coord(p, 1) )] p[0] = self._build_declarations( spec = spec, decls = [{"decl": p[2]}] )[0]
[docs] def p_parameter_declaration_2(self, p): """ parameter_declaration : declaration_specifiers abstract_declarator_opt """ spec = p[1] if not spec["type"]: spec["type"] = [declarations_ast.IdentifierType( ["int"], coord = self._token_coord(p, 1) )] # Parameters can have the same names as typedefs. The trouble is that # the parameter"s name gets grouped into declaration_specifiers if len(spec["type"]) > 1 and len(spec["type"][-1].names) == 1 and \ self._is_type_in_scope(spec["type"][-1].names[0]): decl = self._build_declarations( spec = spec, decls = [{"decl": p[2], "init": None}] )[0] else: decl = declarations_ast.Typename( name = "", quals = spec["qual"], type = p[2] or declarations_ast.TypeDecl(None, None, None), coord = self._token_coord(p, 2)) typename = spec["type"] decl = self._fix_decl_name_type(decl, typename) p[0] = decl
[docs] def p_identifier_list(self, p): """ identifier_list : identifier | identifier_list COMMA identifier """ # 1 parameter: if len(p) == 2: p[0] = declarations_ast.ParamList([p[1]], p[1].coord) # > 1 parameter: else: p[1].params.append(p[3]) p[0] = p[1]
[docs] def p_initializer_1(self, p): """ initializer : assignment_expression """ p[0] = p[1]
[docs] def p_initializer_2(self, p): """ initializer : brace_open initializer_list_opt brace_close | brace_open initializer_list COMMA brace_close """ if p[2] is None: p[0] = declarations_ast.InitList([], self._token_coord(p, 1)) else: p[0] = p[2]
[docs] def p_initializer_list(self, p): """ initializer_list : designation_opt initializer | initializer_list COMMA designation_opt initializer """ # 1 initializer if len(p) == 3: init = p[2] if p[1] is None else declarations_ast.NamedInitializer(p[1], p[2]) p[0] = declarations_ast.InitList([init], p[2].coord) # > 1 initializer else: init = p[4] if p[3] is None else declarations_ast.NamedInitializer(p[3], p[4]) p[1].exprs.append(init) p[0] = p[1]
[docs] def p_designation(self, p): """ designation : designator_list EQUALS """ p[0] = p[1]
# Designators are represented as a list of nodes, in the order in which # they're written in the code.
[docs] def p_designator_list(self, p): """ designator_list : designator | designator_list designator """ p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
[docs] def p_designator(self, p): """ designator : LBRACKET constant_expression RBRACKET | PERIOD identifier """ p[0] = p[2]
[docs] def p_type_name(self, p): """ type_name : specifier_qualifier_list abstract_declarator_opt """ typename = declarations_ast.Typename( name = "", quals = p[1]["qual"][:], type = p[2] or declarations_ast.TypeDecl(None, None, None), coord = self._token_coord(p, 2) ) p[0] = self._fix_decl_name_type(typename, p[1]["type"])
[docs] def p_abstract_declarator_3(self, p): """ abstract_declarator : direct_abstract_declarator """ p[0] = p[1]
# Creating and using direct_abstract_declarator_opt here # instead of listing both direct_abstract_declarator and the # lack of it in the beginning of _1 and _2 caused two # shift/reduce errors.
[docs] def p_direct_abstract_declarator_1(self, p): """ direct_abstract_declarator : LPAREN abstract_declarator RPAREN """ p[0] = p[2]
[docs] def p_direct_abstract_declarator_2(self, p): """ direct_abstract_declarator : direct_abstract_declarator LBRACKET assignment_expression_opt RBRACKET """ arr = declarations_ast.ArrayDecl( type = None, dim = p[3], dim_quals = [], coord = p[1].coord ) p[0] = _type_modify_decl(decl = p[1], modifier = arr)
[docs] def p_direct_abstract_declarator_3(self, p): """ direct_abstract_declarator : LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET """ quals = (p[2] if len(p) > 4 else []) or [] p[0] = declarations_ast.ArrayDecl( type = declarations_ast.TypeDecl(None, None, None), dim = p[3] if len(p) > 4 else p[2], dim_quals = quals, coord = self._token_coord(p, 1) )
[docs] def p_direct_abstract_declarator_6(self, p): """ direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN """ func = declarations_ast.FuncDecl( args = p[3], type = None, coord = p[1].coord ) p[0] = _type_modify_decl(decl = p[1], modifier = func)
[docs] def p_direct_abstract_declarator_7(self, p): """ direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN """ p[0] = declarations_ast.FuncDecl( args = p[2], type = declarations_ast.TypeDecl(None, None, None), coord = self._token_coord(p, 1) )
# declaration is a list, statement isn't. To make it consistent, block_item # will always be a list
[docs] def p_block_item(self, p): """ block_item : declaration | statement """ p[0] = p[1] if isinstance(p[1], list) else [p[1]]
# Since we made block_item a list, this just combines lists
[docs] def p_block_item_list(self, p): """ block_item_list : block_item | block_item_list block_item """ # Empty block items (plain ";") produce [None], so ignore them p[0] = p[1] if (len(p) == 2 or p[2] == [None]) else p[1] + p[2]
[docs] def p_compound_statement_1(self, p): """ compound_statement : brace_open block_item_list_opt brace_close """ p[0] = declarations_ast.Compound( block_items = p[2], coord = self._token_coord(p, 1) )
[docs] def p_selection_statement_1(self, p): """ selection_statement : IF LPAREN expression RPAREN statement """ p[0] = declarations_ast.If(p[3], p[5], None, self._token_coord(p, 1))
[docs] def p_selection_statement_2(self, p): """ selection_statement : IF LPAREN expression RPAREN statement ELSE statement """ p[0] = declarations_ast.If(p[3], p[5], p[7], self._token_coord(p, 1))
[docs] def p_iteration_statement_1(self, p): """ iteration_statement : WHILE LPAREN expression RPAREN statement """ p[0] = declarations_ast.While(p[3], p[5], self._token_coord(p, 1))
[docs] def p_iteration_statement_2(self, p): """ iteration_statement : DO statement WHILE LPAREN expression RPAREN SEMI """ p[0] = declarations_ast.DoWhile(p[5], p[2], self._token_coord(p, 1))
[docs] def p_iteration_statement_3(self, p): """ iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN statement """ p[0] = declarations_ast.For(p[3], p[5], p[7], p[9], self._token_coord(p, 1))
[docs] def p_iteration_statement_4(self, p): """ iteration_statement : FOR LPAREN declaration expression_opt SEMI expression_opt RPAREN statement """ p[0] = declarations_ast.For( declarations_ast.DeclList(p[3], self._token_coord(p, 1)), p[4], p[6], p[8], self._token_coord(p, 1) )
[docs] def p_jump_statement(self, p): """ jump_statement : RETURN expression SEMI | RETURN SEMI """ p[0] = declarations_ast.Return(p[2] if len(p) == 4 else None, self._token_coord(p, 1))
[docs] def p_expression_statement(self, p): """ expression_statement : expression_opt SEMI """ p[0] = declarations_ast.EmptyStatement(self._token_coord(p, 2)) if p[1] is None else p[1]
[docs] def p_expression(self, p): """ expression : assignment_expression | expression COMMA assignment_expression """ if len(p) == 2: p[0] = p[1] else: if not isinstance(p[1], declarations_ast.ExprList): p[1] = declarations_ast.ExprList([p[1]], p[1].coord) p[1].exprs.append(p[3]) p[0] = p[1]
[docs] def p_parenthesized_compound_expression(self, p): """ assignment_expression : LPAREN compound_statement RPAREN """ p[0] = p[2]
[docs] def p_typedef_name(self, p): """ typedef_name : TYPEID """ p[0] = declarations_ast.IdentifierType([p[1]], coord = self._token_coord(p, 1))
[docs] def p_assignment_expression(self, p): """ assignment_expression : conditional_expression | unary_expression assignment_operator assignment_expression """ if len(p) == 2: p[0] = p[1] else: p[0] = declarations_ast.Assignment(p[2], p[1], p[3], p[1].coord)
# This rule relies on the built-in precedence/associativity specification feature of PLY. # (see precedence declaration above)
[docs] def p_assignment_operator(self, p): """ assignment_operator : EQUALS | XOREQUAL | TIMESEQUAL | DIVEQUAL | MODEQUAL | PLUSEQUAL | MINUSEQUAL | LSHIFTEQUAL | RSHIFTEQUAL | ANDEQUAL | OREQUAL """ p[0] = p[1]
[docs] def p_constant_expression(self, p): """ constant_expression : conditional_expression """ p[0] = p[1]
[docs] def p_conditional_expression(self, p): """ conditional_expression : binary_expression """ p[0] = p[1]
[docs] def p_binary_expression(self, p): """ binary_expression : unary_expression | binary_expression TIMES binary_expression | binary_expression DIVIDE binary_expression | binary_expression MOD binary_expression | binary_expression PLUS binary_expression | binary_expression MINUS binary_expression | binary_expression RSHIFT binary_expression | binary_expression LSHIFT binary_expression | binary_expression LT binary_expression | binary_expression LE binary_expression | binary_expression GE binary_expression | binary_expression GT binary_expression | binary_expression EQ binary_expression | binary_expression NE binary_expression | binary_expression AND binary_expression | binary_expression OR binary_expression | binary_expression XOR binary_expression | binary_expression LAND binary_expression | binary_expression LOR binary_expression """ if len(p) == 2: p[0] = p[1] else: p[0] = declarations_ast.BinaryOp(p[2], p[1], p[3], p[1].coord)
[docs] def p_unary_expression_1(self, p): """ unary_expression : postfix_expression """ p[0] = p[1]
[docs] def p_unary_expression_2(self, p): """ unary_expression : unary_operator unary_expression """ p[0] = declarations_ast.UnaryOp(p[1], p[2], p[2].coord)
[docs] def p_unary_operator(self, p): """ unary_operator : AND | TIMES | PLUS | MINUS | NOT | LNOT """ p[0] = p[1]
[docs] def p_postfix_expression_1(self, p): """ postfix_expression : primary_expression """ p[0] = p[1]
[docs] def p_postfix_expression_2(self, p): """ postfix_expression : postfix_expression LBRACKET expression RBRACKET """ p[0] = declarations_ast.ArrayRef(p[1], p[3], p[1].coord)
[docs] def p_postfix_expression_3(self, p): """ postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN | postfix_expression LPAREN RPAREN """ p[0] = declarations_ast.FuncCall(p[1], p[3] if len(p) == 5 else None, p[1].coord)
[docs] def p_postfix_expression_4(self, p): """ postfix_expression : postfix_expression PERIOD ID | postfix_expression PERIOD TYPEID | postfix_expression ARROW ID | postfix_expression ARROW TYPEID """ field = declarations_ast.ID(p[3], self._token_coord(p, 3)) p[0] = declarations_ast.StructRef(p[1], p[2], field, p[1].coord)
[docs] def p_postfix_expression_6(self, p): """ postfix_expression : LPAREN type_name RPAREN brace_open initializer_list brace_close | LPAREN type_name RPAREN brace_open initializer_list COMMA brace_close """ p[0] = declarations_ast.CompoundLiteral(p[2], p[5])
[docs] def p_primary_expression_1(self, p): """ primary_expression : identifier """ p[0] = p[1]
[docs] def p_primary_expression_2(self, p): """ primary_expression : constant """ p[0] = p[1]
[docs] def p_primary_expression_3(self, p): """ primary_expression : unified_string_literal """ p[0] = p[1]
[docs] def p_primary_expression_4(self, p): """ primary_expression : LPAREN expression RPAREN """ p[0] = p[2]
[docs] def p_argument_expression_list(self, p): """ argument_expression_list : assignment_expression | argument_expression_list COMMA assignment_expression """ # 1 expression if len(p) == 2: p[0] = declarations_ast.ExprList([p[1]], p[1].coord) # > 1 expression else: p[1].exprs.append(p[3]) p[0] = p[1]
[docs] def p_identifier(self, p): """ identifier : ID """ p[0] = declarations_ast.ID(p[1], self._token_coord(p, 1))
[docs] def p_constant_1(self, p): """ constant : INT_CONST_DEC """ p[0] = declarations_ast.Constant( "int", p[1], self._token_coord(p, 1))
[docs] def p_constant_2(self, p): """ constant : DOUBLE_CONST """ p[0] = declarations_ast.Constant( "double", p[1], self._token_coord(p, 1))
[docs] def p_constant_3(self, p): """ constant : CHAR_CONST """ p[0] = declarations_ast.Constant( "char", p[1], self._token_coord(p, 1) )
[docs] def p_constant_4(self, p): """ constant : BOOL_CONST""" p[0] = declarations_ast.Constant( "bool", p[1], self._token_coord(p, 1) )
[docs] def p_unified_string_literal(self, p): """ unified_string_literal : STRING_LITERAL | unified_string_literal STRING_LITERAL """ if len(p) == 2: # single literal p[0] = declarations_ast.Constant( "string", p[1], self._token_coord(p, 1)) else: p[1].value = p[1].value[:-1] + p[2][1:] p[0] = p[1]
[docs] def p_brace_open(self, p): """ brace_open : LBRACE """ p[0] = p[1] p.set_lineno(0, p.lineno(1))
[docs] def p_brace_close(self, p): """ brace_close : RBRACE """ p[0] = p[1] p.set_lineno(0, p.lineno(1))
[docs] def p_empty(self, p): """empty : """ p[0] = None
[docs] def p_error(self, p): """ Throws a ParseError. """ # If error recovery is added here in the future, make sure # _get_yacc_lookahead_token still works! if p: self._parse_error( f"before: {p.value}", self._coord(lineno = p.lineno, column = self.dlex.find_tok_column(p))) else: self._parse_error("At end of input", self.dlex.filename)
## ## Help functions ## # An UPPAAL type consists of a basic type declaration, with a list # of modifiers. For example: # # int c[5, N]; # # The basic declaration here is "int c", and the range is the modifier. # # Basic declarations are represented by TypeDecl (from module declarations_ast) and the # modifiers are FuncDecl, RangeDecl and ArrayDecl. # # The standard states that whenever a new modifier is parsed, it should be # added to the end of the list of modifiers. For example: Array Declarators # # In a declaration T D where D has the form # D1 [constant-expression-opt] # and the type of the identifier in the declaration T D1 is # "type-modifier T", the type of the # identifier of D is "type-modifier array of T" # # This is what this method does. The declarator it receives # can be a list of declarators ending with TypeDecl. It # tacks the modifier to the end of this list, just before # the TypeDecl. # # Additionally, the modifier may be a list itself. # In this case, the whole modifier list is spliced into the new location. def _type_modify_decl(decl, modifier): """ Tacks a type modifier on a declarator, and returns the modified declarator. Note: the declarator and modifier may be modified """ modifier_head = modifier modifier_tail = modifier # The modifier may be a nested list. Reach its tail. while modifier_tail.type: modifier_tail = modifier_tail.type # If the decl is a basic type, just tack the modifier onto it. if isinstance(decl, declarations_ast.TypeDecl): modifier_tail.type = decl return modifier # Otherwise, the decl is a list of modifiers. Reach # its tail and splice the modifier onto the tail, # pointing to the underlying basic type. decl_tail = decl while not isinstance(decl_tail.type, declarations_ast.TypeDecl): decl_tail = decl_tail.type modifier_tail.type = decl_tail.type decl_tail.type = modifier_head return decl def _add_declaration_specifier(declspec, newspec, kind, append = False): """ Declaration specifiers are represented by a dictionary with the entries: * qual: a list of type qualifiers * is_typedef: a flag if the variable originates from a typedef definition * type: a list of type specifiers * function: a list of function specifiers * alignment: a list of alignment specifiers This method is given a declaration specifier, and a new specifier of a given kind. If `append` is True, the new specifier is added to the end of the specifiers list, otherwise it's added at the beginning. Returns the declaration specifier, with the new specifier incorporated. """ spec = declspec or {"qual": [], "is_typedef": False, "type": [], "function": []} if spec[kind] is None: return spec if kind == "is_typedef": spec["is_typedef"] = True return spec if append: spec[kind].append(newspec) else: spec[kind].insert(0, newspec) return spec
[docs] class SingleStatementParser(DeclarationParser): """ SingleStatementParser class. Parses single expressions or statements (without requiring a semicolon or declared variables). Supports simple assignments, function calls, and expressions. Ensures the root node of the generated AST is always a FileAST. """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Create a parser configured for single statement parsing self.cparser = yacc.yacc( module = self, start = "single_line_expression", # Custom starting rule for a single line expression/statement debug = kwargs.get("yacc_debug", False), optimize = kwargs.get("yacc_optimize", True), tabmodule = kwargs.get("yacctab", "pycparser.yacctab"), outputdir = kwargs.get("taboutputdir", "") ) def _is_type_in_scope(self, name): """ Disable type checking to allow undeclared variables. """ return False
[docs] def p_single_line_expression(self, p): """ single_line_expression : expression """ # Wrap the single expression in a FileAST node as required p[0] = declarations_ast.FileAST([p[1]])
[docs] def p_expression_statement(self, p): """ expression_statement : expression """ p[0] = p[1]
[docs] def p_external_declaration_6(self, p): """ external_declaration : expression_statement """ p[0] = [p[1]]