Module src.frontend.AST_builder
This module takes in the parse tree, and produces an Abstract Syntax Tree.This is done using a Depth First Traversal. By taking the Concrete Syntax Tree (Parse Tree)and converting it to an Abstract Syntax Tree we can begin to move towards an intermediate form.
Expand source code
"""
This module takes in the parse tree, and produces an Abstract Syntax Tree.This is done using a Depth First Traversal. By taking the Concrete Syntax Tree (Parse Tree)and converting it to an Abstract Syntax Tree we can begin to move towards an intermediate form.
"""
import re
def buildAST(parseHead):
"""
Produces an AST given the head of the Parse Tree
Args:
parseHead: The head node of the parse tree.
Returns:
The head of the Abstract Syntax Tree.
"""
head = parseHead
ASTHead = None
ASTcurrent = None
ntv = [(head, ASTcurrent)]
#DFS by iterating through a stack of nodes to visit
while ntv != []:
c = ntv[0]
typ = c[0].token
ASTcurrent = c[1]
expansion = None
#This block checks the type of the ASTNode that is being visited, and acts as a monstrous switch statement, for each visited node a proper AST segment or node is built
if typ == "program":
ASTHead = ASTNode("program", ASTcurrent)
ASTcurrent = ASTHead
elif typ == "initialization":
ASTcurrent.children.append(ASTNode("=", ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
ASTcurrent.children.append(ASTNode("var", ASTcurrent))
ASTcurrent.children[-1].children.append(ASTNode(c[0].content[1].value, ASTcurrent))
# Check if there is a value to initialize the variable to
if not [x for x in c[0].content if 'value' in x.__dict__ and x.value == "="]:
ASTcurrent.children.append(ASTNode("0", ASTcurrent.parent))
expansion = [(x, ASTcurrent.children[0]) for x in c[0].content if 'content' in x.__dict__ and x.token == "var_type"]
expansion += [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__ and x.token != "var_type"]
elif typ == "designation":
ASTcurrent.children.append(ASTNode(c[0].content[1].content[0].value, ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
ASTcurrent.children.append(ASTNode("var", ASTcurrent))
ASTcurrent.children[-1].children.append(ASTNode(c[0].content[0].content[0].value, ASTcurrent))
# Skip the assignment node
expansion = [(x, ASTcurrent) for x in c[0].content[1:] if 'content' in x.__dict__]
elif typ == "value":
value = str(c[0].content[0].value)
if re.match(r"0b[01]+", value):
# Convert to Binary
value = str(int(value, 2))
elif re.match(r"0x[\dA-Fa-f]+", value):
# Convert to Hex
value = str(int(value, 16))
elif re.match(r"0[0-7]{1,3}", value):
# Convert to Octal
value = str(int(value, 8))
ASTcurrent.children.append(ASTNode(value, ASTcurrent))
elif typ == "function definition":
ASTcurrent.children.append(ASTNode("func", ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
ASTcurrent.children.append(ASTNode(c[0].content[1].value, ASTcurrent))
ASTcurrent.children.append(ASTNode("param", ASTcurrent))
# split the content of the function definition up
expansion = [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__ and x.token == 'func_type']
expansion += [(x, ASTcurrent.children[1]) for x in c[0].content if 'content' in x.__dict__ and x.token == 'args']
expansion += [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__ and x.token == 'block']
elif typ == "functionDeclaration":
ASTcurrent.children.append(ASTNode("decl", ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
ASTcurrent.children.append(ASTNode(c[0].content[1].value, ASTcurrent))
ASTcurrent.children.append(ASTNode("param", ASTcurrent))
expansion = [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__ and x.token == 'func_type']
expansion += [(x, ASTcurrent.children[1]) for x in c[0].content if 'content' in x.__dict__ and x.token == 'args']
elif typ == "arg_terminal":
ASTcurrent.children.append(ASTNode("var", ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
ASTcurrent.children.append(ASTNode(c[0].content[1].value, ASTcurrent))
elif typ == "if":
# Check if the node is already within a branch
if ASTcurrent.name != "branch":
ASTcurrent.children.append(ASTNode("branch", ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
ASTcurrent.children.append(ASTNode("case", ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
ASTcurrent.children.append(ASTNode("", ASTcurrent))
# split the condition from the body and further if statements
expansion = [(x, ASTcurrent.children[0]) for x in c[0].content if 'content' in x.__dict__ and x.token == 'collation']
expansion += [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__ and x.token == 'if_body']
expansion += [(x, ASTcurrent.parent) for x in c[0].content if 'content' in x.__dict__ and x.token == "if_expansion"]
elif typ == "if_expansion":
# Check if it's the last else in the if statement
if c[0].content[1].content[0].token == 'content_terminal' and c[0].content[1].content[0].content[0].token == "if":
expansion = [(x, ASTcurrent) for x in c[0].content[1].content[0].content if 'content' in x.__dict__ and x.token == "if"]
else:
ASTcurrent.children.append(ASTNode("default", ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
elif typ.startswith('arithmetic'):
if len(c[0].content) == 2:
# To catch unary operations
if typ.endswith('unary'):
# All operations with two children are pre ops.
if 'value' in c[0].content[0].__dict__:
# If the first child is a terminal it can either be a pre increment or a sizeof call
ASTcurrent.children.append(ASTNode(c[0].content[0].value, ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
if ASTcurrent.name == "sizeof":
pass
else:
ASTcurrent.children.append(ASTNode("NULL", ASTcurrent))
ASTcurrent.children.append(ASTNode("", ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
else:
# if the first value is a terminal its an operation that is either dereferencing, referencing, positive, negative, complement, and logical negation
ASTcurrent.children.append(ASTNode(c[0].content[0].content[0].value, ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
elif typ.endswith('post'):
# The only post operations with two children are increment and decrement
ASTcurrent.children.append(ASTNode(c[0].content[1].value, ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
ASTcurrent.children.append(ASTNode("", ASTcurrent))
ASTcurrent.children.append(ASTNode("NULL", ASTcurrent))
ASTcurrent = ASTcurrent.children[0]
pass
else:
ASTcurrent.children.append(ASTNode(c[0].content[0].value, ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
expansion = [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__]
elif len(c[0].content) == 3:
# If there are binary operations
if 'value' in c[0].content[1].__dict__:
ASTcurrent.children.append(ASTNode(c[0].content[1].value, ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
expansion = [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__]
else:
pass
pass
elif typ.startswith('collation'):
if len(c[0].content) == 3:
# To ensure that there are no false positives since arithmetic is directly linked to collation
if 'value' in c[0].content[1].__dict__:
if c[0].content[1].value == "&&" or c[0].content[1].value == "||":
ASTcurrent.children.insert(0, ASTNode(c[0].content[1].value, ASTcurrent))
ASTcurrent = ASTcurrent.children[0]
else:
ASTcurrent.children.append(ASTNode(c[0].content[1].value, ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
expansion = [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__]
elif typ == "return":
ASTcurrent.children.append(ASTNode("return", ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
elif typ == "string literal":
ASTcurrent.children.append(ASTNode(c[0].content[0].value, ASTcurrent))
elif typ == "function call":
ASTcurrent.children.append(ASTNode("call", ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
ASTcurrent.children.append(ASTNode(c[0].content[0].value, ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
expansion = [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__ and x.token == "parameter"]
elif typ == "while loop":
ASTcurrent.children.append(ASTNode("while", ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
ASTcurrent.children.append(ASTNode("", ASTcurrent))
expansion = [(x, ASTcurrent.children[0]) for x in c[0].content if 'content' in x.__dict__ and x.token == "collation"]
expansion += [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__ and (x.token == "block" or x.token == "content_terminal")]
elif typ == "break":
ASTcurrent.children.append(ASTNode("break", ASTcurrent))
elif typ == "continue":
ASTcurrent.children.append(ASTNode("continue", ASTcurrent))
elif typ == "jump":
ASTcurrent.children.append(ASTNode("goto", ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
ASTcurrent.children.append(ASTNode(c[0].content[1].value, ASTcurrent))
elif typ == "goto":
ASTcurrent.children.append(ASTNode("label", ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
ASTcurrent.children.append(ASTNode(f"{c[0].content[0].value}", ASTcurrent))
elif typ == "do loop":
ASTcurrent.children.append(ASTNode("do_while", ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
ASTcurrent.children.append(ASTNode("", ASTcurrent))
expansion = [(x, ASTcurrent.children[0]) for x in c[0].content if 'content' in x.__dict__ and x.token == "collation"]
expansion += [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__ and (x.token == "block" or x.token == "content_terminal")]
elif typ == "func_type":
idx = [c[0].content.index(x) for x in c[0].content if 'value' in x.__dict__][0]
ASTcurrent.children.insert(0, ASTNode(c[0].content[idx].value, ASTcurrent))
ASTcurrent = ASTcurrent.children[0]
elif typ == "func_modif_terminal":
ASTcurrent.children.insert(0, ASTNode(c[0].content[0].value, ASTcurrent))
elif typ == "var_type":
idx = [c[0].content.index(x) for x in c[0].content if 'value' in x.__dict__][0]
ASTcurrent.children.insert(0, ASTNode(c[0].content[idx].value, ASTcurrent))
ASTcurrent = ASTcurrent.children[0]
elif typ == "var_modif_terminal":
ASTcurrent.children.insert(0, ASTNode(c[0].content[0].value, ASTcurrent))
elif typ == "param_terminal":
if [x for x in c[0].content if 'content' in x.__dict__] == []:
ASTcurrent.children.append(ASTNode(c[0].content[0].value, ASTcurrent))
elif typ == "for loop":
ASTcurrent.children.append(ASTNode("for", ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
# Prepare the nodes for the different parts of the for loop
ASTcurrent.children.append(ASTNode("", ASTcurrent))
ASTcurrent.children.append(ASTNode("", ASTcurrent))
ASTcurrent.children.append(ASTNode("", ASTcurrent))
ASTcurrent.children.append(ASTNode("", ASTcurrent))
expansion = [(x, ASTcurrent.children[0]) for x in c[0].content if 'content' in x.__dict__ and x.token == "for param 1"]
expansion += [(x, ASTcurrent.children[1]) for x in c[0].content if 'content' in x.__dict__ and x.token == "for param 2"]
expansion += [(x, ASTcurrent.children[2]) for x in c[0].content if 'content' in x.__dict__ and x.token == "for param 3"]
expansion += [(x, ASTcurrent.children[3]) for x in c[0].content if 'content' in x.__dict__ and (x.token == "content_terminal" or x.token == "block")]
elif typ == "switch":
ASTcurrent.children.append(ASTNode("branch", ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
cases = []
expansion = []
# Unravel the tree in a DFS manner to retrieve all the cases
v = [x for x in c[0].content if 'content' in x.__dict__ and x.token == "switch_body"]
while v != []:
n = v[0]
extra = []
if n.token == "case" or n.token == "default":
cases.append(n)
else:
extra = [x for x in n.content if 'content' in x.__dict__]
v = extra + v[1:]
# Store the last case body for use when there is fallthrough.
last_cases = []
for case in reversed(cases):
ASTcurrent.children.insert(0, ASTNode(case.token, ASTcurrent))
if case.token != "default":
ASTcurrent.children[0].children.append(ASTNode("==", ASTcurrent.children[-1]))
ASTcurrent.children[0].children[-1].children.append(ASTNode("var", ASTcurrent.children[-1].children[-1]))
ASTcurrent.children[0].children[-1].children[-1].children.append(ASTNode(c[0].content[2].content[0].content[0].content[0].content[0].content[0].content[0].content[0].content[0].value, ASTcurrent.children[0].children[-1].children[-1]))
ASTcurrent.children[0].children[-1].children.append(ASTNode(case.content[1].content[0].value, ASTcurrent.children[-1].children[-1]))
ASTcurrent.children[0].children.append(ASTNode("body", ASTcurrent.children[-1]))
if [x for x in case.content if 'content' in x.__dict__ and x.token == 'case_body'] == []:
# Add all the previous case bodies until there is a statement that break the fallthrough
expansion += [(x, ASTcurrent.children[0].children[-1]) for x in last_cases]
else:
j = [x.token if 'content' in x.__dict__ else x.value for x in case.content].index('case_body')
# Check if the case will fall through or not
v = [x for x in case.content if 'content' in x.__dict__]
b = False
while v != []:
n = v[0]
extra = []
if n.token in ["break", "return", "goto"]:
b = True
break;
else:
extra = [x for x in n.content if 'content' in x.__dict__]
v = extra + v[1:]
if b:
# If there is breaking statement in the case body reset last_cases
last_cases = [case.content[j]]
else:
# else prepend the current case body to it because of reverse traversal of the cases
last_cases = [case.content[j]] + last_cases
expansion += [(x, ASTcurrent.children[0].children[-1]) for x in last_cases]
elif typ == "block":
ASTcurrent.children.append(ASTNode("body", ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
elif typ == "content_terminal":
if ASTcurrent.name != "body" and 'content' in c[0].content[0].__dict__ and c[0].content[0].token != "block":
ASTcurrent.children.append(ASTNode("body", ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
elif typ == "var_access":
# Abstracted the access of a single value
ASTcurrent.children.append(ASTNode("var", ASTcurrent))
ASTcurrent = ASTcurrent.children[-1]
ASTcurrent.children.append(ASTNode(c[0].content[0].value, ASTcurrent))
else:
pass
if expansion == None:
expansion = [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__]
ntv = expansion + ntv[1:]
#Removes the blank parents from some nodes that end up blank as a result of our AST building process
ntv = [ASTHead]
while ntv != []:
c = ntv[0]
if c.parent and c.parent.name == "":
c.parent.name = c.name
c.parent.children = c.children + c.parent.children[1:]
ntv = [x for x in c.children] + ntv[1:]
return ASTHead
class ASTNode():
"""
A class that builds an object representing the node in an AST.
It has it's children, parent, and name.
"""
def __init__(self, name, parent):
"""
Constructs an ASTNode
Args:
name: The name of the Node (Its contents).
parent: A node that is the parent of this current node.
"""
self.name = name
self.parent = parent
self.children = []
def print_AST(self, file=None, _prefix="", _last=True):
"""
Prints the AST given the head
Args:
file: The file to be written to (Defaults to Stdout).
_prefix: A string indicating the spacing from the left side of the screen.
_last: A boolean that indicates if a node is the last in it's immediate surroundings.
"""
print(f"{_prefix}{'`-- ' if _last else '|-- '}{self.name}", file=file)
_prefix += " " if _last else "| "
for i, child in enumerate(self.children):
_last = i == len(self.children)-1
child.print_AST(file, _prefix, _last)
def list_DFS(self):
"""
Retrieves a list of the nodes from the node its called from that is ordered in a Depth-First Manner.
Returns:
A list of all the nodes
"""
l = []
ntv = [self]
while ntv != []:
c = ntv[0]
l.append(c)
ntv = c.children + ntv[1:]
return l
def list_BFS(self):
"""
Retrieves a list of the nodes from the node its called from that is ordered in a Breadth-First Manner.
Returns:
A list of all the nodes
"""
l = []
ntv = [self]
while ntv != []:
c = ntv[0]
l.append(c)
ntv = ntv[1:] + c.children
return l
def list_PRT(self):
"""
Retrieves a list of the nodes from the node its called from that is based on a pre-order tree traversal.
Returns:
A list of all the nodes
"""
l = []
ntv = [self]
while ntv != []:
cur = ntv[-1]
l.append(cur)
ntv = ntv[:-1] + cur.children
return l
def list_INT(self):
"""
Retrieves a list of the nodes from the node its called from that is based on a in-order tree traversal.
Returns:
A list of all the nodes
"""
cur = self
stack = []
l = []
while True:
while cur != None:
stack.append(cur)
if cur.children != []:
cur = cur.children[0]
else:
cur = None
if stack == []:
break
cur = stack.pop()
l.append(cur)
if len(cur.children) > 1:
cur = cur.children[1]
else:
cur = None
return l
def list_POT(self):
"""
Retrieves a list of the nodes from the node its called from that is based on a
post-order tree traversal.
Returns:
A list of all the nodes
"""
l = []
ntv = [self]
while ntv != []:
cur = ntv[-1]
l.insert(0, cur)
ntv = ntv[:-1] + cur.children
return l
def __eq__(self, other):
if type(other) != type(ASTNode):
return False
for x, y in zip(self.list_DFS(), other.list_DFS()):
if x.name != y.name:
return False
return True
def __str__(self):
"""
Produces a string representation of the AST
"""
li = []
ntv = [("", self, True)]
while ntv:
li.append(ntv[0])
ntv = [(f"{ntv[0][0]}{' ' if ntv[0][2] else '| '}", x, i == len(ntv[0][1].children)-1 ) for i, x in enumerate(ntv[0][1].children)] + ntv[1:]
return "\n".join([f"{x[0]}{'`-- ' if x[2] else '|-- '}{x[1].name}" for x in li]) + "\n"
Functions
def buildAST(parseHead)
-
Produces an AST given the head of the Parse Tree
Args
parseHead
- The head node of the parse tree.
Returns
The head of the Abstract Syntax Tree.
Expand source code
def buildAST(parseHead): """ Produces an AST given the head of the Parse Tree Args: parseHead: The head node of the parse tree. Returns: The head of the Abstract Syntax Tree. """ head = parseHead ASTHead = None ASTcurrent = None ntv = [(head, ASTcurrent)] #DFS by iterating through a stack of nodes to visit while ntv != []: c = ntv[0] typ = c[0].token ASTcurrent = c[1] expansion = None #This block checks the type of the ASTNode that is being visited, and acts as a monstrous switch statement, for each visited node a proper AST segment or node is built if typ == "program": ASTHead = ASTNode("program", ASTcurrent) ASTcurrent = ASTHead elif typ == "initialization": ASTcurrent.children.append(ASTNode("=", ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] ASTcurrent.children.append(ASTNode("var", ASTcurrent)) ASTcurrent.children[-1].children.append(ASTNode(c[0].content[1].value, ASTcurrent)) # Check if there is a value to initialize the variable to if not [x for x in c[0].content if 'value' in x.__dict__ and x.value == "="]: ASTcurrent.children.append(ASTNode("0", ASTcurrent.parent)) expansion = [(x, ASTcurrent.children[0]) for x in c[0].content if 'content' in x.__dict__ and x.token == "var_type"] expansion += [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__ and x.token != "var_type"] elif typ == "designation": ASTcurrent.children.append(ASTNode(c[0].content[1].content[0].value, ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] ASTcurrent.children.append(ASTNode("var", ASTcurrent)) ASTcurrent.children[-1].children.append(ASTNode(c[0].content[0].content[0].value, ASTcurrent)) # Skip the assignment node expansion = [(x, ASTcurrent) for x in c[0].content[1:] if 'content' in x.__dict__] elif typ == "value": value = str(c[0].content[0].value) if re.match(r"0b[01]+", value): # Convert to Binary value = str(int(value, 2)) elif re.match(r"0x[\dA-Fa-f]+", value): # Convert to Hex value = str(int(value, 16)) elif re.match(r"0[0-7]{1,3}", value): # Convert to Octal value = str(int(value, 8)) ASTcurrent.children.append(ASTNode(value, ASTcurrent)) elif typ == "function definition": ASTcurrent.children.append(ASTNode("func", ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] ASTcurrent.children.append(ASTNode(c[0].content[1].value, ASTcurrent)) ASTcurrent.children.append(ASTNode("param", ASTcurrent)) # split the content of the function definition up expansion = [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__ and x.token == 'func_type'] expansion += [(x, ASTcurrent.children[1]) for x in c[0].content if 'content' in x.__dict__ and x.token == 'args'] expansion += [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__ and x.token == 'block'] elif typ == "functionDeclaration": ASTcurrent.children.append(ASTNode("decl", ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] ASTcurrent.children.append(ASTNode(c[0].content[1].value, ASTcurrent)) ASTcurrent.children.append(ASTNode("param", ASTcurrent)) expansion = [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__ and x.token == 'func_type'] expansion += [(x, ASTcurrent.children[1]) for x in c[0].content if 'content' in x.__dict__ and x.token == 'args'] elif typ == "arg_terminal": ASTcurrent.children.append(ASTNode("var", ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] ASTcurrent.children.append(ASTNode(c[0].content[1].value, ASTcurrent)) elif typ == "if": # Check if the node is already within a branch if ASTcurrent.name != "branch": ASTcurrent.children.append(ASTNode("branch", ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] ASTcurrent.children.append(ASTNode("case", ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] ASTcurrent.children.append(ASTNode("", ASTcurrent)) # split the condition from the body and further if statements expansion = [(x, ASTcurrent.children[0]) for x in c[0].content if 'content' in x.__dict__ and x.token == 'collation'] expansion += [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__ and x.token == 'if_body'] expansion += [(x, ASTcurrent.parent) for x in c[0].content if 'content' in x.__dict__ and x.token == "if_expansion"] elif typ == "if_expansion": # Check if it's the last else in the if statement if c[0].content[1].content[0].token == 'content_terminal' and c[0].content[1].content[0].content[0].token == "if": expansion = [(x, ASTcurrent) for x in c[0].content[1].content[0].content if 'content' in x.__dict__ and x.token == "if"] else: ASTcurrent.children.append(ASTNode("default", ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] elif typ.startswith('arithmetic'): if len(c[0].content) == 2: # To catch unary operations if typ.endswith('unary'): # All operations with two children are pre ops. if 'value' in c[0].content[0].__dict__: # If the first child is a terminal it can either be a pre increment or a sizeof call ASTcurrent.children.append(ASTNode(c[0].content[0].value, ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] if ASTcurrent.name == "sizeof": pass else: ASTcurrent.children.append(ASTNode("NULL", ASTcurrent)) ASTcurrent.children.append(ASTNode("", ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] else: # if the first value is a terminal its an operation that is either dereferencing, referencing, positive, negative, complement, and logical negation ASTcurrent.children.append(ASTNode(c[0].content[0].content[0].value, ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] elif typ.endswith('post'): # The only post operations with two children are increment and decrement ASTcurrent.children.append(ASTNode(c[0].content[1].value, ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] ASTcurrent.children.append(ASTNode("", ASTcurrent)) ASTcurrent.children.append(ASTNode("NULL", ASTcurrent)) ASTcurrent = ASTcurrent.children[0] pass else: ASTcurrent.children.append(ASTNode(c[0].content[0].value, ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] expansion = [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__] elif len(c[0].content) == 3: # If there are binary operations if 'value' in c[0].content[1].__dict__: ASTcurrent.children.append(ASTNode(c[0].content[1].value, ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] expansion = [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__] else: pass pass elif typ.startswith('collation'): if len(c[0].content) == 3: # To ensure that there are no false positives since arithmetic is directly linked to collation if 'value' in c[0].content[1].__dict__: if c[0].content[1].value == "&&" or c[0].content[1].value == "||": ASTcurrent.children.insert(0, ASTNode(c[0].content[1].value, ASTcurrent)) ASTcurrent = ASTcurrent.children[0] else: ASTcurrent.children.append(ASTNode(c[0].content[1].value, ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] expansion = [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__] elif typ == "return": ASTcurrent.children.append(ASTNode("return", ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] elif typ == "string literal": ASTcurrent.children.append(ASTNode(c[0].content[0].value, ASTcurrent)) elif typ == "function call": ASTcurrent.children.append(ASTNode("call", ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] ASTcurrent.children.append(ASTNode(c[0].content[0].value, ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] expansion = [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__ and x.token == "parameter"] elif typ == "while loop": ASTcurrent.children.append(ASTNode("while", ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] ASTcurrent.children.append(ASTNode("", ASTcurrent)) expansion = [(x, ASTcurrent.children[0]) for x in c[0].content if 'content' in x.__dict__ and x.token == "collation"] expansion += [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__ and (x.token == "block" or x.token == "content_terminal")] elif typ == "break": ASTcurrent.children.append(ASTNode("break", ASTcurrent)) elif typ == "continue": ASTcurrent.children.append(ASTNode("continue", ASTcurrent)) elif typ == "jump": ASTcurrent.children.append(ASTNode("goto", ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] ASTcurrent.children.append(ASTNode(c[0].content[1].value, ASTcurrent)) elif typ == "goto": ASTcurrent.children.append(ASTNode("label", ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] ASTcurrent.children.append(ASTNode(f"{c[0].content[0].value}", ASTcurrent)) elif typ == "do loop": ASTcurrent.children.append(ASTNode("do_while", ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] ASTcurrent.children.append(ASTNode("", ASTcurrent)) expansion = [(x, ASTcurrent.children[0]) for x in c[0].content if 'content' in x.__dict__ and x.token == "collation"] expansion += [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__ and (x.token == "block" or x.token == "content_terminal")] elif typ == "func_type": idx = [c[0].content.index(x) for x in c[0].content if 'value' in x.__dict__][0] ASTcurrent.children.insert(0, ASTNode(c[0].content[idx].value, ASTcurrent)) ASTcurrent = ASTcurrent.children[0] elif typ == "func_modif_terminal": ASTcurrent.children.insert(0, ASTNode(c[0].content[0].value, ASTcurrent)) elif typ == "var_type": idx = [c[0].content.index(x) for x in c[0].content if 'value' in x.__dict__][0] ASTcurrent.children.insert(0, ASTNode(c[0].content[idx].value, ASTcurrent)) ASTcurrent = ASTcurrent.children[0] elif typ == "var_modif_terminal": ASTcurrent.children.insert(0, ASTNode(c[0].content[0].value, ASTcurrent)) elif typ == "param_terminal": if [x for x in c[0].content if 'content' in x.__dict__] == []: ASTcurrent.children.append(ASTNode(c[0].content[0].value, ASTcurrent)) elif typ == "for loop": ASTcurrent.children.append(ASTNode("for", ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] # Prepare the nodes for the different parts of the for loop ASTcurrent.children.append(ASTNode("", ASTcurrent)) ASTcurrent.children.append(ASTNode("", ASTcurrent)) ASTcurrent.children.append(ASTNode("", ASTcurrent)) ASTcurrent.children.append(ASTNode("", ASTcurrent)) expansion = [(x, ASTcurrent.children[0]) for x in c[0].content if 'content' in x.__dict__ and x.token == "for param 1"] expansion += [(x, ASTcurrent.children[1]) for x in c[0].content if 'content' in x.__dict__ and x.token == "for param 2"] expansion += [(x, ASTcurrent.children[2]) for x in c[0].content if 'content' in x.__dict__ and x.token == "for param 3"] expansion += [(x, ASTcurrent.children[3]) for x in c[0].content if 'content' in x.__dict__ and (x.token == "content_terminal" or x.token == "block")] elif typ == "switch": ASTcurrent.children.append(ASTNode("branch", ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] cases = [] expansion = [] # Unravel the tree in a DFS manner to retrieve all the cases v = [x for x in c[0].content if 'content' in x.__dict__ and x.token == "switch_body"] while v != []: n = v[0] extra = [] if n.token == "case" or n.token == "default": cases.append(n) else: extra = [x for x in n.content if 'content' in x.__dict__] v = extra + v[1:] # Store the last case body for use when there is fallthrough. last_cases = [] for case in reversed(cases): ASTcurrent.children.insert(0, ASTNode(case.token, ASTcurrent)) if case.token != "default": ASTcurrent.children[0].children.append(ASTNode("==", ASTcurrent.children[-1])) ASTcurrent.children[0].children[-1].children.append(ASTNode("var", ASTcurrent.children[-1].children[-1])) ASTcurrent.children[0].children[-1].children[-1].children.append(ASTNode(c[0].content[2].content[0].content[0].content[0].content[0].content[0].content[0].content[0].content[0].value, ASTcurrent.children[0].children[-1].children[-1])) ASTcurrent.children[0].children[-1].children.append(ASTNode(case.content[1].content[0].value, ASTcurrent.children[-1].children[-1])) ASTcurrent.children[0].children.append(ASTNode("body", ASTcurrent.children[-1])) if [x for x in case.content if 'content' in x.__dict__ and x.token == 'case_body'] == []: # Add all the previous case bodies until there is a statement that break the fallthrough expansion += [(x, ASTcurrent.children[0].children[-1]) for x in last_cases] else: j = [x.token if 'content' in x.__dict__ else x.value for x in case.content].index('case_body') # Check if the case will fall through or not v = [x for x in case.content if 'content' in x.__dict__] b = False while v != []: n = v[0] extra = [] if n.token in ["break", "return", "goto"]: b = True break; else: extra = [x for x in n.content if 'content' in x.__dict__] v = extra + v[1:] if b: # If there is breaking statement in the case body reset last_cases last_cases = [case.content[j]] else: # else prepend the current case body to it because of reverse traversal of the cases last_cases = [case.content[j]] + last_cases expansion += [(x, ASTcurrent.children[0].children[-1]) for x in last_cases] elif typ == "block": ASTcurrent.children.append(ASTNode("body", ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] elif typ == "content_terminal": if ASTcurrent.name != "body" and 'content' in c[0].content[0].__dict__ and c[0].content[0].token != "block": ASTcurrent.children.append(ASTNode("body", ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] elif typ == "var_access": # Abstracted the access of a single value ASTcurrent.children.append(ASTNode("var", ASTcurrent)) ASTcurrent = ASTcurrent.children[-1] ASTcurrent.children.append(ASTNode(c[0].content[0].value, ASTcurrent)) else: pass if expansion == None: expansion = [(x, ASTcurrent) for x in c[0].content if 'content' in x.__dict__] ntv = expansion + ntv[1:] #Removes the blank parents from some nodes that end up blank as a result of our AST building process ntv = [ASTHead] while ntv != []: c = ntv[0] if c.parent and c.parent.name == "": c.parent.name = c.name c.parent.children = c.children + c.parent.children[1:] ntv = [x for x in c.children] + ntv[1:] return ASTHead
Classes
class ASTNode (name, parent)
-
A class that builds an object representing the node in an AST. It has it's children, parent, and name.
Constructs an ASTNode
Args
name
- The name of the Node (Its contents).
parent
- A node that is the parent of this current node.
Expand source code
class ASTNode(): """ A class that builds an object representing the node in an AST. It has it's children, parent, and name. """ def __init__(self, name, parent): """ Constructs an ASTNode Args: name: The name of the Node (Its contents). parent: A node that is the parent of this current node. """ self.name = name self.parent = parent self.children = [] def print_AST(self, file=None, _prefix="", _last=True): """ Prints the AST given the head Args: file: The file to be written to (Defaults to Stdout). _prefix: A string indicating the spacing from the left side of the screen. _last: A boolean that indicates if a node is the last in it's immediate surroundings. """ print(f"{_prefix}{'`-- ' if _last else '|-- '}{self.name}", file=file) _prefix += " " if _last else "| " for i, child in enumerate(self.children): _last = i == len(self.children)-1 child.print_AST(file, _prefix, _last) def list_DFS(self): """ Retrieves a list of the nodes from the node its called from that is ordered in a Depth-First Manner. Returns: A list of all the nodes """ l = [] ntv = [self] while ntv != []: c = ntv[0] l.append(c) ntv = c.children + ntv[1:] return l def list_BFS(self): """ Retrieves a list of the nodes from the node its called from that is ordered in a Breadth-First Manner. Returns: A list of all the nodes """ l = [] ntv = [self] while ntv != []: c = ntv[0] l.append(c) ntv = ntv[1:] + c.children return l def list_PRT(self): """ Retrieves a list of the nodes from the node its called from that is based on a pre-order tree traversal. Returns: A list of all the nodes """ l = [] ntv = [self] while ntv != []: cur = ntv[-1] l.append(cur) ntv = ntv[:-1] + cur.children return l def list_INT(self): """ Retrieves a list of the nodes from the node its called from that is based on a in-order tree traversal. Returns: A list of all the nodes """ cur = self stack = [] l = [] while True: while cur != None: stack.append(cur) if cur.children != []: cur = cur.children[0] else: cur = None if stack == []: break cur = stack.pop() l.append(cur) if len(cur.children) > 1: cur = cur.children[1] else: cur = None return l def list_POT(self): """ Retrieves a list of the nodes from the node its called from that is based on a post-order tree traversal. Returns: A list of all the nodes """ l = [] ntv = [self] while ntv != []: cur = ntv[-1] l.insert(0, cur) ntv = ntv[:-1] + cur.children return l def __eq__(self, other): if type(other) != type(ASTNode): return False for x, y in zip(self.list_DFS(), other.list_DFS()): if x.name != y.name: return False return True def __str__(self): """ Produces a string representation of the AST """ li = [] ntv = [("", self, True)] while ntv: li.append(ntv[0]) ntv = [(f"{ntv[0][0]}{' ' if ntv[0][2] else '| '}", x, i == len(ntv[0][1].children)-1 ) for i, x in enumerate(ntv[0][1].children)] + ntv[1:] return "\n".join([f"{x[0]}{'`-- ' if x[2] else '|-- '}{x[1].name}" for x in li]) + "\n"
Methods
def list_BFS(self)
-
Retrieves a list of the nodes from the node its called from that is ordered in a Breadth-First Manner.
Returns
A
list
ofall
the
nodes
Expand source code
def list_BFS(self): """ Retrieves a list of the nodes from the node its called from that is ordered in a Breadth-First Manner. Returns: A list of all the nodes """ l = [] ntv = [self] while ntv != []: c = ntv[0] l.append(c) ntv = ntv[1:] + c.children return l
def list_DFS(self)
-
Retrieves a list of the nodes from the node its called from that is ordered in a Depth-First Manner.
Returns
A
list
ofall
the
nodes
Expand source code
def list_DFS(self): """ Retrieves a list of the nodes from the node its called from that is ordered in a Depth-First Manner. Returns: A list of all the nodes """ l = [] ntv = [self] while ntv != []: c = ntv[0] l.append(c) ntv = c.children + ntv[1:] return l
def list_INT(self)
-
Retrieves a list of the nodes from the node its called from that is based on a in-order tree traversal.
Returns
A
list
ofall
the
nodes
Expand source code
def list_INT(self): """ Retrieves a list of the nodes from the node its called from that is based on a in-order tree traversal. Returns: A list of all the nodes """ cur = self stack = [] l = [] while True: while cur != None: stack.append(cur) if cur.children != []: cur = cur.children[0] else: cur = None if stack == []: break cur = stack.pop() l.append(cur) if len(cur.children) > 1: cur = cur.children[1] else: cur = None return l
def list_POT(self)
-
Retrieves a list of the nodes from the node its called from that is based on a post-order tree traversal.
Returns
A
list
ofall
the
nodes
Expand source code
def list_POT(self): """ Retrieves a list of the nodes from the node its called from that is based on a post-order tree traversal. Returns: A list of all the nodes """ l = [] ntv = [self] while ntv != []: cur = ntv[-1] l.insert(0, cur) ntv = ntv[:-1] + cur.children return l
def list_PRT(self)
-
Retrieves a list of the nodes from the node its called from that is based on a pre-order tree traversal.
Returns
A
list
ofall
the
nodes
Expand source code
def list_PRT(self): """ Retrieves a list of the nodes from the node its called from that is based on a pre-order tree traversal. Returns: A list of all the nodes """ l = [] ntv = [self] while ntv != []: cur = ntv[-1] l.append(cur) ntv = ntv[:-1] + cur.children return l
def print_AST(self, file=None)
-
Prints the AST given the head
Args
file
- The file to be written to (Defaults to Stdout).
_prefix
- A string indicating the spacing from the left side of the screen.
_last
- A boolean that indicates if a node is the last in it's immediate surroundings.
Expand source code
def print_AST(self, file=None, _prefix="", _last=True): """ Prints the AST given the head Args: file: The file to be written to (Defaults to Stdout). _prefix: A string indicating the spacing from the left side of the screen. _last: A boolean that indicates if a node is the last in it's immediate surroundings. """ print(f"{_prefix}{'`-- ' if _last else '|-- '}{self.name}", file=file) _prefix += " " if _last else "| " for i, child in enumerate(self.children): _last = i == len(self.children)-1 child.print_AST(file, _prefix, _last)