Module src.optimizer.IR_Lv1_Builder

This module serves to construct the first linear intermediate representation in the compiler.

Expand source code
"""
This module serves to construct the first linear intermediate representation in the compiler.
"""
import os
import re
import sys
import math

from inspect import getsourcefile
from importlib.machinery import SourceFileLoader
from copy import deepcopy, copy

irl = SourceFileLoader("IRLine", f"{os.path.dirname(os.path.abspath(getsourcefile(lambda:0)))}/IRLine.py").load_module()
ast = SourceFileLoader("AST_builder", f"{os.path.dirname(os.path.abspath(getsourcefile(lambda:0)))}/../frontend/AST_builder.py").load_module()

class LevelOneIR():
    """
    Constructs the linear representation of the input program in order to allow for optimizations such as constant folding, constant proagation, as well as removal of unused variables and functions depending on the optimization level provided as a commandline argument.
    """
    def __init__(self,astHead,symTable):
        """
        Args:
            astHead: The root node of the AST
            symTable: The symbol table for the input
        """
        self.astHead = astHead
        self.symTable = symTable
        self.IR = []

    def construct(self):
        """
        Constructs the linear representation for the object.

        Returns:
            IR: A collection of IRLine objects which can be optimized and/or transformed into assembly.
        """
        sym = self.symTable
        ntv = self.astHead

        varIndex = 0
        lastVarName = "_" + str(varIndex)
        bodyList = []

        # list of all bodies within functions in our C program
        for x in self.astHead.children:
            if x.name == "func":
                # Each entry is the '(func_node, body_node)'
                bodyList.append((x,x.children[3]))

        # Digits are used for contructing unique labels
        returnDigit = 1234
        labelDigit = returnDigit + 1
        lines = []

        for i in bodyList:
            # Beginning of fuction wrapper
            lines.extend(beginWrapper(i, returnDigit))

            # Body of function
            tmp_lines , labelDigit = returnLines(i[1], returnDigit, labelDigit)
            lines.extend(tmp_lines)

            # add a return statement if function didnt have one
            if not isinstance(lines[-1].treeList[-1], irl.IRReturn):
                lines.append(irl.IRLine.singleEntry(irl.IRReturn(None)))

            # End of function wrapper, add closing bracket
            lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=False, functionDecl=True)))

            # NOTE: 'labelDigit' should be the newest and unused digit
            returnDigit = labelDigit

            self.IR = lines

        # RESET THE FRICKING GLOBAL
        irl.tmpVarIndex = 0

        return self.IR

    def optimize(self, opt):
        """
        Performs the optimizations based on the level of optimization

        Args:
            opt: The level of optimization to be performed.
        """
        if opt > 0:
            self.remove_unused_funcs()
            self.remove_unused_vars()
            pass

        # constant folding/propagation
        if opt > 1:
            # A dictionary containing all the values of variables that can at some point be reduced.
            self.var_values = {
                func.name : {
                    var.name : "Undef"
                    for var in self.symTable.symbols if var.entry_type == 0 and var.scope.startswith(f"/{func.name}")
                }
                for func in [sym for sym in self.symTable.symbols if sym.entry_type == 1]
            }

            # NOTE: As a method of knowing whether it is safe to continue on to the next node in the list the following measures have been implemented:
            # Both constant folding and constant propagation only consider a singular value.
            # They both return a boolean describing whether the node was altered as well as some descriptor of what it did.
                # In the case of constant folding, the descriptor is the new simplified node which then has to be assigned to the correct index.
                # In the case of constant propagation, the descriptor is a dictionary containing the updated values relevant to the scope of the IRLine object meaning that tempoary variables are stored within the IRLine but now within the scope as a whole.
            # The idea is that while either of these methods can alter the node you keep executing them on the node so that if the loop terminates the program is sure that the node cannot be reduced further.
            # Another final optimization that can be done would be to check if all the nodes in an IRLine object are `IRAssignment`, if so the last one is the only `IRAssignment` node needed, and the others can be removed.

            prev_maj = 0
            cur_scope = ""
            tmp_vals = {}
            for major, minor, node in [(major, minor, node) for major, tl in enumerate(self.IR) for minor, node in enumerate(tl.treeList)]:
                if major != prev_maj:
                    tmp_vals = {}

                if isinstance(node, irl.IRFunctionDecl):
                    cur_scope = node.name
                elif isinstance(node, irl.IRGoTo) or isinstance(node, irl.IRJump) or isinstance(node, irl.IRIf):
                    for val in tmp_vals.items():
                        if val[0] in self.var_values[cur_scope]:
                            self.var_values[cur_scope][val[0]] = "Undef"
                        tmp_vals[val[0]] = "Undef"
                else:
                    for val in self.var_values[cur_scope].items():
                        tmp_vals[val[0]] = val[1]

                ncf = False
                ncp = False

                while 1:
                    ncf, tmp = self.constant_folding(node)

                    if ncf:
                        self.IR[major].treeList[minor] = tmp
                        node = tmp

                    ncp, vals = self.constant_propagation(node, tmp_vals)

                    for val in vals.items():
                        if val[0] in self.var_values[cur_scope]:
                            self.var_values[cur_scope][val[0]] = val[1]
                        tmp_vals[val[0]] = val[1]

                    if not (ncf or ncp):
                        break
                prev_maj = major

            self.cleanup()

    def remove_unused_vars(self):
        """
        Removes unused variables based on the number of references found in the symbol table
        """
        ir = self.IR
        scope = ""

        #get variables to remove
        vars_temp = [[x.name, x.scope] for x in self.symTable.symbols if x.entry_type != 2 and x.entry_type != 1 and x.entry_type != 3 and len(x.references) == 0]
        #get function from symbol table.
        funcs = [x.name for x in self.symTable.symbols if x.entry_type == 1]

        final_ir = []

        for irLine in ir:

            for idx, irNode in enumerate(irLine.treeList):
                #check if function declaration, to set new scope
                if isinstance(irNode, irl.IRFunctionDecl):
                    scope = irNode.name

                #check if declaration
                if isinstance(irNode, irl.IRVariableInit):
                    #check if variable needs to be skippped
                    if [x for x in vars_temp if x[0] == irNode.var and scope in x[1]] != []:
                        del irLine.treeList[idx]

                #check if usage
                elif(isinstance(irNode, irl.IRAssignment)):
                    if [x for x in vars_temp if x[0] == irNode.lhs and scope in x[1]] != []:
                        del irLine.treeList[idx]

            #irLine has no irNode inside...so we dont add it to new IR list
            if irLine.treeList == []:
                pass
            else:
                final_ir.append(irLine)


        self.IR = final_ir

    def cleanup(self):
        """
        Cleans up the IR by looking at the considering the number of references withing the local `IRLine` object. This allows for the removal of unused tempoary variables.
        """

        class ref():
            def __init__(self, init_index, init_ref_type, side):
                self.refs = [init_index]
                self.ref_types = [init_ref_type]
                self.side = [side]

            def add(self, index, ref_type, side):
                self.refs.append(index)
                self.ref_types.append(ref_type)
                self.side.append(side)

            def __str__(self):
                return f"\t{self.refs}\t{self.ref_types}\t{self.side}"

            def __repr__(self):
                return self.__str__()

        gr = {}
        for line in self.IR:
            lr = {}

            for i, node in enumerate(line.treeList):
                if isinstance(node, irl.IRAssignment):
                    if node.lhs in lr:
                        lr[node.lhs].add(i, "assignment", 0)
                    else:
                        lr[node.lhs] = ref(i, "assignment", 0)

                    tmp = node.rhs.lstrip('-+').replace('.', '', 1)
                    if node.rhs in lr and not tmp.isnumeric():
                        lr[node.rhs].add(i, "assignment", 1)
                    elif not tmp.isnumeric():
                        lr[node.rhs] = ref(i, "assignment", 1)
                elif isinstance(node, irl.IRArth):
                    if node.var in lr:
                        lr[node.var].add(i, "assignment", 0)
                    else:
                        lr[node.var] = ref(i, "assignment", 0)

                    tmp = node.lhs.lstrip('-+').replace('.', '', 1)
                    if node.lhs in lr and not tmp.isnumeric():
                        lr[node.lhs].add(i, "arithmetic", 1)
                    elif not tmp.isnumeric():
                        lr[node.lhs] = ref(i, "arithmetic", 1)

                    if node.rhs:
                        tmp = node.rhs.lstrip('-+').replace('.', '', 1)
                        if node.rhs in lr and not tmp.isnumeric():
                            lr[node.rhs].add(i, "arithmetic", 2)
                        elif not tmp.isnumeric():
                            lr[node.rhs] = ref(i, "arithmetic", 2)
                elif isinstance(node, irl.IRFunctionAssign):
                    if node.lhs in lr:
                        lr[node.lhs].add(i, "assignment", 0)
                    else:
                        lr[node.lhs] = ref(i, "assignment", 0)

                    for param in node.params:
                        tmp = node.lhs.lstrip('-+').replace('.', '', 1)
                        if param in lr and not tmp.isnumeric():
                            lr[node.lhs].add(i, "param", 1)
                        elif not tmp.isnumeric():
                            lr[node.lhs] = ref(i, "param", 1)
                elif isinstance(node, irl.IRIf):
                    tmp = node.lhs.lstrip('-+').replace('.', '', 1)
                    if node.lhs in lr and not tmp.isnumeric():
                        lr[node.lhs].add(i, "collation", 1)
                    elif not tmp.isnumeric():
                        lr[node.lhs] = ref(i, "collation", 1)

                    tmp = node.rhs.lstrip('-+').replace('.', '', 1)
                    if node.rhs in lr and not tmp.isnumeric():
                        lr[node.rhs].add(i, "collation", 2)
                    elif not tmp.isnumeric():
                        lr[node.rhs] = ref(i, "collation", 2)
                elif isinstance(node, irl.IRSpecial):
                    if node.var in lr:
                        lr[node.var].add(i, "assignment", 0)
                        lr[node.var].add(i, "assignment", 1)
                    else:
                        lr[node.var] = ref(i, "assignment", 0)
                        lr[node.var].add(i, "assignment", 1)
                elif isinstance(node, irl.IRReturn):
                    if node.value:
                        tmp = node.value.lstrip('-+').replace('.', '', 1)
                        if node.value in lr:
                            lr[node.value].add(i, "return", 0)
                        elif not tmp.isnumeric():
                            lr[node.value] = ref(i, "return", 0)
            rem_list = []

            for var in lr.items():
                # print(var)
                for i, typ in enumerate(var[1].ref_types):
                    if (
                        i+1 < len(var[1].ref_types)
                        and
                        typ == "assignment"
                        and
                        var[1].side[i] == 0
                        and
                        var[1].side[i+1] == 0
                        and
                        var[1].ref_types[i+1] == "assignment"
                        ):
                        rem_list.append(var[1].refs[i])
                    elif (
                        i+1 == len(var[1].ref_types)
                        and
                        typ == "assignment"
                        and
                        var[1].side[i] == 0
                        # and
                        # var[1].side[i+1] == 0
                        and
                        (
                            var[1].refs[i]+1 != len(line.treeList)
                            or
                            len(var[1].ref_types) > 1)
                        ):
                        rem_list.append(var[1].refs[i])

                # iterate over reference types and see if there are two assignments after each other.


            for node in reversed(sorted(rem_list)):
                line.treeList.pop(node)

    def remove_unused_funcs(self):
        """
        Removes function that are not reference in the symbol table. They can be removed if not used.
        """



        ir = self.IR
        inFunction = False
        to_remove = []

        for idx, irLine in enumerate(ir):

            # Only need to focus on first Node in each IRLine
            ir_firstNode = irLine.treeList[0]

            # Delete IRLine if we are in a 'unused' function
            # Mark 'inFunction' to False if beginning of new function
            if inFunction == True:
                if isinstance(ir_firstNode, irl.IRFunctionDecl):
                    inFunction = False
                else:
                    to_remove.append(idx)
                    continue

            # Check for function
            if isinstance(ir_firstNode, irl.IRFunctionDecl):
                func_name = ir_firstNode.name
                referenceNum = len([x.references for x in self.symTable.symbols if func_name == x.name and x.entry_type == 1][0])

                # If reference is 0, it is unused
                if referenceNum == 0 and func_name != "main":
                    to_remove.append(idx)
                    inFunction = True

        # Actually delete unused function IRLine's
        for i in to_remove[::-1]:
            del ir[i]


        self.IR = ir

    def constant_folding(self,x):
        """
        Peforms constant folding  on a given line in the IR

        Args:
            x: an IRNode object

        Returns:
            A boolean indicating whether it changed anything, and the node that has been modified
        """
        changed = False
        if isinstance(x,irl.IRArth):
            notFound = True
            op = False
            #get the operator being used
            if(x.rhs != None and x.lhs != None):
                if(x.operator == "+"):
                    op = lambda lhs, rhs : lhs + rhs
                elif(x.operator == "-"):
                    op = lambda lhs, rhs : lhs - rhs
                elif(x.operator == "*"):
                    op = lambda lhs, rhs : lhs * rhs
                elif(x.operator == "/"):
                    op = lambda lhs, rhs : lhs / rhs
                elif(x.operator == "%"):
                    op = lambda lhs, rhs : math.fmod(lhs, rhs)
                elif(x.operator == "<<"):
                    # There is an edge case around the changes of architectures so that values may differ depending on the architecture you are using. eg 10 << 31 compared to 10 << 20
                    op = lambda lhs, rhs : lhs << rhs
                elif(x.operator == ">>"):
                    # There is an edge case around the changes of architectures so that values may differ depending on the architecture you are using. eg 10 >> 31 compared to 10 >> 20
                    op = lambda lhs, rhs : lhs >> rhs
                elif(x.operator == "|"):
                    op = lambda lhs, rhs : lhs | rhs
                elif(x.operator == "&"):
                    op = lambda lhs, rhs : lhs & rhs
                elif(x.operator == "^"):
                    op = lambda lhs, rhs : lhs ^ rhs
            else:
                if(x.operator == "~"):
                    op = lambda lhs, rhs : ~lhs
                elif(x.operator == "-"):
                    op = lambda lhs, rhs : 0 - lhs
                elif(x.operator == "+"):
                    op = lambda lhs, rhs : 0 + lhs
                elif(x.operator == "!"):
                    op = lambda lhs, rhs : not lhs
            #get the left hand side and the right hand side
            try:
                lhs = int(x.lhs)
                if(not x.rhs == None):
                    rhs = int(x.rhs)
                else:
                    rhs = None
                notFound = False
            except ValueError:
                try:
                    lhs = float(x.lhs)
                    if(not x.rhs == None):
                        rhs = float(x.rhs)
                    else:
                        rhs = None
                    notFound = False
                except ValueError:
                    pass

            #if we found all components, replace the node
            if(not notFound and op):
                if rhs and rhs < 0:
                    if x.operator == "<<" or x.operator == ">>":
                        raise ValueError("shifting by negative number is undefined behavior.")

                newValue = lambda lhs, rhs, op : op(lhs,rhs)
                val = newValue(lhs,rhs,op) if isinstance(lhs, float) or isinstance(rhs, float) else math.floor(newValue(lhs,rhs,op))
                newAss = irl.IRAssignment(x.var, str(val))
                changed = True
                return changed,newAss
        return changed,x

    def constant_propagation(self, node, var_val):
        """
        Performs constant propagation on a given IRNode if deemed possible.

        Args:
            node: The IRNode object to be considered
            var_val: A dictionary of all the variables and their values which are deemed safe to replace

        Returns:
            The return value describes whether any propogation occurred during the function call
        """
        # NOTE: The current issue is that the propogation is goint to have to exit and re enter the function every time to achieve the following:
        # Clear the tempoary variables per line as they are re used
        # Avoid propogating too far so that assignments in the future that may happen after certain other computations and assignments propogate the correct value

        # TODO: Fix to detect whether the reference is necessary or not. Eg, comparisons in loops compared to simple ifs
        # TODO: Ensure that when comming across a value that is not computable or propogatable such as after some control flow, the dictionary value becomes something distinguisable so that the expression is left alone
        changed = False
        if isinstance(node, irl.IRIf):
            if node.lhs in var_val and var_val[node.lhs] != "Undef":
                node.lhs = var_val[node.lhs]
                changed = True
            if node.rhs in var_val and var_val[node.rhs] != "Undef":
                node.rhs = var_val[node.rhs]
                changed = True

        elif isinstance(node, irl.IRArth):
            if node.lhs in var_val and var_val[node.lhs] != "Undef":
                node.lhs = var_val[node.lhs]
                changed = True
            if node.rhs in var_val and var_val[node.rhs] != "Undef":
                node.rhs = var_val[node.rhs]
                changed = True

        elif isinstance(node, irl.IRAssignment):
            if node.rhs in var_val and var_val[node.rhs] != "Undef":
                node.rhs = var_val[node.rhs]
                var_val[node.lhs] = node.rhs
                changed = True
            else:
                tmp = node.rhs.lstrip('-+').replace('.', '', 1)
                if tmp.isnumeric():
                    var_val[node.lhs] = node.rhs

        elif isinstance(node, irl.IRFunctionAssign):
            for j, param in enumerate(node.params):
                if param in var_val and var_val[param] != None:
                    node.params[j] = var_val[param]
                    changed = True

        return changed, var_val

    def __str__(self):
        return "\n".join([str(x) for x in self.IR]) + "\n"

def beginWrapper(function_tuple, returnDigit):
    """
    Produces the function wrappers and initializes the return digit for the given function.

    Returns:
        lines: The lines of the start of the function
    """
    lines = []
    params = []
    func_type = function_tuple[0].children[0].name
    func_name = function_tuple[0].children[1].name

    for var in function_tuple[0].children[2].children:
        if var.name == "var":

            # list of all modifiers
            modifiers = f"{' '.join([x.name for x in var.children[0].children])}{' ' if [x.name for x in var.children[0].children] else ''}"

            # append entry for parameter
            params.append(f"{' '.join([modifiers])}{var.children[0].name} rV_{var.children[1].name}")

    lines.append(irl.IRLine.singleEntry(irl.IRFunctionDecl(func_name, params)))
    lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=True, functionDecl=True)))

    if func_type != "void":
        modifiers = f"{''.join([x.name for x in function_tuple[0].children[0].children if x.name in ['signed', 'unsigned']])}{' ' if [x.name for x in function_tuple[0].children[0].children if x.name in ['signed', 'unsigned']] else ''}"
        lines.append(irl.IRLine.singleEntry(irl.IRVariableInit(modifiers, func_type, f"D.{returnDigit}")))

    return lines

def returnLines(node,returnDigit,labelDigit,successDigit=None,failureDigit=None, breakDigit=None, continueDigit=None):
    """
    Produces a linear representation of the content nested within `node`.

    Args:
        node: The AST node.
        returnDigit: The variable to store the return value.
        labelDigit: A list of all previously used label values.
        successDigit: The label value to jump to if there is a `continue`.
        failureDigit: The label value to jump to if there is a `break`.
        breakDigit: The label used for break statements to have a correct label so the control flow is correct.

        continueDigit: The label used for a continue statement to know the 'ultimate' success of whatever block it is in

    Returns:
        lines: The lines produced from the content.
        labelDigit: The list of all used label values.
    """
    lines = []
    if node.name == "body":
        il = [x.children[0] for x in node.children if x.name == "=" and x.children[0].children[0].name in ["auto", "long double", "double", "float", "long long", "long long int", "long", "int", "short", "char"]]

        # It is the scopes responsibility to ensure that the content is wrapped in braces
        if il:
            for x in il:
                modifiers = f"{' '.join([y.name for y in x.children[0].children])}{' ' if [y.name for y in x.children[0].children] else ''}"
                lines.append(irl.IRLine.singleEntry(irl.IRVariableInit(modifiers, x.children[0].name, f"rV_{x.children[1].name}"), [labelDigit]))

    for element in node.children:
        try:
            splits = [["+=", "-=", "*=", "/=", "%=", "<<=", ">>=", "|=", "&=", "^=", "<=", ">=", "="],["for"],["body"],["branch"],["return"],["call"],["while", "do_while"],["break"],["continue"],["goto"],["label"], ["++", "--"]]
            ind = [splits.index(x) for x in splits if element.name in x]
            ind = ind[0]
            if ind == 0:
                line = irl.IRLine(element, tvs=[], labelList=[labelDigit])
                tvs, labelList = line.retrieve()
                lines.append(line)

                if labelList != []:
                    labelDigit = labelList[-1]

            elif ind == 1:
                # For Loop
                ns = False
                # Initialize variable
                if element.children[0].children != []:
                    initNode = ast.ASTNode("body", None)
                    initNode.children.append(element.children[0])
                    if [x.children[0] for x in initNode.children if x.name == "=" and x.children[0].children[0].name in ["auto", "long double", "double", "float", "long long", "long long int", "long", "int", "short", "char"]]:
                        # Append an IRBracket node
                        lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=True), [labelDigit]))
                        ns = True
                    tmp, labelDigit = returnLines(initNode, returnDigit, labelDigit)
                    lines.extend(tmp)

                # Keep track of label for conditional block (if conditional exist)
                conditionLabel = None
                if element.children[1].children != []:

                    # Append an IRGoTo node
                    lines.append(irl.IRLine.singleEntry(irl.IRGoTo(f"<D.{labelDigit}>"), [labelDigit]))

                    conditionLabel = labelDigit
                    labelDigit += 1

                # Append an IRJump node for the label that belongs to the start of the loop
                lines.append(irl.IRLine.singleEntry(irl.IRJump(f"<D.{labelDigit}>"), [labelDigit]))

                # Assign labels for start/end of loop
                loopStart = labelDigit
                loopEnd = labelDigit + 1
                labelDigit += 2

                # recursivly deal with the body of the loop
                tmp, labelDigit = returnLines(element.children[3], returnDigit, labelDigit, loopStart, loopEnd, breakDigit=loopEnd, continueDigit=loopStart)
                lines.extend(tmp)

                # Add the "end-of-loop" assignment/arithmetic
                if element.children[2].children != []:
                    initNode = ast.ASTNode("tmp", None)
                    initNode.children.append(element.children[2])
                    tmp, labelDigit = returnLines(initNode, returnDigit, labelDigit)
                    lines.extend(tmp)

                # Start of conditionals for the loop
                if conditionLabel != None:
                    lines.append(irl.IRLine.singleEntry(irl.IRJump(f"<D.{conditionLabel}>"), [labelDigit]))

                    tmpNode = element.children[1]
                    
                    if tmpNode.name not in ['||', '&&', "<=", "<", ">=", ">", "==", "!="] and (
                        tmpNode.children == [] or
                        tmpNode.children[0].name not in ['||', '&&', "<=", "<", ">=", ">", "==", "!="]):
                        # We create an AST Node comparing the value to 0 if there 
                        # are no comparison/logical operators present in the condition.

                        tmpNode = ast.ASTNode("!=", None)
                        tmpNode.children.append(element.children[1])
                        tmpNode.children.append(ast.ASTNode("0", tmpNode))

                    line = irl.IRLine(tmpNode, tvs=[], success=loopStart, failure=loopEnd, labelList=[labelDigit])
                    tvs, labelList = line.retrieve()
                    lines.append(line)

                    if labelList != []:
                        labelDigit = labelList[-1]

                    lines.append(irl.IRLine.singleEntry(irl.IRJump(f"<D.{loopEnd}>"), [labelDigit]))

                else:
                    # No conditional (jump to start of body...always True)
                    lines.append(irl.IRLine.singleEntry(irl.IRGoTo(f"<D.{loopStart}>"), [labelDigit]))

                # increment twice for new index
                labelDigit += 2

                if ns:
                    # Append an IRBracket node
                    lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=False), [labelDigit]))

            elif ind == 2:
                # Append an IRBracket node
                lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=True), [labelDigit]))

                tmp, labelDigit = returnLines(element, returnDigit, labelDigit, successDigit, failureDigit)
                lines.extend(tmp)

                # Append an IRBracket node
                lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=False), [labelDigit]))

            elif ind == 3:
                # If/Else statement(s)

                # list of goto labels to be appended at end of if blocks
                end_if = []

                #for each case in a branch
                for case in element.children:
                    ns = False
                    #create label for body if true and label to skip to correct place if false.
                    success_label = labelDigit
                    labelDigit += 1
                    failure_label = labelDigit
                    labelDigit += 1

                    #default is an 'else'. Only has one child, body
                    if case.name == "default":
                        if [x.children[0] for x in case.children[0].children if x.name == "=" and x.children[0].children[0].name in ["auto", "long double", "double", "float", "long long", "long long int", "long", "int", "short", "char"]]:
                            # Append an IRBracket node
                            lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=True), [labelDigit]))
                            ns = True

                        #Get lines for the body and assign new labeldigit
                        tmp, labelDigit = returnLines(case.children[0], returnDigit, labelDigit, success_label, failure_label, failureDigit, successDigit)
                        lines.extend(tmp)

                        if ns:
                            # Append an IRBracket node
                            lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=False), [labelDigit]))

                        break

                    tmpNode = case.children[0]
                    if tmpNode.name not in ['||', '&&', "<=", "<", ">=", ">", "==", "!="] and (
                        tmpNode.children == [] or
                        tmpNode.children[0].name not in ['||', '&&', "<=", "<", ">=", ">", "==", "!="]):
                        # We create an AST Node comparing the value to 0 if there 
                        # are no comparison/logical operators present in the condition.

                        tmpNode = ast.ASTNode("!=", None)
                        tmpNode.children.append(case.children[0])
                        tmpNode.children.append(ast.ASTNode("0", tmpNode))

                    #break down argument for if statement into smaller if statements
                    line = irl.IRLine(tmpNode, tvs=[], success=success_label, failure=failure_label, labelList=[labelDigit])
                    tvs, labelList = line.retrieve()
                    lines.append(line)

                    if labelList != []:
                        labelDigit = labelList[-1] + 1


                    #Add goto for body statement
                    lines.append(irl.IRLine.singleEntry(irl.IRJump(f"<D.{success_label}>"), [labelDigit]))

                    if [x.children[0] for x in case.children[1].children if x.name == "=" and x.children[0].children[0].name in ["auto", "long double", "double", "float", "long long", "long long int", "long", "int", "short", "char"]]:
                        # Append an IRBracket node
                        lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=True), [labelDigit]))
                        ns = True

                    #Get lines for the if body and assign new labeldigit
                    tmp, labelDigit = returnLines(case.children[1], returnDigit,  labelDigit, success_label, failure_label, failureDigit, successDigit)
                    lines.extend(tmp)

                    if ns:
                        # Append an IRBracket node
                        lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=False), [labelDigit]))

                    #append goto for end of if body
                    lines.append(irl.IRLine.singleEntry(irl.IRGoTo(f"<D.{labelDigit}>"), [labelDigit]))

                    end_if.append(labelDigit)
                    labelDigit += 1

                    lines.append(irl.IRLine.singleEntry(irl.IRJump(f"<D.{failure_label}>"), [labelDigit]))

                for i in end_if:
                    lines.append(irl.IRLine.singleEntry(irl.IRJump(f"<D.{i}>"), [labelDigit]))

            elif ind == 4:
                #Return

                # If returns some type of arithmetic expression, breaks it down.
                if len(element.children) > 0 and element.children[0].children != []:
                    line = irl.IRLine(element.children[0], tvs=[], labelList=[labelDigit])
                    tvs, labelList = line.retrieve()
                    if line.treeList != []:
                        lines.append(line)
                        lines[-1].treeList.append(irl.IRAssignment(f"D.{returnDigit}", f"{tvs[-1]}"))

                    else:
                        lines.append(irl.IRLine.singleEntry(irl.IRAssignment(f"D.{returnDigit}", f"{tvs[-1]}"), [labelDigit]))

                    lines.append(irl.IRLine.singleEntry(irl.IRReturn(f"D.{returnDigit}"), [labelDigit]))

                    if labelList != []:
                        labelDigit = labelList[-1]

                elif len(element.children) > 0 and element.children[0].children == []:
                    lines.append(irl.IRLine.singleEntry(irl.IRAssignment(f"D.{returnDigit}", f"{element.children[0].name}"), [labelDigit]))
                    lines.append(irl.IRLine.singleEntry(irl.IRReturn(f"D.{returnDigit}"), [labelDigit]))

                # Returns nothing
                else:
                    lines.append(irl.IRLine.singleEntry(irl.IRReturn(None), [labelDigit]))

            elif ind == 5:
                #Function Call
                func_call = element.children[0].name

                # function call has parameters
                if element.children[0] != []:
                    line = irl.IRLine(element.children[0], tvs=[], labelList=[labelDigit])
                    tvs, labelList = line.retrieve()
                    lines.append(line)

                    if labelList != []:
                        labelDigit = labelList[-1] + 1

                # no parameters
                else:
                    # Append Empty function call node
                    lines.append(irl.IRLine.singleEntry(irl.IRFunctionCall(func_call, None), [labelDigit]))

            elif ind == 6:
                #While and Do While

                ns = False

                # Jump straight to conditionals for only 'While' statements
                if element.name == "while":
                    lines.append(irl.IRLine.singleEntry(irl.IRGoTo(f"<D.{labelDigit}>"), [labelDigit]))

                # Keep track of label for conditional block
                conditionLabel = labelDigit
                labelDigit += 1

                # Add the label that belongs to the start of the loop
                lines.append(irl.IRLine.singleEntry(irl.IRJump(f"<D.{labelDigit}>"), [labelDigit]))

                if [x.children[0] for x in element.children[1].children if x.name == "=" and x.children[0].children[0].name in ["auto", "long double", "double", "float", "long long", "long long int", "long", "int", "short", "char"]]:
                        # Append an IRBracket node
                        lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=True), [labelDigit]))
                        ns = True

                # Assign labels for start/end of loop
                loopStart = labelDigit
                loopEnd = labelDigit + 1
                labelDigit += 2

                # recursivly deal with the body of the loop
                tmp, labelDigit = returnLines(element.children[1], returnDigit, labelDigit, loopStart, loopEnd, breakDigit=loopEnd, continueDigit=loopStart)
                lines.extend(tmp)

                if ns:
                    # Append an IRBracket node
                    lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=False), [labelDigit]))

                # Start of conditionals for the loop
                lines.append(irl.IRLine.singleEntry(irl.IRJump(f"<D.{conditionLabel}>"), [labelDigit]))

                tmpNode = element.children[0]
                if tmpNode.name not in ['||', '&&', "<=", "<", ">=", ">", "==", "!="] and (
                    tmpNode.children == [] or
                    tmpNode.children[0].name not in ['||', '&&', "<=", "<", ">=", ">", "==", "!="]):
                    # We create an AST Node comparing the value to 0 if there 
                    # are no comparison/logical operators present in the condition.

                    tmpNode = ast.ASTNode("!=", None)
                    tmpNode.children.append(element.children[0])
                    tmpNode.children.append(ast.ASTNode("0", tmpNode))

                line = irl.IRLine(tmpNode, tvs=[], success=loopStart, failure=loopEnd, labelList=[labelDigit])
                tvs, labelList = line.retrieve()
                lines.append(line)

                lines.append(irl.IRLine.singleEntry(irl.IRJump(f"<D.{loopEnd}>"), [labelDigit]))

                if labelList != []:
                    labelDigit = labelList[-1]
                # increment twice for new index (twce, in case it was a do while)
                labelDigit += 2

            elif ind == 7:
                # Break
                if breakDigit:
                    lines.append(irl.IRLine.singleEntry(irl.IRGoTo(f"<D.{breakDigit}>"), [labelDigit]))

            elif ind == 8:
                # Continue
                if continueDigit:
                    lines.append(irl.IRLine.singleEntry(irl.IRGoTo(f"<D.{continueDigit}>"), [labelDigit]))

            elif ind == 9:
                # Goto
                lines.append(irl.IRLine.singleEntry(irl.IRGoTo(f"{element.children[0].name}"), [labelDigit]))
            elif ind == 10:
                # Jump Label
                lines.append(irl.IRLine.singleEntry(irl.IRJump(f"{element.children[0].name}"), [labelDigit]))

                if (len(element.children) > 1):
                    temp_lines, labelDigit = returnLines(element.children[1], returnDigit, labelDigit)

                    lines.extend(temp_lines)

            elif ind == 11:
                # Special assignment (++, --)
                line = irl.IRLine(element, tvs=[])
                tvs, labelList = line.retrieve()
                lines.append(line)

            else:
                pass
                # print("Unsupported at this time")

        except Warning:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            pass

    return lines, labelDigit

Functions

def beginWrapper(function_tuple, returnDigit)

Produces the function wrappers and initializes the return digit for the given function.

Returns

lines
The lines of the start of the function
Expand source code
def beginWrapper(function_tuple, returnDigit):
    """
    Produces the function wrappers and initializes the return digit for the given function.

    Returns:
        lines: The lines of the start of the function
    """
    lines = []
    params = []
    func_type = function_tuple[0].children[0].name
    func_name = function_tuple[0].children[1].name

    for var in function_tuple[0].children[2].children:
        if var.name == "var":

            # list of all modifiers
            modifiers = f"{' '.join([x.name for x in var.children[0].children])}{' ' if [x.name for x in var.children[0].children] else ''}"

            # append entry for parameter
            params.append(f"{' '.join([modifiers])}{var.children[0].name} rV_{var.children[1].name}")

    lines.append(irl.IRLine.singleEntry(irl.IRFunctionDecl(func_name, params)))
    lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=True, functionDecl=True)))

    if func_type != "void":
        modifiers = f"{''.join([x.name for x in function_tuple[0].children[0].children if x.name in ['signed', 'unsigned']])}{' ' if [x.name for x in function_tuple[0].children[0].children if x.name in ['signed', 'unsigned']] else ''}"
        lines.append(irl.IRLine.singleEntry(irl.IRVariableInit(modifiers, func_type, f"D.{returnDigit}")))

    return lines
def returnLines(node, returnDigit, labelDigit, successDigit=None, failureDigit=None, breakDigit=None, continueDigit=None)

Produces a linear representation of the content nested within node.

Args

node
The AST node.
returnDigit
The variable to store the return value.
labelDigit
A list of all previously used label values.
successDigit
The label value to jump to if there is a continue.
failureDigit
The label value to jump to if there is a break.
breakDigit
The label used for break statements to have a correct label so the control flow is correct.
continueDigit
The label used for a continue statement to know the 'ultimate' success of whatever block it is in

Returns

lines
The lines produced from the content.
labelDigit
The list of all used label values.
Expand source code
def returnLines(node,returnDigit,labelDigit,successDigit=None,failureDigit=None, breakDigit=None, continueDigit=None):
    """
    Produces a linear representation of the content nested within `node`.

    Args:
        node: The AST node.
        returnDigit: The variable to store the return value.
        labelDigit: A list of all previously used label values.
        successDigit: The label value to jump to if there is a `continue`.
        failureDigit: The label value to jump to if there is a `break`.
        breakDigit: The label used for break statements to have a correct label so the control flow is correct.

        continueDigit: The label used for a continue statement to know the 'ultimate' success of whatever block it is in

    Returns:
        lines: The lines produced from the content.
        labelDigit: The list of all used label values.
    """
    lines = []
    if node.name == "body":
        il = [x.children[0] for x in node.children if x.name == "=" and x.children[0].children[0].name in ["auto", "long double", "double", "float", "long long", "long long int", "long", "int", "short", "char"]]

        # It is the scopes responsibility to ensure that the content is wrapped in braces
        if il:
            for x in il:
                modifiers = f"{' '.join([y.name for y in x.children[0].children])}{' ' if [y.name for y in x.children[0].children] else ''}"
                lines.append(irl.IRLine.singleEntry(irl.IRVariableInit(modifiers, x.children[0].name, f"rV_{x.children[1].name}"), [labelDigit]))

    for element in node.children:
        try:
            splits = [["+=", "-=", "*=", "/=", "%=", "<<=", ">>=", "|=", "&=", "^=", "<=", ">=", "="],["for"],["body"],["branch"],["return"],["call"],["while", "do_while"],["break"],["continue"],["goto"],["label"], ["++", "--"]]
            ind = [splits.index(x) for x in splits if element.name in x]
            ind = ind[0]
            if ind == 0:
                line = irl.IRLine(element, tvs=[], labelList=[labelDigit])
                tvs, labelList = line.retrieve()
                lines.append(line)

                if labelList != []:
                    labelDigit = labelList[-1]

            elif ind == 1:
                # For Loop
                ns = False
                # Initialize variable
                if element.children[0].children != []:
                    initNode = ast.ASTNode("body", None)
                    initNode.children.append(element.children[0])
                    if [x.children[0] for x in initNode.children if x.name == "=" and x.children[0].children[0].name in ["auto", "long double", "double", "float", "long long", "long long int", "long", "int", "short", "char"]]:
                        # Append an IRBracket node
                        lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=True), [labelDigit]))
                        ns = True
                    tmp, labelDigit = returnLines(initNode, returnDigit, labelDigit)
                    lines.extend(tmp)

                # Keep track of label for conditional block (if conditional exist)
                conditionLabel = None
                if element.children[1].children != []:

                    # Append an IRGoTo node
                    lines.append(irl.IRLine.singleEntry(irl.IRGoTo(f"<D.{labelDigit}>"), [labelDigit]))

                    conditionLabel = labelDigit
                    labelDigit += 1

                # Append an IRJump node for the label that belongs to the start of the loop
                lines.append(irl.IRLine.singleEntry(irl.IRJump(f"<D.{labelDigit}>"), [labelDigit]))

                # Assign labels for start/end of loop
                loopStart = labelDigit
                loopEnd = labelDigit + 1
                labelDigit += 2

                # recursivly deal with the body of the loop
                tmp, labelDigit = returnLines(element.children[3], returnDigit, labelDigit, loopStart, loopEnd, breakDigit=loopEnd, continueDigit=loopStart)
                lines.extend(tmp)

                # Add the "end-of-loop" assignment/arithmetic
                if element.children[2].children != []:
                    initNode = ast.ASTNode("tmp", None)
                    initNode.children.append(element.children[2])
                    tmp, labelDigit = returnLines(initNode, returnDigit, labelDigit)
                    lines.extend(tmp)

                # Start of conditionals for the loop
                if conditionLabel != None:
                    lines.append(irl.IRLine.singleEntry(irl.IRJump(f"<D.{conditionLabel}>"), [labelDigit]))

                    tmpNode = element.children[1]
                    
                    if tmpNode.name not in ['||', '&&', "<=", "<", ">=", ">", "==", "!="] and (
                        tmpNode.children == [] or
                        tmpNode.children[0].name not in ['||', '&&', "<=", "<", ">=", ">", "==", "!="]):
                        # We create an AST Node comparing the value to 0 if there 
                        # are no comparison/logical operators present in the condition.

                        tmpNode = ast.ASTNode("!=", None)
                        tmpNode.children.append(element.children[1])
                        tmpNode.children.append(ast.ASTNode("0", tmpNode))

                    line = irl.IRLine(tmpNode, tvs=[], success=loopStart, failure=loopEnd, labelList=[labelDigit])
                    tvs, labelList = line.retrieve()
                    lines.append(line)

                    if labelList != []:
                        labelDigit = labelList[-1]

                    lines.append(irl.IRLine.singleEntry(irl.IRJump(f"<D.{loopEnd}>"), [labelDigit]))

                else:
                    # No conditional (jump to start of body...always True)
                    lines.append(irl.IRLine.singleEntry(irl.IRGoTo(f"<D.{loopStart}>"), [labelDigit]))

                # increment twice for new index
                labelDigit += 2

                if ns:
                    # Append an IRBracket node
                    lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=False), [labelDigit]))

            elif ind == 2:
                # Append an IRBracket node
                lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=True), [labelDigit]))

                tmp, labelDigit = returnLines(element, returnDigit, labelDigit, successDigit, failureDigit)
                lines.extend(tmp)

                # Append an IRBracket node
                lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=False), [labelDigit]))

            elif ind == 3:
                # If/Else statement(s)

                # list of goto labels to be appended at end of if blocks
                end_if = []

                #for each case in a branch
                for case in element.children:
                    ns = False
                    #create label for body if true and label to skip to correct place if false.
                    success_label = labelDigit
                    labelDigit += 1
                    failure_label = labelDigit
                    labelDigit += 1

                    #default is an 'else'. Only has one child, body
                    if case.name == "default":
                        if [x.children[0] for x in case.children[0].children if x.name == "=" and x.children[0].children[0].name in ["auto", "long double", "double", "float", "long long", "long long int", "long", "int", "short", "char"]]:
                            # Append an IRBracket node
                            lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=True), [labelDigit]))
                            ns = True

                        #Get lines for the body and assign new labeldigit
                        tmp, labelDigit = returnLines(case.children[0], returnDigit, labelDigit, success_label, failure_label, failureDigit, successDigit)
                        lines.extend(tmp)

                        if ns:
                            # Append an IRBracket node
                            lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=False), [labelDigit]))

                        break

                    tmpNode = case.children[0]
                    if tmpNode.name not in ['||', '&&', "<=", "<", ">=", ">", "==", "!="] and (
                        tmpNode.children == [] or
                        tmpNode.children[0].name not in ['||', '&&', "<=", "<", ">=", ">", "==", "!="]):
                        # We create an AST Node comparing the value to 0 if there 
                        # are no comparison/logical operators present in the condition.

                        tmpNode = ast.ASTNode("!=", None)
                        tmpNode.children.append(case.children[0])
                        tmpNode.children.append(ast.ASTNode("0", tmpNode))

                    #break down argument for if statement into smaller if statements
                    line = irl.IRLine(tmpNode, tvs=[], success=success_label, failure=failure_label, labelList=[labelDigit])
                    tvs, labelList = line.retrieve()
                    lines.append(line)

                    if labelList != []:
                        labelDigit = labelList[-1] + 1


                    #Add goto for body statement
                    lines.append(irl.IRLine.singleEntry(irl.IRJump(f"<D.{success_label}>"), [labelDigit]))

                    if [x.children[0] for x in case.children[1].children if x.name == "=" and x.children[0].children[0].name in ["auto", "long double", "double", "float", "long long", "long long int", "long", "int", "short", "char"]]:
                        # Append an IRBracket node
                        lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=True), [labelDigit]))
                        ns = True

                    #Get lines for the if body and assign new labeldigit
                    tmp, labelDigit = returnLines(case.children[1], returnDigit,  labelDigit, success_label, failure_label, failureDigit, successDigit)
                    lines.extend(tmp)

                    if ns:
                        # Append an IRBracket node
                        lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=False), [labelDigit]))

                    #append goto for end of if body
                    lines.append(irl.IRLine.singleEntry(irl.IRGoTo(f"<D.{labelDigit}>"), [labelDigit]))

                    end_if.append(labelDigit)
                    labelDigit += 1

                    lines.append(irl.IRLine.singleEntry(irl.IRJump(f"<D.{failure_label}>"), [labelDigit]))

                for i in end_if:
                    lines.append(irl.IRLine.singleEntry(irl.IRJump(f"<D.{i}>"), [labelDigit]))

            elif ind == 4:
                #Return

                # If returns some type of arithmetic expression, breaks it down.
                if len(element.children) > 0 and element.children[0].children != []:
                    line = irl.IRLine(element.children[0], tvs=[], labelList=[labelDigit])
                    tvs, labelList = line.retrieve()
                    if line.treeList != []:
                        lines.append(line)
                        lines[-1].treeList.append(irl.IRAssignment(f"D.{returnDigit}", f"{tvs[-1]}"))

                    else:
                        lines.append(irl.IRLine.singleEntry(irl.IRAssignment(f"D.{returnDigit}", f"{tvs[-1]}"), [labelDigit]))

                    lines.append(irl.IRLine.singleEntry(irl.IRReturn(f"D.{returnDigit}"), [labelDigit]))

                    if labelList != []:
                        labelDigit = labelList[-1]

                elif len(element.children) > 0 and element.children[0].children == []:
                    lines.append(irl.IRLine.singleEntry(irl.IRAssignment(f"D.{returnDigit}", f"{element.children[0].name}"), [labelDigit]))
                    lines.append(irl.IRLine.singleEntry(irl.IRReturn(f"D.{returnDigit}"), [labelDigit]))

                # Returns nothing
                else:
                    lines.append(irl.IRLine.singleEntry(irl.IRReturn(None), [labelDigit]))

            elif ind == 5:
                #Function Call
                func_call = element.children[0].name

                # function call has parameters
                if element.children[0] != []:
                    line = irl.IRLine(element.children[0], tvs=[], labelList=[labelDigit])
                    tvs, labelList = line.retrieve()
                    lines.append(line)

                    if labelList != []:
                        labelDigit = labelList[-1] + 1

                # no parameters
                else:
                    # Append Empty function call node
                    lines.append(irl.IRLine.singleEntry(irl.IRFunctionCall(func_call, None), [labelDigit]))

            elif ind == 6:
                #While and Do While

                ns = False

                # Jump straight to conditionals for only 'While' statements
                if element.name == "while":
                    lines.append(irl.IRLine.singleEntry(irl.IRGoTo(f"<D.{labelDigit}>"), [labelDigit]))

                # Keep track of label for conditional block
                conditionLabel = labelDigit
                labelDigit += 1

                # Add the label that belongs to the start of the loop
                lines.append(irl.IRLine.singleEntry(irl.IRJump(f"<D.{labelDigit}>"), [labelDigit]))

                if [x.children[0] for x in element.children[1].children if x.name == "=" and x.children[0].children[0].name in ["auto", "long double", "double", "float", "long long", "long long int", "long", "int", "short", "char"]]:
                        # Append an IRBracket node
                        lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=True), [labelDigit]))
                        ns = True

                # Assign labels for start/end of loop
                loopStart = labelDigit
                loopEnd = labelDigit + 1
                labelDigit += 2

                # recursivly deal with the body of the loop
                tmp, labelDigit = returnLines(element.children[1], returnDigit, labelDigit, loopStart, loopEnd, breakDigit=loopEnd, continueDigit=loopStart)
                lines.extend(tmp)

                if ns:
                    # Append an IRBracket node
                    lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=False), [labelDigit]))

                # Start of conditionals for the loop
                lines.append(irl.IRLine.singleEntry(irl.IRJump(f"<D.{conditionLabel}>"), [labelDigit]))

                tmpNode = element.children[0]
                if tmpNode.name not in ['||', '&&', "<=", "<", ">=", ">", "==", "!="] and (
                    tmpNode.children == [] or
                    tmpNode.children[0].name not in ['||', '&&', "<=", "<", ">=", ">", "==", "!="]):
                    # We create an AST Node comparing the value to 0 if there 
                    # are no comparison/logical operators present in the condition.

                    tmpNode = ast.ASTNode("!=", None)
                    tmpNode.children.append(element.children[0])
                    tmpNode.children.append(ast.ASTNode("0", tmpNode))

                line = irl.IRLine(tmpNode, tvs=[], success=loopStart, failure=loopEnd, labelList=[labelDigit])
                tvs, labelList = line.retrieve()
                lines.append(line)

                lines.append(irl.IRLine.singleEntry(irl.IRJump(f"<D.{loopEnd}>"), [labelDigit]))

                if labelList != []:
                    labelDigit = labelList[-1]
                # increment twice for new index (twce, in case it was a do while)
                labelDigit += 2

            elif ind == 7:
                # Break
                if breakDigit:
                    lines.append(irl.IRLine.singleEntry(irl.IRGoTo(f"<D.{breakDigit}>"), [labelDigit]))

            elif ind == 8:
                # Continue
                if continueDigit:
                    lines.append(irl.IRLine.singleEntry(irl.IRGoTo(f"<D.{continueDigit}>"), [labelDigit]))

            elif ind == 9:
                # Goto
                lines.append(irl.IRLine.singleEntry(irl.IRGoTo(f"{element.children[0].name}"), [labelDigit]))
            elif ind == 10:
                # Jump Label
                lines.append(irl.IRLine.singleEntry(irl.IRJump(f"{element.children[0].name}"), [labelDigit]))

                if (len(element.children) > 1):
                    temp_lines, labelDigit = returnLines(element.children[1], returnDigit, labelDigit)

                    lines.extend(temp_lines)

            elif ind == 11:
                # Special assignment (++, --)
                line = irl.IRLine(element, tvs=[])
                tvs, labelList = line.retrieve()
                lines.append(line)

            else:
                pass
                # print("Unsupported at this time")

        except Warning:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            pass

    return lines, labelDigit

Classes

class LevelOneIR (astHead, symTable)

Constructs the linear representation of the input program in order to allow for optimizations such as constant folding, constant proagation, as well as removal of unused variables and functions depending on the optimization level provided as a commandline argument.

Args

astHead
The root node of the AST
symTable
The symbol table for the input
Expand source code
class LevelOneIR():
    """
    Constructs the linear representation of the input program in order to allow for optimizations such as constant folding, constant proagation, as well as removal of unused variables and functions depending on the optimization level provided as a commandline argument.
    """
    def __init__(self,astHead,symTable):
        """
        Args:
            astHead: The root node of the AST
            symTable: The symbol table for the input
        """
        self.astHead = astHead
        self.symTable = symTable
        self.IR = []

    def construct(self):
        """
        Constructs the linear representation for the object.

        Returns:
            IR: A collection of IRLine objects which can be optimized and/or transformed into assembly.
        """
        sym = self.symTable
        ntv = self.astHead

        varIndex = 0
        lastVarName = "_" + str(varIndex)
        bodyList = []

        # list of all bodies within functions in our C program
        for x in self.astHead.children:
            if x.name == "func":
                # Each entry is the '(func_node, body_node)'
                bodyList.append((x,x.children[3]))

        # Digits are used for contructing unique labels
        returnDigit = 1234
        labelDigit = returnDigit + 1
        lines = []

        for i in bodyList:
            # Beginning of fuction wrapper
            lines.extend(beginWrapper(i, returnDigit))

            # Body of function
            tmp_lines , labelDigit = returnLines(i[1], returnDigit, labelDigit)
            lines.extend(tmp_lines)

            # add a return statement if function didnt have one
            if not isinstance(lines[-1].treeList[-1], irl.IRReturn):
                lines.append(irl.IRLine.singleEntry(irl.IRReturn(None)))

            # End of function wrapper, add closing bracket
            lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=False, functionDecl=True)))

            # NOTE: 'labelDigit' should be the newest and unused digit
            returnDigit = labelDigit

            self.IR = lines

        # RESET THE FRICKING GLOBAL
        irl.tmpVarIndex = 0

        return self.IR

    def optimize(self, opt):
        """
        Performs the optimizations based on the level of optimization

        Args:
            opt: The level of optimization to be performed.
        """
        if opt > 0:
            self.remove_unused_funcs()
            self.remove_unused_vars()
            pass

        # constant folding/propagation
        if opt > 1:
            # A dictionary containing all the values of variables that can at some point be reduced.
            self.var_values = {
                func.name : {
                    var.name : "Undef"
                    for var in self.symTable.symbols if var.entry_type == 0 and var.scope.startswith(f"/{func.name}")
                }
                for func in [sym for sym in self.symTable.symbols if sym.entry_type == 1]
            }

            # NOTE: As a method of knowing whether it is safe to continue on to the next node in the list the following measures have been implemented:
            # Both constant folding and constant propagation only consider a singular value.
            # They both return a boolean describing whether the node was altered as well as some descriptor of what it did.
                # In the case of constant folding, the descriptor is the new simplified node which then has to be assigned to the correct index.
                # In the case of constant propagation, the descriptor is a dictionary containing the updated values relevant to the scope of the IRLine object meaning that tempoary variables are stored within the IRLine but now within the scope as a whole.
            # The idea is that while either of these methods can alter the node you keep executing them on the node so that if the loop terminates the program is sure that the node cannot be reduced further.
            # Another final optimization that can be done would be to check if all the nodes in an IRLine object are `IRAssignment`, if so the last one is the only `IRAssignment` node needed, and the others can be removed.

            prev_maj = 0
            cur_scope = ""
            tmp_vals = {}
            for major, minor, node in [(major, minor, node) for major, tl in enumerate(self.IR) for minor, node in enumerate(tl.treeList)]:
                if major != prev_maj:
                    tmp_vals = {}

                if isinstance(node, irl.IRFunctionDecl):
                    cur_scope = node.name
                elif isinstance(node, irl.IRGoTo) or isinstance(node, irl.IRJump) or isinstance(node, irl.IRIf):
                    for val in tmp_vals.items():
                        if val[0] in self.var_values[cur_scope]:
                            self.var_values[cur_scope][val[0]] = "Undef"
                        tmp_vals[val[0]] = "Undef"
                else:
                    for val in self.var_values[cur_scope].items():
                        tmp_vals[val[0]] = val[1]

                ncf = False
                ncp = False

                while 1:
                    ncf, tmp = self.constant_folding(node)

                    if ncf:
                        self.IR[major].treeList[minor] = tmp
                        node = tmp

                    ncp, vals = self.constant_propagation(node, tmp_vals)

                    for val in vals.items():
                        if val[0] in self.var_values[cur_scope]:
                            self.var_values[cur_scope][val[0]] = val[1]
                        tmp_vals[val[0]] = val[1]

                    if not (ncf or ncp):
                        break
                prev_maj = major

            self.cleanup()

    def remove_unused_vars(self):
        """
        Removes unused variables based on the number of references found in the symbol table
        """
        ir = self.IR
        scope = ""

        #get variables to remove
        vars_temp = [[x.name, x.scope] for x in self.symTable.symbols if x.entry_type != 2 and x.entry_type != 1 and x.entry_type != 3 and len(x.references) == 0]
        #get function from symbol table.
        funcs = [x.name for x in self.symTable.symbols if x.entry_type == 1]

        final_ir = []

        for irLine in ir:

            for idx, irNode in enumerate(irLine.treeList):
                #check if function declaration, to set new scope
                if isinstance(irNode, irl.IRFunctionDecl):
                    scope = irNode.name

                #check if declaration
                if isinstance(irNode, irl.IRVariableInit):
                    #check if variable needs to be skippped
                    if [x for x in vars_temp if x[0] == irNode.var and scope in x[1]] != []:
                        del irLine.treeList[idx]

                #check if usage
                elif(isinstance(irNode, irl.IRAssignment)):
                    if [x for x in vars_temp if x[0] == irNode.lhs and scope in x[1]] != []:
                        del irLine.treeList[idx]

            #irLine has no irNode inside...so we dont add it to new IR list
            if irLine.treeList == []:
                pass
            else:
                final_ir.append(irLine)


        self.IR = final_ir

    def cleanup(self):
        """
        Cleans up the IR by looking at the considering the number of references withing the local `IRLine` object. This allows for the removal of unused tempoary variables.
        """

        class ref():
            def __init__(self, init_index, init_ref_type, side):
                self.refs = [init_index]
                self.ref_types = [init_ref_type]
                self.side = [side]

            def add(self, index, ref_type, side):
                self.refs.append(index)
                self.ref_types.append(ref_type)
                self.side.append(side)

            def __str__(self):
                return f"\t{self.refs}\t{self.ref_types}\t{self.side}"

            def __repr__(self):
                return self.__str__()

        gr = {}
        for line in self.IR:
            lr = {}

            for i, node in enumerate(line.treeList):
                if isinstance(node, irl.IRAssignment):
                    if node.lhs in lr:
                        lr[node.lhs].add(i, "assignment", 0)
                    else:
                        lr[node.lhs] = ref(i, "assignment", 0)

                    tmp = node.rhs.lstrip('-+').replace('.', '', 1)
                    if node.rhs in lr and not tmp.isnumeric():
                        lr[node.rhs].add(i, "assignment", 1)
                    elif not tmp.isnumeric():
                        lr[node.rhs] = ref(i, "assignment", 1)
                elif isinstance(node, irl.IRArth):
                    if node.var in lr:
                        lr[node.var].add(i, "assignment", 0)
                    else:
                        lr[node.var] = ref(i, "assignment", 0)

                    tmp = node.lhs.lstrip('-+').replace('.', '', 1)
                    if node.lhs in lr and not tmp.isnumeric():
                        lr[node.lhs].add(i, "arithmetic", 1)
                    elif not tmp.isnumeric():
                        lr[node.lhs] = ref(i, "arithmetic", 1)

                    if node.rhs:
                        tmp = node.rhs.lstrip('-+').replace('.', '', 1)
                        if node.rhs in lr and not tmp.isnumeric():
                            lr[node.rhs].add(i, "arithmetic", 2)
                        elif not tmp.isnumeric():
                            lr[node.rhs] = ref(i, "arithmetic", 2)
                elif isinstance(node, irl.IRFunctionAssign):
                    if node.lhs in lr:
                        lr[node.lhs].add(i, "assignment", 0)
                    else:
                        lr[node.lhs] = ref(i, "assignment", 0)

                    for param in node.params:
                        tmp = node.lhs.lstrip('-+').replace('.', '', 1)
                        if param in lr and not tmp.isnumeric():
                            lr[node.lhs].add(i, "param", 1)
                        elif not tmp.isnumeric():
                            lr[node.lhs] = ref(i, "param", 1)
                elif isinstance(node, irl.IRIf):
                    tmp = node.lhs.lstrip('-+').replace('.', '', 1)
                    if node.lhs in lr and not tmp.isnumeric():
                        lr[node.lhs].add(i, "collation", 1)
                    elif not tmp.isnumeric():
                        lr[node.lhs] = ref(i, "collation", 1)

                    tmp = node.rhs.lstrip('-+').replace('.', '', 1)
                    if node.rhs in lr and not tmp.isnumeric():
                        lr[node.rhs].add(i, "collation", 2)
                    elif not tmp.isnumeric():
                        lr[node.rhs] = ref(i, "collation", 2)
                elif isinstance(node, irl.IRSpecial):
                    if node.var in lr:
                        lr[node.var].add(i, "assignment", 0)
                        lr[node.var].add(i, "assignment", 1)
                    else:
                        lr[node.var] = ref(i, "assignment", 0)
                        lr[node.var].add(i, "assignment", 1)
                elif isinstance(node, irl.IRReturn):
                    if node.value:
                        tmp = node.value.lstrip('-+').replace('.', '', 1)
                        if node.value in lr:
                            lr[node.value].add(i, "return", 0)
                        elif not tmp.isnumeric():
                            lr[node.value] = ref(i, "return", 0)
            rem_list = []

            for var in lr.items():
                # print(var)
                for i, typ in enumerate(var[1].ref_types):
                    if (
                        i+1 < len(var[1].ref_types)
                        and
                        typ == "assignment"
                        and
                        var[1].side[i] == 0
                        and
                        var[1].side[i+1] == 0
                        and
                        var[1].ref_types[i+1] == "assignment"
                        ):
                        rem_list.append(var[1].refs[i])
                    elif (
                        i+1 == len(var[1].ref_types)
                        and
                        typ == "assignment"
                        and
                        var[1].side[i] == 0
                        # and
                        # var[1].side[i+1] == 0
                        and
                        (
                            var[1].refs[i]+1 != len(line.treeList)
                            or
                            len(var[1].ref_types) > 1)
                        ):
                        rem_list.append(var[1].refs[i])

                # iterate over reference types and see if there are two assignments after each other.


            for node in reversed(sorted(rem_list)):
                line.treeList.pop(node)

    def remove_unused_funcs(self):
        """
        Removes function that are not reference in the symbol table. They can be removed if not used.
        """



        ir = self.IR
        inFunction = False
        to_remove = []

        for idx, irLine in enumerate(ir):

            # Only need to focus on first Node in each IRLine
            ir_firstNode = irLine.treeList[0]

            # Delete IRLine if we are in a 'unused' function
            # Mark 'inFunction' to False if beginning of new function
            if inFunction == True:
                if isinstance(ir_firstNode, irl.IRFunctionDecl):
                    inFunction = False
                else:
                    to_remove.append(idx)
                    continue

            # Check for function
            if isinstance(ir_firstNode, irl.IRFunctionDecl):
                func_name = ir_firstNode.name
                referenceNum = len([x.references for x in self.symTable.symbols if func_name == x.name and x.entry_type == 1][0])

                # If reference is 0, it is unused
                if referenceNum == 0 and func_name != "main":
                    to_remove.append(idx)
                    inFunction = True

        # Actually delete unused function IRLine's
        for i in to_remove[::-1]:
            del ir[i]


        self.IR = ir

    def constant_folding(self,x):
        """
        Peforms constant folding  on a given line in the IR

        Args:
            x: an IRNode object

        Returns:
            A boolean indicating whether it changed anything, and the node that has been modified
        """
        changed = False
        if isinstance(x,irl.IRArth):
            notFound = True
            op = False
            #get the operator being used
            if(x.rhs != None and x.lhs != None):
                if(x.operator == "+"):
                    op = lambda lhs, rhs : lhs + rhs
                elif(x.operator == "-"):
                    op = lambda lhs, rhs : lhs - rhs
                elif(x.operator == "*"):
                    op = lambda lhs, rhs : lhs * rhs
                elif(x.operator == "/"):
                    op = lambda lhs, rhs : lhs / rhs
                elif(x.operator == "%"):
                    op = lambda lhs, rhs : math.fmod(lhs, rhs)
                elif(x.operator == "<<"):
                    # There is an edge case around the changes of architectures so that values may differ depending on the architecture you are using. eg 10 << 31 compared to 10 << 20
                    op = lambda lhs, rhs : lhs << rhs
                elif(x.operator == ">>"):
                    # There is an edge case around the changes of architectures so that values may differ depending on the architecture you are using. eg 10 >> 31 compared to 10 >> 20
                    op = lambda lhs, rhs : lhs >> rhs
                elif(x.operator == "|"):
                    op = lambda lhs, rhs : lhs | rhs
                elif(x.operator == "&"):
                    op = lambda lhs, rhs : lhs & rhs
                elif(x.operator == "^"):
                    op = lambda lhs, rhs : lhs ^ rhs
            else:
                if(x.operator == "~"):
                    op = lambda lhs, rhs : ~lhs
                elif(x.operator == "-"):
                    op = lambda lhs, rhs : 0 - lhs
                elif(x.operator == "+"):
                    op = lambda lhs, rhs : 0 + lhs
                elif(x.operator == "!"):
                    op = lambda lhs, rhs : not lhs
            #get the left hand side and the right hand side
            try:
                lhs = int(x.lhs)
                if(not x.rhs == None):
                    rhs = int(x.rhs)
                else:
                    rhs = None
                notFound = False
            except ValueError:
                try:
                    lhs = float(x.lhs)
                    if(not x.rhs == None):
                        rhs = float(x.rhs)
                    else:
                        rhs = None
                    notFound = False
                except ValueError:
                    pass

            #if we found all components, replace the node
            if(not notFound and op):
                if rhs and rhs < 0:
                    if x.operator == "<<" or x.operator == ">>":
                        raise ValueError("shifting by negative number is undefined behavior.")

                newValue = lambda lhs, rhs, op : op(lhs,rhs)
                val = newValue(lhs,rhs,op) if isinstance(lhs, float) or isinstance(rhs, float) else math.floor(newValue(lhs,rhs,op))
                newAss = irl.IRAssignment(x.var, str(val))
                changed = True
                return changed,newAss
        return changed,x

    def constant_propagation(self, node, var_val):
        """
        Performs constant propagation on a given IRNode if deemed possible.

        Args:
            node: The IRNode object to be considered
            var_val: A dictionary of all the variables and their values which are deemed safe to replace

        Returns:
            The return value describes whether any propogation occurred during the function call
        """
        # NOTE: The current issue is that the propogation is goint to have to exit and re enter the function every time to achieve the following:
        # Clear the tempoary variables per line as they are re used
        # Avoid propogating too far so that assignments in the future that may happen after certain other computations and assignments propogate the correct value

        # TODO: Fix to detect whether the reference is necessary or not. Eg, comparisons in loops compared to simple ifs
        # TODO: Ensure that when comming across a value that is not computable or propogatable such as after some control flow, the dictionary value becomes something distinguisable so that the expression is left alone
        changed = False
        if isinstance(node, irl.IRIf):
            if node.lhs in var_val and var_val[node.lhs] != "Undef":
                node.lhs = var_val[node.lhs]
                changed = True
            if node.rhs in var_val and var_val[node.rhs] != "Undef":
                node.rhs = var_val[node.rhs]
                changed = True

        elif isinstance(node, irl.IRArth):
            if node.lhs in var_val and var_val[node.lhs] != "Undef":
                node.lhs = var_val[node.lhs]
                changed = True
            if node.rhs in var_val and var_val[node.rhs] != "Undef":
                node.rhs = var_val[node.rhs]
                changed = True

        elif isinstance(node, irl.IRAssignment):
            if node.rhs in var_val and var_val[node.rhs] != "Undef":
                node.rhs = var_val[node.rhs]
                var_val[node.lhs] = node.rhs
                changed = True
            else:
                tmp = node.rhs.lstrip('-+').replace('.', '', 1)
                if tmp.isnumeric():
                    var_val[node.lhs] = node.rhs

        elif isinstance(node, irl.IRFunctionAssign):
            for j, param in enumerate(node.params):
                if param in var_val and var_val[param] != None:
                    node.params[j] = var_val[param]
                    changed = True

        return changed, var_val

    def __str__(self):
        return "\n".join([str(x) for x in self.IR]) + "\n"

Methods

def cleanup(self)

Cleans up the IR by looking at the considering the number of references withing the local IRLine object. This allows for the removal of unused tempoary variables.

Expand source code
def cleanup(self):
    """
    Cleans up the IR by looking at the considering the number of references withing the local `IRLine` object. This allows for the removal of unused tempoary variables.
    """

    class ref():
        def __init__(self, init_index, init_ref_type, side):
            self.refs = [init_index]
            self.ref_types = [init_ref_type]
            self.side = [side]

        def add(self, index, ref_type, side):
            self.refs.append(index)
            self.ref_types.append(ref_type)
            self.side.append(side)

        def __str__(self):
            return f"\t{self.refs}\t{self.ref_types}\t{self.side}"

        def __repr__(self):
            return self.__str__()

    gr = {}
    for line in self.IR:
        lr = {}

        for i, node in enumerate(line.treeList):
            if isinstance(node, irl.IRAssignment):
                if node.lhs in lr:
                    lr[node.lhs].add(i, "assignment", 0)
                else:
                    lr[node.lhs] = ref(i, "assignment", 0)

                tmp = node.rhs.lstrip('-+').replace('.', '', 1)
                if node.rhs in lr and not tmp.isnumeric():
                    lr[node.rhs].add(i, "assignment", 1)
                elif not tmp.isnumeric():
                    lr[node.rhs] = ref(i, "assignment", 1)
            elif isinstance(node, irl.IRArth):
                if node.var in lr:
                    lr[node.var].add(i, "assignment", 0)
                else:
                    lr[node.var] = ref(i, "assignment", 0)

                tmp = node.lhs.lstrip('-+').replace('.', '', 1)
                if node.lhs in lr and not tmp.isnumeric():
                    lr[node.lhs].add(i, "arithmetic", 1)
                elif not tmp.isnumeric():
                    lr[node.lhs] = ref(i, "arithmetic", 1)

                if node.rhs:
                    tmp = node.rhs.lstrip('-+').replace('.', '', 1)
                    if node.rhs in lr and not tmp.isnumeric():
                        lr[node.rhs].add(i, "arithmetic", 2)
                    elif not tmp.isnumeric():
                        lr[node.rhs] = ref(i, "arithmetic", 2)
            elif isinstance(node, irl.IRFunctionAssign):
                if node.lhs in lr:
                    lr[node.lhs].add(i, "assignment", 0)
                else:
                    lr[node.lhs] = ref(i, "assignment", 0)

                for param in node.params:
                    tmp = node.lhs.lstrip('-+').replace('.', '', 1)
                    if param in lr and not tmp.isnumeric():
                        lr[node.lhs].add(i, "param", 1)
                    elif not tmp.isnumeric():
                        lr[node.lhs] = ref(i, "param", 1)
            elif isinstance(node, irl.IRIf):
                tmp = node.lhs.lstrip('-+').replace('.', '', 1)
                if node.lhs in lr and not tmp.isnumeric():
                    lr[node.lhs].add(i, "collation", 1)
                elif not tmp.isnumeric():
                    lr[node.lhs] = ref(i, "collation", 1)

                tmp = node.rhs.lstrip('-+').replace('.', '', 1)
                if node.rhs in lr and not tmp.isnumeric():
                    lr[node.rhs].add(i, "collation", 2)
                elif not tmp.isnumeric():
                    lr[node.rhs] = ref(i, "collation", 2)
            elif isinstance(node, irl.IRSpecial):
                if node.var in lr:
                    lr[node.var].add(i, "assignment", 0)
                    lr[node.var].add(i, "assignment", 1)
                else:
                    lr[node.var] = ref(i, "assignment", 0)
                    lr[node.var].add(i, "assignment", 1)
            elif isinstance(node, irl.IRReturn):
                if node.value:
                    tmp = node.value.lstrip('-+').replace('.', '', 1)
                    if node.value in lr:
                        lr[node.value].add(i, "return", 0)
                    elif not tmp.isnumeric():
                        lr[node.value] = ref(i, "return", 0)
        rem_list = []

        for var in lr.items():
            # print(var)
            for i, typ in enumerate(var[1].ref_types):
                if (
                    i+1 < len(var[1].ref_types)
                    and
                    typ == "assignment"
                    and
                    var[1].side[i] == 0
                    and
                    var[1].side[i+1] == 0
                    and
                    var[1].ref_types[i+1] == "assignment"
                    ):
                    rem_list.append(var[1].refs[i])
                elif (
                    i+1 == len(var[1].ref_types)
                    and
                    typ == "assignment"
                    and
                    var[1].side[i] == 0
                    # and
                    # var[1].side[i+1] == 0
                    and
                    (
                        var[1].refs[i]+1 != len(line.treeList)
                        or
                        len(var[1].ref_types) > 1)
                    ):
                    rem_list.append(var[1].refs[i])

            # iterate over reference types and see if there are two assignments after each other.


        for node in reversed(sorted(rem_list)):
            line.treeList.pop(node)
def constant_folding(self, x)

Peforms constant folding on a given line in the IR

Args

x
an IRNode object

Returns

A boolean indicating whether it changed anything, and the node that has been modified
 
Expand source code
def constant_folding(self,x):
    """
    Peforms constant folding  on a given line in the IR

    Args:
        x: an IRNode object

    Returns:
        A boolean indicating whether it changed anything, and the node that has been modified
    """
    changed = False
    if isinstance(x,irl.IRArth):
        notFound = True
        op = False
        #get the operator being used
        if(x.rhs != None and x.lhs != None):
            if(x.operator == "+"):
                op = lambda lhs, rhs : lhs + rhs
            elif(x.operator == "-"):
                op = lambda lhs, rhs : lhs - rhs
            elif(x.operator == "*"):
                op = lambda lhs, rhs : lhs * rhs
            elif(x.operator == "/"):
                op = lambda lhs, rhs : lhs / rhs
            elif(x.operator == "%"):
                op = lambda lhs, rhs : math.fmod(lhs, rhs)
            elif(x.operator == "<<"):
                # There is an edge case around the changes of architectures so that values may differ depending on the architecture you are using. eg 10 << 31 compared to 10 << 20
                op = lambda lhs, rhs : lhs << rhs
            elif(x.operator == ">>"):
                # There is an edge case around the changes of architectures so that values may differ depending on the architecture you are using. eg 10 >> 31 compared to 10 >> 20
                op = lambda lhs, rhs : lhs >> rhs
            elif(x.operator == "|"):
                op = lambda lhs, rhs : lhs | rhs
            elif(x.operator == "&"):
                op = lambda lhs, rhs : lhs & rhs
            elif(x.operator == "^"):
                op = lambda lhs, rhs : lhs ^ rhs
        else:
            if(x.operator == "~"):
                op = lambda lhs, rhs : ~lhs
            elif(x.operator == "-"):
                op = lambda lhs, rhs : 0 - lhs
            elif(x.operator == "+"):
                op = lambda lhs, rhs : 0 + lhs
            elif(x.operator == "!"):
                op = lambda lhs, rhs : not lhs
        #get the left hand side and the right hand side
        try:
            lhs = int(x.lhs)
            if(not x.rhs == None):
                rhs = int(x.rhs)
            else:
                rhs = None
            notFound = False
        except ValueError:
            try:
                lhs = float(x.lhs)
                if(not x.rhs == None):
                    rhs = float(x.rhs)
                else:
                    rhs = None
                notFound = False
            except ValueError:
                pass

        #if we found all components, replace the node
        if(not notFound and op):
            if rhs and rhs < 0:
                if x.operator == "<<" or x.operator == ">>":
                    raise ValueError("shifting by negative number is undefined behavior.")

            newValue = lambda lhs, rhs, op : op(lhs,rhs)
            val = newValue(lhs,rhs,op) if isinstance(lhs, float) or isinstance(rhs, float) else math.floor(newValue(lhs,rhs,op))
            newAss = irl.IRAssignment(x.var, str(val))
            changed = True
            return changed,newAss
    return changed,x
def constant_propagation(self, node, var_val)

Performs constant propagation on a given IRNode if deemed possible.

Args

node
The IRNode object to be considered
var_val
A dictionary of all the variables and their values which are deemed safe to replace

Returns

The return value describes whether any propogation occurred during the function call
 
Expand source code
def constant_propagation(self, node, var_val):
    """
    Performs constant propagation on a given IRNode if deemed possible.

    Args:
        node: The IRNode object to be considered
        var_val: A dictionary of all the variables and their values which are deemed safe to replace

    Returns:
        The return value describes whether any propogation occurred during the function call
    """
    # NOTE: The current issue is that the propogation is goint to have to exit and re enter the function every time to achieve the following:
    # Clear the tempoary variables per line as they are re used
    # Avoid propogating too far so that assignments in the future that may happen after certain other computations and assignments propogate the correct value

    # TODO: Fix to detect whether the reference is necessary or not. Eg, comparisons in loops compared to simple ifs
    # TODO: Ensure that when comming across a value that is not computable or propogatable such as after some control flow, the dictionary value becomes something distinguisable so that the expression is left alone
    changed = False
    if isinstance(node, irl.IRIf):
        if node.lhs in var_val and var_val[node.lhs] != "Undef":
            node.lhs = var_val[node.lhs]
            changed = True
        if node.rhs in var_val and var_val[node.rhs] != "Undef":
            node.rhs = var_val[node.rhs]
            changed = True

    elif isinstance(node, irl.IRArth):
        if node.lhs in var_val and var_val[node.lhs] != "Undef":
            node.lhs = var_val[node.lhs]
            changed = True
        if node.rhs in var_val and var_val[node.rhs] != "Undef":
            node.rhs = var_val[node.rhs]
            changed = True

    elif isinstance(node, irl.IRAssignment):
        if node.rhs in var_val and var_val[node.rhs] != "Undef":
            node.rhs = var_val[node.rhs]
            var_val[node.lhs] = node.rhs
            changed = True
        else:
            tmp = node.rhs.lstrip('-+').replace('.', '', 1)
            if tmp.isnumeric():
                var_val[node.lhs] = node.rhs

    elif isinstance(node, irl.IRFunctionAssign):
        for j, param in enumerate(node.params):
            if param in var_val and var_val[param] != None:
                node.params[j] = var_val[param]
                changed = True

    return changed, var_val
def construct(self)

Constructs the linear representation for the object.

Returns

IR
A collection of IRLine objects which can be optimized and/or transformed into assembly.
Expand source code
def construct(self):
    """
    Constructs the linear representation for the object.

    Returns:
        IR: A collection of IRLine objects which can be optimized and/or transformed into assembly.
    """
    sym = self.symTable
    ntv = self.astHead

    varIndex = 0
    lastVarName = "_" + str(varIndex)
    bodyList = []

    # list of all bodies within functions in our C program
    for x in self.astHead.children:
        if x.name == "func":
            # Each entry is the '(func_node, body_node)'
            bodyList.append((x,x.children[3]))

    # Digits are used for contructing unique labels
    returnDigit = 1234
    labelDigit = returnDigit + 1
    lines = []

    for i in bodyList:
        # Beginning of fuction wrapper
        lines.extend(beginWrapper(i, returnDigit))

        # Body of function
        tmp_lines , labelDigit = returnLines(i[1], returnDigit, labelDigit)
        lines.extend(tmp_lines)

        # add a return statement if function didnt have one
        if not isinstance(lines[-1].treeList[-1], irl.IRReturn):
            lines.append(irl.IRLine.singleEntry(irl.IRReturn(None)))

        # End of function wrapper, add closing bracket
        lines.append(irl.IRLine.singleEntry(irl.IRBracket(opening=False, functionDecl=True)))

        # NOTE: 'labelDigit' should be the newest and unused digit
        returnDigit = labelDigit

        self.IR = lines

    # RESET THE FRICKING GLOBAL
    irl.tmpVarIndex = 0

    return self.IR
def optimize(self, opt)

Performs the optimizations based on the level of optimization

Args

opt
The level of optimization to be performed.
Expand source code
def optimize(self, opt):
    """
    Performs the optimizations based on the level of optimization

    Args:
        opt: The level of optimization to be performed.
    """
    if opt > 0:
        self.remove_unused_funcs()
        self.remove_unused_vars()
        pass

    # constant folding/propagation
    if opt > 1:
        # A dictionary containing all the values of variables that can at some point be reduced.
        self.var_values = {
            func.name : {
                var.name : "Undef"
                for var in self.symTable.symbols if var.entry_type == 0 and var.scope.startswith(f"/{func.name}")
            }
            for func in [sym for sym in self.symTable.symbols if sym.entry_type == 1]
        }

        # NOTE: As a method of knowing whether it is safe to continue on to the next node in the list the following measures have been implemented:
        # Both constant folding and constant propagation only consider a singular value.
        # They both return a boolean describing whether the node was altered as well as some descriptor of what it did.
            # In the case of constant folding, the descriptor is the new simplified node which then has to be assigned to the correct index.
            # In the case of constant propagation, the descriptor is a dictionary containing the updated values relevant to the scope of the IRLine object meaning that tempoary variables are stored within the IRLine but now within the scope as a whole.
        # The idea is that while either of these methods can alter the node you keep executing them on the node so that if the loop terminates the program is sure that the node cannot be reduced further.
        # Another final optimization that can be done would be to check if all the nodes in an IRLine object are `IRAssignment`, if so the last one is the only `IRAssignment` node needed, and the others can be removed.

        prev_maj = 0
        cur_scope = ""
        tmp_vals = {}
        for major, minor, node in [(major, minor, node) for major, tl in enumerate(self.IR) for minor, node in enumerate(tl.treeList)]:
            if major != prev_maj:
                tmp_vals = {}

            if isinstance(node, irl.IRFunctionDecl):
                cur_scope = node.name
            elif isinstance(node, irl.IRGoTo) or isinstance(node, irl.IRJump) or isinstance(node, irl.IRIf):
                for val in tmp_vals.items():
                    if val[0] in self.var_values[cur_scope]:
                        self.var_values[cur_scope][val[0]] = "Undef"
                    tmp_vals[val[0]] = "Undef"
            else:
                for val in self.var_values[cur_scope].items():
                    tmp_vals[val[0]] = val[1]

            ncf = False
            ncp = False

            while 1:
                ncf, tmp = self.constant_folding(node)

                if ncf:
                    self.IR[major].treeList[minor] = tmp
                    node = tmp

                ncp, vals = self.constant_propagation(node, tmp_vals)

                for val in vals.items():
                    if val[0] in self.var_values[cur_scope]:
                        self.var_values[cur_scope][val[0]] = val[1]
                    tmp_vals[val[0]] = val[1]

                if not (ncf or ncp):
                    break
            prev_maj = major

        self.cleanup()
def remove_unused_funcs(self)

Removes function that are not reference in the symbol table. They can be removed if not used.

Expand source code
def remove_unused_funcs(self):
    """
    Removes function that are not reference in the symbol table. They can be removed if not used.
    """



    ir = self.IR
    inFunction = False
    to_remove = []

    for idx, irLine in enumerate(ir):

        # Only need to focus on first Node in each IRLine
        ir_firstNode = irLine.treeList[0]

        # Delete IRLine if we are in a 'unused' function
        # Mark 'inFunction' to False if beginning of new function
        if inFunction == True:
            if isinstance(ir_firstNode, irl.IRFunctionDecl):
                inFunction = False
            else:
                to_remove.append(idx)
                continue

        # Check for function
        if isinstance(ir_firstNode, irl.IRFunctionDecl):
            func_name = ir_firstNode.name
            referenceNum = len([x.references for x in self.symTable.symbols if func_name == x.name and x.entry_type == 1][0])

            # If reference is 0, it is unused
            if referenceNum == 0 and func_name != "main":
                to_remove.append(idx)
                inFunction = True

    # Actually delete unused function IRLine's
    for i in to_remove[::-1]:
        del ir[i]


    self.IR = ir
def remove_unused_vars(self)

Removes unused variables based on the number of references found in the symbol table

Expand source code
def remove_unused_vars(self):
    """
    Removes unused variables based on the number of references found in the symbol table
    """
    ir = self.IR
    scope = ""

    #get variables to remove
    vars_temp = [[x.name, x.scope] for x in self.symTable.symbols if x.entry_type != 2 and x.entry_type != 1 and x.entry_type != 3 and len(x.references) == 0]
    #get function from symbol table.
    funcs = [x.name for x in self.symTable.symbols if x.entry_type == 1]

    final_ir = []

    for irLine in ir:

        for idx, irNode in enumerate(irLine.treeList):
            #check if function declaration, to set new scope
            if isinstance(irNode, irl.IRFunctionDecl):
                scope = irNode.name

            #check if declaration
            if isinstance(irNode, irl.IRVariableInit):
                #check if variable needs to be skippped
                if [x for x in vars_temp if x[0] == irNode.var and scope in x[1]] != []:
                    del irLine.treeList[idx]

            #check if usage
            elif(isinstance(irNode, irl.IRAssignment)):
                if [x for x in vars_temp if x[0] == irNode.lhs and scope in x[1]] != []:
                    del irLine.treeList[idx]

        #irLine has no irNode inside...so we dont add it to new IR list
        if irLine.treeList == []:
            pass
        else:
            final_ir.append(irLine)


    self.IR = final_ir