Module `src.frontend.preprocessor`

This module serves as the pre-processor for any given C program. By searching for any pre-processor keywords it will perform the necessary manipulation of the C program to complete any pre-processing operations.

Expand source code

"""
This module serves as the pre-processor for any given C program. By searching for any pre-processor keywords it will perform the necessary manipulation of the C program to complete any pre-processing operations.
"""
import os
import re

def remove_comments(text):
    """
    Removes comments from the given text

    Args:
        text: The string representation of C code.

    Returns:
        The text with all C comments removed.
    """

    # Regex that will capture both '//' and '/* */' style comments
    regex = r"/(\*(\w|\W)*?\*/|/([^\n]*))"

    return re.sub(regex, '', text)



def find_preprocessors(text):
    """
    Finds all preprocessor keywords from the given text.

    Args:
        text: The string representation of C code.

    Returns:
        List of the found preprocessor keywords and their corresponding values.
    """

    regex = r"#\s*(warning|else|endif|include|undef|ifdef|ifndef|if|elif|pragma|define|if|elif|error|pragma|line)([\t\f ]+[^\s]+)([\t\f ]+[^\s]+)*"
    li = re.findall(regex, text)
    return li



def get_text(file_name, path):
    """
    Opens and retrieves the text from an import file and preprocesses this text.

    Args:
        file_name: The import file name.

    Returns:
        Text of the import file.
    """
    fi = open(path + "/" + file_name, "r")
    text = fi.read()
    fi.close()

    # Run preprocessing on imported file
    text = run(text, path)

    return text



def cleanup(text):
    """
    Cleans up C code text by removing pre-processing lines.

    Args:
        text: text format of C code to be cleaned up.

    Returns:
        New text of the C code after cleanup.
    """

    return "\n".join([x for x in text.splitlines() if not x.startswith("#")])



def run(text, path):
    """
    Performs pre-processing on C code text.

    Args:
        text: text format of C code to be pre-processed.

    Returns:
        Text of the C code after pre-processing.
    """

    # Remove comments from text
    text = remove_comments(text)

    # Find all preprocessor elements (if any)
    # NOTE: Currrently the list is sorted so the '#define' statements are
    #       first on the list. This prevents the manipulation of the imported C code.
    proc_list = sorted(find_preprocessors(text), key = lambda x: x[0])


    for pre_proc in proc_list:
        # Determine which are 'includes', 'defines', etc..

        if pre_proc[0] == "define":

            try:

                variable = pre_proc[1].replace(" ", "") #strip whitespace
                value = pre_proc[2].replace(" ", "")

                # Delete the pre-processor instruction from the C code
                text = re.sub(rf"\s*#define {variable} {value}", "", text)

                # Replace occurences of VARIABLE with VALUE in C code
                text = re.sub(rf"{variable}", value, text)

            except Exception:
                raise BaseException("Invalid '#define' statement")

        if pre_proc[0] == "include":

            # Try to match to standard library import (i.e. <xyz.h>)
            file_name = ["".join(x) for x in re.findall('<([^"]*)>', pre_proc[1])]
            if file_name:
                # TODO: add capability to search for standard libraries

                # Delete the pre-processor instruction from the C code
                text = re.sub(rf'\s*#include <{file_name[0]}>\n', "", text)
                #text = get_text(PATH_TO_STD_LIBRARIES) + text
                continue

            # Try to match to local library import (i.e. "xyz.h", 'xyz.h')
            file_name = ["".join(x) for x in re.findall('["]([^"]*)["]|[\']([^"]*)[\']', pre_proc[1])]
            if file_name:

                # Delete the pre-processor instruction from the C code
                text = re.sub(rf'\s*#include \"{file_name[0]}\"\n', "", text)
                text = get_text(file_name[0], os.path.abspath(os.path.dirname(path))) + text

            else:
                raise BaseException("Invalid '#include' statement")



    # We are continuing to impliment more "supplimental" pre-processor features, but cleanup for now.
    text = cleanup(text)

    return text


def main():
    """
    A function used when running the preprocessor as a standalone script
    """
    fi = open("./test_files/test.c", "r")
    text = fi.read()
    fi.close()
    text = run(text)

    print(text)

if __name__ == "__main__":
    main()

Functions

def cleanup(text)

Cleans up C code text by removing pre-processing lines.

Args

text: text format of C code to be cleaned up.

Returns

New text of the C code after cleanup.

Expand source code

def cleanup(text):
    """
    Cleans up C code text by removing pre-processing lines.

    Args:
        text: text format of C code to be cleaned up.

    Returns:
        New text of the C code after cleanup.
    """

    return "\n".join([x for x in text.splitlines() if not x.startswith("#")])

def find_preprocessors(text)

Finds all preprocessor keywords from the given text.

Args

text: The string representation of C code.

Returns

List of the found preprocessor keywords and their corresponding values.

Expand source code

def find_preprocessors(text):
    """
    Finds all preprocessor keywords from the given text.

    Args:
        text: The string representation of C code.

    Returns:
        List of the found preprocessor keywords and their corresponding values.
    """

    regex = r"#\s*(warning|else|endif|include|undef|ifdef|ifndef|if|elif|pragma|define|if|elif|error|pragma|line)([\t\f ]+[^\s]+)([\t\f ]+[^\s]+)*"
    li = re.findall(regex, text)
    return li

def get_text(file_name, path)

Opens and retrieves the text from an import file and preprocesses this text.

Args

file_name: The import file name.

Returns

Text of the import file.

Expand source code

def get_text(file_name, path):
    """
    Opens and retrieves the text from an import file and preprocesses this text.

    Args:
        file_name: The import file name.

    Returns:
        Text of the import file.
    """
    fi = open(path + "/" + file_name, "r")
    text = fi.read()
    fi.close()

    # Run preprocessing on imported file
    text = run(text, path)

    return text

def main()

A function used when running the preprocessor as a standalone script

Expand source code

def main():
    """
    A function used when running the preprocessor as a standalone script
    """
    fi = open("./test_files/test.c", "r")
    text = fi.read()
    fi.close()
    text = run(text)

    print(text)

def remove_comments(text)

Removes comments from the given text

Args

text: The string representation of C code.

Returns

The text with all C comments removed.

Expand source code

def remove_comments(text):
    """
    Removes comments from the given text

    Args:
        text: The string representation of C code.

    Returns:
        The text with all C comments removed.
    """

    # Regex that will capture both '//' and '/* */' style comments
    regex = r"/(\*(\w|\W)*?\*/|/([^\n]*))"

    return re.sub(regex, '', text)

def run(text, path)

Performs pre-processing on C code text.

Args

text: text format of C code to be pre-processed.

Returns

Text of the C code after pre-processing.

Expand source code

def run(text, path):
    """
    Performs pre-processing on C code text.

    Args:
        text: text format of C code to be pre-processed.

    Returns:
        Text of the C code after pre-processing.
    """

    # Remove comments from text
    text = remove_comments(text)

    # Find all preprocessor elements (if any)
    # NOTE: Currrently the list is sorted so the '#define' statements are
    #       first on the list. This prevents the manipulation of the imported C code.
    proc_list = sorted(find_preprocessors(text), key = lambda x: x[0])


    for pre_proc in proc_list:
        # Determine which are 'includes', 'defines', etc..

        if pre_proc[0] == "define":

            try:

                variable = pre_proc[1].replace(" ", "") #strip whitespace
                value = pre_proc[2].replace(" ", "")

                # Delete the pre-processor instruction from the C code
                text = re.sub(rf"\s*#define {variable} {value}", "", text)

                # Replace occurences of VARIABLE with VALUE in C code
                text = re.sub(rf"{variable}", value, text)

            except Exception:
                raise BaseException("Invalid '#define' statement")

        if pre_proc[0] == "include":

            # Try to match to standard library import (i.e. <xyz.h>)
            file_name = ["".join(x) for x in re.findall('<([^"]*)>', pre_proc[1])]
            if file_name:
                # TODO: add capability to search for standard libraries

                # Delete the pre-processor instruction from the C code
                text = re.sub(rf'\s*#include <{file_name[0]}>\n', "", text)
                #text = get_text(PATH_TO_STD_LIBRARIES) + text
                continue

            # Try to match to local library import (i.e. "xyz.h", 'xyz.h')
            file_name = ["".join(x) for x in re.findall('["]([^"]*)["]|[\']([^"]*)[\']', pre_proc[1])]
            if file_name:

                # Delete the pre-processor instruction from the C code
                text = re.sub(rf'\s*#include \"{file_name[0]}\"\n', "", text)
                text = get_text(file_name[0], os.path.abspath(os.path.dirname(path))) + text

            else:
                raise BaseException("Invalid '#include' statement")



    # We are continuing to impliment more "supplimental" pre-processor features, but cleanup for now.
    text = cleanup(text)

    return text