Module src.frontend.bnfToParser
This module reads in a BNF and produces a parser for use with rply.The BNF expects the form X : Y Y #name, where the name determines what the node will end up being called in the Parse tree.
Expand source code
"""
This module reads in a BNF and produces a parser for use with rply.The BNF expects the form X : Y Y #name, where the name determines what the node will end up being called in the Parse tree.
"""
import re
import os
funcTemp = """
@self.pg.production('BNFSPOT')
def FUNCNAMESPOT(p):
newNode = ParseTree("NAMESPOT",p)
self.Head = newNode
return newNode
"""
headTemp = """
@self.pg.production('BNFSPOT')
def program(p):
\"\"\"
Tells the parser which BNF will be the head of the tree
Args:
p: The matching set of tokens.
Returns:
The node of the ParseTree.
\"\"\"
newNode = ParseTree("NAMESPOT",p)
self.Head = newNode
return newNode
"""
initTemp = """
def __init__(self):
\"\"\"
Initializes the parser and tells it the allowed tokens
\"\"\"
self.pg = ParserGenerator(
TOKENSPOT ,
)
#initialzie head and current node
self.Head = None
"""
def main(path):
"""
Creates a parser.py file from input. While the default is more or less BNF_definition (as it is passed in inside this files __main__ block) the function assumes no default
Args:
path: the path to the input file
"""
fi = open(path,"r")
cont = fi.read()
fi.close()
reg = r'([A-Z][A-Z|_]*[A-Z])'
reg = re.compile(reg)
tokens = []
for group in reg.findall(cont):
tokens.append("'" + group + "'")
tokens = list(dict.fromkeys(tokens))
tokenList = "["
jon = ","
join = jon.join(tokens)
tokenList += join + "]"
initFunc = initTemp.replace("TOKENSPOT",tokenList)
fi = open(path,"r")
cont = fi.readlines()
fi.close()
functionList = ""
for line in cont:
if(line != "\n"):
spl = line.split("#")
bnf = spl[0]
funcname = bnf.replace(" ","_")
funcname = funcname.replace(":","_")
name = spl[1].strip()
if(name == "program"):
newFunc = headTemp
else:
newFunc = funcTemp
newFunc = newFunc.replace("BNFSPOT",bnf)
newFunc = newFunc.replace("FUNCNAMESPOT",funcname)
newFunc = newFunc.replace("NAMESPOT",name)
functionList += newFunc
totalOutput = """
\"\"\"
This module contains definitions for the ParseTree and Parser classes, as well as some ansillary functions to assist.
\"\"\"
from rply import ParserGenerator
from rply.errors import ParserGeneratorWarning
from warnings import simplefilter
from rply.token import Token
#we get werid 'non-descriptive' warnings from ParserGenerator, this ignores those
simplefilter('ignore', ParserGeneratorWarning)
class ParseTree():
\"\"\"
ParseTree is a class that acts as each node in an ParseTree
\"\"\"
def __init__(self, token, content):
\"\"\"
Construct a new ParseTree object
Args:
token: The token type of the node.
content: The content of that is tokenized.
\"\"\"
self.token = token
self.content = content
def print_ParseTree(self, file=None, _prefix="", _last=True):
\"\"\"
Prints the ParseTree in depth first order
Args:
file: The file to be written to (Defaults to Stdout).
_prefix: A string indicating the spacing from the left side of the screen.
_last: A boolean that indicates if a self is the last in it's immediate surroundings.
\"\"\"
print(f"{_prefix}{'`-- ' if _last else '|-- '}{self.token}", file=file)
_prefix += " " if _last else "| "
for i, child in enumerate(self.content):
_last = i == len(self.content)-1
if 'content' in child.__dict__:
child.print_ParseTree(file, _prefix, _last)
else:
print(f"{_prefix}{'`-- ' if _last else '|-- '}{child}", file=file)
def __str__(self):
\"\"\"
Produces a string representation of the Parse Tree
\"\"\"
li = []
ntv = [("", self, True)]
while ntv:
li.append(ntv[0])
ntv = [(f"{ntv[0][0]}{' ' if ntv[0][2] else '| '}", x, i == len(ntv[0][1].content)-1 ) for i, x in enumerate(ntv[0][1].content)] + ntv[1:] if 'content' in ntv[0][1].__dict__ else ntv[1:]
return "\\n".join([f"{x[0]}{'`-- ' if x[2] else '|-- '}{x[1].token if 'token' in x[1].__dict__ else x[1]}" for x in li]) + "\\n"
def __repr__(self):
\"\"\"
Constructs a list based string representation of the parse tree
\"\"\"
li = []
ntv = [(1, self)]
while ntv:
li.append((ntv[0][0], ntv[0][1].content))
ntv = [(ntv[0][0]+1, x) for x in ntv[0][1].content if 'content' in x.__dict__] + ntv[1:]
return "\\n".join([f"{x[0]} : {[y.token if 'content' in y.__dict__ else y for y in x[1]]}" for x in li])
def getListView(self, level):
\"\"\"
Prints a simple list version of the tree for output. Calls itself recursively
Args:
level: The current level of the tree.
\"\"\"
li = []
li.append(f"{level+1} : {[x if 'content' not in x.__dict__ else x.token for x in self.content]}")
for x in self.content:
if "content" in x.__dict__:
li.extend(x.getListView(level+1))
if level == 0:
return "\\n".join(li)
return li
#setup parser class
class Parser():
\"\"\"
Definition for the Parser object, works off of rply. Contains rules for parsing.
\"\"\"
INITSPOT
def parse(self):
\"\"\"
The list of BNF functions and their behavior
\"\"\"
FUNCLISTSPOT
@self.pg.error
def error_handle(token):
\"\"\"
Boilerplate error handling function
Args:
token: The token that caused an error.
\"\"\"
return ValueError(token)
#boilerplate function
def get_parser(self):
\"\"\"
Retrieves the built version of the parser.
Returns:
The built parser.
\"\"\"
return self.pg.build()
#retrieve the trees head
def getTree(self):
\"\"\"
Getter for the head of the tree.
Returns:
The head of the tree.
\"\"\"
return self.Head
def print_error(self):
\"\"\"
Prints parser error message. This function ultimately iterates through the ParseTree that was returned after the parser found an error. ParseTree's consist of tokens as well as other ParseTree's so we need to iterate to find the first token and then print its source position.
\"\"\"
# TODO: add some more in-depth error processing to print
# out a more detailed description of what went wrong, and possibly some suggestions
# at to why there was a parse/syntax error. (i.e. suggest a missing semicolon)
head = self.getTree()
token = 0 # token hasn't been found yet, so we set value to 0
while True and head:
# Iterate through list of elements
for i in head.content:
# Could be a Token
if(type(i) == type(Token("sample", "sample"))):
# Found a Token
token = i
break
# Check again (to break out of while loop and not iterate again)
if (type(token) == type(Token("sample", "sample"))):
break
else:
# Set head to last element.
# If this code executes then I can assume that the
# last element is an ParseTree.
head = head.content[len(head.content)-1]
if token:
print(f"ParsingError: Last token \\\'{token.value}\\\' parsed successfully at, {token.source_pos}\\n")
else:
# Never found a token to report, need to exit
print("ParsingError: No ParseTree obtained\\n")
exit()
"""
totalOutput = totalOutput.replace("INITSPOT",initFunc)
totalOutput = totalOutput.replace("FUNCLISTSPOT",functionList)
print("Overwriting ")
with open(os.path.dirname(__file__) + "/parser.py", 'w') as f:
f.write(totalOutput)
if __name__ == "__main__":
#default is assumed to be BNF definition if not otherwise specified
main("BNF_definition")
Functions
def main(path)
-
Creates a parser.py file from input. While the default is more or less BNF_definition (as it is passed in inside this files main block) the function assumes no default
Args
path
- the path to the input file
Expand source code
def main(path): """ Creates a parser.py file from input. While the default is more or less BNF_definition (as it is passed in inside this files __main__ block) the function assumes no default Args: path: the path to the input file """ fi = open(path,"r") cont = fi.read() fi.close() reg = r'([A-Z][A-Z|_]*[A-Z])' reg = re.compile(reg) tokens = [] for group in reg.findall(cont): tokens.append("'" + group + "'") tokens = list(dict.fromkeys(tokens)) tokenList = "[" jon = "," join = jon.join(tokens) tokenList += join + "]" initFunc = initTemp.replace("TOKENSPOT",tokenList) fi = open(path,"r") cont = fi.readlines() fi.close() functionList = "" for line in cont: if(line != "\n"): spl = line.split("#") bnf = spl[0] funcname = bnf.replace(" ","_") funcname = funcname.replace(":","_") name = spl[1].strip() if(name == "program"): newFunc = headTemp else: newFunc = funcTemp newFunc = newFunc.replace("BNFSPOT",bnf) newFunc = newFunc.replace("FUNCNAMESPOT",funcname) newFunc = newFunc.replace("NAMESPOT",name) functionList += newFunc totalOutput = """ \"\"\" This module contains definitions for the ParseTree and Parser classes, as well as some ansillary functions to assist. \"\"\" from rply import ParserGenerator from rply.errors import ParserGeneratorWarning from warnings import simplefilter from rply.token import Token #we get werid 'non-descriptive' warnings from ParserGenerator, this ignores those simplefilter('ignore', ParserGeneratorWarning) class ParseTree(): \"\"\" ParseTree is a class that acts as each node in an ParseTree \"\"\" def __init__(self, token, content): \"\"\" Construct a new ParseTree object Args: token: The token type of the node. content: The content of that is tokenized. \"\"\" self.token = token self.content = content def print_ParseTree(self, file=None, _prefix="", _last=True): \"\"\" Prints the ParseTree in depth first order Args: file: The file to be written to (Defaults to Stdout). _prefix: A string indicating the spacing from the left side of the screen. _last: A boolean that indicates if a self is the last in it's immediate surroundings. \"\"\" print(f"{_prefix}{'`-- ' if _last else '|-- '}{self.token}", file=file) _prefix += " " if _last else "| " for i, child in enumerate(self.content): _last = i == len(self.content)-1 if 'content' in child.__dict__: child.print_ParseTree(file, _prefix, _last) else: print(f"{_prefix}{'`-- ' if _last else '|-- '}{child}", file=file) def __str__(self): \"\"\" Produces a string representation of the Parse Tree \"\"\" li = [] ntv = [("", self, True)] while ntv: li.append(ntv[0]) ntv = [(f"{ntv[0][0]}{' ' if ntv[0][2] else '| '}", x, i == len(ntv[0][1].content)-1 ) for i, x in enumerate(ntv[0][1].content)] + ntv[1:] if 'content' in ntv[0][1].__dict__ else ntv[1:] return "\\n".join([f"{x[0]}{'`-- ' if x[2] else '|-- '}{x[1].token if 'token' in x[1].__dict__ else x[1]}" for x in li]) + "\\n" def __repr__(self): \"\"\" Constructs a list based string representation of the parse tree \"\"\" li = [] ntv = [(1, self)] while ntv: li.append((ntv[0][0], ntv[0][1].content)) ntv = [(ntv[0][0]+1, x) for x in ntv[0][1].content if 'content' in x.__dict__] + ntv[1:] return "\\n".join([f"{x[0]} : {[y.token if 'content' in y.__dict__ else y for y in x[1]]}" for x in li]) def getListView(self, level): \"\"\" Prints a simple list version of the tree for output. Calls itself recursively Args: level: The current level of the tree. \"\"\" li = [] li.append(f"{level+1} : {[x if 'content' not in x.__dict__ else x.token for x in self.content]}") for x in self.content: if "content" in x.__dict__: li.extend(x.getListView(level+1)) if level == 0: return "\\n".join(li) return li #setup parser class class Parser(): \"\"\" Definition for the Parser object, works off of rply. Contains rules for parsing. \"\"\" INITSPOT def parse(self): \"\"\" The list of BNF functions and their behavior \"\"\" FUNCLISTSPOT @self.pg.error def error_handle(token): \"\"\" Boilerplate error handling function Args: token: The token that caused an error. \"\"\" return ValueError(token) #boilerplate function def get_parser(self): \"\"\" Retrieves the built version of the parser. Returns: The built parser. \"\"\" return self.pg.build() #retrieve the trees head def getTree(self): \"\"\" Getter for the head of the tree. Returns: The head of the tree. \"\"\" return self.Head def print_error(self): \"\"\" Prints parser error message. This function ultimately iterates through the ParseTree that was returned after the parser found an error. ParseTree's consist of tokens as well as other ParseTree's so we need to iterate to find the first token and then print its source position. \"\"\" # TODO: add some more in-depth error processing to print # out a more detailed description of what went wrong, and possibly some suggestions # at to why there was a parse/syntax error. (i.e. suggest a missing semicolon) head = self.getTree() token = 0 # token hasn't been found yet, so we set value to 0 while True and head: # Iterate through list of elements for i in head.content: # Could be a Token if(type(i) == type(Token("sample", "sample"))): # Found a Token token = i break # Check again (to break out of while loop and not iterate again) if (type(token) == type(Token("sample", "sample"))): break else: # Set head to last element. # If this code executes then I can assume that the # last element is an ParseTree. head = head.content[len(head.content)-1] if token: print(f"ParsingError: Last token \\\'{token.value}\\\' parsed successfully at, {token.source_pos}\\n") else: # Never found a token to report, need to exit print("ParsingError: No ParseTree obtained\\n") exit() """ totalOutput = totalOutput.replace("INITSPOT",initFunc) totalOutput = totalOutput.replace("FUNCLISTSPOT",functionList) print("Overwriting ") with open(os.path.dirname(__file__) + "/parser.py", 'w') as f: f.write(totalOutput)