Source code for pyaiml21.parser.parser

"""The AIML parser."""
from .logger import Logger
from ._xml_lxml_parser import LXMLParser as XMLParser
from ._category_parser import CategoryParser
from ._walker import Walker, TagSyntaxCheckerFn
from pyaiml21.ast import Category
from typing import Optional, List, Tuple, Dict


AIMLIF_SUBSTITUTIONS = {r"#\Newline": "\n", r"#\Comma": ","}


def _sub_aimlif(txt: str):
    for k, v in AIMLIF_SUBSTITUTIONS.items():
        txt = txt.replace(k, v)
    return txt


[docs]class AIMLParser: """ Allows parsing XML files into tree-like objects. AIMLParser represents a parsing phase when loading AIML categories into the interpreter. Loads file, parses it as a XML file (ignoring all AIML syntax rules), and finds and returns the basic unit of knowledge, category elements, which are then validated. To enable the parser to validate other tags, use `add_tag` to pass the name of the tag and parsing function. Note: As category elements might be wrapped with <topic>, AIMLParser tries to correctly assign child node <topic> to all categories wrapped, i.e. .. code-block:: xml <topic name="..."> <category>..</category> </topic> is transformed, internally, into .. code-block:: xml <category> ... <topic>topic_name</topic> </category> """ def __init__(self): """Create empty parser without any AIML tags knowledge.""" self.known_tags: Dict[str, TagSyntaxCheckerFn] = {}
[docs] def add_tag(self, name: str, fn: TagSyntaxCheckerFn) -> None: """Enable the parser to correctly validate the <`name`> tag.""" self.known_tags[name] = fn
def _parse_aiml(self, source: str, is_file: bool) \ -> Tuple[Optional[List[Category]], Logger]: """Parse .aiml file or text based on the `is_file` flag.""" logger = Logger() root = XMLParser.parse(source, logger) \ if is_file \ else XMLParser.parse_text(source, logger) if not root or logger.has_errors(): assert logger.has_errors() return None, logger categories = CategoryParser.get_categories(root, logger) for category in categories: Walker.walk(category, logger, self.known_tags) return categories, logger
[docs] def parse_category_aiml(self, text: str) \ -> Tuple[Optional[Category], Logger]: """ Parse one <category> from the text. The only requirement is that <category> is the root element and text is well-formed xml document. :param text: string with <category> element :return: parsed category and the logger with reported errors, optionally the category might be None, in this case the logger contains at least one error """ logger = Logger() # reuse the parser, create "AIML file" with one category text = f"<aiml>{text}</aiml>" root = XMLParser.parse_text(text, logger) if not root or logger.has_errors(): assert logger.has_errors() return None, logger categories = CategoryParser.get_categories(root, logger) if not categories: return None, logger assert len(categories) == 1 Walker.walk(categories[0], logger, self.known_tags) return categories[0], logger
[docs] def parse_aiml_text(self, text: str)\ -> Tuple[Optional[List[Category]], Logger]: """ Parse AIML content from the `text`. The requirements on the text content are similar to the ones presented in the `parse_aiml_text` method. :param text: string with AIML content :return: list of categories and the logger with error messages; optionally the list of categories might be None, in this case the logger contains at least one error """ return self._parse_aiml(text, is_file=False)
[docs] def parse_aiml_file(self, filename: str)\ -> Tuple[Optional[List[Category]], Logger]: """ Parse .aiml file given by the `filename`. .aiml file is an XML file containing AIML elements, and <aiml> as the root element. Any syntactic errors are reported via the returned logger. :param filename: name of file to parse (.aiml) :return: list of categories and the logger with error messages; optionally the list of categories might be None, in this case the logger contains at least one error """ return self._parse_aiml(filename, is_file=True)
[docs] def parse_aimlif_file(self, filename: str)\ -> Tuple[Optional[List[Category]], Logger]: """ Parse .aimlif file given by the `filename`. The AIMLIF format is an alternative format to represent AIML files, where each category is stored on a single line. The elements are separated with a comma, in this order: activation count, pattern, that, topic, template, filename Notes: * this method completely ignores activation count as it does not seem to have any purpose; * the filename present on the line is also ignored; :param filename: .aimlif file to parse :return: list of categories and the logger with error messages; optionally the list of categories might be None, in this case the logger contains at least one error """ logger = Logger() shadow_xml = [] with open(filename, "r", encoding="utf-8") as f: for lineno, line in enumerate(f): if line.count(",") != 5: logger.error_xml_syntax("AIMLIF: expected five commas", filename, lineno) return None, logger _, pat, tha, top, tem, _ = map(_sub_aimlif, line.split(",")) category_children = [] for text, tag in zip([pat, tem, tha, top], ["pattern", "template", "that", "topic"]): tagged_text = f"<{tag}>{text}</{tag}>" category_children.append(tagged_text) cat_inner = "".join(category_children) cat = f"<category>{cat_inner}</category>" shadow_xml.append(cat) return self.parse_aiml_text("<aiml>" + "".join(shadow_xml) + "</aiml>")