Skip to content

rigging.model

Models are the core datatypes for structured parsing.

Answer #

Bases: Model

Quick model for answers.

CommaDelimitedAnswer #

Bases: DelimitedAnswer

Comma delimited answer (,)

DelimitedAnswer #

Bases: Model

Mixed support delimited answer (- | / ,) selected based on most-matches

items: list[str] property #

Parsed items from the content.

Description #

Bases: Model

Quick model for descriptions.

Instructions #

Bases: Model

Quick model for instructions.

Model #

Bases: BaseXmlModel

from_text(content: str) -> list[tuple[ModelT, slice]] classmethod #

The core parsing method which attempts to extract and parse as many valid instances of a model from semi-structured text.

Parameters:

  • content (str) –

    The text content to parse.

Returns:

  • list[tuple[ModelT, slice]]

    A list of tuples containing the extracted models and their corresponding slices.

Raises:

  • MissingModelError

    If the specified model tags are not found in the message.

  • ValidationError

    If an error occurs while parsing the content.

Source code in rigging/model.py
@classmethod
def from_text(cls, content: str) -> list[tuple[ModelT, slice]]:
    """
    The core parsing method which attempts to extract and parse as many
    valid instances of a model from semi-structured text.

    Args:
        content: The text content to parse.

    Returns:
        A list of tuples containing the extracted models and their corresponding slices.

    Raises:
        MissingModelError: If the specified model tags are not found in the message.
        ValidationError: If an error occurs while parsing the content.
    """
    cls.ensure_valid()

    pattern = r"(<([\w-]+).*?>((.*?)</\2>))"
    matches = [m for m in re.finditer(pattern, content, flags=re.DOTALL) if m.group(2) == cls.__xml_tag__]

    if not matches:
        raise MissingModelError(f"Failed to find '{cls.xml_tags()}' in message")

    # Sort matches_with_tag based on the length of the interior text,
    # longest first. This should help us avoid matching the model
    # supplying hollow tags before the actual data.

    sorted_matches = sorted(matches, key=lambda m: len(m.group(4)), reverse=True)

    extracted: list[tuple[ModelT, slice]] = []
    exceptions: list[Exception] = []
    for match in sorted_matches:
        full_text, _, inner_with_end_tag, inner = match.groups()

        # The model might trip up regex by including partial tags
        # in passing before actually using them. We'll continually try
        # to parse the inner text until we can't extract our model anymore.
        #
        # Example: "Sure I'll use <answer> tags: <answer>hello</answer>"
        #
        # TODO: The opposite could be true, and we could greedily parse
        # backwards if we get failures. This is a simple solution for now.

        inner_match: re.Match[str] | None = match
        while inner_match is not None:
            inner_matches = re.finditer(pattern, inner_with_end_tag, flags=re.DOTALL)
            inner_match = next((m for m in inner_matches if m.group(2) == cls.__xml_tag__), None)
            if inner_match is not None:
                full_text, _, inner_with_end_tag, inner = inner_match.groups()

        try:
            model = (
                cls(**{next(iter(cls.model_fields)): unescape_xml(inner)})
                if cls.is_simple()
                else cls.from_xml(escape_xml(full_text))
            )
            extracted.append((model, slice(match.start(), match.end())))  # type: ignore [arg-type]
        except Exception as e:
            exceptions.append(e)
            continue

    # TODO: This is poor form atm, but the exception stacking
    # and final error should involve some careful thought

    if not extracted:
        raise exceptions[0]

    return extracted

is_simple() -> bool classmethod #

Check if the model is "simple", meaning it has a single field with a basic datatype.

Until we refactor our XML parsing, this helps make the parsing more consistent for models which can support it.

Returns:

  • bool

    True if the model is simple, False otherwise.

Source code in rigging/model.py
@classmethod
def is_simple(cls) -> bool:
    """
    Check if the model is "simple", meaning it has a single field with a basic datatype.

    Until we refactor our XML parsing, this helps make the parsing more consistent for models
    which can support it.

    Returns:
        True if the model is simple, False otherwise.
    """
    field_values = list(cls.model_fields.values())
    if len(field_values) != 1:
        return False

    annotation = field_values[0].annotation
    if t.get_origin(annotation) == t.Annotated:
        annotation = t.get_args(annotation)[0]

    return annotation in BASIC_TYPES

one_from_text(content: str, *, fail_on_many: bool = False) -> tuple[ModelT, slice] classmethod #

Finds and returns a single match from the given text content.

Parameters:

  • content (str) –

    The text content to search for matches.

  • fail_on_many (bool, default: False ) –

    If True, raises a ValidationError if multiple matches are found.

Returns:

  • tuple[ModelT, slice]

    A tuple containing the matched model and the slice indicating the match location.

Raises:

  • ValidationError

    If multiple matches are found and fail_on_many is True.

Source code in rigging/model.py
@classmethod
def one_from_text(cls, content: str, *, fail_on_many: bool = False) -> tuple[ModelT, slice]:
    """
    Finds and returns a single match from the given text content.

    Args:
        content: The text content to search for matches.
        fail_on_many: If True, raises a ValidationError if multiple matches are found.

    Returns:
        A tuple containing the matched model and the slice indicating the match location.

    Raises:
        ValidationError: If multiple matches are found and fail_on_many is True.
    """
    matches = cls.from_text(content)  # type: ignore [var-annotated]
    if fail_on_many and len(matches) > 1:
        raise ValidationError("Multiple matches found with 'fail_on_many=True'")
    return max(matches, key=lambda x: x[1].stop - x[1].start)

to_pretty_xml() -> str #

Converts the model to a pretty XML string with indents and newlines.

Returns:

  • str

    The pretty XML representation of the model.

Source code in rigging/model.py
def to_pretty_xml(self) -> str:
    """
    Converts the model to a pretty XML string with indents and newlines.

    Returns:
        The pretty XML representation of the model.
    """
    tree = self.to_xml_tree()
    ET.indent(tree, "  ")
    pretty_encoded_xml = ET.tostring(tree, short_empty_elements=False, encoding="utf-8").decode()

    if self.__class__.is_simple():
        # We only expect to use this in our "simple"
        # models, but I'd like a better long-term solution
        return unescape_xml(pretty_encoded_xml)
    else:
        return pretty_encoded_xml  # type: ignore [no-any-return]

xml_end_tag() -> str classmethod #

Helper method which wrapped the class tag in XML braces with a leading slash.

Source code in rigging/model.py
@classmethod
def xml_end_tag(cls) -> str:
    """Helper method which wrapped the class tag in XML braces with a leading slash."""
    return f"</{cls.__xml_tag__}>"

xml_example() -> str classmethod #

Returns an example XML representation of the given class.

Models should typically override this method to provide a more complex example.

By default, this method returns a hollow XML scaffold one layer deep.

Returns:

  • str

    A string containing the XML representation of the class.

Source code in rigging/model.py
@classmethod
def xml_example(cls) -> str:
    """
    Returns an example XML representation of the given class.

    Models should typically override this method to provide a more complex example.

    By default, this method returns a hollow XML scaffold one layer deep.

    Returns:
        A string containing the XML representation of the class.
    """
    if cls.is_simple():
        return cls.xml_tags()

    schema = cls.model_json_schema()
    properties = schema["properties"]
    structure = {cls.__xml_tag__: {field: None for field in properties}}
    xml_string = xmltodict.unparse(
        structure, pretty=True, full_document=False, indent="  ", short_empty_elements=True
    )
    return t.cast(str, xml_string)  # Bad type hints in xmltodict

xml_start_tag() -> str classmethod #

Helper method which wrapped the class tag in XML braces.

Source code in rigging/model.py
@classmethod
def xml_start_tag(cls) -> str:
    """Helper method which wrapped the class tag in XML braces."""
    return f"<{cls.__xml_tag__}>"

xml_tags() -> str classmethod #

Helper method which returns the full XML tags for the class.

Source code in rigging/model.py
@classmethod
def xml_tags(cls) -> str:
    """Helper method which returns the full XML tags for the class."""
    return cls.xml_start_tag() + cls.xml_end_tag()

NewlineDelimitedAnswer #

Bases: DelimitedAnswer

Newline delimited answer ( )

Question #

Bases: Model

Quick model for questions.

QuestionAnswer #

Bases: Model

Quick model for question-answer pairs.

answer: Answer = element() class-attribute instance-attribute #

The answer

question: Question = element() class-attribute instance-attribute #

The question

Thinking #

Bases: Model

Quick model for thinking messages.

YesNoAnswer #

Bases: Model

Yes/No answer answer with coercion

boolean: bool instance-attribute #

The boolean value of the answer.

make_primitive(name: str, type_: type[PrimitiveT] = str, *, tag: str | None = None, doc: str | None = None, validator: t.Callable[[str], str | None] | None = None, strip_content: bool = True) -> type[Primitive[PrimitiveT]] #

Helper to create a simple primitive model with an optional content validator.

Note

This API is experimental and may change in the future.

Parameters:

  • name (str) –

    The name of the model.

  • tag (str | None, default: None ) –

    The XML tag for the model.

  • doc (str | None, default: None ) –

    The documentation for the model.

  • validator (Callable[[str], str | None] | None, default: None ) –

    An optional content validator for the model.

  • strip_content (bool, default: True ) –

    Whether to strip the content string before pydantic validation.

Returns:

  • type[Primitive[PrimitiveT]]

    The primitive model class.

Source code in rigging/model.py
def make_primitive(
    name: str,
    type_: type[PrimitiveT] = str,  # type: ignore [assignment]
    *,
    tag: str | None = None,
    doc: str | None = None,
    validator: t.Callable[[str], str | None] | None = None,
    strip_content: bool = True,
) -> type[Primitive[PrimitiveT]]:
    """
    Helper to create a simple primitive model with an optional content validator.

    Note:
        This API is experimental and may change in the future.

    Args:
        name: The name of the model.
        tag: The XML tag for the model.
        doc: The documentation for the model.
        validator: An optional content validator for the model.
        strip_content: Whether to strip the content string before pydantic validation.

    Returns:
        The primitive model class.
    """

    def _validate(value: str) -> str:
        if validator is not None:
            return validator(value) or value
        return value

    if strip_content:
        type_ = t.Annotated[type_, BeforeValidator(lambda x: x.strip() if isinstance(x, str) else x)]  # type: ignore

    return create_model(
        name,
        __base__=Primitive[type_],  # type: ignore
        __doc__=doc,
        __cls_kwargs__={"tag": tag},
        content=(type_, ...),
        __validators__={"content_validator": field_validator("content")(_validate)} if validator else {},
    )