Skip to content

API Reference

apatch(client, mode=Mode.TOOLS)

No longer necessary, use patch instead.

Patch the client.chat.completions.create method

Enables the following features:

  • response_model parameter to parse the response from OpenAI's API
  • max_retries parameter to retry the function if the response is not valid
  • validation_context parameter to validate the response using the pydantic model
  • strict parameter to use strict json parsing
Source code in instructor/patch.py
def apatch(client: AsyncOpenAI, mode: Mode = Mode.TOOLS):
    """
    No longer necessary, use `patch` instead.

    Patch the `client.chat.completions.create` method

    Enables the following features:

    - `response_model` parameter to parse the response from OpenAI's API
    - `max_retries` parameter to retry the function if the response is not valid
    - `validation_context` parameter to validate the response using the pydantic model
    - `strict` parameter to use strict json parsing
    """
    import warnings

    warnings.warn(
        "apatch is deprecated, use patch instead", DeprecationWarning, stacklevel=2
    )
    return patch(client, mode=mode)

patch(client=None, create=None, mode=Mode.TOOLS)

Patch the client.chat.completions.create method

Enables the following features:

  • response_model parameter to parse the response from OpenAI's API
  • max_retries parameter to retry the function if the response is not valid
  • validation_context parameter to validate the response using the pydantic model
  • strict parameter to use strict json parsing
Source code in instructor/patch.py
def patch(
    client: Union[OpenAI, AsyncOpenAI] = None,
    create: Callable[T_ParamSpec, T_Retval] = None,
    mode: Mode = Mode.TOOLS,
) -> Union[OpenAI, AsyncOpenAI]:
    """
    Patch the `client.chat.completions.create` method

    Enables the following features:

    - `response_model` parameter to parse the response from OpenAI's API
    - `max_retries` parameter to retry the function if the response is not valid
    - `validation_context` parameter to validate the response using the pydantic model
    - `strict` parameter to use strict json parsing
    """

    logger.debug(f"Patching `client.chat.completions.create` with {mode=}")

    if create is not None:
        func = create
    elif client is not None:
        func = client.chat.completions.create
    else:
        raise ValueError("Either client or create must be provided")

    func_is_async = is_async(func)

    @wraps(func)
    async def new_create_async(
        response_model: Type[T_Model] = None,
        validation_context: dict = None,
        max_retries: int = 1,
        *args: T_ParamSpec.args,
        **kwargs: T_ParamSpec.kwargs,
    ) -> T_Model:
        response_model, new_kwargs = handle_response_model(
            response_model=response_model, mode=mode, **kwargs
        )
        response = await retry_async(
            func=func,
            response_model=response_model,
            validation_context=validation_context,
            max_retries=max_retries,
            args=args,
            kwargs=new_kwargs,
            mode=mode,
        )  # type: ignore
        return response

    @wraps(func)
    def new_create_sync(
        response_model: Type[T_Model] = None,
        validation_context: dict = None,
        max_retries: int = 1,
        *args: T_ParamSpec.args,
        **kwargs: T_ParamSpec.kwargs,
    ) -> T_Model:
        response_model, new_kwargs = handle_response_model(
            response_model=response_model, mode=mode, **kwargs
        )
        response = retry_sync(
            func=func,
            response_model=response_model,
            validation_context=validation_context,
            max_retries=max_retries,
            args=args,
            kwargs=new_kwargs,
            mode=mode,
        )
        return response

    new_create = new_create_async if func_is_async else new_create_sync

    if client is not None:
        client.chat.completions.create = new_create
        return client
    else:
        return new_create

Validator

Bases: OpenAISchema

Validate if an attribute is correct and if not, return a new value with an error message

Source code in instructor/dsl/validators.py
class Validator(OpenAISchema):
    """
    Validate if an attribute is correct and if not,
    return a new value with an error message
    """

    is_valid: bool = Field(
        default=True,
        description="Whether the attribute is valid based on the requirements",
    )
    reason: Optional[str] = Field(
        default=None,
        description="The error message if the attribute is not valid, otherwise None",
    )
    fixed_value: Optional[str] = Field(
        default=None,
        description="If the attribute is not valid, suggest a new value for the attribute",
    )

llm_validator(statement, allow_override=False, model='gpt-3.5-turbo', temperature=0, openai_client=None)

Create a validator that uses the LLM to validate an attribute

Usage

from instructor import llm_validator
from pydantic import BaseModel, Field, field_validator

class User(BaseModel):
    name: str = Annotated[str, llm_validator("The name must be a full name all lowercase")
    age: int = Field(description="The age of the person")

try:
    user = User(name="Jason Liu", age=20)
except ValidationError as e:
    print(e)
1 validation error for User
name
  The name is valid but not all lowercase (type=value_error.llm_validator)

Note that there, the error message is written by the LLM, and the error type is value_error.llm_validator.

Parameters:

Name Type Description Default
statement str

The statement to validate

required
model str

The LLM to use for validation (default: "gpt-3.5-turbo-0613")

'gpt-3.5-turbo'
temperature float

The temperature to use for the LLM (default: 0)

0
openai_client OpenAI

The OpenAI client to use (default: None)

None
Source code in instructor/dsl/validators.py
def llm_validator(
    statement: str,
    allow_override: bool = False,
    model: str = "gpt-3.5-turbo",
    temperature: float = 0,
    openai_client: Optional[OpenAI] = None,
) -> Callable[[str], str]:
    """
    Create a validator that uses the LLM to validate an attribute

    ## Usage

    ```python
    from instructor import llm_validator
    from pydantic import BaseModel, Field, field_validator

    class User(BaseModel):
        name: str = Annotated[str, llm_validator("The name must be a full name all lowercase")
        age: int = Field(description="The age of the person")

    try:
        user = User(name="Jason Liu", age=20)
    except ValidationError as e:
        print(e)
    ```

    ```
    1 validation error for User
    name
      The name is valid but not all lowercase (type=value_error.llm_validator)
    ```

    Note that there, the error message is written by the LLM, and the error type is `value_error.llm_validator`.

    Parameters:
        statement (str): The statement to validate
        model (str): The LLM to use for validation (default: "gpt-3.5-turbo-0613")
        temperature (float): The temperature to use for the LLM (default: 0)
        openai_client (OpenAI): The OpenAI client to use (default: None)
    """

    openai_client = openai_client if openai_client else instructor.patch(OpenAI())

    def llm(v: str) -> str:
        resp = openai_client.chat.completions.create(
            response_model=Validator,
            messages=[
                {
                    "role": "system",
                    "content": "You are a world class validation model. Capable to determine if the following value is valid for the statement, if it is not, explain why and suggest a new value.",
                },
                {
                    "role": "user",
                    "content": f"Does `{v}` follow the rules: {statement}",
                },
            ],
            model=model,
            temperature=temperature,
        )  # type: ignore[all]

        # If the response is  not valid, return the reason, this could be used in
        # the future to generate a better response, via reasking mechanism.
        assert resp.is_valid, resp.reason

        if allow_override and not resp.is_valid and resp.fixed_value is not None:
            # If the value is not valid, but we allow override, return the fixed value
            return resp.fixed_value
        return v

    return llm

openai_moderation(client=None)

Validates a message using OpenAI moderation model.

Should only be used for monitoring inputs and outputs of OpenAI APIs Other use cases are disallowed as per: https://platform.openai.com/docs/guides/moderation/overview

Example:

from instructor import OpenAIModeration

class Response(BaseModel):
    message: Annotated[str, AfterValidator(OpenAIModeration(openai_client=client))]

Response(message="I hate you")

 ValidationError: 1 validation error for Response
 message
Value error, `I hate you.` was flagged for ['harassment'] [type=value_error, input_value='I hate you.', input_type=str]

client (OpenAI): The OpenAI client to use, must be sync (default: None)

Source code in instructor/dsl/validators.py
def openai_moderation(client: Optional[OpenAI] = None) -> Callable[[str], str]:
    """
    Validates a message using OpenAI moderation model.

    Should only be used for monitoring inputs and outputs of OpenAI APIs
    Other use cases are disallowed as per:
    https://platform.openai.com/docs/guides/moderation/overview

    Example:
    ```python
    from instructor import OpenAIModeration

    class Response(BaseModel):
        message: Annotated[str, AfterValidator(OpenAIModeration(openai_client=client))]

    Response(message="I hate you")
    ```

    ```
     ValidationError: 1 validation error for Response
     message
    Value error, `I hate you.` was flagged for ['harassment'] [type=value_error, input_value='I hate you.', input_type=str]
    ```

    client (OpenAI): The OpenAI client to use, must be sync (default: None)
    """

    client = client or OpenAI()

    def validate_message_with_openai_mod(v: str) -> str:
        response = client.moderations.create(input=v)
        out = response.results[0]
        cats = out.categories.model_dump()
        if out.flagged:
            raise ValueError(
                f"`{v}` was flagged for {', '.join(cat for cat in cats if cats[cat])}"
            )

        return v

    return validate_message_with_openai_mod

IterableModel(subtask_class, name=None, description=None)

Dynamically create a IterableModel OpenAISchema that can be used to segment multiple tasks given a base class. This creates class that can be used to create a toolkit for a specific task, names and descriptions are automatically generated. However they can be overridden.

Usage

from pydantic import BaseModel, Field
from instructor import IterableModel

class User(BaseModel):
    name: str = Field(description="The name of the person")
    age: int = Field(description="The age of the person")
    role: str = Field(description="The role of the person")

MultiUser = IterableModel(User)

Result

class MultiUser(OpenAISchema, MultiTaskBase):
    tasks: List[User] = Field(
        default_factory=list,
        repr=False,
        description="Correctly segmented list of `User` tasks",
    )

    @classmethod
    def from_streaming_response(cls, completion) -> Generator[User]:
        '''
        Parse the streaming response from OpenAI and yield a `User` object
        for each task in the response
        '''
        json_chunks = cls.extract_json(completion)
        yield from cls.tasks_from_chunks(json_chunks)

Parameters:

Name Type Description Default
subtask_class Type[OpenAISchema]

The base class to use for the MultiTask

required
name Optional[str]

The name of the MultiTask class, if None then the name of the subtask class is used as Multi{subtask_class.__name__}

None
description Optional[str]

The description of the MultiTask class, if None then the description is set to Correct segmentation of{subtask_class.name}tasks

None

Returns:

Name Type Description
schema OpenAISchema

A new class that can be used to segment multiple tasks

Source code in instructor/dsl/iterable.py
def IterableModel(
    subtask_class: Type[BaseModel],
    name: Optional[str] = None,
    description: Optional[str] = None,
) -> Type[BaseModel]:
    """
    Dynamically create a IterableModel OpenAISchema that can be used to segment multiple
    tasks given a base class. This creates class that can be used to create a toolkit
    for a specific task, names and descriptions are automatically generated. However
    they can be overridden.

    ## Usage

    ```python
    from pydantic import BaseModel, Field
    from instructor import IterableModel

    class User(BaseModel):
        name: str = Field(description="The name of the person")
        age: int = Field(description="The age of the person")
        role: str = Field(description="The role of the person")

    MultiUser = IterableModel(User)
    ```

    ## Result

    ```python
    class MultiUser(OpenAISchema, MultiTaskBase):
        tasks: List[User] = Field(
            default_factory=list,
            repr=False,
            description="Correctly segmented list of `User` tasks",
        )

        @classmethod
        def from_streaming_response(cls, completion) -> Generator[User]:
            '''
            Parse the streaming response from OpenAI and yield a `User` object
            for each task in the response
            '''
            json_chunks = cls.extract_json(completion)
            yield from cls.tasks_from_chunks(json_chunks)
    ```

    Parameters:
        subtask_class (Type[OpenAISchema]): The base class to use for the MultiTask
        name (Optional[str]): The name of the MultiTask class, if None then the name
            of the subtask class is used as `Multi{subtask_class.__name__}`
        description (Optional[str]): The description of the MultiTask class, if None
            then the description is set to `Correct segmentation of `{subtask_class.__name__}` tasks`

    Returns:
        schema (OpenAISchema): A new class that can be used to segment multiple tasks
    """
    task_name = subtask_class.__name__ if name is None else name

    name = f"Iterable{task_name}"

    list_tasks = (
        List[subtask_class],  # type: ignore[valid-type]
        Field(
            default_factory=list,
            repr=False,
            description=f"Correctly segmented list of `{task_name}` tasks",
        ),
    )

    new_cls = create_model(
        name,
        tasks=list_tasks,
        __base__=(OpenAISchema, IterableBase),  # type: ignore
    )
    # set the class constructor BaseModel
    new_cls.task_type = subtask_class

    new_cls.__doc__ = (
        f"Correct segmentation of `{task_name}` tasks"
        if description is None
        else description
    )
    assert issubclass(
        new_cls, OpenAISchema
    ), "The new class should be a subclass of OpenAISchema"
    return new_cls

Partial

Bases: Generic[T_Model]

Generate a new class which has PartialBase as a base class.

Notes

This will enable partial validation of the model while streaming.

Example

Partial[SomeModel]

Source code in instructor/dsl/partial.py
class Partial(Generic[T_Model]):
    """Generate a new class which has PartialBase as a base class.

    Notes:
        This will enable partial validation of the model while streaming.

    Example:
        Partial[SomeModel]
    """

    def __new__(
        cls,
        *args: object,  # noqa :ARG003
        **kwargs: object,  # noqa :ARG003
    ) -> "Partial[T_Model]":
        """Cannot instantiate.

        Raises:
            TypeError: Direct instantiation not allowed.
        """
        raise TypeError("Cannot instantiate abstract Partial class.")

    def __init_subclass__(
        cls,
        *args: object,
        **kwargs: object,
    ) -> NoReturn:
        """Cannot subclass.

        Raises:
           TypeError: Subclassing not allowed.
        """
        raise TypeError("Cannot subclass {}.Partial".format(cls.__module__))

    def __class_getitem__(  # type: ignore[override]
        cls,
        wrapped_class: type[T_Model] | tuple[type[T_Model], type[MakeFieldsOptional]],
    ) -> type[T_Model]:
        """Convert model to one that inherits from PartialBase.

        We don't make the fields optional at this point, we just wrap them with `Partial` so the names of the nested models will be
        `Partial{ModelName}`. We want the output of `model_json_schema()` to
        reflect the name change, but everything else should be the same as the
        original model. During validation, we'll generate a true partial model
        to support partially defined fields.

        """

        make_fields_optional = None
        if isinstance(wrapped_class, tuple):
            wrapped_class, make_fields_optional = wrapped_class

        def _wrap_models(field: FieldInfo) -> tuple[object, FieldInfo]:
            tmp_field = deepcopy(field)

            annotation = field.annotation

            # Handle generics (like List, Dict, etc.)
            if get_origin(annotation) is not None:
                # Get the generic base (like List, Dict) and its arguments (like User in List[User])
                generic_base = get_origin(annotation)
                generic_args = get_args(annotation)

                # Recursively apply Partial to each of the generic arguments
                modified_args = tuple(
                    (
                        Partial[arg]  # type: ignore[valid-type]
                        if isinstance(arg, type) and issubclass(arg, BaseModel)
                        else arg
                    )
                    for arg in generic_args
                )

                # Reconstruct the generic type with modified arguments
                tmp_field.annotation = (
                    generic_base[modified_args] if generic_base else None
                )
            # If the field is a BaseModel, then recursively convert it's
            # attributes to optionals.
            elif isinstance(annotation, type) and issubclass(annotation, BaseModel):
                tmp_field.annotation = Partial[annotation]  # type: ignore[assignment, valid-type]
            return tmp_field.annotation, tmp_field

        return create_model(
            __model_name=(
                wrapped_class.__name__
                if wrapped_class.__name__.startswith("Partial")
                else f"Partial{wrapped_class.__name__}"
            ),
            __base__=(wrapped_class, PartialBase),
            __module__=wrapped_class.__module__,
            **{
                field_name: (
                    _make_field_optional(field_info)
                    if make_fields_optional is not None
                    else _wrap_models(field_info)
                )
                for field_name, field_info in wrapped_class.model_fields.items()
            },
        )  # type: ignore[all]

__class_getitem__(wrapped_class)

Convert model to one that inherits from PartialBase.

We don't make the fields optional at this point, we just wrap them with Partial so the names of the nested models will be Partial{ModelName}. We want the output of model_json_schema() to reflect the name change, but everything else should be the same as the original model. During validation, we'll generate a true partial model to support partially defined fields.

Source code in instructor/dsl/partial.py
def __class_getitem__(  # type: ignore[override]
    cls,
    wrapped_class: type[T_Model] | tuple[type[T_Model], type[MakeFieldsOptional]],
) -> type[T_Model]:
    """Convert model to one that inherits from PartialBase.

    We don't make the fields optional at this point, we just wrap them with `Partial` so the names of the nested models will be
    `Partial{ModelName}`. We want the output of `model_json_schema()` to
    reflect the name change, but everything else should be the same as the
    original model. During validation, we'll generate a true partial model
    to support partially defined fields.

    """

    make_fields_optional = None
    if isinstance(wrapped_class, tuple):
        wrapped_class, make_fields_optional = wrapped_class

    def _wrap_models(field: FieldInfo) -> tuple[object, FieldInfo]:
        tmp_field = deepcopy(field)

        annotation = field.annotation

        # Handle generics (like List, Dict, etc.)
        if get_origin(annotation) is not None:
            # Get the generic base (like List, Dict) and its arguments (like User in List[User])
            generic_base = get_origin(annotation)
            generic_args = get_args(annotation)

            # Recursively apply Partial to each of the generic arguments
            modified_args = tuple(
                (
                    Partial[arg]  # type: ignore[valid-type]
                    if isinstance(arg, type) and issubclass(arg, BaseModel)
                    else arg
                )
                for arg in generic_args
            )

            # Reconstruct the generic type with modified arguments
            tmp_field.annotation = (
                generic_base[modified_args] if generic_base else None
            )
        # If the field is a BaseModel, then recursively convert it's
        # attributes to optionals.
        elif isinstance(annotation, type) and issubclass(annotation, BaseModel):
            tmp_field.annotation = Partial[annotation]  # type: ignore[assignment, valid-type]
        return tmp_field.annotation, tmp_field

    return create_model(
        __model_name=(
            wrapped_class.__name__
            if wrapped_class.__name__.startswith("Partial")
            else f"Partial{wrapped_class.__name__}"
        ),
        __base__=(wrapped_class, PartialBase),
        __module__=wrapped_class.__module__,
        **{
            field_name: (
                _make_field_optional(field_info)
                if make_fields_optional is not None
                else _wrap_models(field_info)
            )
            for field_name, field_info in wrapped_class.model_fields.items()
        },
    )  # type: ignore[all]

__init_subclass__(*args, **kwargs)

Cannot subclass.

Raises:

Type Description
TypeError

Subclassing not allowed.

Source code in instructor/dsl/partial.py
def __init_subclass__(
    cls,
    *args: object,
    **kwargs: object,
) -> NoReturn:
    """Cannot subclass.

    Raises:
       TypeError: Subclassing not allowed.
    """
    raise TypeError("Cannot subclass {}.Partial".format(cls.__module__))

__new__(*args, **kwargs)

Cannot instantiate.

Raises:

Type Description
TypeError

Direct instantiation not allowed.

Source code in instructor/dsl/partial.py
def __new__(
    cls,
    *args: object,  # noqa :ARG003
    **kwargs: object,  # noqa :ARG003
) -> "Partial[T_Model]":
    """Cannot instantiate.

    Raises:
        TypeError: Direct instantiation not allowed.
    """
    raise TypeError("Cannot instantiate abstract Partial class.")

PartialBase

Bases: Generic[T_Model]

Source code in instructor/dsl/partial.py
class PartialBase(Generic[T_Model]):
    @classmethod
    @lru_cache(maxsize=None)
    def get_partial_model(cls) -> type[T_Model]:
        """Return a partial model we can use to validate partial results."""
        assert issubclass(
            cls, BaseModel
        ), f"{cls.__name__} must be a subclass of BaseModel"

        return create_model(
            __model_name=(
                cls.__name__
                if cls.__name__.startswith("Partial")
                else f"Partial{cls.__name__}"
            ),
            __base__=cls,
            __module__=cls.__module__,
            **{
                field_name: _make_field_optional(field_info)
                for field_name, field_info in cls.model_fields.items()
            },
        )  # type: ignore[all]

    @classmethod
    def from_streaming_response(
        cls, completion: Iterable[Any], mode: Mode, **kwargs: Any
    ) -> Generator[T_Model, None, None]:
        json_chunks = cls.extract_json(completion, mode)

        if mode == Mode.MD_JSON:
            json_chunks = extract_json_from_stream(json_chunks)

        yield from cls.model_from_chunks(json_chunks, **kwargs)

    @classmethod
    async def from_streaming_response_async(
        cls, completion: AsyncGenerator[Any, None], mode: Mode, **kwargs: Any
    ) -> AsyncGenerator[T_Model, None]:
        json_chunks = cls.extract_json_async(completion, mode)

        if mode == Mode.MD_JSON:
            json_chunks = extract_json_from_stream_async(json_chunks)

        return cls.model_from_chunks_async(json_chunks, **kwargs)

    @classmethod
    def model_from_chunks(
        cls, json_chunks: Iterable[Any], **kwargs: Any
    ) -> Generator[T_Model, None, None]:
        prev_obj = None
        potential_object = ""
        partial_model = cls.get_partial_model()
        for chunk in json_chunks:
            potential_object += chunk

            # Avoid parsing incomplete json when its just whitespace otherwise parser throws an exception
            task_json = (
                parser.parse(potential_object) if potential_object.strip() else None
            )
            if task_json:
                obj = partial_model.model_validate(task_json, strict=None, **kwargs)  # type: ignore[attr-defined]
                if obj != prev_obj:
                    obj.__dict__["chunk"] = (
                        chunk  # Provide the raw chunk for debugging and benchmarking
                    )
                    prev_obj = obj
                    yield obj

    @classmethod
    async def model_from_chunks_async(
        cls, json_chunks: AsyncGenerator[str, None], **kwargs: Any
    ) -> AsyncGenerator[T_Model, None]:
        potential_object = ""
        prev_obj = None
        partial_model = cls.get_partial_model()
        async for chunk in json_chunks:
            potential_object += chunk

            # Avoid parsing incomplete json when its just whitespace otherwise parser throws an exception
            task_json = (
                parser.parse(potential_object) if potential_object.strip() else None
            )
            if task_json:
                obj = partial_model.model_validate(task_json, strict=None, **kwargs)  # type: ignore[attr-defined]
                if obj != prev_obj:
                    obj.__dict__["chunk"] = (
                        chunk  # Provide the raw chunk for debugging and benchmarking
                    )
                    prev_obj = obj
                    yield obj

    @staticmethod
    def extract_json(
        completion: Iterable[Any], mode: Mode
    ) -> Generator[str, None, None]:
        for chunk in completion:
            try:
                if chunk.choices:
                    if mode == Mode.FUNCTIONS:
                        if json_chunk := chunk.choices[0].delta.function_call.arguments:
                            yield json_chunk
                    elif mode in {Mode.JSON, Mode.MD_JSON, Mode.JSON_SCHEMA}:
                        if json_chunk := chunk.choices[0].delta.content:
                            yield json_chunk
                    elif mode == Mode.TOOLS:
                        if json_chunk := chunk.choices[0].delta.tool_calls:
                            yield json_chunk[0].function.arguments
                    else:
                        raise NotImplementedError(
                            f"Mode {mode} is not supported for MultiTask streaming"
                        )
            except AttributeError:
                pass

    @staticmethod
    async def extract_json_async(
        completion: AsyncGenerator[Any, None], mode: Mode
    ) -> AsyncGenerator[str, None]:
        async for chunk in completion:
            try:
                if chunk.choices:
                    if mode == Mode.FUNCTIONS:
                        if json_chunk := chunk.choices[0].delta.function_call.arguments:
                            yield json_chunk
                    elif mode in {Mode.JSON, Mode.MD_JSON, Mode.JSON_SCHEMA}:
                        if json_chunk := chunk.choices[0].delta.content:
                            yield json_chunk
                    elif mode == Mode.TOOLS:
                        if json_chunk := chunk.choices[0].delta.tool_calls:
                            yield json_chunk[0].function.arguments
                    else:
                        raise NotImplementedError(
                            f"Mode {mode} is not supported for MultiTask streaming"
                        )
            except AttributeError:
                pass

get_partial_model() cached classmethod

Return a partial model we can use to validate partial results.

Source code in instructor/dsl/partial.py
@classmethod
@lru_cache(maxsize=None)
def get_partial_model(cls) -> type[T_Model]:
    """Return a partial model we can use to validate partial results."""
    assert issubclass(
        cls, BaseModel
    ), f"{cls.__name__} must be a subclass of BaseModel"

    return create_model(
        __model_name=(
            cls.__name__
            if cls.__name__.startswith("Partial")
            else f"Partial{cls.__name__}"
        ),
        __base__=cls,
        __module__=cls.__module__,
        **{
            field_name: _make_field_optional(field_info)
            for field_name, field_info in cls.model_fields.items()
        },
    )  # type: ignore[all]

MaybeBase

Bases: BaseModel, Generic[T]

Extract a result from a model, if any, otherwise set the error and message fields.

Source code in instructor/dsl/maybe.py
class MaybeBase(BaseModel, Generic[T]):  # type: ignore[misc]
    """
    Extract a result from a model, if any, otherwise set the error and message fields.
    """

    result: Optional[T]
    error: bool = Field(default=False)
    message: Optional[str]

    def __bool__(self) -> bool:
        return self.result is not None

Maybe(model)

Create a Maybe model for a given Pydantic model. This allows you to return a model that includes fields for result, error, and message for sitatations where the data may not be present in the context.

Usage

from pydantic import BaseModel, Field
from instructor import Maybe

class User(BaseModel):
    name: str = Field(description="The name of the person")
    age: int = Field(description="The age of the person")
    role: str = Field(description="The role of the person")

MaybeUser = Maybe(User)

Result

class MaybeUser(BaseModel):
    result: Optional[User]
    error: bool = Field(default=False)
    message: Optional[str]

    def __bool__(self):
        return self.result is not None

Parameters:

Name Type Description Default
model Type[BaseModel]

The Pydantic model to wrap with Maybe.

required

Returns:

Name Type Description
MaybeModel Type[BaseModel]

A new Pydantic model that includes fields for result, error, and message.

Source code in instructor/dsl/maybe.py
def Maybe(model: Type[T]) -> Type[MaybeBase[T]]:
    """
    Create a Maybe model for a given Pydantic model. This allows you to return a model that includes fields for `result`, `error`, and `message` for sitatations where the data may not be present in the context.

    ## Usage

    ```python
    from pydantic import BaseModel, Field
    from instructor import Maybe

    class User(BaseModel):
        name: str = Field(description="The name of the person")
        age: int = Field(description="The age of the person")
        role: str = Field(description="The role of the person")

    MaybeUser = Maybe(User)
    ```

    ## Result

    ```python
    class MaybeUser(BaseModel):
        result: Optional[User]
        error: bool = Field(default=False)
        message: Optional[str]

        def __bool__(self):
            return self.result is not None
    ```

    Parameters:
        model (Type[BaseModel]): The Pydantic model to wrap with Maybe.

    Returns:
        MaybeModel (Type[BaseModel]): A new Pydantic model that includes fields for `result`, `error`, and `message`.
    """

    fields = {
        "result": (
            Optional[model],
            Field(
                default=None,
                description="Correctly extracted result from the model, if any, otherwise None",
            ),
        ),
        "error": (bool, Field(default=False)),
        "message": (
            Optional[str],
            Field(
                default=None,
                description="Error message if no result was found, should be short and concise",
            ),
        ),
    }

    return create_model(f"Maybe{model.__name__}", __base__=MaybeBase, **fields)

OpenAISchema

Bases: BaseModel

Source code in instructor/function_calls.py
class OpenAISchema(BaseModel):  # type: ignore[misc]
    @classmethod  # type: ignore[misc]
    @property
    def openai_schema(cls) -> Dict[str, Any]:
        """
        Return the schema in the format of OpenAI's schema as jsonschema

        Note:
            Its important to add a docstring to describe how to best use this class, it will be included in the description attribute and be part of the prompt.

        Returns:
            model_json_schema (dict): A dictionary in the format of OpenAI's schema as jsonschema
        """
        schema = cls.model_json_schema()
        docstring = parse(cls.__doc__ or "")
        parameters = {
            k: v for k, v in schema.items() if k not in ("title", "description")
        }
        for param in docstring.params:
            if (name := param.arg_name) in parameters["properties"] and (
                description := param.description
            ):
                if "description" not in parameters["properties"][name]:
                    parameters["properties"][name]["description"] = description

        parameters["required"] = sorted(
            k for k, v in parameters["properties"].items() if "default" not in v
        )

        if "description" not in schema:
            if docstring.short_description:
                schema["description"] = docstring.short_description
            else:
                schema["description"] = (
                    f"Correctly extracted `{cls.__name__}` with all "
                    f"the required parameters with correct types"
                )

        return {
            "name": schema["title"],
            "description": schema["description"],
            "parameters": parameters,
        }

    @classmethod
    @property
    def anthropic_schema(cls) -> str:
        from instructor.anthropic_utils import json_to_xml

        return "\n".join(
            line.lstrip()
            for line in parseString(json_to_xml(cls)).toprettyxml().splitlines()[1:]
        )

    @classmethod
    def from_response(
        cls,
        completion: ChatCompletion,
        validation_context: Optional[Dict[str, Any]] = None,
        strict: Optional[bool] = None,
        mode: Mode = Mode.TOOLS,
    ) -> BaseModel:
        """Execute the function from the response of an openai chat completion

        Parameters:
            completion (openai.ChatCompletion): The response from an openai chat completion
            throw_error (bool): Whether to throw an error if the function call is not detected
            validation_context (dict): The validation context to use for validating the response
            strict (bool): Whether to use strict json parsing
            mode (Mode): The openai completion mode

        Returns:
            cls (OpenAISchema): An instance of the class
        """
        if mode == Mode.ANTHROPIC_TOOLS:
            try:
                from instructor.anthropic_utils import extract_xml, xml_to_model
            except ImportError as err:
                raise ImportError(
                    "Please 'pip install anthropic' package to proceed."
                ) from err
            assert hasattr(completion, "content")
            return xml_to_model(cls, extract_xml(completion.content[0].text))  # type:ignore

        assert hasattr(completion, "choices")

        if completion.choices[0].finish_reason == "length":
            logger.error("Incomplete output detected, should increase max_tokens")
            raise IncompleteOutputException()

        # If Anthropic, this should be different
        message = completion.choices[0].message

        if mode == Mode.FUNCTIONS:
            assert (
                message.function_call.name == cls.openai_schema["name"]  # type: ignore[index]
            ), "Function name does not match"
            model_response = cls.model_validate_json(
                message.function_call.arguments,  # type: ignore[attr-defined]
                context=validation_context,
                strict=strict,
            )
        elif mode in {Mode.TOOLS, Mode.MISTRAL_TOOLS}:
            assert (
                len(message.tool_calls or []) == 1
            ), "Instructor does not support multiple tool calls, use List[Model] instead."
            tool_call = message.tool_calls[0]  # type: ignore
            assert (
                tool_call.function.name == cls.openai_schema["name"]  # type: ignore[index]
            ), "Tool name does not match"
            model_response = cls.model_validate_json(
                tool_call.function.arguments,
                context=validation_context,
                strict=strict,
            )
        elif mode in {Mode.JSON, Mode.JSON_SCHEMA, Mode.MD_JSON}:
            if mode == Mode.MD_JSON:
                message.content = extract_json_from_codeblock(message.content or "")

            model_response = cls.model_validate_json(
                message.content,  # type: ignore
                context=validation_context,
                strict=strict,
            )
        else:
            raise ValueError(f"Invalid patch mode: {mode}")

        # TODO: add logging or response handler
        return model_response

openai_schema: Dict[str, Any] classmethod property

Return the schema in the format of OpenAI's schema as jsonschema

Note

Its important to add a docstring to describe how to best use this class, it will be included in the description attribute and be part of the prompt.

Returns:

Name Type Description
model_json_schema dict

A dictionary in the format of OpenAI's schema as jsonschema

from_response(completion, validation_context=None, strict=None, mode=Mode.TOOLS) classmethod

Execute the function from the response of an openai chat completion

Parameters:

Name Type Description Default
completion ChatCompletion

The response from an openai chat completion

required
throw_error bool

Whether to throw an error if the function call is not detected

required
validation_context dict

The validation context to use for validating the response

None
strict bool

Whether to use strict json parsing

None
mode Mode

The openai completion mode

TOOLS

Returns:

Name Type Description
cls OpenAISchema

An instance of the class

Source code in instructor/function_calls.py
@classmethod
def from_response(
    cls,
    completion: ChatCompletion,
    validation_context: Optional[Dict[str, Any]] = None,
    strict: Optional[bool] = None,
    mode: Mode = Mode.TOOLS,
) -> BaseModel:
    """Execute the function from the response of an openai chat completion

    Parameters:
        completion (openai.ChatCompletion): The response from an openai chat completion
        throw_error (bool): Whether to throw an error if the function call is not detected
        validation_context (dict): The validation context to use for validating the response
        strict (bool): Whether to use strict json parsing
        mode (Mode): The openai completion mode

    Returns:
        cls (OpenAISchema): An instance of the class
    """
    if mode == Mode.ANTHROPIC_TOOLS:
        try:
            from instructor.anthropic_utils import extract_xml, xml_to_model
        except ImportError as err:
            raise ImportError(
                "Please 'pip install anthropic' package to proceed."
            ) from err
        assert hasattr(completion, "content")
        return xml_to_model(cls, extract_xml(completion.content[0].text))  # type:ignore

    assert hasattr(completion, "choices")

    if completion.choices[0].finish_reason == "length":
        logger.error("Incomplete output detected, should increase max_tokens")
        raise IncompleteOutputException()

    # If Anthropic, this should be different
    message = completion.choices[0].message

    if mode == Mode.FUNCTIONS:
        assert (
            message.function_call.name == cls.openai_schema["name"]  # type: ignore[index]
        ), "Function name does not match"
        model_response = cls.model_validate_json(
            message.function_call.arguments,  # type: ignore[attr-defined]
            context=validation_context,
            strict=strict,
        )
    elif mode in {Mode.TOOLS, Mode.MISTRAL_TOOLS}:
        assert (
            len(message.tool_calls or []) == 1
        ), "Instructor does not support multiple tool calls, use List[Model] instead."
        tool_call = message.tool_calls[0]  # type: ignore
        assert (
            tool_call.function.name == cls.openai_schema["name"]  # type: ignore[index]
        ), "Tool name does not match"
        model_response = cls.model_validate_json(
            tool_call.function.arguments,
            context=validation_context,
            strict=strict,
        )
    elif mode in {Mode.JSON, Mode.JSON_SCHEMA, Mode.MD_JSON}:
        if mode == Mode.MD_JSON:
            message.content = extract_json_from_codeblock(message.content or "")

        model_response = cls.model_validate_json(
            message.content,  # type: ignore
            context=validation_context,
            strict=strict,
        )
    else:
        raise ValueError(f"Invalid patch mode: {mode}")

    # TODO: add logging or response handler
    return model_response