HuggingFaceLLM#

pydantic model llama_index.llms.huggingface.HuggingFaceLLM#

HuggingFace LLM.

Show JSON schema

{
   "title": "HuggingFaceLLM",
   "description": "HuggingFace LLM.",
   "type": "object",
   "properties": {
      "callback_manager": {
         "title": "Callback Manager"
      },
      "system_prompt": {
         "title": "System Prompt",
         "description": "The system prompt, containing any extra instructions or context. The model card on HuggingFace should specify if this is needed.",
         "default": "",
         "type": "string"
      },
      "messages_to_prompt": {
         "title": "Messages To Prompt"
      },
      "completion_to_prompt": {
         "title": "Completion To Prompt"
      },
      "output_parser": {
         "title": "Output Parser"
      },
      "pydantic_program_mode": {
         "default": "default",
         "allOf": [
            {
               "$ref": "#/definitions/PydanticProgramMode"
            }
         ]
      },
      "query_wrapper_prompt": {
         "title": "Query Wrapper Prompt"
      },
      "model_name": {
         "title": "Model Name",
         "description": "The model name to use from HuggingFace. Unused if `model` is passed in directly.",
         "default": "StabilityAI/stablelm-tuned-alpha-3b",
         "type": "string"
      },
      "context_window": {
         "title": "Context Window",
         "description": "The maximum number of tokens available for input.",
         "default": 3900,
         "exclusiveMinimum": 0,
         "type": "integer"
      },
      "max_new_tokens": {
         "title": "Max New Tokens",
         "description": "The maximum number of tokens to generate.",
         "default": 256,
         "exclusiveMinimum": 0,
         "type": "integer"
      },
      "tokenizer_name": {
         "title": "Tokenizer Name",
         "description": "The name of the tokenizer to use from HuggingFace. Unused if `tokenizer` is passed in directly.",
         "default": "StabilityAI/stablelm-tuned-alpha-3b",
         "type": "string"
      },
      "device_map": {
         "title": "Device Map",
         "description": "The device_map to use. Defaults to 'auto'.",
         "default": "auto",
         "type": "string"
      },
      "stopping_ids": {
         "title": "Stopping Ids",
         "description": "The stopping ids to use. Generation stops when these token IDs are predicted.",
         "type": "array",
         "items": {
            "type": "integer"
         }
      },
      "tokenizer_outputs_to_remove": {
         "title": "Tokenizer Outputs To Remove",
         "description": "The outputs to remove from the tokenizer. Sometimes huggingface tokenizers return extra inputs that cause errors.",
         "type": "array",
         "items": {}
      },
      "tokenizer_kwargs": {
         "title": "Tokenizer Kwargs",
         "description": "The kwargs to pass to the tokenizer.",
         "type": "object"
      },
      "model_kwargs": {
         "title": "Model Kwargs",
         "description": "The kwargs to pass to the model during initialization.",
         "type": "object"
      },
      "generate_kwargs": {
         "title": "Generate Kwargs",
         "description": "The kwargs to pass to the model during generation.",
         "type": "object"
      },
      "is_chat_model": {
         "title": "Is Chat Model",
         "description": "Set True if the model exposes a chat interface (i.e. can be passed a sequence of messages, rather than text), like OpenAI's /v1/chat/completions endpoint. Be sure to verify that you either pass an appropriate tokenizer that can convert prompts to properly formatted chat messages or a `messages_to_prompt` that does so.",
         "default": false,
         "type": "boolean"
      },
      "class_name": {
         "title": "Class Name",
         "type": "string",
         "default": "HuggingFace_LLM"
      }
   },
   "definitions": {
      "PydanticProgramMode": {
         "title": "PydanticProgramMode",
         "description": "Pydantic program mode.",
         "enum": [
            "default",
            "openai",
            "llm",
            "guidance",
            "lm-format-enforcer"
         ],
         "type": "string"
      }
   }
}

Config

arbitrary_types_allowed: bool = True

Fields

context_window (int)
device_map (str)
generate_kwargs (dict)
is_chat_model (bool)
max_new_tokens (int)
model_kwargs (dict)
model_name (str)
query_wrapper_prompt (llama_index.prompts.base.PromptTemplate)
stopping_ids (List[int])
system_prompt (str)
tokenizer_kwargs (dict)
tokenizer_name (str)
tokenizer_outputs_to_remove (list)

Validators

_validate_callback_manager » callback_manager
set_completion_to_prompt » completion_to_prompt
set_messages_to_prompt » messages_to_prompt

field context_window: int = 3900#

The maximum number of tokens available for input.

Constraints

exclusiveMinimum = 0

field device_map: str = 'auto'#: The device_map to use. Defaults to ‘auto’.

field generate_kwargs: dict [Optional]#: The kwargs to pass to the model during generation.

field is_chat_model: bool = False#: Set True if the model exposes a chat interface (i.e. can be passed a sequence of messages, rather than text), like OpenAI’s /v1/chat/completions endpoint. Be sure to verify that you either pass an appropriate tokenizer that can convert prompts to properly formatted chat messages or a messages_to_prompt that does so.

field max_new_tokens: int = 256#

The maximum number of tokens to generate.

Constraints

exclusiveMinimum = 0

field model_kwargs: dict [Optional]#: The kwargs to pass to the model during initialization.

field model_name: str = 'StabilityAI/stablelm-tuned-alpha-3b'#: The model name to use from HuggingFace. Unused if model is passed in directly.

field query_wrapper_prompt: PromptTemplate = PromptTemplate(metadata={'prompt_type': <PromptType.CUSTOM: 'custom'>}, template_vars=['query_str'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, template='{query_str}')#: The query wrapper prompt, containing the query placeholder. The model card on HuggingFace should specify if this is needed. Should contain a {query_str} placeholder.

field stopping_ids: List[int] [Optional]#: The stopping ids to use. Generation stops when these token IDs are predicted.

field system_prompt: str = ''#: The system prompt, containing any extra instructions or context. The model card on HuggingFace should specify if this is needed.

field tokenizer_kwargs: dict [Optional]#: The kwargs to pass to the tokenizer.

field tokenizer_name: str = 'StabilityAI/stablelm-tuned-alpha-3b'#: The name of the tokenizer to use from HuggingFace. Unused if tokenizer is passed in directly.

field tokenizer_outputs_to_remove: list [Optional]#: The outputs to remove from the tokenizer. Sometimes huggingface tokenizers return extra inputs that cause errors.

chat(messages: Sequence[ChatMessage], **kwargs: Any) → Any#: Chat endpoint for LLM.

classmethod class_name() → str#

Get the class name, used as a unique ID in serialization.

This provides a key that makes serialization robust against actual class name changes.

complete(*args: Any, **kwargs: Any) → Any#: Completion endpoint for LLM.

stream_chat(messages: Sequence[ChatMessage], **kwargs: Any) → Any#: Streaming chat endpoint for LLM.

stream_complete(*args: Any, **kwargs: Any) → Any#: Streaming completion endpoint for LLM.

property metadata: LLMMetadata#: LLM metadata.