Skip to content

Chat memory buffer

ChatMemoryBuffer #

Bases: BaseChatStoreMemory

Simple buffer for storing chat history.

Parameters:

Name Type Description Default
token_limit int
required
tokenizer_fn Callable[list, List]
functools.partial(<bound method Encoding.encode of <Encoding 'cl100k_base'>>, allowed_special='all')
Source code in llama-index-core/llama_index/core/memory/chat_memory_buffer.py
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
class ChatMemoryBuffer(BaseChatStoreMemory):
    """Simple buffer for storing chat history."""

    token_limit: int
    tokenizer_fn: Callable[[str], List] = Field(
        default_factory=get_tokenizer,
        exclude=True,
    )

    @classmethod
    def class_name(cls) -> str:
        """Get class name."""
        return "ChatMemoryBuffer"

    @model_validator(mode="before")
    @classmethod
    def validate_memory(cls, values: dict) -> dict:
        # Validate token limit
        token_limit = values.get("token_limit", -1)
        if token_limit < 1:
            raise ValueError("Token limit must be set and greater than 0.")

        # Validate tokenizer -- this avoids errors when loading from json/dict
        tokenizer_fn = values.get("tokenizer_fn", None)
        if tokenizer_fn is None:
            values["tokenizer_fn"] = get_tokenizer()

        return values

    @classmethod
    def from_defaults(
        cls,
        chat_history: Optional[List[ChatMessage]] = None,
        llm: Optional[LLM] = None,
        chat_store: Optional[BaseChatStore] = None,
        chat_store_key: str = DEFAULT_CHAT_STORE_KEY,
        token_limit: Optional[int] = None,
        tokenizer_fn: Optional[Callable[[str], List]] = None,
        **kwargs: Any,
    ) -> "ChatMemoryBuffer":
        """Create a chat memory buffer from an LLM."""
        if kwargs:
            raise ValueError(f"Unexpected kwargs: {kwargs}")

        if llm is not None:
            context_window = llm.metadata.context_window
            token_limit = token_limit or int(context_window * DEFAULT_TOKEN_LIMIT_RATIO)
        elif token_limit is None:
            token_limit = DEFAULT_TOKEN_LIMIT

        if chat_history is not None:
            chat_store = chat_store or SimpleChatStore()
            chat_store.set_messages(chat_store_key, chat_history)

        return cls(
            token_limit=token_limit,
            tokenizer_fn=tokenizer_fn or get_tokenizer(),
            chat_store=chat_store or SimpleChatStore(),
            chat_store_key=chat_store_key,
        )

    def to_string(self) -> str:
        """Convert memory to string."""
        return self.json()

    @classmethod
    def from_string(cls, json_str: str) -> "ChatMemoryBuffer":
        """Create a chat memory buffer from a string."""
        dict_obj = json.loads(json_str)
        return cls.from_dict(dict_obj)

    def to_dict(self, **kwargs: Any) -> dict:
        """Convert memory to dict."""
        return self.dict()

    @classmethod
    def from_dict(cls, data: Dict[str, Any], **kwargs: Any) -> "ChatMemoryBuffer":
        from llama_index.core.storage.chat_store.loading import load_chat_store

        # NOTE: this handles backwards compatibility with the old chat history
        if "chat_history" in data:
            chat_history = data.pop("chat_history")
            simple_store = SimpleChatStore(store={DEFAULT_CHAT_STORE_KEY: chat_history})
            data["chat_store"] = simple_store
        elif "chat_store" in data:
            chat_store_dict = data.pop("chat_store")
            chat_store = load_chat_store(chat_store_dict)
            data["chat_store"] = chat_store

        return cls(**data)

    def get(
        self, input: Optional[str] = None, initial_token_count: int = 0, **kwargs: Any
    ) -> List[ChatMessage]:
        """Get chat history."""
        chat_history = self.get_all()

        if initial_token_count > self.token_limit:
            raise ValueError("Initial token count exceeds token limit")

        message_count = len(chat_history)

        cur_messages = chat_history[-message_count:]
        token_count = self._token_count_for_messages(cur_messages) + initial_token_count

        while token_count > self.token_limit and message_count > 1:
            message_count -= 1
            while chat_history[-message_count].role in (
                MessageRole.TOOL,
                MessageRole.ASSISTANT,
            ):
                # we cannot have an assistant message at the start of the chat history
                # if after removal of the first, we have an assistant message,
                # we need to remove the assistant message too
                #
                # all tool messages should be preceded by an assistant message
                # if we remove a tool message, we need to remove the assistant message too
                message_count -= 1

            cur_messages = chat_history[-message_count:]
            token_count = (
                self._token_count_for_messages(cur_messages) + initial_token_count
            )

        # catch one message longer than token limit
        if token_count > self.token_limit or message_count <= 0:
            return []

        return chat_history[-message_count:]

    def _token_count_for_messages(self, messages: List[ChatMessage]) -> int:
        if len(messages) <= 0:
            return 0

        msg_str = " ".join(str(m.content) for m in messages)
        return len(self.tokenizer_fn(msg_str))

class_name classmethod #

class_name() -> str

Get class name.

Source code in llama-index-core/llama_index/core/memory/chat_memory_buffer.py
27
28
29
30
@classmethod
def class_name(cls) -> str:
    """Get class name."""
    return "ChatMemoryBuffer"

from_defaults classmethod #

from_defaults(chat_history: Optional[List[ChatMessage]] = None, llm: Optional[LLM] = None, chat_store: Optional[BaseChatStore] = None, chat_store_key: str = DEFAULT_CHAT_STORE_KEY, token_limit: Optional[int] = None, tokenizer_fn: Optional[Callable[[str], List]] = None, **kwargs: Any) -> ChatMemoryBuffer

Create a chat memory buffer from an LLM.

Source code in llama-index-core/llama_index/core/memory/chat_memory_buffer.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
@classmethod
def from_defaults(
    cls,
    chat_history: Optional[List[ChatMessage]] = None,
    llm: Optional[LLM] = None,
    chat_store: Optional[BaseChatStore] = None,
    chat_store_key: str = DEFAULT_CHAT_STORE_KEY,
    token_limit: Optional[int] = None,
    tokenizer_fn: Optional[Callable[[str], List]] = None,
    **kwargs: Any,
) -> "ChatMemoryBuffer":
    """Create a chat memory buffer from an LLM."""
    if kwargs:
        raise ValueError(f"Unexpected kwargs: {kwargs}")

    if llm is not None:
        context_window = llm.metadata.context_window
        token_limit = token_limit or int(context_window * DEFAULT_TOKEN_LIMIT_RATIO)
    elif token_limit is None:
        token_limit = DEFAULT_TOKEN_LIMIT

    if chat_history is not None:
        chat_store = chat_store or SimpleChatStore()
        chat_store.set_messages(chat_store_key, chat_history)

    return cls(
        token_limit=token_limit,
        tokenizer_fn=tokenizer_fn or get_tokenizer(),
        chat_store=chat_store or SimpleChatStore(),
        chat_store_key=chat_store_key,
    )

to_string #

to_string() -> str

Convert memory to string.

Source code in llama-index-core/llama_index/core/memory/chat_memory_buffer.py
79
80
81
def to_string(self) -> str:
    """Convert memory to string."""
    return self.json()

from_string classmethod #

from_string(json_str: str) -> ChatMemoryBuffer

Create a chat memory buffer from a string.

Source code in llama-index-core/llama_index/core/memory/chat_memory_buffer.py
83
84
85
86
87
@classmethod
def from_string(cls, json_str: str) -> "ChatMemoryBuffer":
    """Create a chat memory buffer from a string."""
    dict_obj = json.loads(json_str)
    return cls.from_dict(dict_obj)

to_dict #

to_dict(**kwargs: Any) -> dict

Convert memory to dict.

Source code in llama-index-core/llama_index/core/memory/chat_memory_buffer.py
89
90
91
def to_dict(self, **kwargs: Any) -> dict:
    """Convert memory to dict."""
    return self.dict()

get #

get(input: Optional[str] = None, initial_token_count: int = 0, **kwargs: Any) -> List[ChatMessage]

Get chat history.

Source code in llama-index-core/llama_index/core/memory/chat_memory_buffer.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
def get(
    self, input: Optional[str] = None, initial_token_count: int = 0, **kwargs: Any
) -> List[ChatMessage]:
    """Get chat history."""
    chat_history = self.get_all()

    if initial_token_count > self.token_limit:
        raise ValueError("Initial token count exceeds token limit")

    message_count = len(chat_history)

    cur_messages = chat_history[-message_count:]
    token_count = self._token_count_for_messages(cur_messages) + initial_token_count

    while token_count > self.token_limit and message_count > 1:
        message_count -= 1
        while chat_history[-message_count].role in (
            MessageRole.TOOL,
            MessageRole.ASSISTANT,
        ):
            # we cannot have an assistant message at the start of the chat history
            # if after removal of the first, we have an assistant message,
            # we need to remove the assistant message too
            #
            # all tool messages should be preceded by an assistant message
            # if we remove a tool message, we need to remove the assistant message too
            message_count -= 1

        cur_messages = chat_history[-message_count:]
        token_count = (
            self._token_count_for_messages(cur_messages) + initial_token_count
        )

    # catch one message longer than token limit
    if token_count > self.token_limit or message_count <= 0:
        return []

    return chat_history[-message_count:]