classCondenseQuestionChatEngine(BaseChatEngine):"""Condense Question Chat Engine. First generate a standalone question from conversation context and last message, then query the query engine for a response. """def__init__(self,query_engine:BaseQueryEngine,condense_question_prompt:BasePromptTemplate,memory:BaseMemory,llm:LLMPredictorType,verbose:bool=False,callback_manager:Optional[CallbackManager]=None,)->None:self._query_engine=query_engineself._condense_question_prompt=condense_question_promptself._memory=memoryself._llm=llmself._verbose=verboseself.callback_manager=callback_managerorCallbackManager([])@classmethoddeffrom_defaults(cls,query_engine:BaseQueryEngine,condense_question_prompt:Optional[BasePromptTemplate]=None,chat_history:Optional[List[ChatMessage]]=None,memory:Optional[BaseMemory]=None,memory_cls:Type[BaseMemory]=ChatMemoryBuffer,service_context:Optional[ServiceContext]=None,verbose:bool=False,system_prompt:Optional[str]=None,prefix_messages:Optional[List[ChatMessage]]=None,llm:Optional[LLM]=None,**kwargs:Any,)->"CondenseQuestionChatEngine":"""Initialize a CondenseQuestionChatEngine from default parameters."""condense_question_prompt=condense_question_promptorDEFAULT_PROMPTllm=llmorllm_from_settings_or_context(Settings,service_context)chat_history=chat_historyor[]memory=memoryormemory_cls.from_defaults(chat_history=chat_history,llm=llm)ifsystem_promptisnotNone:raiseNotImplementedError("system_prompt is not supported for CondenseQuestionChatEngine.")ifprefix_messagesisnotNone:raiseNotImplementedError("prefix_messages is not supported for CondenseQuestionChatEngine.")returncls(query_engine,condense_question_prompt,memory,llm,verbose=verbose,callback_manager=callback_manager_from_settings_or_context(Settings,service_context),)def_condense_question(self,chat_history:List[ChatMessage],last_message:str)->str:""" Generate standalone question from conversation context and last message. """chat_history_str=messages_to_history_str(chat_history)logger.debug(chat_history_str)returnself._llm.predict(self._condense_question_prompt,question=last_message,chat_history=chat_history_str,)asyncdef_acondense_question(self,chat_history:List[ChatMessage],last_message:str)->str:""" Generate standalone question from conversation context and last message. """chat_history_str=messages_to_history_str(chat_history)logger.debug(chat_history_str)returnawaitself._llm.apredict(self._condense_question_prompt,question=last_message,chat_history=chat_history_str,)def_get_tool_output_from_response(self,query:str,response:RESPONSE_TYPE)->ToolOutput:ifisinstance(response,StreamingResponse):returnToolOutput(content="",tool_name="query_engine",raw_input={"query":query},raw_output=response,)else:returnToolOutput(content=str(response),tool_name="query_engine",raw_input={"query":query},raw_output=response,)@trace_method("chat")defchat(self,message:str,chat_history:Optional[List[ChatMessage]]=None)->AgentChatResponse:chat_history=chat_historyorself._memory.get()# Generate standalone question from conversation context and last messagecondensed_question=self._condense_question(chat_history,message)log_str=f"Querying with: {condensed_question}"logger.info(log_str)ifself._verbose:print(log_str)# TODO: right now, query engine uses class attribute to configure streaming,# we are moving towards separate streaming and non-streaming methods.# In the meanwhile, use this hack to toggle streaming.fromllama_index.core.query_engine.retriever_query_engineimport(RetrieverQueryEngine,)ifisinstance(self._query_engine,RetrieverQueryEngine):is_streaming=self._query_engine._response_synthesizer._streamingself._query_engine._response_synthesizer._streaming=False# Query with standalone questionquery_response=self._query_engine.query(condensed_question)# NOTE: reset streaming flagifisinstance(self._query_engine,RetrieverQueryEngine):self._query_engine._response_synthesizer._streaming=is_streamingtool_output=self._get_tool_output_from_response(condensed_question,query_response)# Record responseself._memory.put(ChatMessage(role=MessageRole.USER,content=message))self._memory.put(ChatMessage(role=MessageRole.ASSISTANT,content=str(query_response)))returnAgentChatResponse(response=str(query_response),sources=[tool_output])@trace_method("chat")defstream_chat(self,message:str,chat_history:Optional[List[ChatMessage]]=None)->StreamingAgentChatResponse:chat_history=chat_historyorself._memory.get()# Generate standalone question from conversation context and last messagecondensed_question=self._condense_question(chat_history,message)log_str=f"Querying with: {condensed_question}"logger.info(log_str)ifself._verbose:print(log_str)# TODO: right now, query engine uses class attribute to configure streaming,# we are moving towards separate streaming and non-streaming methods.# In the meanwhile, use this hack to toggle streaming.fromllama_index.core.query_engine.retriever_query_engineimport(RetrieverQueryEngine,)ifisinstance(self._query_engine,RetrieverQueryEngine):is_streaming=self._query_engine._response_synthesizer._streamingself._query_engine._response_synthesizer._streaming=True# Query with standalone questionquery_response=self._query_engine.query(condensed_question)# NOTE: reset streaming flagifisinstance(self._query_engine,RetrieverQueryEngine):self._query_engine._response_synthesizer._streaming=is_streamingtool_output=self._get_tool_output_from_response(condensed_question,query_response)# Record responseif(isinstance(query_response,StreamingResponse)andquery_response.response_genisnotNone):# override the generator to include writing to chat historyself._memory.put(ChatMessage(role=MessageRole.USER,content=message))response=StreamingAgentChatResponse(chat_stream=response_gen_from_query_engine(query_response.response_gen),sources=[tool_output],)thread=Thread(target=response.write_response_to_history,args=(self._memory,),)thread.start()else:raiseValueError("Streaming is not enabled. Please use chat() instead.")returnresponse@trace_method("chat")asyncdefachat(self,message:str,chat_history:Optional[List[ChatMessage]]=None)->AgentChatResponse:chat_history=chat_historyorself._memory.get()# Generate standalone question from conversation context and last messagecondensed_question=awaitself._acondense_question(chat_history,message)log_str=f"Querying with: {condensed_question}"logger.info(log_str)ifself._verbose:print(log_str)# TODO: right now, query engine uses class attribute to configure streaming,# we are moving towards separate streaming and non-streaming methods.# In the meanwhile, use this hack to toggle streaming.fromllama_index.core.query_engine.retriever_query_engineimport(RetrieverQueryEngine,)ifisinstance(self._query_engine,RetrieverQueryEngine):is_streaming=self._query_engine._response_synthesizer._streamingself._query_engine._response_synthesizer._streaming=False# Query with standalone questionquery_response=awaitself._query_engine.aquery(condensed_question)# NOTE: reset streaming flagifisinstance(self._query_engine,RetrieverQueryEngine):self._query_engine._response_synthesizer._streaming=is_streamingtool_output=self._get_tool_output_from_response(condensed_question,query_response)# Record responseself._memory.put(ChatMessage(role=MessageRole.USER,content=message))self._memory.put(ChatMessage(role=MessageRole.ASSISTANT,content=str(query_response)))returnAgentChatResponse(response=str(query_response),sources=[tool_output])@trace_method("chat")asyncdefastream_chat(self,message:str,chat_history:Optional[List[ChatMessage]]=None)->StreamingAgentChatResponse:chat_history=chat_historyorself._memory.get()# Generate standalone question from conversation context and last messagecondensed_question=awaitself._acondense_question(chat_history,message)log_str=f"Querying with: {condensed_question}"logger.info(log_str)ifself._verbose:print(log_str)# TODO: right now, query engine uses class attribute to configure streaming,# we are moving towards separate streaming and non-streaming methods.# In the meanwhile, use this hack to toggle streaming.fromllama_index.core.query_engine.retriever_query_engineimport(RetrieverQueryEngine,)ifisinstance(self._query_engine,RetrieverQueryEngine):is_streaming=self._query_engine._response_synthesizer._streamingself._query_engine._response_synthesizer._streaming=True# Query with standalone questionquery_response=awaitself._query_engine.aquery(condensed_question)# NOTE: reset streaming flagifisinstance(self._query_engine,RetrieverQueryEngine):self._query_engine._response_synthesizer._streaming=is_streamingtool_output=self._get_tool_output_from_response(condensed_question,query_response)# Record responseif(isinstance(query_response,StreamingResponse)andquery_response.response_genisnotNone):# override the generator to include writing to chat history# TODO: query engine does not support async generator yetself._memory.put(ChatMessage(role=MessageRole.USER,content=message))response=StreamingAgentChatResponse(chat_stream=response_gen_from_query_engine(query_response.response_gen),sources=[tool_output],)thread=Thread(target=response.write_response_to_history,args=(self._memory,))thread.start()else:raiseValueError("Streaming is not enabled. Please use achat() instead.")returnresponsedefreset(self)->None:# Clear chat historyself._memory.reset()@propertydefchat_history(self)->List[ChatMessage]:"""Get chat history."""returnself._memory.get_all()
@classmethoddeffrom_defaults(cls,query_engine:BaseQueryEngine,condense_question_prompt:Optional[BasePromptTemplate]=None,chat_history:Optional[List[ChatMessage]]=None,memory:Optional[BaseMemory]=None,memory_cls:Type[BaseMemory]=ChatMemoryBuffer,service_context:Optional[ServiceContext]=None,verbose:bool=False,system_prompt:Optional[str]=None,prefix_messages:Optional[List[ChatMessage]]=None,llm:Optional[LLM]=None,**kwargs:Any,)->"CondenseQuestionChatEngine":"""Initialize a CondenseQuestionChatEngine from default parameters."""condense_question_prompt=condense_question_promptorDEFAULT_PROMPTllm=llmorllm_from_settings_or_context(Settings,service_context)chat_history=chat_historyor[]memory=memoryormemory_cls.from_defaults(chat_history=chat_history,llm=llm)ifsystem_promptisnotNone:raiseNotImplementedError("system_prompt is not supported for CondenseQuestionChatEngine.")ifprefix_messagesisnotNone:raiseNotImplementedError("prefix_messages is not supported for CondenseQuestionChatEngine.")returncls(query_engine,condense_question_prompt,memory,llm,verbose=verbose,callback_manager=callback_manager_from_settings_or_context(Settings,service_context),)