classPIINodePostprocessor(BaseNodePostprocessor):"""PII Node processor. NOTE: this is a beta feature, the API might change. Args: llm (LLM): The local LLM to use for prediction. """llm:LLMpii_str_tmpl:str=DEFAULT_PII_TMPLpii_node_info_key:str="__pii_node_info__"@classmethoddefclass_name(cls)->str:return"PIINodePostprocessor"defmask_pii(self,text:str)->Tuple[str,Dict]:"""Mask PII in text."""pii_prompt=PromptTemplate(self.pii_str_tmpl)# TODO: allow customizationtask_str=("Mask out the PII, replace each PII with a tag, and return the text. ""Return the mapping in JSON.")response=self.llm.predict(pii_prompt,context_str=text,query_str=task_str)splits=response.split("Output Mapping:")text_output=splits[0].strip()json_str_output=splits[1].strip()json_dict=json.loads(json_str_output)returntext_output,json_dictdef_postprocess_nodes(self,nodes:List[NodeWithScore],query_bundle:Optional[QueryBundle]=None,)->List[NodeWithScore]:"""Postprocess nodes."""# swap out text from nodes, with the original node mappingsnew_nodes=[]fornode_with_scoreinnodes:node=node_with_score.nodenew_text,mapping_info=self.mask_pii(node.get_content(metadata_mode=MetadataMode.LLM))new_node=deepcopy(node)new_node.excluded_embed_metadata_keys.append(self.pii_node_info_key)new_node.excluded_llm_metadata_keys.append(self.pii_node_info_key)new_node.metadata[self.pii_node_info_key]=mapping_infonew_node.set_content(new_text)new_nodes.append(NodeWithScore(node=new_node,score=node_with_score.score))returnnew_nodes
Source code in llama-index-core/llama_index/core/postprocessor/pii.py
575859606162636465666768697071
defmask_pii(self,text:str)->Tuple[str,Dict]:"""Mask PII in text."""pii_prompt=PromptTemplate(self.pii_str_tmpl)# TODO: allow customizationtask_str=("Mask out the PII, replace each PII with a tag, and return the text. ""Return the mapping in JSON.")response=self.llm.predict(pii_prompt,context_str=text,query_str=task_str)splits=response.split("Output Mapping:")text_output=splits[0].strip()json_str_output=splits[1].strip()json_dict=json.loads(json_str_output)returntext_output,json_dict