classNERPIINodePostprocessor(BaseNodePostprocessor):"""NER PII Node processor. Uses a HF transformers model. """pii_node_info_key:str="__pii_node_info__"@classmethoddefclass_name(cls)->str:return"NERPIINodePostprocessor"defmask_pii(self,ner:Callable,text:str)->Tuple[str,Dict]:"""Mask PII in text."""new_text=textresponse=ner(text)mapping={}forentryinresponse:entity_group_tag=f"[{entry['entity_group']}_{entry['start']}]"new_text=new_text.replace(entry["word"],entity_group_tag).strip()mapping[entity_group_tag]=entry["word"]returnnew_text,mappingdef_postprocess_nodes(self,nodes:List[NodeWithScore],query_bundle:Optional[QueryBundle]=None,)->List[NodeWithScore]:"""Postprocess nodes."""fromtransformersimportpipeline# pants: no-infer-depner=pipeline("ner",grouped_entities=True)# swap out text from nodes, with the original node mappingsnew_nodes=[]fornode_with_scoreinnodes:node=node_with_score.nodenew_text,mapping_info=self.mask_pii(ner,node.get_content(metadata_mode=MetadataMode.LLM))new_node=deepcopy(node)new_node.excluded_embed_metadata_keys.append(self.pii_node_info_key)new_node.excluded_llm_metadata_keys.append(self.pii_node_info_key)new_node.metadata[self.pii_node_info_key]=mapping_infonew_node.set_content(new_text)new_nodes.append(NodeWithScore(node=new_node,score=node_with_score.score))returnnew_nodes
Source code in llama-index-core/llama_index/core/postprocessor/pii.py
109110111112113114115116117118
defmask_pii(self,ner:Callable,text:str)->Tuple[str,Dict]:"""Mask PII in text."""new_text=textresponse=ner(text)mapping={}forentryinresponse:entity_group_tag=f"[{entry['entity_group']}_{entry['start']}]"new_text=new_text.replace(entry["word"],entity_group_tag).strip()mapping[entity_group_tag]=entry["word"]returnnew_text,mapping