Bases: BaseNodePostprocessor
Keyword-based Node processor.
Source code in llama-index-core/llama_index/core/postprocessor/node.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67 | class KeywordNodePostprocessor(BaseNodePostprocessor):
"""Keyword-based Node processor."""
required_keywords: List[str] = Field(default_factory=list)
exclude_keywords: List[str] = Field(default_factory=list)
lang: str = Field(default="en")
@classmethod
def class_name(cls) -> str:
return "KeywordNodePostprocessor"
def _postprocess_nodes(
self,
nodes: List[NodeWithScore],
query_bundle: Optional[QueryBundle] = None,
) -> List[NodeWithScore]:
"""Postprocess nodes."""
try:
import spacy
except ImportError:
raise ImportError(
"Spacy is not installed, please install it with `pip install spacy`."
)
from spacy.matcher import PhraseMatcher
nlp = spacy.blank(self.lang)
required_matcher = PhraseMatcher(nlp.vocab)
exclude_matcher = PhraseMatcher(nlp.vocab)
required_matcher.add("RequiredKeywords", list(nlp.pipe(self.required_keywords)))
exclude_matcher.add("ExcludeKeywords", list(nlp.pipe(self.exclude_keywords)))
new_nodes = []
for node_with_score in nodes:
node = node_with_score.node
doc = nlp(node.get_content())
if self.required_keywords and not required_matcher(doc):
continue
if self.exclude_keywords and exclude_matcher(doc):
continue
new_nodes.append(node_with_score)
return new_nodes
|