classPreprocessReader(BaseReader):def__init__(self,api_key:str,*args,**kwargs):ifapi_keyisNoneorapi_key=="":raiseValueError("Please provide an api key to be used while doing the auth with the system.")try:frompypreprocessimportPreprocessexceptImportError:raiseImportError("`pypreprocess` package not found, please run `pip install"" pypreprocess`")_info={}self._preprocess=Preprocess(api_key)self._filepath=Noneself._process_id=Noneforkey,valueinkwargs.items():ifkey=="filepath":self._filepath=valueself._preprocess.set_filepath(value)ifkey=="process_id":self._process_id=valueself._preprocess.set_process_id(value)elifkeyin["table_output_format","table_output"]:_info["table_output_format"]=valueelifkeyin["repeat_table_header","table_header"]:_info["repeat_table_header"]=valueelifkeyin["merge","repeat_title","keep_header","keep_footer","smart_header","image_text",]:_info[key]=valueif_info!={}:self._preprocess.set_info(_info)ifself._filepathisNoneandself._process_idisNone:raiseValueError("Please provide either filepath or process_id to handle the resutls.")self._chunks=Nonedefload_data(self,return_whole_document=False)->List[Document]:ifself._chunksisNone:ifself._process_idisnotNone:self._get_data_by_process()elifself._filepathisnotNone:self._get_data_by_filepath()ifself._chunksisnotNone:ifreturn_whole_documentisTrue:return[Document(text=" ".join(self._chunks),metadata={"filename":os.path.basename(self._filepath)},)]else:return[Document(text=chunk,metadata={"filename":os.path.basename(self._filepath)},)forchunkinself._chunks]else:raiseException("There is error happened during handling your file, please try again.")else:ifreturn_whole_documentisTrue:return[Document(text=" ".join(self._chunks),metadata={"filename":os.path.basename(self._filepath)},)]else:return[Document(text=chunk,metadata={"filename":os.path.basename(self._filepath)},)forchunkinself._chunks]defget_process_id(self):returnself._process_iddefget_nodes(self)->List[TextNode]:ifself._chunksisNone:self.load_data()nodes=[]forchunkinself._chunks:text=str(chunk)id=hashlib.md5(text.encode()).hexdigest()nodes.append(TextNode(text=text,id_=id))iflen(nodes)>1:nodes[0].relationships[NodeRelationship.NEXT]=RelatedNodeInfo(node_id=nodes[1].node_id,metadata={"filename":os.path.basename(self._filepath)},)foriinrange(1,len(nodes)-1):nodes[i].relationships[NodeRelationship.NEXT]=RelatedNodeInfo(node_id=nodes[i+1].node_id,metadata={"filename":os.path.basename(self._filepath)},)nodes[i].relationships[NodeRelationship.PREVIOUS]=RelatedNodeInfo(node_id=nodes[i-1].node_id,metadata={"filename":os.path.basename(self._filepath)},)nodes[-1].relationships[NodeRelationship.PREVIOUS]=RelatedNodeInfo(node_id=nodes[-2].node_id,metadata={"filename":os.path.basename(self._filepath)},)returnnodesdef_get_data_by_filepath(self)->None:pp_response=self._preprocess.chunk()ifpp_response.status=="OK"andpp_response.successisTrue:self._process_id=pp_response.data["process"]["id"]response=self._preprocess.wait()ifresponse.status=="OK"andresponse.successisTrue:# self._filepath = response.data['info']['file']['name']self._chunks=response.data["chunks"]def_get_data_by_process(self)->None:response=self._preprocess.wait()ifresponse.status=="OK"andresponse.successisTrue:self._filepath=response.data["info"]["file"]["name"]self._chunks=response.data["chunks"]