classRayyanReader(BaseReader):"""Rayyan reader. Reads articles from a Rayyan review. Args: credentials_path (str): Rayyan credentials path. rayyan_url (str, optional): Rayyan URL. Defaults to https://rayyan.ai. Set to an alternative URL if you are using a non-production Rayyan instance. """def__init__(self,credentials_path:str,rayyan_url:str="https://rayyan.ai")->None:"""Initialize Rayyan reader."""fromrayyanimportRayyanfromrayyan.userimportUserlogging.debug("Initializing Rayyan reader...")self.rayyan=Rayyan(credentials_path,url=rayyan_url)user=User(self.rayyan).get_info()logging.info(f"Signed in successfully to Rayyan as: {user['displayName']}!")defload_data(self,review_id:str,filters:dict={})->List[Document]:"""Load articles from a review. Args: review_id (int): Rayyan review ID. filters (dict, optional): Filters to apply to the review. Defaults to None. Passed to the Rayyan review results method as is. Returns: List[Document]: List of documents. """fromtenacityimport(retry,stop_after_attempt,stop_after_delay,stop_all,wait_random_exponential,)fromtqdmimporttqdmfromrayyan.reviewimportReviewrayyan_review=Review(self.rayyan)my_review=rayyan_review.get(review_id)logging.info(f"Working on review: '{my_review['title']}' with {my_review['total_articles']} total articles.")result_params={"start":0,"length":100}result_params.update(filters)@retry(wait=wait_random_exponential(min=1,max=10),stop=stop_all(stop_after_attempt(3),stop_after_delay(30)),)deffetch_results_with_retry():logging.debug("Fetch parameters: %s",result_params)returnrayyan_review.results(review_id,result_params)articles=[]logging.info("Fetching articles from Rayyan...")total=my_review["total_articles"]withtqdm(total=total)aspbar:whilelen(articles)<total:# retrieve articles in batchesreview_results=fetch_results_with_retry()fetched_articles=review_results["data"]articles.extend(fetched_articles)# update total in case filters are appliediftotal!=review_results["recordsFiltered"]:total=review_results["recordsFiltered"]pbar.total=totalresult_params["start"]+=len(fetched_articles)pbar.update(len(fetched_articles))results=[]forarticleinarticles:# iterate over all abstractsabstracts=""ifarticle["abstracts"]isnotNone:abstracts_arr=[abstract["content"]forabstractinarticle["abstracts"]]iflen(abstracts_arr)>0:# map array into a stringabstracts="\n".join(abstracts_arr)[0:1024].strip()title=article["title"]iftitleisnotNone:title=title.strip()body=f"{title}\n{abstracts}"ifbody.strip()=="":continueextra_info={"id":article["id"],"title":title}results.append(Document(text=body,extra_info=extra_info,))returnresults
defload_data(self,review_id:str,filters:dict={})->List[Document]:"""Load articles from a review. Args: review_id (int): Rayyan review ID. filters (dict, optional): Filters to apply to the review. Defaults to None. Passed to the Rayyan review results method as is. Returns: List[Document]: List of documents. """fromtenacityimport(retry,stop_after_attempt,stop_after_delay,stop_all,wait_random_exponential,)fromtqdmimporttqdmfromrayyan.reviewimportReviewrayyan_review=Review(self.rayyan)my_review=rayyan_review.get(review_id)logging.info(f"Working on review: '{my_review['title']}' with {my_review['total_articles']} total articles.")result_params={"start":0,"length":100}result_params.update(filters)@retry(wait=wait_random_exponential(min=1,max=10),stop=stop_all(stop_after_attempt(3),stop_after_delay(30)),)deffetch_results_with_retry():logging.debug("Fetch parameters: %s",result_params)returnrayyan_review.results(review_id,result_params)articles=[]logging.info("Fetching articles from Rayyan...")total=my_review["total_articles"]withtqdm(total=total)aspbar:whilelen(articles)<total:# retrieve articles in batchesreview_results=fetch_results_with_retry()fetched_articles=review_results["data"]articles.extend(fetched_articles)# update total in case filters are appliediftotal!=review_results["recordsFiltered"]:total=review_results["recordsFiltered"]pbar.total=totalresult_params["start"]+=len(fetched_articles)pbar.update(len(fetched_articles))results=[]forarticleinarticles:# iterate over all abstractsabstracts=""ifarticle["abstracts"]isnotNone:abstracts_arr=[abstract["content"]forabstractinarticle["abstracts"]]iflen(abstracts_arr)>0:# map array into a stringabstracts="\n".join(abstracts_arr)[0:1024].strip()title=article["title"]iftitleisnotNone:title=title.strip()body=f"{title}\n{abstracts}"ifbody.strip()=="":continueextra_info={"id":article["id"],"title":title}results.append(Document(text=body,extra_info=extra_info,))returnresults