classOpenAlexReader(BaseReader):""" This class is used to search and import data from OpenAlex. Parameters ---------- email : str Email address to use for OpenAlex API Attributes: ---------- Works : pyalex.Works pyalex.Works object pyalex : pyalex pyalex object """def__init__(self,email)->None:self.email=emaildef_search_openalex(self,query,fields):base_url="https://api.openalex.org/works?search="fields_param=f"&select={fields}"email_param=f"&mailto={self.email}"full_url=base_url+query+fields_param+email_paramtry:response=requests.get(full_url,timeout=10)response.raise_for_status()# Check if request is successfuldata=response.json()# Parse JSON dataif"error"indata:raiseValueError(f"API returned error: {data['error']}")returndataexceptrequests.exceptions.HTTPErrorashttp_error:logger.error(f"HTTP error occurred: {http_error}")exceptrequests.exceptions.RequestExceptionasrequest_error:logger.error(f"Error occurred: {request_error}")exceptValueErrorasvalue_error:logger.error(value_error)returnNonedef_fulltext_search_openalex(self,query,fields):base_url="https://api.openalex.org/works?filter=fulltext.search:"fields_param=f"&select={fields}"email_param=f"&mailto={self.email}"full_url=base_url+query+fields_param+email_paramtry:response=requests.get(full_url,timeout=10)response.raise_for_status()# Check if request is successfuldata=response.json()# Parse JSON dataif"error"indata:raiseValueError(f"API returned error: {data['error']}")returndataexceptrequests.exceptions.HTTPErrorashttp_error:logger.error(f"HTTP error occurred: {http_error}")exceptrequests.exceptions.RequestExceptionasrequest_error:logger.error(f"Error occurred: {request_error}")exceptValueErrorasvalue_error:logger.error(value_error)returnNonedef_invert_abstract(self,inv_index):ifinv_indexisnotNone:l_inv=[(w,p)forw,posininv_index.items()forpinpos]return" ".join(x[0]forxinsorted(l_inv,key=lambdax:x[1]))returnNonedefload_data(self,query:str,full_text=False,fields=None)->List[Document]:iffieldsisNone:fields="title,abstract_inverted_index,publication_year,keywords,authorships,primary_location"iffull_text:works=self._fulltext_search_openalex(query,fields)else:works=self._search_openalex(query,fields)documents=[]forworkinworks["results"]:ifwork["abstract_inverted_index"]isnotNone:abstract=self._invert_abstract(work["abstract_inverted_index"])else:abstract=Nonetitle=work.get("title",None)text=None# concat title and abstractifabstractandtitle:text=title+" "+abstractelifnotabstract:text=titletry:primary_location=work["primary_location"]["source"]["display_name"]except(KeyError,TypeError):primary_location=Nonemetadata={"title":work.get("title",None),"keywords":work.get("keywords",None),"primary_location":primary_location,"publication_year":work.get("publication_year",None),"authorships":[item["author"]["display_name"]foriteminwork["authorships"]],}documents.append(Document(text=text,extra_info=metadata))returndocuments