Coverage for mindsdb / integrations / handlers / mendeley_handler / mendeley_handler.py: 0%
111 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
1from mindsdb_sql_parser import parse_sql
2import pandas as pd
3from mendeley import Mendeley
4from mindsdb.integrations.libs.api_handler import APIHandler
5from mendeley.session import MendeleySession
6from mindsdb.integrations.handlers.mendeley_handler.mendeley_tables import CatalogSearchTable
7from mindsdb.utilities import log
8from typing import Dict
9from mindsdb.integrations.libs.response import (
10 HandlerStatusResponse as StatusResponse
11)
13logger = log.getLogger(__name__)
16class MendeleyHandler(APIHandler):
18 def __init__(self, name, **kwargs):
19 """ constructor
20 Args:
21 name (str): the handler name
22 """
23 super().__init__(name)
25 self.connection_args = kwargs.get('connection_data', {})
27 self.client_id = self.connection_args.get('client_id', None)
28 self.client_secret = self.connection_args.get('client_secret', None)
29 self.session = self.connect()
31 self.session = None
32 self.is_connected = False
34 catalog_search_data = CatalogSearchTable(self)
35 self.catalog_search_data = catalog_search_data
36 self._register_table('catalog_search_data', catalog_search_data)
38 def connect(self) -> MendeleySession:
39 """ The connect method sets up the connection required by the handler.
40 In order establish a connection with Mendeley API one needs the client id and client secret that are
41 created after registering the application at https://dev.mendeley.com/myapps.html . More information on the matter
42 can be found at https://dev.mendeley.com/reference/topics/application_registration.html .
43 In order to have access to Mendeley data we use "session".
45 Returns:
46 HandlerStatusResponse """
48 if self.is_connected:
49 return self.session
51 mendeley = Mendeley(self.client_id, self.client_secret)
52 auth = mendeley.start_client_credentials_flow()
53 self.session = auth.authenticate()
55 self.is_connected = True
56 return self.session
58 def check_connection(self) -> StatusResponse:
59 """ The check_connection method checks the connection to the handler
60 Returns:
61 HandlerStatusResponse
62 """
63 response = StatusResponse(False)
65 try:
66 self.connect()
67 response.success = True
69 except Exception as e:
70 logger.error(f'Error connecting to Mendeley: {e}!')
71 response.error_message = str(e)
73 self.is_connected = response.success
74 return response
76 def native_query(self, query_string: str):
77 """The native_query method receives raw query and acts upon it.
78 Args:
79 query_string (str): query in native format
80 Returns:
81 HandlerResponse
82 """
83 ast = parse_sql(query_string)
84 return self.query(ast)
86 def get_authors(self, data):
87 """The get_authors method receives the data - a specific document returned by the API, gets the names of the authors
88 and combines them in a string, so as to allow the use of DataFrame.
89 Args:
90 data (CatalogDocument): document returned by API
91 Returns:
92 authors string
93 """
94 authors = ""
95 sum = 0
96 if data.authors is not None:
97 for x in data.authors:
98 if sum + 1 == len(data.authors) and x.first_name is not None and x.last_name is not None:
99 authors = authors + x.first_name + " " + x.last_name
100 else:
101 if x.first_name is not None and x.last_name is not None:
102 authors = authors + x.first_name + " " + x.last_name + ", "
103 sum = sum + 1
104 return authors
106 def get_keywords(self, data):
107 """The get_keywords method receives the data-a specific document returned by the API, gets the specified keywords
108 and combines them in a string, so as to allow the use of DataFrame.
109 Args:
110 data (CatalogDocument) : document returned by the API
111 Returns:
112 keywords string
113 """
114 keywords = ""
115 sum = 0
116 if data.keywords is not None:
117 for x in data.keywords:
118 if sum + 1 == len(data.keywords):
119 keywords = keywords + x + " "
120 else:
121 if x is not None:
122 keywords = keywords + x + ", "
123 sum = sum + 1
124 return keywords
126 def create_dict(self, data):
127 """The create_dict method receives the data-a specific document returned by the API, gets the resources-fields of the document,
128 as specified in Mendley documentation, and puts them in a dictionary.
130 Args:
131 data (CatalogDocument) : document returned by API
132 Returns:
133 dict dictionary
134 """
135 dict = {}
136 dict["title"] = data.title
137 dict["type"] = data.type
138 dict["source"] = data.source
139 dict["year"] = data.year
140 if data.identifiers is not None:
141 dict["pmid"] = data.identifiers.get("pmid")
142 dict["sgr"] = data.identifiers.get("sgr")
143 dict["issn"] = data.identifiers.get("issn")
144 dict["scopus"] = data.identifiers.get("scopus")
145 dict["doi"] = data.identifiers.get("doi")
146 dict["pui"] = data.identifiers.get("pui")
147 dict["authors"] = self.get_authors(data)
148 if data.keywords is not None:
149 dict["keywords"] = self.get_keywords(data)
150 else:
151 dict["keywords"] = None
152 dict["link"] = data.link
153 dict["id"] = data.id
154 return dict
156 def call_mendeley_api(self, method_name: str, params: Dict) -> pd.DataFrame:
157 """The method call_mendeley_api is used to communicate with Mendeley. Depending on the method used there are three different types
158 of search conducted.
159 The advanced_search results in a CatalogSearch resource, which, depending on the parameters used, could either be a number of different documents (CatalogDocument),
160 a single one or none.
161 The by_identifier search is more specific in nature and can result either in one or no CatalogDocuments.
162 The get search has the same results as the by_identifier.
163 If the method specified does not exist, an NotImplementedError is raised.
164 Args:
165 method_name (str) : name of method
166 params (Dict): Dictionary containing the parameters used in the search
167 Returns:
168 DataFrame
169 """
171 self.session = self.connect()
173 if method_name == 'advanced_search':
174 search_params = {
175 'title': params.get("title"),
176 'author': params.get("author"),
177 'source': params.get("source"),
178 'abstract': params.get("abstract"),
179 'min_year': params.get("min_year"),
180 'max_year': params.get("max_year"),
181 'open_access': params.get("open_access")
182 }
183 data = self.session.catalog.advanced_search(**search_params)
184 sum = 0
185 df = pd.DataFrame()
186 for x in data.list(page_size=params["limit"]).items:
187 if sum == 0:
188 df = pd.DataFrame(self.create_dict(x), index=[0])
189 sum += 1
190 else:
191 df = df.append(self.create_dict(x), ignore_index=True)
192 sum += 1
193 if df.empty:
194 raise NotImplementedError(('Insufficient or wrong input given'))
195 else:
196 return df
198 elif method_name == 'identifier_search':
199 search_params = {
200 'arxiv': params.get("arxiv"),
201 'doi': params.get("doi"),
202 'isbn': params.get("isbn"),
203 'issn': params.get("issn"),
204 'pmid': params.get("pmid"),
205 'scopus': params.get("scopus"),
206 'filehash': params.get("filehash")
207 }
208 data = self.session.catalog.by_identifier(**search_params)
209 df = pd.DataFrame(self.create_dict(data), index=[0])
210 return df
212 elif method_name == 'get':
213 data = self.session.catalog.get(params.get("id"))
214 df = pd.DataFrame(self.create_dict(data), index=[0])
215 return df
217 raise NotImplementedError('Method name {} not supported by Mendeley API Handler'.format(method_name))