Coverage for mindsdb / integrations / handlers / mendeley_handler / mendeley_handler.py: 0%

111 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-21 00:36 +0000

1from mindsdb_sql_parser import parse_sql 

2import pandas as pd 

3from mendeley import Mendeley 

4from mindsdb.integrations.libs.api_handler import APIHandler 

5from mendeley.session import MendeleySession 

6from mindsdb.integrations.handlers.mendeley_handler.mendeley_tables import CatalogSearchTable 

7from mindsdb.utilities import log 

8from typing import Dict 

9from mindsdb.integrations.libs.response import ( 

10 HandlerStatusResponse as StatusResponse 

11) 

12 

13logger = log.getLogger(__name__) 

14 

15 

16class MendeleyHandler(APIHandler): 

17 

18 def __init__(self, name, **kwargs): 

19 """ constructor 

20 Args: 

21 name (str): the handler name 

22 """ 

23 super().__init__(name) 

24 

25 self.connection_args = kwargs.get('connection_data', {}) 

26 

27 self.client_id = self.connection_args.get('client_id', None) 

28 self.client_secret = self.connection_args.get('client_secret', None) 

29 self.session = self.connect() 

30 

31 self.session = None 

32 self.is_connected = False 

33 

34 catalog_search_data = CatalogSearchTable(self) 

35 self.catalog_search_data = catalog_search_data 

36 self._register_table('catalog_search_data', catalog_search_data) 

37 

38 def connect(self) -> MendeleySession: 

39 """ The connect method sets up the connection required by the handler. 

40 In order establish a connection with Mendeley API one needs the client id and client secret that are 

41 created after registering the application at https://dev.mendeley.com/myapps.html . More information on the matter 

42 can be found at https://dev.mendeley.com/reference/topics/application_registration.html . 

43 In order to have access to Mendeley data we use "session". 

44 

45 Returns: 

46 HandlerStatusResponse """ 

47 

48 if self.is_connected: 

49 return self.session 

50 

51 mendeley = Mendeley(self.client_id, self.client_secret) 

52 auth = mendeley.start_client_credentials_flow() 

53 self.session = auth.authenticate() 

54 

55 self.is_connected = True 

56 return self.session 

57 

58 def check_connection(self) -> StatusResponse: 

59 """ The check_connection method checks the connection to the handler 

60 Returns: 

61 HandlerStatusResponse 

62 """ 

63 response = StatusResponse(False) 

64 

65 try: 

66 self.connect() 

67 response.success = True 

68 

69 except Exception as e: 

70 logger.error(f'Error connecting to Mendeley: {e}!') 

71 response.error_message = str(e) 

72 

73 self.is_connected = response.success 

74 return response 

75 

76 def native_query(self, query_string: str): 

77 """The native_query method receives raw query and acts upon it. 

78 Args: 

79 query_string (str): query in native format 

80 Returns: 

81 HandlerResponse 

82 """ 

83 ast = parse_sql(query_string) 

84 return self.query(ast) 

85 

86 def get_authors(self, data): 

87 """The get_authors method receives the data - a specific document returned by the API, gets the names of the authors 

88 and combines them in a string, so as to allow the use of DataFrame. 

89 Args: 

90 data (CatalogDocument): document returned by API 

91 Returns: 

92 authors string 

93 """ 

94 authors = "" 

95 sum = 0 

96 if data.authors is not None: 

97 for x in data.authors: 

98 if sum + 1 == len(data.authors) and x.first_name is not None and x.last_name is not None: 

99 authors = authors + x.first_name + " " + x.last_name 

100 else: 

101 if x.first_name is not None and x.last_name is not None: 

102 authors = authors + x.first_name + " " + x.last_name + ", " 

103 sum = sum + 1 

104 return authors 

105 

106 def get_keywords(self, data): 

107 """The get_keywords method receives the data-a specific document returned by the API, gets the specified keywords 

108 and combines them in a string, so as to allow the use of DataFrame. 

109 Args: 

110 data (CatalogDocument) : document returned by the API 

111 Returns: 

112 keywords string 

113 """ 

114 keywords = "" 

115 sum = 0 

116 if data.keywords is not None: 

117 for x in data.keywords: 

118 if sum + 1 == len(data.keywords): 

119 keywords = keywords + x + " " 

120 else: 

121 if x is not None: 

122 keywords = keywords + x + ", " 

123 sum = sum + 1 

124 return keywords 

125 

126 def create_dict(self, data): 

127 """The create_dict method receives the data-a specific document returned by the API, gets the resources-fields of the document, 

128 as specified in Mendley documentation, and puts them in a dictionary. 

129 

130 Args: 

131 data (CatalogDocument) : document returned by API 

132 Returns: 

133 dict dictionary 

134 """ 

135 dict = {} 

136 dict["title"] = data.title 

137 dict["type"] = data.type 

138 dict["source"] = data.source 

139 dict["year"] = data.year 

140 if data.identifiers is not None: 

141 dict["pmid"] = data.identifiers.get("pmid") 

142 dict["sgr"] = data.identifiers.get("sgr") 

143 dict["issn"] = data.identifiers.get("issn") 

144 dict["scopus"] = data.identifiers.get("scopus") 

145 dict["doi"] = data.identifiers.get("doi") 

146 dict["pui"] = data.identifiers.get("pui") 

147 dict["authors"] = self.get_authors(data) 

148 if data.keywords is not None: 

149 dict["keywords"] = self.get_keywords(data) 

150 else: 

151 dict["keywords"] = None 

152 dict["link"] = data.link 

153 dict["id"] = data.id 

154 return dict 

155 

156 def call_mendeley_api(self, method_name: str, params: Dict) -> pd.DataFrame: 

157 """The method call_mendeley_api is used to communicate with Mendeley. Depending on the method used there are three different types 

158 of search conducted. 

159 The advanced_search results in a CatalogSearch resource, which, depending on the parameters used, could either be a number of different documents (CatalogDocument), 

160 a single one or none. 

161 The by_identifier search is more specific in nature and can result either in one or no CatalogDocuments. 

162 The get search has the same results as the by_identifier. 

163 If the method specified does not exist, an NotImplementedError is raised. 

164 Args: 

165 method_name (str) : name of method 

166 params (Dict): Dictionary containing the parameters used in the search 

167 Returns: 

168 DataFrame 

169 """ 

170 

171 self.session = self.connect() 

172 

173 if method_name == 'advanced_search': 

174 search_params = { 

175 'title': params.get("title"), 

176 'author': params.get("author"), 

177 'source': params.get("source"), 

178 'abstract': params.get("abstract"), 

179 'min_year': params.get("min_year"), 

180 'max_year': params.get("max_year"), 

181 'open_access': params.get("open_access") 

182 } 

183 data = self.session.catalog.advanced_search(**search_params) 

184 sum = 0 

185 df = pd.DataFrame() 

186 for x in data.list(page_size=params["limit"]).items: 

187 if sum == 0: 

188 df = pd.DataFrame(self.create_dict(x), index=[0]) 

189 sum += 1 

190 else: 

191 df = df.append(self.create_dict(x), ignore_index=True) 

192 sum += 1 

193 if df.empty: 

194 raise NotImplementedError(('Insufficient or wrong input given')) 

195 else: 

196 return df 

197 

198 elif method_name == 'identifier_search': 

199 search_params = { 

200 'arxiv': params.get("arxiv"), 

201 'doi': params.get("doi"), 

202 'isbn': params.get("isbn"), 

203 'issn': params.get("issn"), 

204 'pmid': params.get("pmid"), 

205 'scopus': params.get("scopus"), 

206 'filehash': params.get("filehash") 

207 } 

208 data = self.session.catalog.by_identifier(**search_params) 

209 df = pd.DataFrame(self.create_dict(data), index=[0]) 

210 return df 

211 

212 elif method_name == 'get': 

213 data = self.session.catalog.get(params.get("id")) 

214 df = pd.DataFrame(self.create_dict(data), index=[0]) 

215 return df 

216 

217 raise NotImplementedError('Method name {} not supported by Mendeley API Handler'.format(method_name))