Coverage for mindsdb / integrations / handlers / ms_one_drive_handler / ms_one_drive_handler.py: 0%

93 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-21 00:36 +0000

1from typing import Any, Dict, Text 

2 

3import msal 

4from mindsdb_sql_parser.ast.base import ASTNode 

5from mindsdb_sql_parser.ast import Constant, Identifier, Select, Star 

6from mindsdb_sql_parser import parse_sql 

7import pandas as pd 

8from requests.exceptions import RequestException 

9 

10from mindsdb.integrations.handlers.ms_one_drive_handler.ms_graph_api_one_drive_client import MSGraphAPIOneDriveClient 

11from mindsdb.integrations.handlers.ms_one_drive_handler.ms_one_drive_tables import FileTable, ListFilesTable 

12from mindsdb.integrations.utilities.handlers.auth_utilities.microsoft import MSGraphAPIDelegatedPermissionsManager 

13from mindsdb.integrations.utilities.handlers.auth_utilities.exceptions import AuthException 

14from mindsdb.integrations.libs.response import ( 

15 HandlerResponse as Response, 

16 HandlerStatusResponse as StatusResponse, 

17 RESPONSE_TYPE 

18) 

19from mindsdb.integrations.libs.api_handler import APIHandler 

20from mindsdb.utilities import log 

21 

22logger = log.getLogger(__name__) 

23 

24 

25class MSOneDriveHandler(APIHandler): 

26 """ 

27 This handler handles the connection and execution of SQL statements on Microsoft OneDrive. 

28 """ 

29 

30 name = 'one_drive' 

31 supported_file_formats = ['csv', 'tsv', 'json', 'parquet', 'pdf', 'txt'] 

32 

33 def __init__(self, name: Text, connection_data: Dict, **kwargs: Any) -> None: 

34 """ 

35 Initializes the handler. 

36 

37 Args: 

38 name (Text): The name of the handler instance. 

39 connection_data (Dict): The connection data required to connect to the Microsoft Graph API. 

40 kwargs: Arbitrary keyword arguments. 

41 """ 

42 super().__init__(name) 

43 self.connection_data = connection_data 

44 self.handler_storage = kwargs['handler_storage'] 

45 self.kwargs = kwargs 

46 

47 self.connection = None 

48 self.is_connected = False 

49 

50 def connect(self): 

51 """ 

52 Establishes a connection to Microsoft OneDrive via the Microsoft Graph API. 

53 

54 Raises: 

55 ValueError: If the required connection parameters are not provided. 

56 AuthenticationError: If an error occurs during the authentication process. 

57 

58 Returns: 

59 MSGraphAPIOneDriveClient: An instance of the Microsoft Graph API client for Microsoft OneDrive. 

60 """ 

61 if self.is_connected and self.connection.check_connection(): 

62 return self.connection 

63 

64 # Mandatory connection parameters. 

65 if not all(key in self.connection_data for key in ['client_id', 'client_secret', 'tenant_id']): 

66 raise ValueError("Required parameters (client_id, client_secret, tenant_id) must be provided.") 

67 

68 # Initialize the token cache. 

69 cache = msal.SerializableTokenCache() 

70 

71 # Load the cache from file if it exists. 

72 cache_file = 'cache.bin' 

73 try: 

74 cache_content = self.handler_storage.file_get(cache_file) 

75 except FileNotFoundError: 

76 cache_content = None 

77 

78 if cache_content: 

79 cache.deserialize(cache_content) 

80 

81 # Initialize the Microsoft Authentication Library (MSAL) app. 

82 permissions_manager = MSGraphAPIDelegatedPermissionsManager( 

83 client_id=self.connection_data['client_id'], 

84 client_secret=self.connection_data['client_secret'], 

85 tenant_id=self.connection_data['tenant_id'], 

86 cache=cache, 

87 code=self.connection_data.get('code') 

88 ) 

89 

90 access_token = permissions_manager.get_access_token() 

91 

92 # Save the cache back to file if it has changed. 

93 if cache.has_state_changed: 

94 self.handler_storage.file_set(cache_file, cache.serialize().encode('utf-8')) 

95 

96 # Pass the access token to the Microsoft Graph API client for Microsoft OneDrive. 

97 self.connection = MSGraphAPIOneDriveClient( 

98 access_token=access_token, 

99 ) 

100 

101 self.is_connected = True 

102 

103 return self.connection 

104 

105 def check_connection(self) -> StatusResponse: 

106 """ 

107 Checks the status of the connection to the Microsoft Graph API for Microsoft OneDrive. 

108 

109 Returns: 

110 StatusResponse: An object containing the success status and an error message if an error occurs. 

111 """ 

112 response = StatusResponse(False) 

113 

114 try: 

115 connection = self.connect() 

116 if connection.check_connection(): 

117 response.success = True 

118 response.copy_storage = True 

119 else: 

120 raise RequestException("Connection check failed!") 

121 except (ValueError, RequestException) as known_error: 

122 logger.error(f'Connection check to Microsoft OneDrive failed, {known_error}!') 

123 response.error_message = str(known_error) 

124 except AuthException as error: 

125 response.error_message = str(error) 

126 response.redirect_url = error.auth_url 

127 return response 

128 except Exception as unknown_error: 

129 logger.error(f'Connection check to Microsoft OneDrive failed due to an unknown error, {unknown_error}!') 

130 response.error_message = str(unknown_error) 

131 

132 self.is_connected = response.success 

133 

134 return response 

135 

136 def query(self, query: ASTNode) -> Response: 

137 """ 

138 Executes a SQL query represented by an ASTNode and retrieves the data. 

139 

140 Args: 

141 query (ASTNode): An ASTNode representing the SQL query to be executed. 

142 

143 Raises: 

144 ValueError: If the file format is not supported. 

145 NotImplementedError: If the query type is not supported. 

146 

147 Returns: 

148 Response: A response object containing the result of the query or an error message. 

149 """ 

150 if isinstance(query, Select): 

151 table_name = query.from_table.parts[-1] 

152 

153 # If the table name is 'files', query the 'files' table. 

154 if table_name == "files": 

155 table = ListFilesTable(self) 

156 df = table.select(query) 

157 

158 # For any other table name, query the file content via the 'FileTable' class. 

159 # Only the supported file formats can be queried. 

160 else: 

161 extension = table_name.split('.')[-1] 

162 if extension not in self.supported_file_formats: 

163 logger.error(f'The file format {extension} is not supported!') 

164 raise ValueError(f'The file format {extension} is not supported!') 

165 

166 table = FileTable(self, table_name=table_name) 

167 df = table.select(query) 

168 

169 return Response( 

170 RESPONSE_TYPE.TABLE, 

171 data_frame=df 

172 ) 

173 

174 else: 

175 raise NotImplementedError( 

176 "Only SELECT queries are supported by the Microsoft OneDrive handler." 

177 ) 

178 

179 def native_query(self, query: Text) -> Response: 

180 """ 

181 Executes a SQL query and returns the result. 

182 

183 Args: 

184 query (str): The SQL query to be executed. 

185 

186 Returns: 

187 Response: A response object containing the result of the query or an error message. 

188 """ 

189 query_ast = parse_sql(query) 

190 return self.query(query_ast) 

191 

192 def get_tables(self) -> Response: 

193 """ 

194 Retrieves a list of tables (files) in the user's Microsoft OneDrive. 

195 Each file is considered a table. Only the supported file formats are included in the list. 

196 

197 Returns: 

198 Response: A response object containing the list of tables and views, formatted as per the `Response` class. 

199 """ 

200 connection = self.connect() 

201 

202 # Get only the supported file formats. 

203 # Wrap the file names with backticks to prevent SQL syntax errors. 

204 supported_files = [ 

205 f"`{file['path']}`" 

206 for file in connection.get_all_items() 

207 if file['path'].split('.')[-1] in self.supported_file_formats 

208 ] 

209 

210 # Add the 'files' table to the list of supported tables. 

211 supported_files.insert(0, 'files') 

212 

213 response = Response( 

214 RESPONSE_TYPE.TABLE, 

215 data_frame=pd.DataFrame( 

216 supported_files, 

217 columns=['table_name'] 

218 ) 

219 ) 

220 

221 return response 

222 

223 def get_columns(self, table_name: str) -> Response: 

224 """ 

225 Retrieves column details for a specified table (file) in the user's Microsoft OneDrive. 

226 

227 Args: 

228 table_name (Text): The name of the table for which to retrieve column information. 

229 

230 Returns: 

231 Response: A response object containing the column details, formatted as per the `Response` class. 

232 """ 

233 # Get the columns (and their data types) by querying a single row from the table. 

234 query = Select( 

235 targets=[Star()], 

236 from_table=Identifier(parts=[table_name]), 

237 limit=Constant(1) 

238 ) 

239 

240 result = self.query(query) 

241 

242 response = Response( 

243 RESPONSE_TYPE.TABLE, 

244 data_frame=pd.DataFrame( 

245 { 

246 'column_name': result.data_frame.columns, 

247 'data_type': [data_type if data_type != 'object' else 'string' for data_type in result.data_frame.dtypes] 

248 } 

249 ) 

250 ) 

251 

252 return response