Coverage for mindsdb / integrations / handlers / ms_one_drive_handler / ms_one_drive_handler.py: 0%
93 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
1from typing import Any, Dict, Text
3import msal
4from mindsdb_sql_parser.ast.base import ASTNode
5from mindsdb_sql_parser.ast import Constant, Identifier, Select, Star
6from mindsdb_sql_parser import parse_sql
7import pandas as pd
8from requests.exceptions import RequestException
10from mindsdb.integrations.handlers.ms_one_drive_handler.ms_graph_api_one_drive_client import MSGraphAPIOneDriveClient
11from mindsdb.integrations.handlers.ms_one_drive_handler.ms_one_drive_tables import FileTable, ListFilesTable
12from mindsdb.integrations.utilities.handlers.auth_utilities.microsoft import MSGraphAPIDelegatedPermissionsManager
13from mindsdb.integrations.utilities.handlers.auth_utilities.exceptions import AuthException
14from mindsdb.integrations.libs.response import (
15 HandlerResponse as Response,
16 HandlerStatusResponse as StatusResponse,
17 RESPONSE_TYPE
18)
19from mindsdb.integrations.libs.api_handler import APIHandler
20from mindsdb.utilities import log
22logger = log.getLogger(__name__)
25class MSOneDriveHandler(APIHandler):
26 """
27 This handler handles the connection and execution of SQL statements on Microsoft OneDrive.
28 """
30 name = 'one_drive'
31 supported_file_formats = ['csv', 'tsv', 'json', 'parquet', 'pdf', 'txt']
33 def __init__(self, name: Text, connection_data: Dict, **kwargs: Any) -> None:
34 """
35 Initializes the handler.
37 Args:
38 name (Text): The name of the handler instance.
39 connection_data (Dict): The connection data required to connect to the Microsoft Graph API.
40 kwargs: Arbitrary keyword arguments.
41 """
42 super().__init__(name)
43 self.connection_data = connection_data
44 self.handler_storage = kwargs['handler_storage']
45 self.kwargs = kwargs
47 self.connection = None
48 self.is_connected = False
50 def connect(self):
51 """
52 Establishes a connection to Microsoft OneDrive via the Microsoft Graph API.
54 Raises:
55 ValueError: If the required connection parameters are not provided.
56 AuthenticationError: If an error occurs during the authentication process.
58 Returns:
59 MSGraphAPIOneDriveClient: An instance of the Microsoft Graph API client for Microsoft OneDrive.
60 """
61 if self.is_connected and self.connection.check_connection():
62 return self.connection
64 # Mandatory connection parameters.
65 if not all(key in self.connection_data for key in ['client_id', 'client_secret', 'tenant_id']):
66 raise ValueError("Required parameters (client_id, client_secret, tenant_id) must be provided.")
68 # Initialize the token cache.
69 cache = msal.SerializableTokenCache()
71 # Load the cache from file if it exists.
72 cache_file = 'cache.bin'
73 try:
74 cache_content = self.handler_storage.file_get(cache_file)
75 except FileNotFoundError:
76 cache_content = None
78 if cache_content:
79 cache.deserialize(cache_content)
81 # Initialize the Microsoft Authentication Library (MSAL) app.
82 permissions_manager = MSGraphAPIDelegatedPermissionsManager(
83 client_id=self.connection_data['client_id'],
84 client_secret=self.connection_data['client_secret'],
85 tenant_id=self.connection_data['tenant_id'],
86 cache=cache,
87 code=self.connection_data.get('code')
88 )
90 access_token = permissions_manager.get_access_token()
92 # Save the cache back to file if it has changed.
93 if cache.has_state_changed:
94 self.handler_storage.file_set(cache_file, cache.serialize().encode('utf-8'))
96 # Pass the access token to the Microsoft Graph API client for Microsoft OneDrive.
97 self.connection = MSGraphAPIOneDriveClient(
98 access_token=access_token,
99 )
101 self.is_connected = True
103 return self.connection
105 def check_connection(self) -> StatusResponse:
106 """
107 Checks the status of the connection to the Microsoft Graph API for Microsoft OneDrive.
109 Returns:
110 StatusResponse: An object containing the success status and an error message if an error occurs.
111 """
112 response = StatusResponse(False)
114 try:
115 connection = self.connect()
116 if connection.check_connection():
117 response.success = True
118 response.copy_storage = True
119 else:
120 raise RequestException("Connection check failed!")
121 except (ValueError, RequestException) as known_error:
122 logger.error(f'Connection check to Microsoft OneDrive failed, {known_error}!')
123 response.error_message = str(known_error)
124 except AuthException as error:
125 response.error_message = str(error)
126 response.redirect_url = error.auth_url
127 return response
128 except Exception as unknown_error:
129 logger.error(f'Connection check to Microsoft OneDrive failed due to an unknown error, {unknown_error}!')
130 response.error_message = str(unknown_error)
132 self.is_connected = response.success
134 return response
136 def query(self, query: ASTNode) -> Response:
137 """
138 Executes a SQL query represented by an ASTNode and retrieves the data.
140 Args:
141 query (ASTNode): An ASTNode representing the SQL query to be executed.
143 Raises:
144 ValueError: If the file format is not supported.
145 NotImplementedError: If the query type is not supported.
147 Returns:
148 Response: A response object containing the result of the query or an error message.
149 """
150 if isinstance(query, Select):
151 table_name = query.from_table.parts[-1]
153 # If the table name is 'files', query the 'files' table.
154 if table_name == "files":
155 table = ListFilesTable(self)
156 df = table.select(query)
158 # For any other table name, query the file content via the 'FileTable' class.
159 # Only the supported file formats can be queried.
160 else:
161 extension = table_name.split('.')[-1]
162 if extension not in self.supported_file_formats:
163 logger.error(f'The file format {extension} is not supported!')
164 raise ValueError(f'The file format {extension} is not supported!')
166 table = FileTable(self, table_name=table_name)
167 df = table.select(query)
169 return Response(
170 RESPONSE_TYPE.TABLE,
171 data_frame=df
172 )
174 else:
175 raise NotImplementedError(
176 "Only SELECT queries are supported by the Microsoft OneDrive handler."
177 )
179 def native_query(self, query: Text) -> Response:
180 """
181 Executes a SQL query and returns the result.
183 Args:
184 query (str): The SQL query to be executed.
186 Returns:
187 Response: A response object containing the result of the query or an error message.
188 """
189 query_ast = parse_sql(query)
190 return self.query(query_ast)
192 def get_tables(self) -> Response:
193 """
194 Retrieves a list of tables (files) in the user's Microsoft OneDrive.
195 Each file is considered a table. Only the supported file formats are included in the list.
197 Returns:
198 Response: A response object containing the list of tables and views, formatted as per the `Response` class.
199 """
200 connection = self.connect()
202 # Get only the supported file formats.
203 # Wrap the file names with backticks to prevent SQL syntax errors.
204 supported_files = [
205 f"`{file['path']}`"
206 for file in connection.get_all_items()
207 if file['path'].split('.')[-1] in self.supported_file_formats
208 ]
210 # Add the 'files' table to the list of supported tables.
211 supported_files.insert(0, 'files')
213 response = Response(
214 RESPONSE_TYPE.TABLE,
215 data_frame=pd.DataFrame(
216 supported_files,
217 columns=['table_name']
218 )
219 )
221 return response
223 def get_columns(self, table_name: str) -> Response:
224 """
225 Retrieves column details for a specified table (file) in the user's Microsoft OneDrive.
227 Args:
228 table_name (Text): The name of the table for which to retrieve column information.
230 Returns:
231 Response: A response object containing the column details, formatted as per the `Response` class.
232 """
233 # Get the columns (and their data types) by querying a single row from the table.
234 query = Select(
235 targets=[Star()],
236 from_table=Identifier(parts=[table_name]),
237 limit=Constant(1)
238 )
240 result = self.query(query)
242 response = Response(
243 RESPONSE_TYPE.TABLE,
244 data_frame=pd.DataFrame(
245 {
246 'column_name': result.data_frame.columns,
247 'data_type': [data_type if data_type != 'object' else 'string' for data_type in result.data_frame.dtypes]
248 }
249 )
250 )
252 return response