Coverage for mindsdb / api / http / namespaces / file.py: 63%
160 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
1import os
2import shutil
3import tarfile
4import tempfile
5import zipfile
6from pathlib import Path
7from urllib.parse import urlparse
9import multipart
10import requests
11from flask import current_app as ca
12from flask import request
13from flask_restx import Resource
15from mindsdb.api.http.namespaces.configs.files import ns_conf
16from mindsdb.api.http.utils import http_error
17from mindsdb.metrics.metrics import api_endpoint_metrics
18from mindsdb.utilities.config import config
19from mindsdb.utilities.context import context as ctx
20from mindsdb.utilities import log
21from mindsdb.utilities.security import is_private_url, clear_filename, validate_urls
22from mindsdb.utilities.fs import safe_extract
23from mindsdb.integrations.utilities.files.file_reader import FileProcessingError
25logger = log.getLogger(__name__)
26MAX_FILE_SIZE = 1024 * 1024 * 100 # 100Mb
29@ns_conf.route("/")
30class FilesList(Resource):
31 @ns_conf.doc("get_files_list")
32 @api_endpoint_metrics("GET", "/files")
33 def get(self):
34 """List all files"""
35 return ca.file_controller.get_files()
38@ns_conf.route("/<name>")
39@ns_conf.param("name", "MindsDB's name for file")
40class File(Resource):
41 @ns_conf.doc("put_file")
42 @api_endpoint_metrics("PUT", "/files/file")
43 def put(self, name: str):
44 """add new file as table
46 The table name is <name> path paramether
48 Data is provided as json or form data. File can be provided with FormData or via URL.
50 If file is in FormData, then the form contain:
51 - source_type (str) - 'file'
52 - file (binary) - the file itself
53 - original_file_name (str, optional) - the name with which the file will be saved
55 If file should be uploaded from URL:
56 - source_type (str) - 'url'
57 - source (str) - the URL
58 - original_file_name (str, optional) - the name with which the file will be saved
59 """
61 data = {}
62 mindsdb_file_name = name.lower()
64 def on_field(field):
65 name = field.field_name.decode()
66 value = field.value.decode()
67 data[name] = value
69 file_object = None
71 def on_file(file):
72 nonlocal file_object
73 file_name = file.file_name.decode()
74 data["file"] = file_name
75 if Path(file_name).name != file_name:
76 raise ValueError(f"Wrong file name: {file_name}")
77 file_object = file.file_object
79 temp_dir_path = tempfile.mkdtemp(prefix="mindsdb_file_")
81 if request.headers["Content-Type"].startswith("multipart/form-data"):
82 parser = multipart.create_form_parser(
83 headers=request.headers,
84 on_field=on_field,
85 on_file=on_file,
86 config={
87 "UPLOAD_DIR": temp_dir_path.encode(), # bytes required
88 "UPLOAD_KEEP_FILENAME": False,
89 "UPLOAD_KEEP_EXTENSIONS": True,
90 "UPLOAD_DELETE_TMP": False,
91 "MAX_MEMORY_FILE_SIZE": 0,
92 },
93 )
95 while True:
96 chunk = request.stream.read(8192)
97 if not chunk:
98 break
99 parser.write(chunk)
100 parser.finalize()
101 parser.close()
103 if file_object is not None: 103 ↛ 115line 103 didn't jump to line 115 because the condition on line 103 was always true
104 if not file_object.closed: 104 ↛ 110line 104 didn't jump to line 110 because the condition on line 104 was always true
105 try:
106 file_object.flush()
107 except (AttributeError, ValueError, OSError):
108 logger.debug("Failed to flush file_object before closing.", exc_info=True)
109 file_object.close()
110 Path(file_object.name).rename(Path(file_object.name).parent / data["file"])
111 file_object = None
112 else:
113 data = request.json
115 existing_file_names = ca.file_controller.get_files_names(lower=True)
116 if mindsdb_file_name.lower() in existing_file_names: 116 ↛ 117line 116 didn't jump to line 117 because the condition on line 116 was never true
117 return http_error(
118 400,
119 "File already exists",
120 f"File with name '{mindsdb_file_name}' already exists",
121 )
123 source_type = data.get("source_type", "file")
124 if source_type not in ("file", "url"): 124 ↛ 125line 124 didn't jump to line 125 because the condition on line 124 was never true
125 return http_error(
126 400,
127 "Wrong file source type",
128 f'Only "file" and "url" supported as file source, got "{source_type}"',
129 )
131 if source_type == "url":
132 url_file_upload_enabled = config["url_file_upload"]["enabled"]
133 if url_file_upload_enabled is False:
134 return http_error(400, "URL file upload is disabled.", "URL file upload is disabled.")
136 if "file" in data: 136 ↛ 137line 136 didn't jump to line 137 because the condition on line 136 was never true
137 return http_error(
138 400,
139 "Fields conflict",
140 'URL source type can not be used together with "file" field.',
141 )
142 if "source" not in data: 142 ↛ 143line 142 didn't jump to line 143 because the condition on line 142 was never true
143 return http_error(
144 400,
145 "Missed file source",
146 'If the file\'s source type is URL, the "source" field should be specified.',
147 )
148 url = data["source"]
149 try:
150 url = urlparse(url)
151 if not (url.scheme and url.netloc): 151 ↛ 153line 151 didn't jump to line 153 because the condition on line 151 was always true
152 raise ValueError()
153 url = url.geturl()
154 except Exception:
155 return http_error(
156 400,
157 "Invalid URL",
158 f"The URL is not valid: {data['source']}",
159 )
161 allowed_origins = config["url_file_upload"]["allowed_origins"]
162 disallowed_origins = config["url_file_upload"]["disallowed_origins"]
164 if validate_urls(url, allowed_origins, disallowed_origins) is False:
165 return http_error(
166 400,
167 "Invalid URL",
168 "URL is not allowed for security reasons. Allowed hosts are: "
169 f"{', '.join(allowed_origins) if allowed_origins else 'not specified'}.",
170 )
172 data["file"] = clear_filename(mindsdb_file_name)
173 if config.is_cloud:
174 if is_private_url(url):
175 return http_error(400, f"URL is private: {url}")
177 if ctx.user_class != 1:
178 info = requests.head(url, timeout=30)
179 file_size = info.headers.get("Content-Length")
180 try:
181 file_size = int(file_size)
182 except Exception:
183 pass
185 if file_size is None:
186 return http_error(
187 400,
188 "Error getting file info",
189 "Can't determine remote file size",
190 )
191 if file_size > MAX_FILE_SIZE:
192 return http_error(
193 400,
194 "File is too big",
195 f"Upload limit for file is {MAX_FILE_SIZE >> 20} MB",
196 )
197 with requests.get(url, stream=True) as r:
198 if r.status_code != 200:
199 return http_error(400, "Error getting file", f"Got status code: {r.status_code}")
200 file_path = os.path.join(temp_dir_path, data["file"])
201 with open(file_path, "wb") as f:
202 for chunk in r.iter_content(chunk_size=8192):
203 f.write(chunk)
205 if "file" not in data: 205 ↛ 206line 205 didn't jump to line 206 because the condition on line 205 was never true
206 return http_error(
207 400,
208 "File field is missed",
209 'The "field" field is missed in the form',
210 )
212 original_file_name = clear_filename(data.get("original_file_name"))
214 file_path = os.path.join(temp_dir_path, data["file"])
215 lp = file_path.lower()
216 if lp.endswith((".zip", ".tar.gz")):
217 if lp.endswith(".zip"): 217 ↛ 220line 217 didn't jump to line 220 because the condition on line 217 was always true
218 with zipfile.ZipFile(file_path) as f:
219 f.extractall(temp_dir_path)
220 elif lp.endswith(".tar.gz"):
221 with tarfile.open(file_path) as f:
222 safe_extract(f, temp_dir_path)
223 os.remove(file_path)
224 files = os.listdir(temp_dir_path)
225 if len(files) != 1: 225 ↛ 226line 225 didn't jump to line 226 because the condition on line 225 was never true
226 os.rmdir(temp_dir_path)
227 return http_error(400, "Wrong content.", "Archive must contain only one data file.")
228 file_path = os.path.join(temp_dir_path, files[0])
229 mindsdb_file_name = files[0]
230 if not os.path.isfile(file_path): 230 ↛ 231line 230 didn't jump to line 231 because the condition on line 230 was never true
231 os.rmdir(temp_dir_path)
232 return http_error(400, "Wrong content.", "Archive must contain data file in root.")
234 try:
235 if not Path(mindsdb_file_name).suffix == "":
236 return http_error(400, "Error", "File name cannot contain extension.")
237 ca.file_controller.save_file(mindsdb_file_name, file_path, file_name=original_file_name)
238 except FileProcessingError as e:
239 return http_error(400, "Error", str(e))
240 except Exception as e:
241 return http_error(500, "Error", str(e))
242 finally:
243 shutil.rmtree(temp_dir_path, ignore_errors=True)
245 return "", 200
247 @ns_conf.doc("delete_file")
248 @api_endpoint_metrics("DELETE", "/files/file")
249 def delete(self, name: str):
250 """delete file"""
252 try:
253 ca.file_controller.delete_file(name)
254 except FileNotFoundError:
255 logger.exception(f"Error when deleting file '{name}'")
256 return http_error(
257 400,
258 "Error deleting file",
259 f"There was an error while trying to delete file with name '{name}'",
260 )
261 except Exception as e:
262 logger.error(e)
263 return http_error(
264 500,
265 "Error occured while deleting file",
266 f"There was an error while trying to delete file with name '{name}'",
267 )
268 return "", 200