Coverage for mindsdb / utilities / security.py: 77%
37 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
1from urllib.parse import urlparse
2import socket
3import ipaddress
6def is_private_url(url: str):
7 """
8 Raises exception if url is private
10 :param url: url to check
11 """
13 hostname = urlparse(url).hostname
14 if not hostname:
15 # Unable find hostname in url
16 return True
17 ip = socket.gethostbyname(hostname)
18 return ipaddress.ip_address(ip).is_private
21def clear_filename(filename: str) -> str:
22 """
23 Removes path symbols from filename which could be used for path injection
24 :param filename: input filename
25 :return: output filename
26 """
28 if not filename: 28 ↛ 30line 28 didn't jump to line 30 because the condition on line 28 was always true
29 return filename
30 badchars = '\\/:*?"<>|'
31 for c in badchars:
32 filename = filename.replace(c, "")
33 return filename
36def _split_url(url: str) -> tuple[str, str]:
37 """
38 Splits the URL into scheme and netloc.
40 Args:
41 url (str): The URL to split.
43 Returns:
44 tuple[str, str]: The scheme and netloc of the URL.
46 Raises:
47 ValueError: If the URL does not include protocol and host name.
48 """
49 parsed_url = urlparse(url)
50 if not (parsed_url.scheme and parsed_url.netloc):
51 raise ValueError(f"URL must include protocol and host name: {url}")
52 return parsed_url.scheme.lower(), parsed_url.netloc.lower()
55def validate_urls(urls: str | list[str], allowed_urls: list[str], disallowed_urls: list[str] | None = None) -> bool:
56 """
57 Checks if the provided URL(s) is/are from an allowed host.
59 This function parses the URL(s) and checks the origin (scheme + netloc)
60 against a list of allowed hosts.
62 Examples:
63 validate_urls("http://site.com/file", ["site.com"]) -> Exception
64 validate_urls("https://site.com/file", ["https://site.com"]) -> True
65 validate_urls("http://site.com/file", ["https://site.com"]) -> False
66 validate_urls("https://site.com/file", ["https://example.com"]) -> False
67 validate_urls("site.com/file", ["https://site.com"]) -> Exception
69 Args:
70 urls (str | list[str]): The URL(s) to check. Can be a single URL (str) or a list of URLs (list).
71 allowed_urls (list[str]): The list of allowed URLs.
72 disallowed_urls (list[str]): The list of disallowed URLs. If provided, the function
73 will return False if the URL is in the disallowed list.
75 Returns:
76 bool: True if the URL(s) is/are from an allowed host and not in the disallowed list, False otherwise.
77 """
78 if disallowed_urls is None:
79 disallowed_urls = []
81 allowed_origins = [_split_url(url) for url in allowed_urls]
82 disallowed_origins = [_split_url(url) for url in disallowed_urls]
84 if isinstance(urls, str):
85 urls = [urls]
87 if allowed_origins:
88 for url in urls:
89 if _split_url(url) not in allowed_origins:
90 return False
92 if disallowed_origins:
93 for url in urls:
94 if _split_url(url) in disallowed_origins:
95 return False
97 return True