Coverage for mindsdb / utilities / security.py: 77%

37 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-21 00:36 +0000

1from urllib.parse import urlparse 

2import socket 

3import ipaddress 

4 

5 

6def is_private_url(url: str): 

7 """ 

8 Raises exception if url is private 

9 

10 :param url: url to check 

11 """ 

12 

13 hostname = urlparse(url).hostname 

14 if not hostname: 

15 # Unable find hostname in url 

16 return True 

17 ip = socket.gethostbyname(hostname) 

18 return ipaddress.ip_address(ip).is_private 

19 

20 

21def clear_filename(filename: str) -> str: 

22 """ 

23 Removes path symbols from filename which could be used for path injection 

24 :param filename: input filename 

25 :return: output filename 

26 """ 

27 

28 if not filename: 28 ↛ 30line 28 didn't jump to line 30 because the condition on line 28 was always true

29 return filename 

30 badchars = '\\/:*?"<>|' 

31 for c in badchars: 

32 filename = filename.replace(c, "") 

33 return filename 

34 

35 

36def _split_url(url: str) -> tuple[str, str]: 

37 """ 

38 Splits the URL into scheme and netloc. 

39 

40 Args: 

41 url (str): The URL to split. 

42 

43 Returns: 

44 tuple[str, str]: The scheme and netloc of the URL. 

45 

46 Raises: 

47 ValueError: If the URL does not include protocol and host name. 

48 """ 

49 parsed_url = urlparse(url) 

50 if not (parsed_url.scheme and parsed_url.netloc): 

51 raise ValueError(f"URL must include protocol and host name: {url}") 

52 return parsed_url.scheme.lower(), parsed_url.netloc.lower() 

53 

54 

55def validate_urls(urls: str | list[str], allowed_urls: list[str], disallowed_urls: list[str] | None = None) -> bool: 

56 """ 

57 Checks if the provided URL(s) is/are from an allowed host. 

58 

59 This function parses the URL(s) and checks the origin (scheme + netloc) 

60 against a list of allowed hosts. 

61 

62 Examples: 

63 validate_urls("http://site.com/file", ["site.com"]) -> Exception 

64 validate_urls("https://site.com/file", ["https://site.com"]) -> True 

65 validate_urls("http://site.com/file", ["https://site.com"]) -> False 

66 validate_urls("https://site.com/file", ["https://example.com"]) -> False 

67 validate_urls("site.com/file", ["https://site.com"]) -> Exception 

68 

69 Args: 

70 urls (str | list[str]): The URL(s) to check. Can be a single URL (str) or a list of URLs (list). 

71 allowed_urls (list[str]): The list of allowed URLs. 

72 disallowed_urls (list[str]): The list of disallowed URLs. If provided, the function 

73 will return False if the URL is in the disallowed list. 

74 

75 Returns: 

76 bool: True if the URL(s) is/are from an allowed host and not in the disallowed list, False otherwise. 

77 """ 

78 if disallowed_urls is None: 

79 disallowed_urls = [] 

80 

81 allowed_origins = [_split_url(url) for url in allowed_urls] 

82 disallowed_origins = [_split_url(url) for url in disallowed_urls] 

83 

84 if isinstance(urls, str): 

85 urls = [urls] 

86 

87 if allowed_origins: 

88 for url in urls: 

89 if _split_url(url) not in allowed_origins: 

90 return False 

91 

92 if disallowed_origins: 

93 for url in urls: 

94 if _split_url(url) in disallowed_origins: 

95 return False 

96 

97 return True