Coverage for mindsdb / integrations / handlers / dockerhub_handler / dockerhub_tables.py: 0%
171 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
1import pandas as pd
2from typing import List
3from mindsdb.integrations.libs.api_handler import APITable
4from mindsdb.integrations.utilities.handlers.query_utilities import SELECTQueryParser, SELECTQueryExecutor
5from mindsdb.utilities import log
6from mindsdb_sql_parser import ast
8logger = log.getLogger(__name__)
11class DockerHubRepoImagesSummaryTable(APITable):
12 """The DockerHub Repo Images Summary Table implementation"""
14 def select(self, query: ast.Select) -> pd.DataFrame:
15 """Pulls data from the https://docs.docker.com/docker-hub/api/latest/#tag/images" API
17 Parameters
18 ----------
19 query : ast.Select
20 Given SQL SELECT query
22 Returns
23 -------
24 pd.DataFrame
25 repo images summary matching the query
27 Raises
28 ------
29 ValueError
30 If the query contains an unsupported condition
31 """
33 select_statement_parser = SELECTQueryParser(
34 query,
35 'repo_images_summary',
36 self.get_columns()
37 )
39 selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
41 search_params = {}
42 subset_where_conditions = []
43 for op, arg1, arg2 in where_conditions:
44 if arg1 == 'namespace':
45 if op == '=':
46 search_params["namespace"] = arg2
47 else:
48 raise NotImplementedError("Only '=' operator is supported for namespace column.")
49 elif arg1 == 'repository':
50 if op == '=':
51 search_params["repository"] = arg2
52 else:
53 raise NotImplementedError("Only '=' operator is supported for repository column.")
54 elif arg1 in self.get_columns():
55 subset_where_conditions.append([op, arg1, arg2])
57 filter_flag = ("namespace" in search_params) and ("repository" in search_params)
59 if not filter_flag:
60 raise NotImplementedError("Both namespace and repository columns have to be present in WHERE clause.")
62 repo_images_summary_df = pd.DataFrame(columns=self.get_columns())
64 response = self.handler.docker_client.get_images_summary(search_params["namespace"], search_params["repository"])
66 self.check_res(res=response)
68 content = response["content"]
70 repo_images_summary_df = pd.json_normalize({"active_from": content["active_from"], "total": content["statistics"]["total"], "active": content["statistics"]["active"], "inactive": content["statistics"]["inactive"]})
72 select_statement_executor = SELECTQueryExecutor(
73 repo_images_summary_df,
74 selected_columns,
75 subset_where_conditions,
76 order_by_conditions,
77 result_limit
78 )
80 repo_images_summary_df = select_statement_executor.execute_query()
82 return repo_images_summary_df
84 def check_res(self, res):
85 if res["code"] != 200:
86 raise Exception("Error fetching results - " + res["error"])
88 def get_columns(self) -> List[str]:
89 """Gets all columns to be returned in pandas DataFrame responses
91 Returns
92 -------
93 List[str]
94 List of columns
95 """
97 return [
98 "active_from",
99 "total",
100 "active",
101 "inactive"
102 ]
105class DockerHubOrgSettingsTable(APITable):
106 """The DockerHub Repo Org Settings Table implementation"""
108 def select(self, query: ast.Select) -> pd.DataFrame:
109 """Pulls data from the https://hub.docker.com/v2/orgs/{name}/settings" API
111 Parameters
112 ----------
113 query : ast.Select
114 Given SQL SELECT query
116 Returns
117 -------
118 pd.DataFrame
119 org settings matching the query
121 Raises
122 ------
123 ValueError
124 If the query contains an unsupported condition
125 """
127 select_statement_parser = SELECTQueryParser(
128 query,
129 'org_settings',
130 self.get_columns()
131 )
133 selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
135 search_params = {}
136 subset_where_conditions = []
137 for op, arg1, arg2 in where_conditions:
138 if arg1 == 'organization':
139 if op == '=':
140 search_params["organization"] = arg2
141 else:
142 raise NotImplementedError("Only '=' operator is supported for organization column.")
143 elif arg1 in self.get_columns():
144 subset_where_conditions.append([op, arg1, arg2])
146 if "organization" not in search_params:
147 raise NotImplementedError("organization column has to be present in where clause.")
149 organization_df = pd.DataFrame(columns=self.get_columns())
151 response = self.handler.docker_client.get_org_settings(search_params["organization"])
153 self.check_res(res=response)
155 content = response["content"]
157 organization_df = pd.json_normalize({"restricted_images_enabled": content["restricted_images"]["enabled"], "restricted_images_allow_official_images": content["restricted_images"]["allow_official_images"], "restricted_images_allow_verified_publishers": content["restricted_images"]["allow_verified_publishers"]})
159 select_statement_executor = SELECTQueryExecutor(
160 organization_df,
161 selected_columns,
162 subset_where_conditions,
163 order_by_conditions,
164 result_limit
165 )
167 organization_df = select_statement_executor.execute_query()
169 return organization_df
171 def check_res(self, res):
172 if res["code"] != 200:
173 raise Exception("Error fetching results - " + res["error"])
175 def get_columns(self) -> List[str]:
176 """Gets all columns to be returned in pandas DataFrame responses
178 Returns
179 -------
180 List[str]
181 List of columns
182 """
184 return [
185 "restricted_images_enabled",
186 "restricted_images_allow_official_images",
187 "restricted_images_allow_verified_publishers"
188 ]
191class DockerHubRepoImagesTable(APITable):
192 """The DockerHub Repo Images Table implementation"""
194 def select(self, query: ast.Select) -> pd.DataFrame:
195 """Pulls data from the https://hub.docker.com/v2/namespaces/{namespace}/repositories/{repository}/images" API
197 Parameters
198 ----------
199 query : ast.Select
200 Given SQL SELECT query
202 Returns
203 -------
204 pd.DataFrame
205 Repo Images matching the query
207 Raises
208 ------
209 ValueError
210 If the query contains an unsupported condition
211 """
213 select_statement_parser = SELECTQueryParser(
214 query,
215 'repo_images',
216 self.get_columns()
217 )
219 selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
221 search_params = {}
222 subset_where_conditions = []
223 for op, arg1, arg2 in where_conditions:
224 if arg1 == 'namespace':
225 if op == '=':
226 search_params["namespace"] = arg2
227 else:
228 raise NotImplementedError("Only '=' operator is supported for namespace column.")
229 elif arg1 == 'repository':
230 if op == '=':
231 search_params["repository"] = arg2
232 else:
233 raise NotImplementedError("Only '=' operator is supported for repository column.")
234 elif arg1 in self.get_columns():
235 subset_where_conditions.append([op, arg1, arg2])
237 filter_flag = ("namespace" in search_params) and ("repository" in search_params)
239 if not filter_flag:
240 raise NotImplementedError("namespace and repository column has to be present in where clause.")
242 repo_images_summary_df = pd.DataFrame(columns=self.get_columns())
244 response = self.handler.docker_client.get_repo_images(search_params["namespace"], search_params["repository"])
246 self.check_res(res=response)
248 content = response["content"]
250 repo_images_summary_df = pd.json_normalize(content["results"])
252 select_statement_executor = SELECTQueryExecutor(
253 repo_images_summary_df,
254 selected_columns,
255 subset_where_conditions,
256 order_by_conditions,
257 result_limit
258 )
260 repo_images_summary_df = select_statement_executor.execute_query()
262 return repo_images_summary_df
264 def check_res(self, res):
265 if res["code"] != 200:
266 raise Exception("Error fetching results - " + res["error"])
268 def get_columns(self) -> List[str]:
269 """Gets all columns to be returned in pandas DataFrame responses
271 Returns
272 -------
273 List[str]
274 List of columns
275 """
277 return ["namespace",
278 "repository",
279 "digest",
280 "tags",
281 "last_pushed",
282 "last_pulled",
283 "status"
284 ]
287class DockerHubRepoTagTable(APITable):
288 """The DockerHub Repo Tag Table implementation"""
290 def select(self, query: ast.Select) -> pd.DataFrame:
291 """Pulls data from the https://docs.docker.com/docker-hub/api/latest/#tag/images" API
293 Parameters
294 ----------
295 query : ast.Select
296 Given SQL SELECT query
298 Returns
299 -------
300 pd.DataFrame
301 Repo Tag matching the query
303 Raises
304 ------
305 ValueError
306 If the query contains an unsupported condition
307 """
309 select_statement_parser = SELECTQueryParser(
310 query,
311 'repo_tag_details',
312 self.get_columns()
313 )
315 selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
317 search_params = {}
318 subset_where_conditions = []
319 for op, arg1, arg2 in where_conditions:
320 if arg1 == 'namespace':
321 if op == '=':
322 search_params["namespace"] = arg2
323 else:
324 raise NotImplementedError("Only '=' operator is supported for namespace column.")
325 elif arg1 == 'repository':
326 if op == '=':
327 search_params["repository"] = arg2
328 else:
329 raise NotImplementedError("Only '=' operator is supported for repository column.")
330 elif arg1 == 'tag':
331 if op == '=':
332 search_params["tag"] = arg2
333 else:
334 raise NotImplementedError("Only '=' operator is supported for tag column.")
335 elif arg1 in self.get_columns():
336 subset_where_conditions.append([op, arg1, arg2])
338 filter_flag = ("namespace" in search_params) and ("repository" in search_params) and ("tag" in search_params)
340 if not filter_flag:
341 raise NotImplementedError("namespace, repository and tag column has to be present in where clause.")
343 repo_tag_summary_df = pd.DataFrame(columns=self.get_columns())
345 response = self.handler.docker_client.get_repo_tag(search_params["namespace"], search_params["repository"], search_params["tag"])
347 self.check_res(res=response)
349 content = response["content"]
351 repo_tag_summary_df = pd.json_normalize({"creator": content["creator"],
352 "id": content["id"],
353 "images": content["images"],
354 "last_updated": content["last_updated"],
355 "last_updater": content["last_updater"],
356 "last_updater_username": content["last_updater_username"],
357 "name": content["name"],
358 "repository": content["repository"],
359 "full_size": content["full_size"],
360 "v2": content["v2"],
361 "tag_status": content["tag_status"],
362 "tag_last_pulled": content["tag_last_pulled"],
363 "tag_last_pushed": content["tag_last_pushed"],
364 "media_type": content["media_type"],
365 "content_type": content["media_type"]
366 })
368 select_statement_executor = SELECTQueryExecutor(
369 repo_tag_summary_df,
370 selected_columns,
371 subset_where_conditions,
372 order_by_conditions,
373 result_limit
374 )
376 repo_tag_summary_df = select_statement_executor.execute_query()
378 return repo_tag_summary_df
380 def check_res(self, res):
381 if res["code"] != 200:
382 raise Exception("Error fetching results - " + res["error"])
384 def get_columns(self) -> List[str]:
385 """Gets all columns to be returned in pandas DataFrame responses
387 Returns
388 -------
389 List[str]
390 List of columns
391 """
393 return ["creator",
394 "id",
395 "images",
396 "last_updated",
397 "last_updater",
398 "last_updater_username",
399 "name",
400 "repository",
401 "full_size",
402 "v2",
403 "tag_status",
404 "tag_last_pulled",
405 "tag_last_pushed",
406 "media_type",
407 "content_type"
408 ]
411class DockerHubRepoTagsTable(APITable):
412 """The DockerHub Repo Tags Table implementation"""
414 def select(self, query: ast.Select) -> pd.DataFrame:
415 """Pulls data from the https://hub.docker.com/v2/namespaces/{namespace}/repositories/{repository}/tags" API
417 Parameters
418 ----------
419 query : ast.Select
420 Given SQL SELECT query
422 Returns
423 -------
424 pd.DataFrame
425 Repo Tag matching the query
427 Raises
428 ------
429 ValueError
430 If the query contains an unsupported condition
431 """
433 select_statement_parser = SELECTQueryParser(
434 query,
435 'repo_tags',
436 self.get_columns()
437 )
439 selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query()
441 search_params = {}
442 subset_where_conditions = []
443 for op, arg1, arg2 in where_conditions:
444 if arg1 == 'namespace':
445 if op == '=':
446 search_params["namespace"] = arg2
447 else:
448 raise NotImplementedError("Only '=' operator is supported for namespace column.")
449 elif arg1 == 'repository':
450 if op == '=':
451 search_params["repository"] = arg2
452 else:
453 raise NotImplementedError("Only '=' operator is supported for repository column.")
454 elif arg1 in self.get_columns():
455 subset_where_conditions.append([op, arg1, arg2])
457 filter_flag = ("namespace" in search_params) and ("repository" in search_params)
459 if not filter_flag:
460 raise NotImplementedError("namespace and repository column has to be present in where clause.")
462 repo_tags_summary_df = pd.DataFrame(columns=self.get_columns())
464 response = self.handler.docker_client.get_repo_tags(search_params["namespace"], search_params["repository"])
466 self.check_res(res=response)
468 content = response["content"]
470 repo_tags_summary_df = pd.json_normalize(content["results"])
472 select_statement_executor = SELECTQueryExecutor(
473 repo_tags_summary_df,
474 selected_columns,
475 subset_where_conditions,
476 order_by_conditions,
477 result_limit
478 )
480 repo_tags_summary_df = select_statement_executor.execute_query()
482 return repo_tags_summary_df
484 def check_res(self, res):
485 if res["code"] != 200:
486 raise Exception("Error fetching results - " + res["error"])
488 def get_columns(self) -> List[str]:
489 """Gets all columns to be returned in pandas DataFrame responses
491 Returns
492 -------
493 List[str]
494 List of columns
495 """
497 return ["creator",
498 "id",
499 "images",
500 "last_updated",
501 "last_updater",
502 "last_updater_username",
503 "name",
504 "repository",
505 "full_size",
506 "v2",
507 "tag_status",
508 "tag_last_pulled",
509 "tag_last_pushed",
510 "media_type",
511 "content_type"
512 ]