Coverage for mindsdb / integrations / handlers / dockerhub_handler / dockerhub_tables.py: 0%

171 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-21 00:36 +0000

1import pandas as pd 

2from typing import List 

3from mindsdb.integrations.libs.api_handler import APITable 

4from mindsdb.integrations.utilities.handlers.query_utilities import SELECTQueryParser, SELECTQueryExecutor 

5from mindsdb.utilities import log 

6from mindsdb_sql_parser import ast 

7 

8logger = log.getLogger(__name__) 

9 

10 

11class DockerHubRepoImagesSummaryTable(APITable): 

12 """The DockerHub Repo Images Summary Table implementation""" 

13 

14 def select(self, query: ast.Select) -> pd.DataFrame: 

15 """Pulls data from the https://docs.docker.com/docker-hub/api/latest/#tag/images" API 

16 

17 Parameters 

18 ---------- 

19 query : ast.Select 

20 Given SQL SELECT query 

21 

22 Returns 

23 ------- 

24 pd.DataFrame 

25 repo images summary matching the query 

26 

27 Raises 

28 ------ 

29 ValueError 

30 If the query contains an unsupported condition 

31 """ 

32 

33 select_statement_parser = SELECTQueryParser( 

34 query, 

35 'repo_images_summary', 

36 self.get_columns() 

37 ) 

38 

39 selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() 

40 

41 search_params = {} 

42 subset_where_conditions = [] 

43 for op, arg1, arg2 in where_conditions: 

44 if arg1 == 'namespace': 

45 if op == '=': 

46 search_params["namespace"] = arg2 

47 else: 

48 raise NotImplementedError("Only '=' operator is supported for namespace column.") 

49 elif arg1 == 'repository': 

50 if op == '=': 

51 search_params["repository"] = arg2 

52 else: 

53 raise NotImplementedError("Only '=' operator is supported for repository column.") 

54 elif arg1 in self.get_columns(): 

55 subset_where_conditions.append([op, arg1, arg2]) 

56 

57 filter_flag = ("namespace" in search_params) and ("repository" in search_params) 

58 

59 if not filter_flag: 

60 raise NotImplementedError("Both namespace and repository columns have to be present in WHERE clause.") 

61 

62 repo_images_summary_df = pd.DataFrame(columns=self.get_columns()) 

63 

64 response = self.handler.docker_client.get_images_summary(search_params["namespace"], search_params["repository"]) 

65 

66 self.check_res(res=response) 

67 

68 content = response["content"] 

69 

70 repo_images_summary_df = pd.json_normalize({"active_from": content["active_from"], "total": content["statistics"]["total"], "active": content["statistics"]["active"], "inactive": content["statistics"]["inactive"]}) 

71 

72 select_statement_executor = SELECTQueryExecutor( 

73 repo_images_summary_df, 

74 selected_columns, 

75 subset_where_conditions, 

76 order_by_conditions, 

77 result_limit 

78 ) 

79 

80 repo_images_summary_df = select_statement_executor.execute_query() 

81 

82 return repo_images_summary_df 

83 

84 def check_res(self, res): 

85 if res["code"] != 200: 

86 raise Exception("Error fetching results - " + res["error"]) 

87 

88 def get_columns(self) -> List[str]: 

89 """Gets all columns to be returned in pandas DataFrame responses 

90 

91 Returns 

92 ------- 

93 List[str] 

94 List of columns 

95 """ 

96 

97 return [ 

98 "active_from", 

99 "total", 

100 "active", 

101 "inactive" 

102 ] 

103 

104 

105class DockerHubOrgSettingsTable(APITable): 

106 """The DockerHub Repo Org Settings Table implementation""" 

107 

108 def select(self, query: ast.Select) -> pd.DataFrame: 

109 """Pulls data from the https://hub.docker.com/v2/orgs/{name}/settings" API 

110 

111 Parameters 

112 ---------- 

113 query : ast.Select 

114 Given SQL SELECT query 

115 

116 Returns 

117 ------- 

118 pd.DataFrame 

119 org settings matching the query 

120 

121 Raises 

122 ------ 

123 ValueError 

124 If the query contains an unsupported condition 

125 """ 

126 

127 select_statement_parser = SELECTQueryParser( 

128 query, 

129 'org_settings', 

130 self.get_columns() 

131 ) 

132 

133 selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() 

134 

135 search_params = {} 

136 subset_where_conditions = [] 

137 for op, arg1, arg2 in where_conditions: 

138 if arg1 == 'organization': 

139 if op == '=': 

140 search_params["organization"] = arg2 

141 else: 

142 raise NotImplementedError("Only '=' operator is supported for organization column.") 

143 elif arg1 in self.get_columns(): 

144 subset_where_conditions.append([op, arg1, arg2]) 

145 

146 if "organization" not in search_params: 

147 raise NotImplementedError("organization column has to be present in where clause.") 

148 

149 organization_df = pd.DataFrame(columns=self.get_columns()) 

150 

151 response = self.handler.docker_client.get_org_settings(search_params["organization"]) 

152 

153 self.check_res(res=response) 

154 

155 content = response["content"] 

156 

157 organization_df = pd.json_normalize({"restricted_images_enabled": content["restricted_images"]["enabled"], "restricted_images_allow_official_images": content["restricted_images"]["allow_official_images"], "restricted_images_allow_verified_publishers": content["restricted_images"]["allow_verified_publishers"]}) 

158 

159 select_statement_executor = SELECTQueryExecutor( 

160 organization_df, 

161 selected_columns, 

162 subset_where_conditions, 

163 order_by_conditions, 

164 result_limit 

165 ) 

166 

167 organization_df = select_statement_executor.execute_query() 

168 

169 return organization_df 

170 

171 def check_res(self, res): 

172 if res["code"] != 200: 

173 raise Exception("Error fetching results - " + res["error"]) 

174 

175 def get_columns(self) -> List[str]: 

176 """Gets all columns to be returned in pandas DataFrame responses 

177 

178 Returns 

179 ------- 

180 List[str] 

181 List of columns 

182 """ 

183 

184 return [ 

185 "restricted_images_enabled", 

186 "restricted_images_allow_official_images", 

187 "restricted_images_allow_verified_publishers" 

188 ] 

189 

190 

191class DockerHubRepoImagesTable(APITable): 

192 """The DockerHub Repo Images Table implementation""" 

193 

194 def select(self, query: ast.Select) -> pd.DataFrame: 

195 """Pulls data from the https://hub.docker.com/v2/namespaces/{namespace}/repositories/{repository}/images" API 

196 

197 Parameters 

198 ---------- 

199 query : ast.Select 

200 Given SQL SELECT query 

201 

202 Returns 

203 ------- 

204 pd.DataFrame 

205 Repo Images matching the query 

206 

207 Raises 

208 ------ 

209 ValueError 

210 If the query contains an unsupported condition 

211 """ 

212 

213 select_statement_parser = SELECTQueryParser( 

214 query, 

215 'repo_images', 

216 self.get_columns() 

217 ) 

218 

219 selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() 

220 

221 search_params = {} 

222 subset_where_conditions = [] 

223 for op, arg1, arg2 in where_conditions: 

224 if arg1 == 'namespace': 

225 if op == '=': 

226 search_params["namespace"] = arg2 

227 else: 

228 raise NotImplementedError("Only '=' operator is supported for namespace column.") 

229 elif arg1 == 'repository': 

230 if op == '=': 

231 search_params["repository"] = arg2 

232 else: 

233 raise NotImplementedError("Only '=' operator is supported for repository column.") 

234 elif arg1 in self.get_columns(): 

235 subset_where_conditions.append([op, arg1, arg2]) 

236 

237 filter_flag = ("namespace" in search_params) and ("repository" in search_params) 

238 

239 if not filter_flag: 

240 raise NotImplementedError("namespace and repository column has to be present in where clause.") 

241 

242 repo_images_summary_df = pd.DataFrame(columns=self.get_columns()) 

243 

244 response = self.handler.docker_client.get_repo_images(search_params["namespace"], search_params["repository"]) 

245 

246 self.check_res(res=response) 

247 

248 content = response["content"] 

249 

250 repo_images_summary_df = pd.json_normalize(content["results"]) 

251 

252 select_statement_executor = SELECTQueryExecutor( 

253 repo_images_summary_df, 

254 selected_columns, 

255 subset_where_conditions, 

256 order_by_conditions, 

257 result_limit 

258 ) 

259 

260 repo_images_summary_df = select_statement_executor.execute_query() 

261 

262 return repo_images_summary_df 

263 

264 def check_res(self, res): 

265 if res["code"] != 200: 

266 raise Exception("Error fetching results - " + res["error"]) 

267 

268 def get_columns(self) -> List[str]: 

269 """Gets all columns to be returned in pandas DataFrame responses 

270 

271 Returns 

272 ------- 

273 List[str] 

274 List of columns 

275 """ 

276 

277 return ["namespace", 

278 "repository", 

279 "digest", 

280 "tags", 

281 "last_pushed", 

282 "last_pulled", 

283 "status" 

284 ] 

285 

286 

287class DockerHubRepoTagTable(APITable): 

288 """The DockerHub Repo Tag Table implementation""" 

289 

290 def select(self, query: ast.Select) -> pd.DataFrame: 

291 """Pulls data from the https://docs.docker.com/docker-hub/api/latest/#tag/images" API 

292 

293 Parameters 

294 ---------- 

295 query : ast.Select 

296 Given SQL SELECT query 

297 

298 Returns 

299 ------- 

300 pd.DataFrame 

301 Repo Tag matching the query 

302 

303 Raises 

304 ------ 

305 ValueError 

306 If the query contains an unsupported condition 

307 """ 

308 

309 select_statement_parser = SELECTQueryParser( 

310 query, 

311 'repo_tag_details', 

312 self.get_columns() 

313 ) 

314 

315 selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() 

316 

317 search_params = {} 

318 subset_where_conditions = [] 

319 for op, arg1, arg2 in where_conditions: 

320 if arg1 == 'namespace': 

321 if op == '=': 

322 search_params["namespace"] = arg2 

323 else: 

324 raise NotImplementedError("Only '=' operator is supported for namespace column.") 

325 elif arg1 == 'repository': 

326 if op == '=': 

327 search_params["repository"] = arg2 

328 else: 

329 raise NotImplementedError("Only '=' operator is supported for repository column.") 

330 elif arg1 == 'tag': 

331 if op == '=': 

332 search_params["tag"] = arg2 

333 else: 

334 raise NotImplementedError("Only '=' operator is supported for tag column.") 

335 elif arg1 in self.get_columns(): 

336 subset_where_conditions.append([op, arg1, arg2]) 

337 

338 filter_flag = ("namespace" in search_params) and ("repository" in search_params) and ("tag" in search_params) 

339 

340 if not filter_flag: 

341 raise NotImplementedError("namespace, repository and tag column has to be present in where clause.") 

342 

343 repo_tag_summary_df = pd.DataFrame(columns=self.get_columns()) 

344 

345 response = self.handler.docker_client.get_repo_tag(search_params["namespace"], search_params["repository"], search_params["tag"]) 

346 

347 self.check_res(res=response) 

348 

349 content = response["content"] 

350 

351 repo_tag_summary_df = pd.json_normalize({"creator": content["creator"], 

352 "id": content["id"], 

353 "images": content["images"], 

354 "last_updated": content["last_updated"], 

355 "last_updater": content["last_updater"], 

356 "last_updater_username": content["last_updater_username"], 

357 "name": content["name"], 

358 "repository": content["repository"], 

359 "full_size": content["full_size"], 

360 "v2": content["v2"], 

361 "tag_status": content["tag_status"], 

362 "tag_last_pulled": content["tag_last_pulled"], 

363 "tag_last_pushed": content["tag_last_pushed"], 

364 "media_type": content["media_type"], 

365 "content_type": content["media_type"] 

366 }) 

367 

368 select_statement_executor = SELECTQueryExecutor( 

369 repo_tag_summary_df, 

370 selected_columns, 

371 subset_where_conditions, 

372 order_by_conditions, 

373 result_limit 

374 ) 

375 

376 repo_tag_summary_df = select_statement_executor.execute_query() 

377 

378 return repo_tag_summary_df 

379 

380 def check_res(self, res): 

381 if res["code"] != 200: 

382 raise Exception("Error fetching results - " + res["error"]) 

383 

384 def get_columns(self) -> List[str]: 

385 """Gets all columns to be returned in pandas DataFrame responses 

386 

387 Returns 

388 ------- 

389 List[str] 

390 List of columns 

391 """ 

392 

393 return ["creator", 

394 "id", 

395 "images", 

396 "last_updated", 

397 "last_updater", 

398 "last_updater_username", 

399 "name", 

400 "repository", 

401 "full_size", 

402 "v2", 

403 "tag_status", 

404 "tag_last_pulled", 

405 "tag_last_pushed", 

406 "media_type", 

407 "content_type" 

408 ] 

409 

410 

411class DockerHubRepoTagsTable(APITable): 

412 """The DockerHub Repo Tags Table implementation""" 

413 

414 def select(self, query: ast.Select) -> pd.DataFrame: 

415 """Pulls data from the https://hub.docker.com/v2/namespaces/{namespace}/repositories/{repository}/tags" API 

416 

417 Parameters 

418 ---------- 

419 query : ast.Select 

420 Given SQL SELECT query 

421 

422 Returns 

423 ------- 

424 pd.DataFrame 

425 Repo Tag matching the query 

426 

427 Raises 

428 ------ 

429 ValueError 

430 If the query contains an unsupported condition 

431 """ 

432 

433 select_statement_parser = SELECTQueryParser( 

434 query, 

435 'repo_tags', 

436 self.get_columns() 

437 ) 

438 

439 selected_columns, where_conditions, order_by_conditions, result_limit = select_statement_parser.parse_query() 

440 

441 search_params = {} 

442 subset_where_conditions = [] 

443 for op, arg1, arg2 in where_conditions: 

444 if arg1 == 'namespace': 

445 if op == '=': 

446 search_params["namespace"] = arg2 

447 else: 

448 raise NotImplementedError("Only '=' operator is supported for namespace column.") 

449 elif arg1 == 'repository': 

450 if op == '=': 

451 search_params["repository"] = arg2 

452 else: 

453 raise NotImplementedError("Only '=' operator is supported for repository column.") 

454 elif arg1 in self.get_columns(): 

455 subset_where_conditions.append([op, arg1, arg2]) 

456 

457 filter_flag = ("namespace" in search_params) and ("repository" in search_params) 

458 

459 if not filter_flag: 

460 raise NotImplementedError("namespace and repository column has to be present in where clause.") 

461 

462 repo_tags_summary_df = pd.DataFrame(columns=self.get_columns()) 

463 

464 response = self.handler.docker_client.get_repo_tags(search_params["namespace"], search_params["repository"]) 

465 

466 self.check_res(res=response) 

467 

468 content = response["content"] 

469 

470 repo_tags_summary_df = pd.json_normalize(content["results"]) 

471 

472 select_statement_executor = SELECTQueryExecutor( 

473 repo_tags_summary_df, 

474 selected_columns, 

475 subset_where_conditions, 

476 order_by_conditions, 

477 result_limit 

478 ) 

479 

480 repo_tags_summary_df = select_statement_executor.execute_query() 

481 

482 return repo_tags_summary_df 

483 

484 def check_res(self, res): 

485 if res["code"] != 200: 

486 raise Exception("Error fetching results - " + res["error"]) 

487 

488 def get_columns(self) -> List[str]: 

489 """Gets all columns to be returned in pandas DataFrame responses 

490 

491 Returns 

492 ------- 

493 List[str] 

494 List of columns 

495 """ 

496 

497 return ["creator", 

498 "id", 

499 "images", 

500 "last_updated", 

501 "last_updater", 

502 "last_updater_username", 

503 "name", 

504 "repository", 

505 "full_size", 

506 "v2", 

507 "tag_status", 

508 "tag_last_pulled", 

509 "tag_last_pushed", 

510 "media_type", 

511 "content_type" 

512 ]