Coverage for mindsdb / integrations / handlers / github_handler / github_tables.py: 0%

287 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-21 00:36 +0000

1import re 

2from typing import List 

3import pandas as pd 

4 

5from mindsdb.integrations.libs.api_handler import APIResource 

6from mindsdb.integrations.utilities.sql_utils import ( 

7 FilterCondition, FilterOperator, SortColumn) 

8from mindsdb.utilities import log 

9 

10 

11logger = log.getLogger(__name__) 

12 

13 

14class GithubIssuesTable(APIResource): 

15 """The GitHub Issue Table implementation""" 

16 

17 def list(self, 

18 conditions: List[FilterCondition] = None, 

19 limit: int = None, 

20 sort: List[SortColumn] = None, 

21 targets: List[str] = None) -> pd.DataFrame: 

22 """Pulls data from the GitHub "List repository issues" API 

23 

24 Returns 

25 ------- 

26 pd.DataFrame 

27 GitHub issues matching the query 

28 

29 Raises 

30 ------ 

31 ValueError 

32 If the query contains an unsupported condition 

33 """ 

34 

35 if limit is None: 

36 limit = 20 

37 

38 issues_kwargs = {'state': 'all'} 

39 

40 if sort is not None: 

41 for col in sort: 

42 if col.column in ('created', 'updated', 'comments'): 

43 issues_kwargs['sort'] = col.column 

44 issues_kwargs['direction'] = 'asc' if col.ascending else 'desc' 

45 sort.applied = True 

46 

47 # supported only 1 column 

48 break 

49 

50 for condition in conditions: 

51 if (condition.column in ('state', 'assignee', 'creator', 'mentioned', 'milestone') 

52 and condition.op == FilterOperator.EQUAL): 

53 

54 issues_kwargs[condition.column] = condition.value 

55 condition.applied = True 

56 

57 elif condition.column == 'labels': 

58 if condition.op == FilterOperator.IN: 

59 issues_kwargs['labels'] = condition.value 

60 elif condition.op == FilterOperator.EQUAL: 

61 issues_kwargs['labels'] = condition.value.split(",") 

62 condition.applied = True 

63 

64 elif condition.column == 'updated' and condition.op == FilterOperator.GREATER_THAN: 

65 issues_kwargs['since'] = condition.value 

66 condition.applied = True 

67 

68 self.handler.connect() 

69 

70 data = [] 

71 count = 0 

72 for an_issue in self.handler.connection\ 

73 .get_repo(self.handler.repository) \ 

74 .get_issues(**issues_kwargs): 

75 item = { 

76 "number": an_issue.number, 

77 "title": an_issue.title, 

78 "state": an_issue.state, 

79 "creator": an_issue.user.login, 

80 "labels": ",".join( 

81 [label.name for label in an_issue.labels] 

82 ), 

83 "assignees": ",".join( 

84 [ 

85 assignee.login 

86 for assignee in an_issue.assignees 

87 ] 

88 ), 

89 "comments": an_issue.comments, 

90 "body": an_issue.body, 

91 "created": an_issue.created_at, 

92 "updated": an_issue.updated_at, 

93 "closed": an_issue.closed_at, 

94 } 

95 

96 if 'closed_by' in targets: 

97 item['closed_by'] = an_issue.closed_by.login if an_issue.closed_by else None 

98 

99 data.append(item) 

100 

101 count += 1 

102 if limit <= count: 

103 break 

104 

105 return pd.DataFrame(data, columns=self.get_columns()) 

106 

107 def add(self, issues: List[dict]): 

108 """Inserts data into the GitHub "Create an issue" API 

109 

110 Parameters 

111 ---------- 

112 issues : list of dict 

113 

114 Raises 

115 ------ 

116 ValueError 

117 If the query contains an unsupported condition 

118 """ 

119 

120 if self.handler.connection_data.get("api_key", None) is None: 

121 raise ValueError( 

122 "Need an authenticated connection in order to insert a GitHub issue" 

123 ) 

124 

125 self.handler.connect() 

126 for issue in issues: 

127 self._add(issue) 

128 

129 def _add(self, issue: dict): 

130 current_repo = self.handler.connection.get_repo(self.handler.repository) 

131 

132 insert_kwargs = {} 

133 

134 if issue.get("title", None) is None: 

135 raise ValueError("Title parameter is required to insert a GitHub issue") 

136 

137 if issue.get("body", None): 

138 insert_kwargs["body"] = issue["body"] 

139 

140 if issue.get("assignees", None): 

141 insert_kwargs["assignees"] = [] 

142 for an_assignee in issue["assignees"].split(","): 

143 an_assignee = an_assignee.replace(" ", "") 

144 try: 

145 github_user = self.handler.connection.get_user(an_assignee) 

146 except Exception as e: 

147 raise ValueError( 

148 f'Encountered an exception looking up assignee "{an_assignee}" in GitHub: ' 

149 f"{type(e).__name__} - {e}" 

150 ) 

151 

152 insert_kwargs["assignees"].append(github_user) 

153 

154 if issue.get("milestone", None): 

155 current_milestones = current_repo.get_milestones() 

156 

157 found_existing_milestone = False 

158 for a_milestone in current_milestones: 

159 if a_milestone.title == issue["milestone"]: 

160 insert_kwargs["milestone"] = a_milestone 

161 found_existing_milestone = True 

162 break 

163 

164 if not found_existing_milestone: 

165 logger.debug( 

166 f"Milestone \"{issue['milestone']}\" not found, creating it" 

167 ) 

168 insert_kwargs["milestone"] = current_repo.create_milestone( 

169 issue["milestone"] 

170 ) 

171 else: 

172 logger.debug(f"Milestone \"{issue['milestone']}\" already exists") 

173 

174 if issue.get("labels", None): 

175 insert_kwargs["labels"] = [] 

176 

177 inserted_labels = [] 

178 for a_label in issue["labels"].split(","): 

179 a_label = a_label.replace(" ", "") 

180 inserted_labels.append(a_label) 

181 

182 existing_labels = current_repo.get_labels() 

183 

184 existing_labels_set = set([label.name for label in existing_labels]) 

185 

186 if not set(inserted_labels).issubset(existing_labels_set): 

187 new_inserted_labels = set(inserted_labels).difference( 

188 existing_labels_set 

189 ) 

190 logger.debug( 

191 "Inserting new labels: " + ", ".join(new_inserted_labels) 

192 ) 

193 for a_new_label in new_inserted_labels: 

194 current_repo.create_label(a_new_label, "000000") 

195 

196 for a_label in existing_labels: 

197 if a_label.name in inserted_labels: 

198 insert_kwargs["labels"].append(a_label) 

199 

200 try: 

201 current_repo.create_issue(issue["title"], **insert_kwargs) 

202 except Exception as e: 

203 raise ValueError( 

204 f"Encountered an exception creating an issue in GitHub: " 

205 f"{type(e).__name__} - {e}" 

206 ) 

207 

208 def get_columns(self) -> List[str]: 

209 """Gets all columns to be returned in pandas DataFrame responses 

210 

211 Returns 

212 ------- 

213 List[str] 

214 List of columns 

215 """ 

216 return [ 

217 "number", 

218 "title", 

219 "state", 

220 "creator", 

221 "closed_by", 

222 "labels", 

223 "assignees", 

224 "comments", 

225 "body", 

226 "created", 

227 "updated", 

228 "closed", 

229 ] 

230 

231 

232class GithubPullRequestsTable(APIResource): 

233 """The GitHub Issue Table implementation""" 

234 

235 def list(self, 

236 conditions: List[FilterCondition] = None, 

237 limit: int = None, 

238 sort: List[SortColumn] = None, 

239 targets: List[str] = None) -> pd.DataFrame: 

240 """Pulls data from the GitHub "List repository pull requests" API 

241 

242 Native filters: 

243 - state: open, closed, or all (default) 

244 - head: Filter pulls by head user or head organization and branch name 

245 in the format of user:ref-name or organization:ref-name 

246 - base: Filter pulls by base branch name 

247 

248 Native sorts: 

249 - created, updated, popularity 

250 

251 Returns 

252 ------- 

253 pd.DataFrame 

254 GitHub pull requests matching the query 

255 

256 Raises 

257 ------ 

258 ValueError 

259 If the query contains an unsupported condition 

260 """ 

261 

262 if limit is None: 

263 limit = 20 

264 

265 issues_kwargs = {'state': 'all'} 

266 

267 if sort is not None: 

268 for col in sort: 

269 if col.column in ('created', 'updated', 'popularity'): 

270 issues_kwargs['sort'] = col.column 

271 issues_kwargs['direction'] = 'asc' if col.ascending else 'desc' 

272 sort.applied = True 

273 

274 # supported only 1 column 

275 break 

276 

277 for condition in conditions: 

278 if (condition.column in ('state', 'head', 'base') 

279 and condition.op == FilterOperator.EQUAL): 

280 

281 issues_kwargs[condition.column] = condition.value 

282 condition.applied = True 

283 

284 self.handler.connect() 

285 

286 data = [] 

287 count = 0 

288 for a_pull in self.handler.connection\ 

289 .get_repo(self.handler.repository) \ 

290 .get_pulls(**issues_kwargs): 

291 

292 item = { 

293 "number": a_pull.number, 

294 "title": a_pull.title, 

295 "state": a_pull.state, 

296 "creator": a_pull.user.login, 

297 "labels": ",".join( 

298 [label.name for label in a_pull.labels] 

299 ), 

300 "milestone": a_pull.milestone.title if a_pull.milestone else None, 

301 "assignees": ",".join( 

302 [ 

303 assignee.login 

304 for assignee in a_pull.assignees 

305 ] 

306 ), 

307 "reviewers": ",".join( 

308 [ 

309 reviewer.login 

310 for reviewer in a_pull.requested_reviewers 

311 ] 

312 ), 

313 "teams": ",".join( 

314 [ 

315 team.name 

316 for team in a_pull.requested_teams 

317 ] 

318 ), 

319 "draft": a_pull.draft, 

320 "body": a_pull.body, 

321 "base": a_pull.base.ref if a_pull.base else None, 

322 "head": a_pull.head.ref if a_pull.head else None, 

323 "created": a_pull.created_at, 

324 "updated": a_pull.updated_at, 

325 "merged": a_pull.merged_at, 

326 "closed": a_pull.closed_at, 

327 } 

328 

329 # downloaded columns, use them only if explicitly requested 

330 for field in ('comments', 'review_comments', 'mergeable', 'mergeable_state', 'rebaseable', 

331 'commits', 'additions', 'deletions', 'changed_files'): 

332 if field in targets: 

333 item[field] = getattr(a_pull, field) 

334 if 'is_merged' in targets: 

335 item['is_merged'] = a_pull.merged 

336 if 'merged_by' in targets: 

337 item['is_merged'] = a_pull.merged_by.login if a_pull.merged_by else None 

338 

339 data.append(item) 

340 count += 1 

341 if limit <= count: 

342 break 

343 

344 return pd.DataFrame(data, columns=self.get_columns()) 

345 

346 def get_columns(self) -> List[str]: 

347 """Gets all columns to be returned in pandas DataFrame responses 

348 

349 Returns 

350 ------- 

351 List[str] 

352 List of columns 

353 """ 

354 return [ 

355 "number", 

356 "title", 

357 "state", 

358 "creator", 

359 "labels", 

360 "milestone", 

361 "assignees", 

362 "reviewers", 

363 "teams", 

364 "comments", 

365 "review_comments", 

366 "draft", 

367 "is_merged", 

368 "mergeable", 

369 "mergeable_state", 

370 "merged_by", 

371 "rebaseable", 

372 "body", 

373 "base", 

374 "head", 

375 "commits", 

376 "additions", 

377 "deletions", 

378 "changed_files", 

379 "created", 

380 "updated", 

381 "merged", 

382 "closed", 

383 ] 

384 

385 

386class GithubCommitsTable(APIResource): 

387 """The GitHub Commits Table implementation""" 

388 

389 def list(self, 

390 conditions: List[FilterCondition] = None, 

391 limit: int = None, 

392 sort: List[SortColumn] = None, 

393 targets: List[str] = None) -> pd.DataFrame: 

394 """Pulls data from the GitHub "List commits" API 

395 

396 Returns 

397 ------- 

398 pd.DataFrame 

399 GitHub commits matching the query 

400 

401 Raises 

402 ------ 

403 ValueError 

404 If the query contains an unsupported condition 

405 """ 

406 

407 limit = limit or 20 

408 

409 commits_kwargs = {} 

410 

411 if sort is not None: 

412 for col in sort: 

413 if col.column in ("author", "date", "message"): 

414 commits_kwargs['sort'] = col.column 

415 commits_kwargs['direction'] = 'asc' if col.ascending else 'desc' 

416 sort.applied = True 

417 

418 # supported only 1 column 

419 break 

420 

421 for condition in conditions: 

422 if condition.column == 'author': 

423 if condition.op != FilterOperator.EQUAL: 

424 raise ValueError("Unsupported where operation for author") 

425 commits_kwargs["author"] = condition.value 

426 condition.applied = True 

427 

428 self.handler.connect() 

429 

430 data = [] 

431 for a_commit in self.handler.connection.get_repo( 

432 self.handler.repository 

433 ).get_commits(**commits_kwargs): 

434 

435 item = { 

436 "sha": a_commit.sha, 

437 "author": a_commit.commit.author.name, 

438 "date": a_commit.commit.author.date, 

439 "message": a_commit.commit.message, 

440 } 

441 

442 data.append(item) 

443 

444 if limit <= len(data): 

445 break 

446 

447 return pd.DataFrame(data, columns=self.get_columns()) 

448 

449 def get_columns(self) -> List[str]: 

450 """Gets all columns to be returned in pandas DataFrame responses 

451 

452 Returns 

453 ------- 

454 List[str] 

455 List of columns 

456 """ 

457 

458 return ["sha", "author", "date", "message"] 

459 

460 

461class GithubReleasesTable(APIResource): 

462 """The GitHub Releases Table implementation""" 

463 

464 def list(self, 

465 conditions: List[FilterCondition] = None, 

466 limit: int = None, 

467 sort: List[SortColumn] = None, 

468 targets: List[str] = None) -> pd.DataFrame: 

469 """Pulls data from the GitHub "List repository releases" API 

470 

471 Returns 

472 ------- 

473 pd.DataFrame 

474 GitHub releases matching the query 

475 

476 Raises 

477 ------ 

478 ValueError 

479 If the query contains an unsupported condition 

480 """ 

481 

482 limit = limit or 20 

483 

484 self.handler.connect() 

485 

486 data = [] 

487 for a_release in self.handler.connection.get_repo( 

488 self.handler.repository 

489 ).get_releases(): 

490 

491 item = { 

492 "id": self.check_none(a_release.id), 

493 "author": self.check_none(a_release.author.login), 

494 "body": self.check_none(a_release.body), 

495 "created_at": self.check_none(str(a_release.created_at)), 

496 "html_url": self.check_none(a_release.html_url), 

497 "published_at": self.check_none(str(a_release.published_at)), 

498 "tag_name": self.check_none(a_release.tag_name), 

499 "title": self.check_none(a_release.title), 

500 "url": self.check_none(a_release.url), 

501 "zipball_url": self.check_none(a_release.zipball_url) 

502 } 

503 

504 data.append(item) 

505 

506 if limit <= len(data): 

507 break 

508 

509 return pd.DataFrame(data, columns=self.get_columns()) 

510 

511 def check_none(self, val): 

512 return "" if val is None else val 

513 

514 def get_columns(self) -> List[str]: 

515 """Gets all columns to be returned in pandas DataFrame responses 

516 

517 Returns 

518 ------- 

519 List[str] 

520 List of columns 

521 """ 

522 

523 return [ 

524 "id", 

525 "author", 

526 "body", 

527 "created_at", 

528 "html_url", 

529 "published_at", 

530 "tag_name", 

531 "title", 

532 "url", 

533 "zipball_url" 

534 ] 

535 

536 

537class GithubBranchesTable(APIResource): 

538 """The GitHub Branches Table implementation""" 

539 

540 def list(self, 

541 conditions: List[FilterCondition] = None, 

542 limit: int = None, 

543 sort: List[SortColumn] = None, 

544 targets: List[str] = None) -> pd.DataFrame: 

545 """Pulls data from the GitHub "List repository branches" API 

546 

547 Returns 

548 ------- 

549 pd.DataFrame 

550 GitHub branches matching the query 

551 

552 Raises 

553 ------ 

554 ValueError 

555 If the query contains an unsupported condition 

556 """ 

557 

558 limit = limit or 20 

559 

560 self.handler.connect() 

561 

562 data = [] 

563 for branch in self.handler.connection.get_repo(self.handler.repository).get_branches(): 

564 raw_data = branch.raw_data 

565 

566 item = { 

567 "name": self.check_none(raw_data["name"]), 

568 "url": "https://github.com/" + self.handler.repository + "/tree/" + raw_data["name"], 

569 "commit_sha": self.check_none(raw_data["commit"]["sha"]), 

570 "commit_url": self.check_none(raw_data["commit"]["url"]), 

571 "protected": self.check_none(raw_data["protected"]) 

572 } 

573 

574 data.append(item) 

575 

576 if limit <= len(data): 

577 break 

578 

579 return pd.DataFrame(data, columns=self.get_columns()) 

580 

581 def check_none(self, val): 

582 return "" if val is None else val 

583 

584 def get_columns(self) -> List[str]: 

585 """Gets all columns to be returned in pandas DataFrame responses 

586 

587 Returns 

588 ------- 

589 List[str] 

590 List of columns 

591 """ 

592 

593 return [ 

594 "name", 

595 "url", 

596 "commit_sha", 

597 "commit_url", 

598 "protected" 

599 ] 

600 

601 

602class GithubContributorsTable(APIResource): 

603 """The GitHub Contributors Table implementation""" 

604 

605 def list(self, 

606 conditions: List[FilterCondition] = None, 

607 limit: int = None, 

608 sort: List[SortColumn] = None, 

609 targets: List[str] = None) -> pd.DataFrame: 

610 """Pulls data from the GitHub "List repository contributors" API 

611 

612 Returns 

613 ------- 

614 pd.DataFrame 

615 GitHub contributors matching the query 

616 

617 Raises 

618 ------ 

619 ValueError 

620 If the query contains an unsupported condition 

621 """ 

622 

623 limit = limit or 20 

624 

625 self.handler.connect() 

626 

627 data = [] 

628 for contributor in self.handler.connection.get_repo(self.handler.repository).get_contributors(): 

629 raw_data = contributor.raw_data 

630 

631 item = { 

632 "avatar_url": self.check_none(raw_data["avatar_url"]), 

633 "html_url": self.check_none(raw_data["html_url"]), 

634 "followers_url": self.check_none(raw_data["followers_url"]), 

635 "subscriptions_url": self.check_none(raw_data["subscriptions_url"]), 

636 "organizations_url": self.check_none(raw_data["organizations_url"]), 

637 "repos_url": self.check_none(raw_data["repos_url"]), 

638 "events_url": self.check_none(raw_data["events_url"]), 

639 "received_events_url": self.check_none(raw_data["received_events_url"]), 

640 "site_admin": self.check_none(raw_data["site_admin"]), 

641 "name": self.check_none(raw_data["name"]), 

642 "company": self.check_none(raw_data["company"]), 

643 "blog": self.check_none(raw_data["blog"]), 

644 "location": self.check_none(raw_data["location"]), 

645 "email": self.check_none(raw_data["email"]), 

646 "hireable": self.check_none(raw_data["hireable"]), 

647 "bio": self.check_none(raw_data["bio"]), 

648 "twitter_username": self.check_none(raw_data["twitter_username"]), 

649 "public_repos": self.check_none(raw_data["public_repos"]), 

650 "public_gists": self.check_none(raw_data["public_repos"]), 

651 "followers": self.check_none(raw_data["followers"]), 

652 "following": self.check_none(raw_data["following"]), 

653 "created_at": self.check_none(raw_data["created_at"]), 

654 "updated_at": self.check_none(raw_data["updated_at"]) 

655 } 

656 

657 data.append(item) 

658 

659 if limit <= len(data): 

660 break 

661 

662 return pd.DataFrame(data, columns=self.get_columns()) 

663 

664 def check_none(self, val): 

665 return "" if val is None else val 

666 

667 def get_columns(self) -> List[str]: 

668 """Gets all columns to be returned in pandas DataFrame responses 

669 

670 Returns 

671 ------- 

672 List[str] 

673 List of columns 

674 """ 

675 

676 return [ 

677 "avatar_url", 

678 "html_url", 

679 "followers_url", 

680 "subscriptions_url", 

681 "organizations_url", 

682 "repos_url", 

683 "events_url", 

684 "received_events_url", 

685 "site_admin", 

686 "name", 

687 "company", 

688 "blog", 

689 "location", 

690 "email", 

691 "hireable", 

692 "bio", 

693 "twitter_username", 

694 "public_repos", 

695 "public_gists", 

696 "followers", 

697 "following", 

698 "created_at", 

699 "updated_at" 

700 ] 

701 

702 

703class GithubProjectsTable(APIResource): 

704 """The GitHub Projects Table implementation""" 

705 

706 def list(self, 

707 conditions: List[FilterCondition] = None, 

708 limit: int = None, 

709 sort: List[SortColumn] = None, 

710 targets: List[str] = None) -> pd.DataFrame: 

711 """Pulls data from the GitHub "List repository projects" API 

712 

713 Returns 

714 ------- 

715 pd.DataFrame 

716 GitHub projects matching the query 

717 

718 Raises 

719 ------ 

720 ValueError 

721 If the query contains an unsupported condition 

722 """ 

723 

724 limit = limit or 20 

725 

726 self.handler.connect() 

727 

728 data = [] 

729 for project in self.handler.connection.get_repo(self.handler.repository).get_projects(): 

730 raw_data = project.raw_data 

731 

732 item = { 

733 "owner_url": self.check_none(raw_data["owner_url"]), 

734 "url": self.check_none(raw_data["url"]), 

735 "html_url": self.check_none(raw_data["html_url"]), 

736 "columns_url": self.check_none(raw_data["columns_url"]), 

737 "id": self.check_none(raw_data["id"]), 

738 "node_id": self.check_none(raw_data["node_id"]), 

739 "name": self.check_none(raw_data["name"]), 

740 "body": self.check_none(raw_data["body"]), 

741 "number": self.check_none(raw_data["number"]), 

742 "state": self.check_none(raw_data["state"]), 

743 "created_at": self.check_none(raw_data["created_at"]), 

744 "updated_at": self.check_none(raw_data["updated_at"]), 

745 "creator_login": self.check_none(raw_data["creator"]["login"]), 

746 "creator_id": self.check_none(raw_data["creator"]["id"]), 

747 "creator_url": self.check_none(raw_data["creator"]["url"]), 

748 "creator_html_url": self.check_none(raw_data["creator"]["html_url"]), 

749 "creator_site_admin": self.check_none(raw_data["creator"]["site_admin"]) 

750 } 

751 

752 data.append(item) 

753 

754 if limit <= len(data): 

755 break 

756 

757 return pd.DataFrame(data, columns=self.get_columns()) 

758 

759 def check_none(self, val): 

760 return "" if val is None else val 

761 

762 def get_columns(self) -> List[str]: 

763 """Gets all columns to be returned in pandas DataFrame responses 

764 

765 Returns 

766 ------- 

767 List[str] 

768 List of columns 

769 """ 

770 

771 return [ 

772 "owner_url", 

773 "url", 

774 "html_url", 

775 "columns_url", 

776 "id", 

777 "node_id", 

778 "name", 

779 "body", 

780 "number", 

781 "state", 

782 "created_at", 

783 "updated_at", 

784 "creator_login", 

785 "creator_id", 

786 "creator_url", 

787 "creator_html_url", 

788 "creator_site_admin" 

789 ] 

790 

791 

792class GithubMilestonesTable(APIResource): 

793 """The GitHub Milestones Table implementation""" 

794 

795 def list(self, 

796 conditions: List[FilterCondition] = None, 

797 limit: int = None, 

798 sort: List[SortColumn] = None, 

799 targets: List[str] = None) -> pd.DataFrame: 

800 """Pulls data from the GitHub "List repository milestones" API 

801 

802 Returns 

803 ------- 

804 pd.DataFrame 

805 GitHub milestones matching the query 

806 

807 Raises 

808 ------ 

809 ValueError 

810 If the query contains an unsupported condition 

811 """ 

812 

813 limit = limit or 20 

814 

815 self.handler.connect() 

816 

817 data = [] 

818 for milestone in self.handler.connection.get_repo(self.handler.repository).get_milestones(): 

819 raw_data = milestone.raw_data 

820 

821 item = { 

822 "url": self.check_none(raw_data["url"]), 

823 "html_url": self.check_none(raw_data["html_url"]), 

824 "labels_url": self.check_none(raw_data["labels_url"]), 

825 "id": self.check_none(raw_data["id"]), 

826 "node_id": self.check_none(raw_data["node_id"]), 

827 "number": self.check_none(raw_data["number"]), 

828 "title": self.check_none(raw_data["title"]), 

829 "description": self.check_none(raw_data["description"]), 

830 "creator": self.check_none(raw_data["creator"]), 

831 "open_issues": self.check_none(raw_data["open_issues"]), 

832 "closed_issues": self.check_none(raw_data["closed_issues"]), 

833 "state": self.check_none(raw_data["state"]), 

834 "created_at": self.check_none(raw_data["created_at"]), 

835 "updated_at": self.check_none(raw_data["updated_at"]), 

836 "due_on": self.check_none(raw_data["due_on"]), 

837 "closed_at": self.check_none(raw_data["closed_at"]) 

838 } 

839 

840 data.append(item) 

841 

842 if limit <= len(data): 

843 break 

844 

845 return pd.DataFrame(data, columns=self.get_columns()) 

846 

847 def check_none(self, val): 

848 return "" if val is None else val 

849 

850 def get_columns(self) -> List[str]: 

851 """Gets all columns to be returned in pandas DataFrame responses 

852 

853 Returns 

854 ------- 

855 List[str] 

856 List of columns 

857 """ 

858 

859 return [ 

860 "url", 

861 "html_url", 

862 "labels_url", 

863 "id", 

864 "node_id", 

865 "number", 

866 "title", 

867 "description", 

868 "creator", 

869 "open_issues", 

870 "closed_issues", 

871 "state", 

872 "created_at", 

873 "updated_at", 

874 "due_on", 

875 "closed_at" 

876 ] 

877 

878 

879class GithubFilesTable(APIResource): 

880 

881 def get_path(self, repo, path, file_matches=None, file_not_matches=None, limit=None): 

882 

883 res = [] 

884 for item in list(repo.get_contents(path)): 

885 if item.type == "dir": 

886 subres = self.get_path(repo, item.path, file_matches, file_not_matches, limit) 

887 res.extend(subres) 

888 limit -= len(subres) 

889 else: 

890 if ( 

891 ( 

892 file_matches is None 

893 

894 or any(re.match(pattern, item.name) for pattern in file_matches) 

895 ) 

896 

897 and ( 

898 file_not_matches is None 

899 

900 or not any(re.match(pattern, item.name) for pattern in file_not_matches) 

901 ) 

902 ): 

903 

904 file = { 

905 'path': item.path, 

906 'name': item.name, 

907 'content': item.decoded_content, 

908 } 

909 res.append(file) 

910 limit -= 1 

911 if limit <= 0: 

912 break 

913 return res 

914 

915 def list(self, 

916 conditions: List[FilterCondition] = None, 

917 limit: int = None, 

918 sort: List[SortColumn] = None, 

919 targets: List[str] = None) -> pd.DataFrame: 

920 

921 self.handler.connect() 

922 repo = self.handler.connection.get_repo(self.handler.repository) 

923 

924 # TODO sort 

925 

926 path = '' 

927 file_matches = [] 

928 file_not_matches = [] 

929 for condition in conditions: 

930 if condition.column == 'path' and condition.op == FilterOperator.EQUAL: 

931 path = condition.value 

932 condition.applied = True 

933 elif condition.column == 'name': 

934 pattern = f'^{condition.value}$' 

935 if condition.op == FilterOperator.EQUAL: 

936 file_matches.append(pattern) 

937 elif condition.op == FilterOperator.LIKE: 

938 # https://stackoverflow.com/a/26148730 

939 pattern = pattern.replace('%', '.*') 

940 file_matches.append(pattern) 

941 elif condition.op == FilterOperator.NOT_LIKE: 

942 pattern = pattern.replace('%', '.*') 

943 file_not_matches.append(pattern) 

944 condition.applied = True 

945 

946 if limit is None: 

947 limit = 10 

948 

949 if len(file_matches) == 0: 

950 file_matches = None 

951 if len(file_not_matches) == 0: 

952 file_not_matches = None 

953 res = self.get_path(repo, path, file_matches, file_not_matches, limit) 

954 return pd.DataFrame(res, columns=self.get_columns()) 

955 

956 def get_columns(self) -> list: 

957 return ['path', 'name', 'content']