Coverage for mindsdb/integrations/handlers/github_handler/github

1import re

2from typing import List

3import pandas as pd

5from mindsdb.integrations.libs.api_handler import APIResource

6from mindsdb.integrations.utilities.sql_utils import (

7 FilterCondition, FilterOperator, SortColumn)

8from mindsdb.utilities import log

11logger = log.getLogger(__name__)

14class GithubIssuesTable(APIResource):

15 """The GitHub Issue Table implementation"""

17 def list(self,

18 conditions: List[FilterCondition] = None,

19 limit: int = None,

20 sort: List[SortColumn] = None,

21 targets: List[str] = None) -> pd.DataFrame:

22 """Pulls data from the GitHub "List repository issues" API

24 Returns

25 -------

26 pd.DataFrame

27 GitHub issues matching the query

29 Raises

30 ------

31 ValueError

32 If the query contains an unsupported condition

33 """

35 if limit is None:

36 limit = 20

38 issues_kwargs = {'state': 'all'}

40 if sort is not None:

41 for col in sort:

42 if col.column in ('created', 'updated', 'comments'):

43 issues_kwargs['sort'] = col.column

44 issues_kwargs['direction'] = 'asc' if col.ascending else 'desc'

45 sort.applied = True

47 # supported only 1 column

48 break

50 for condition in conditions:

51 if (condition.column in ('state', 'assignee', 'creator', 'mentioned', 'milestone')

52 and condition.op == FilterOperator.EQUAL):

54 issues_kwargs[condition.column] = condition.value

55 condition.applied = True

57 elif condition.column == 'labels':

58 if condition.op == FilterOperator.IN:

59 issues_kwargs['labels'] = condition.value

60 elif condition.op == FilterOperator.EQUAL:

61 issues_kwargs['labels'] = condition.value.split(",")

62 condition.applied = True

64 elif condition.column == 'updated' and condition.op == FilterOperator.GREATER_THAN:

65 issues_kwargs['since'] = condition.value

66 condition.applied = True

68 self.handler.connect()

70 data = []

71 count = 0

72 for an_issue in self.handler.connection\

73 .get_repo(self.handler.repository) \

74 .get_issues(**issues_kwargs):

75 item = {

76 "number": an_issue.number,

77 "title": an_issue.title,

78 "state": an_issue.state,

79 "creator": an_issue.user.login,

80 "labels": ",".join(

81 [label.name for label in an_issue.labels]

82 ),

83 "assignees": ",".join(

84 [

85 assignee.login

86 for assignee in an_issue.assignees

87 ]

88 ),

89 "comments": an_issue.comments,

90 "body": an_issue.body,

91 "created": an_issue.created_at,

92 "updated": an_issue.updated_at,

93 "closed": an_issue.closed_at,

94 }

96 if 'closed_by' in targets:

97 item['closed_by'] = an_issue.closed_by.login if an_issue.closed_by else None

99 data.append(item)

100

101 count += 1

102 if limit <= count:

103 break

104

105 return pd.DataFrame(data, columns=self.get_columns())

106

107 def add(self, issues: List[dict]):

108 """Inserts data into the GitHub "Create an issue" API

109

110 Parameters

111 ----------

112 issues : list of dict

113

114 Raises

115 ------

116 ValueError

117 If the query contains an unsupported condition

118 """

119

120 if self.handler.connection_data.get("api_key", None) is None:

121 raise ValueError(

122 "Need an authenticated connection in order to insert a GitHub issue"

123 )

124

125 self.handler.connect()

126 for issue in issues:

127 self._add(issue)

128

129 def _add(self, issue: dict):

130 current_repo = self.handler.connection.get_repo(self.handler.repository)

131

132 insert_kwargs = {}

133

134 if issue.get("title", None) is None:

135 raise ValueError("Title parameter is required to insert a GitHub issue")

136

137 if issue.get("body", None):

138 insert_kwargs["body"] = issue["body"]

139

140 if issue.get("assignees", None):

141 insert_kwargs["assignees"] = []

142 for an_assignee in issue["assignees"].split(","):

143 an_assignee = an_assignee.replace(" ", "")

144 try:

145 github_user = self.handler.connection.get_user(an_assignee)

146 except Exception as e:

147 raise ValueError(

148 f'Encountered an exception looking up assignee "{an_assignee}" in GitHub: '

149 f"{type(e).__name__} - {e}"

150 )

151

152 insert_kwargs["assignees"].append(github_user)

153

154 if issue.get("milestone", None):

155 current_milestones = current_repo.get_milestones()

156

157 found_existing_milestone = False

158 for a_milestone in current_milestones:

159 if a_milestone.title == issue["milestone"]:

160 insert_kwargs["milestone"] = a_milestone

161 found_existing_milestone = True

162 break

163

164 if not found_existing_milestone:

165 logger.debug(

166 f"Milestone \"{issue['milestone']}\" not found, creating it"

167 )

168 insert_kwargs["milestone"] = current_repo.create_milestone(

169 issue["milestone"]

170 )

171 else:

172 logger.debug(f"Milestone \"{issue['milestone']}\" already exists")

173

174 if issue.get("labels", None):

175 insert_kwargs["labels"] = []

176

177 inserted_labels = []

178 for a_label in issue["labels"].split(","):

179 a_label = a_label.replace(" ", "")

180 inserted_labels.append(a_label)

181

182 existing_labels = current_repo.get_labels()

183

184 existing_labels_set = set([label.name for label in existing_labels])

185

186 if not set(inserted_labels).issubset(existing_labels_set):

187 new_inserted_labels = set(inserted_labels).difference(

188 existing_labels_set

189 )

190 logger.debug(

191 "Inserting new labels: " + ", ".join(new_inserted_labels)

192 )

193 for a_new_label in new_inserted_labels:

194 current_repo.create_label(a_new_label, "000000")

195

196 for a_label in existing_labels:

197 if a_label.name in inserted_labels:

198 insert_kwargs["labels"].append(a_label)

199

200 try:

201 current_repo.create_issue(issue["title"], **insert_kwargs)

202 except Exception as e:

203 raise ValueError(

204 f"Encountered an exception creating an issue in GitHub: "

205 f"{type(e).__name__} - {e}"

206 )

207

208 def get_columns(self) -> List[str]:

209 """Gets all columns to be returned in pandas DataFrame responses

210

211 Returns

212 -------

213 List[str]

214 List of columns

215 """

216 return [

217 "number",

218 "title",

219 "state",

220 "creator",

221 "closed_by",

222 "labels",

223 "assignees",

224 "comments",

225 "body",

226 "created",

227 "updated",

228 "closed",

229 ]

230

231

232class GithubPullRequestsTable(APIResource):

233 """The GitHub Issue Table implementation"""

234

235 def list(self,

236 conditions: List[FilterCondition] = None,

237 limit: int = None,

238 sort: List[SortColumn] = None,

239 targets: List[str] = None) -> pd.DataFrame:

240 """Pulls data from the GitHub "List repository pull requests" API

241

242 Native filters:

243 - state: open, closed, or all (default)

244 - head: Filter pulls by head user or head organization and branch name

245 in the format of user:ref-name or organization:ref-name

246 - base: Filter pulls by base branch name

247

248 Native sorts:

249 - created, updated, popularity

250

251 Returns

252 -------

253 pd.DataFrame

254 GitHub pull requests matching the query

255

256 Raises

257 ------

258 ValueError

259 If the query contains an unsupported condition

260 """

261

262 if limit is None:

263 limit = 20

264

265 issues_kwargs = {'state': 'all'}

266

267 if sort is not None:

268 for col in sort:

269 if col.column in ('created', 'updated', 'popularity'):

270 issues_kwargs['sort'] = col.column

271 issues_kwargs['direction'] = 'asc' if col.ascending else 'desc'

272 sort.applied = True

273

274 # supported only 1 column

275 break

276

277 for condition in conditions:

278 if (condition.column in ('state', 'head', 'base')

279 and condition.op == FilterOperator.EQUAL):

280

281 issues_kwargs[condition.column] = condition.value

282 condition.applied = True

283

284 self.handler.connect()

285

286 data = []

287 count = 0

288 for a_pull in self.handler.connection\

289 .get_repo(self.handler.repository) \

290 .get_pulls(**issues_kwargs):

291

292 item = {

293 "number": a_pull.number,

294 "title": a_pull.title,

295 "state": a_pull.state,

296 "creator": a_pull.user.login,

297 "labels": ",".join(

298 [label.name for label in a_pull.labels]

299 ),

300 "milestone": a_pull.milestone.title if a_pull.milestone else None,

301 "assignees": ",".join(

302 [

303 assignee.login

304 for assignee in a_pull.assignees

305 ]

306 ),

307 "reviewers": ",".join(

308 [

309 reviewer.login

310 for reviewer in a_pull.requested_reviewers

311 ]

312 ),

313 "teams": ",".join(

314 [

315 team.name

316 for team in a_pull.requested_teams

317 ]

318 ),

319 "draft": a_pull.draft,

320 "body": a_pull.body,

321 "base": a_pull.base.ref if a_pull.base else None,

322 "head": a_pull.head.ref if a_pull.head else None,

323 "created": a_pull.created_at,

324 "updated": a_pull.updated_at,

325 "merged": a_pull.merged_at,

326 "closed": a_pull.closed_at,

327 }

328

329 # downloaded columns, use them only if explicitly requested

330 for field in ('comments', 'review_comments', 'mergeable', 'mergeable_state', 'rebaseable',

331 'commits', 'additions', 'deletions', 'changed_files'):

332 if field in targets:

333 item[field] = getattr(a_pull, field)

334 if 'is_merged' in targets:

335 item['is_merged'] = a_pull.merged

336 if 'merged_by' in targets:

337 item['is_merged'] = a_pull.merged_by.login if a_pull.merged_by else None

338

339 data.append(item)

340 count += 1

341 if limit <= count:

342 break

343

344 return pd.DataFrame(data, columns=self.get_columns())

345

346 def get_columns(self) -> List[str]:

347 """Gets all columns to be returned in pandas DataFrame responses

348

349 Returns

350 -------

351 List[str]

352 List of columns

353 """

354 return [

355 "number",

356 "title",

357 "state",

358 "creator",

359 "labels",

360 "milestone",

361 "assignees",

362 "reviewers",

363 "teams",

364 "comments",

365 "review_comments",

366 "draft",

367 "is_merged",

368 "mergeable",

369 "mergeable_state",

370 "merged_by",

371 "rebaseable",

372 "body",

373 "base",

374 "head",

375 "commits",

376 "additions",

377 "deletions",

378 "changed_files",

379 "created",

380 "updated",

381 "merged",

382 "closed",

383 ]

384

385

386class GithubCommitsTable(APIResource):

387 """The GitHub Commits Table implementation"""

388

389 def list(self,

390 conditions: List[FilterCondition] = None,

391 limit: int = None,

392 sort: List[SortColumn] = None,

393 targets: List[str] = None) -> pd.DataFrame:

394 """Pulls data from the GitHub "List commits" API

395

396 Returns

397 -------

398 pd.DataFrame

399 GitHub commits matching the query

400

401 Raises

402 ------

403 ValueError

404 If the query contains an unsupported condition

405 """

406

407 limit = limit or 20

408

409 commits_kwargs = {}

410

411 if sort is not None:

412 for col in sort:

413 if col.column in ("author", "date", "message"):

414 commits_kwargs['sort'] = col.column

415 commits_kwargs['direction'] = 'asc' if col.ascending else 'desc'

416 sort.applied = True

417

418 # supported only 1 column

419 break

420

421 for condition in conditions:

422 if condition.column == 'author':

423 if condition.op != FilterOperator.EQUAL:

424 raise ValueError("Unsupported where operation for author")

425 commits_kwargs["author"] = condition.value

426 condition.applied = True

427

428 self.handler.connect()

429

430 data = []

431 for a_commit in self.handler.connection.get_repo(

432 self.handler.repository

433 ).get_commits(**commits_kwargs):

434

435 item = {

436 "sha": a_commit.sha,

437 "author": a_commit.commit.author.name,

438 "date": a_commit.commit.author.date,

439 "message": a_commit.commit.message,

440 }

441

442 data.append(item)

443

444 if limit <= len(data):

445 break

446

447 return pd.DataFrame(data, columns=self.get_columns())

448

449 def get_columns(self) -> List[str]:

450 """Gets all columns to be returned in pandas DataFrame responses

451

452 Returns

453 -------

454 List[str]

455 List of columns

456 """

457

458 return ["sha", "author", "date", "message"]

459

460

461class GithubReleasesTable(APIResource):

462 """The GitHub Releases Table implementation"""

463

464 def list(self,

465 conditions: List[FilterCondition] = None,

466 limit: int = None,

467 sort: List[SortColumn] = None,

468 targets: List[str] = None) -> pd.DataFrame:

469 """Pulls data from the GitHub "List repository releases" API

470

471 Returns

472 -------

473 pd.DataFrame

474 GitHub releases matching the query

475

476 Raises

477 ------

478 ValueError

479 If the query contains an unsupported condition

480 """

481

482 limit = limit or 20

483

484 self.handler.connect()

485

486 data = []

487 for a_release in self.handler.connection.get_repo(

488 self.handler.repository

489 ).get_releases():

490

491 item = {

492 "id": self.check_none(a_release.id),

493 "author": self.check_none(a_release.author.login),

494 "body": self.check_none(a_release.body),

495 "created_at": self.check_none(str(a_release.created_at)),

496 "html_url": self.check_none(a_release.html_url),

497 "published_at": self.check_none(str(a_release.published_at)),

498 "tag_name": self.check_none(a_release.tag_name),

499 "title": self.check_none(a_release.title),

500 "url": self.check_none(a_release.url),

501 "zipball_url": self.check_none(a_release.zipball_url)

502 }

503

504 data.append(item)

505

506 if limit <= len(data):

507 break

508

509 return pd.DataFrame(data, columns=self.get_columns())

510

511 def check_none(self, val):

512 return "" if val is None else val

513

514 def get_columns(self) -> List[str]:

515 """Gets all columns to be returned in pandas DataFrame responses

516

517 Returns

518 -------

519 List[str]

520 List of columns

521 """

522

523 return [

524 "id",

525 "author",

526 "body",

527 "created_at",

528 "html_url",

529 "published_at",

530 "tag_name",

531 "title",

532 "url",

533 "zipball_url"

534 ]

535

536

537class GithubBranchesTable(APIResource):

538 """The GitHub Branches Table implementation"""

539

540 def list(self,

541 conditions: List[FilterCondition] = None,

542 limit: int = None,

543 sort: List[SortColumn] = None,

544 targets: List[str] = None) -> pd.DataFrame:

545 """Pulls data from the GitHub "List repository branches" API

546

547 Returns

548 -------

549 pd.DataFrame

550 GitHub branches matching the query

551

552 Raises

553 ------

554 ValueError

555 If the query contains an unsupported condition

556 """

557

558 limit = limit or 20

559

560 self.handler.connect()

561

562 data = []

563 for branch in self.handler.connection.get_repo(self.handler.repository).get_branches():

564 raw_data = branch.raw_data

565

566 item = {

567 "name": self.check_none(raw_data["name"]),

568 "url": "https://github.com/" + self.handler.repository + "/tree/" + raw_data["name"],

569 "commit_sha": self.check_none(raw_data["commit"]["sha"]),

570 "commit_url": self.check_none(raw_data["commit"]["url"]),

571 "protected": self.check_none(raw_data["protected"])

572 }

573

574 data.append(item)

575

576 if limit <= len(data):

577 break

578

579 return pd.DataFrame(data, columns=self.get_columns())

580

581 def check_none(self, val):

582 return "" if val is None else val

583

584 def get_columns(self) -> List[str]:

585 """Gets all columns to be returned in pandas DataFrame responses

586

587 Returns

588 -------

589 List[str]

590 List of columns

591 """

592

593 return [

594 "name",

595 "url",

596 "commit_sha",

597 "commit_url",

598 "protected"

599 ]

600

601

602class GithubContributorsTable(APIResource):

603 """The GitHub Contributors Table implementation"""

604

605 def list(self,

606 conditions: List[FilterCondition] = None,

607 limit: int = None,

608 sort: List[SortColumn] = None,

609 targets: List[str] = None) -> pd.DataFrame:

610 """Pulls data from the GitHub "List repository contributors" API

611

612 Returns

613 -------

614 pd.DataFrame

615 GitHub contributors matching the query

616

617 Raises

618 ------

619 ValueError

620 If the query contains an unsupported condition

621 """

622

623 limit = limit or 20

624

625 self.handler.connect()

626

627 data = []

628 for contributor in self.handler.connection.get_repo(self.handler.repository).get_contributors():

629 raw_data = contributor.raw_data

630

631 item = {

632 "avatar_url": self.check_none(raw_data["avatar_url"]),

633 "html_url": self.check_none(raw_data["html_url"]),

634 "followers_url": self.check_none(raw_data["followers_url"]),

635 "subscriptions_url": self.check_none(raw_data["subscriptions_url"]),

636 "organizations_url": self.check_none(raw_data["organizations_url"]),

637 "repos_url": self.check_none(raw_data["repos_url"]),

638 "events_url": self.check_none(raw_data["events_url"]),

639 "received_events_url": self.check_none(raw_data["received_events_url"]),

640 "site_admin": self.check_none(raw_data["site_admin"]),

641 "name": self.check_none(raw_data["name"]),

642 "company": self.check_none(raw_data["company"]),

643 "blog": self.check_none(raw_data["blog"]),

644 "location": self.check_none(raw_data["location"]),

645 "email": self.check_none(raw_data["email"]),

646 "hireable": self.check_none(raw_data["hireable"]),

647 "bio": self.check_none(raw_data["bio"]),

648 "twitter_username": self.check_none(raw_data["twitter_username"]),

649 "public_repos": self.check_none(raw_data["public_repos"]),

650 "public_gists": self.check_none(raw_data["public_repos"]),

651 "followers": self.check_none(raw_data["followers"]),

652 "following": self.check_none(raw_data["following"]),

653 "created_at": self.check_none(raw_data["created_at"]),

654 "updated_at": self.check_none(raw_data["updated_at"])

655 }

656

657 data.append(item)

658

659 if limit <= len(data):

660 break

661

662 return pd.DataFrame(data, columns=self.get_columns())

663

664 def check_none(self, val):

665 return "" if val is None else val

666

667 def get_columns(self) -> List[str]:

668 """Gets all columns to be returned in pandas DataFrame responses

669

670 Returns

671 -------

672 List[str]

673 List of columns

674 """

675

676 return [

677 "avatar_url",

678 "html_url",

679 "followers_url",

680 "subscriptions_url",

681 "organizations_url",

682 "repos_url",

683 "events_url",

684 "received_events_url",

685 "site_admin",

686 "name",

687 "company",

688 "blog",

689 "location",

690 "email",

691 "hireable",

692 "bio",

693 "twitter_username",

694 "public_repos",

695 "public_gists",

696 "followers",

697 "following",

698 "created_at",

699 "updated_at"

700 ]

701

702

703class GithubProjectsTable(APIResource):

704 """The GitHub Projects Table implementation"""

705

706 def list(self,

707 conditions: List[FilterCondition] = None,

708 limit: int = None,

709 sort: List[SortColumn] = None,

710 targets: List[str] = None) -> pd.DataFrame:

711 """Pulls data from the GitHub "List repository projects" API

712

713 Returns

714 -------

715 pd.DataFrame

716 GitHub projects matching the query

717

718 Raises

719 ------

720 ValueError

721 If the query contains an unsupported condition

722 """

723

724 limit = limit or 20

725

726 self.handler.connect()

727

728 data = []

729 for project in self.handler.connection.get_repo(self.handler.repository).get_projects():

730 raw_data = project.raw_data

731

732 item = {

733 "owner_url": self.check_none(raw_data["owner_url"]),

734 "url": self.check_none(raw_data["url"]),

735 "html_url": self.check_none(raw_data["html_url"]),

736 "columns_url": self.check_none(raw_data["columns_url"]),

737 "id": self.check_none(raw_data["id"]),

738 "node_id": self.check_none(raw_data["node_id"]),

739 "name": self.check_none(raw_data["name"]),

740 "body": self.check_none(raw_data["body"]),

741 "number": self.check_none(raw_data["number"]),

742 "state": self.check_none(raw_data["state"]),

743 "created_at": self.check_none(raw_data["created_at"]),

744 "updated_at": self.check_none(raw_data["updated_at"]),

745 "creator_login": self.check_none(raw_data["creator"]["login"]),

746 "creator_id": self.check_none(raw_data["creator"]["id"]),

747 "creator_url": self.check_none(raw_data["creator"]["url"]),

748 "creator_html_url": self.check_none(raw_data["creator"]["html_url"]),

749 "creator_site_admin": self.check_none(raw_data["creator"]["site_admin"])

750 }

751

752 data.append(item)

753

754 if limit <= len(data):

755 break

756

757 return pd.DataFrame(data, columns=self.get_columns())

758

759 def check_none(self, val):

760 return "" if val is None else val

761

762 def get_columns(self) -> List[str]:

763 """Gets all columns to be returned in pandas DataFrame responses

764

765 Returns

766 -------

767 List[str]

768 List of columns

769 """

770

771 return [

772 "owner_url",

773 "url",

774 "html_url",

775 "columns_url",

776 "id",

777 "node_id",

778 "name",

779 "body",

780 "number",

781 "state",

782 "created_at",

783 "updated_at",

784 "creator_login",

785 "creator_id",

786 "creator_url",

787 "creator_html_url",

788 "creator_site_admin"

789 ]

790

791

792class GithubMilestonesTable(APIResource):

793 """The GitHub Milestones Table implementation"""

794

795 def list(self,

796 conditions: List[FilterCondition] = None,

797 limit: int = None,

798 sort: List[SortColumn] = None,

799 targets: List[str] = None) -> pd.DataFrame:

800 """Pulls data from the GitHub "List repository milestones" API

801

802 Returns

803 -------

804 pd.DataFrame

805 GitHub milestones matching the query

806

807 Raises

808 ------

809 ValueError

810 If the query contains an unsupported condition

811 """

812

813 limit = limit or 20

814

815 self.handler.connect()

816

817 data = []

818 for milestone in self.handler.connection.get_repo(self.handler.repository).get_milestones():

819 raw_data = milestone.raw_data

820

821 item = {

822 "url": self.check_none(raw_data["url"]),

823 "html_url": self.check_none(raw_data["html_url"]),

824 "labels_url": self.check_none(raw_data["labels_url"]),

825 "id": self.check_none(raw_data["id"]),

826 "node_id": self.check_none(raw_data["node_id"]),

827 "number": self.check_none(raw_data["number"]),

828 "title": self.check_none(raw_data["title"]),

829 "description": self.check_none(raw_data["description"]),

830 "creator": self.check_none(raw_data["creator"]),

831 "open_issues": self.check_none(raw_data["open_issues"]),

832 "closed_issues": self.check_none(raw_data["closed_issues"]),

833 "state": self.check_none(raw_data["state"]),

834 "created_at": self.check_none(raw_data["created_at"]),

835 "updated_at": self.check_none(raw_data["updated_at"]),

836 "due_on": self.check_none(raw_data["due_on"]),

837 "closed_at": self.check_none(raw_data["closed_at"])

838 }

839

840 data.append(item)

841

842 if limit <= len(data):

843 break

844

845 return pd.DataFrame(data, columns=self.get_columns())

846

847 def check_none(self, val):

848 return "" if val is None else val

849

850 def get_columns(self) -> List[str]:

851 """Gets all columns to be returned in pandas DataFrame responses

852

853 Returns

854 -------

855 List[str]

856 List of columns

857 """

858

859 return [

860 "url",

861 "html_url",

862 "labels_url",

863 "id",

864 "node_id",

865 "number",

866 "title",

867 "description",

868 "creator",

869 "open_issues",

870 "closed_issues",

871 "state",

872 "created_at",

873 "updated_at",

874 "due_on",

875 "closed_at"

876 ]

877

878

879class GithubFilesTable(APIResource):

880

881 def get_path(self, repo, path, file_matches=None, file_not_matches=None, limit=None):

882

883 res = []

884 for item in list(repo.get_contents(path)):

885 if item.type == "dir":

886 subres = self.get_path(repo, item.path, file_matches, file_not_matches, limit)

887 res.extend(subres)

888 limit -= len(subres)

889 else:

890 if (

891 (

892 file_matches is None

893

894 or any(re.match(pattern, item.name) for pattern in file_matches)

895 )

896

897 and (

898 file_not_matches is None

899

900 or not any(re.match(pattern, item.name) for pattern in file_not_matches)

901 )

902 ):

903

904 file = {

905 'path': item.path,

906 'name': item.name,

907 'content': item.decoded_content,

908 }

909 res.append(file)

910 limit -= 1

911 if limit <= 0:

912 break

913 return res

914

915 def list(self,

916 conditions: List[FilterCondition] = None,

917 limit: int = None,

918 sort: List[SortColumn] = None,

919 targets: List[str] = None) -> pd.DataFrame:

920

921 self.handler.connect()

922 repo = self.handler.connection.get_repo(self.handler.repository)

923

924 # TODO sort

925

926 path = ''

927 file_matches = []

928 file_not_matches = []

929 for condition in conditions:

930 if condition.column == 'path' and condition.op == FilterOperator.EQUAL:

931 path = condition.value

932 condition.applied = True

933 elif condition.column == 'name':

934 pattern = f'^{condition.value}$'

935 if condition.op == FilterOperator.EQUAL:

936 file_matches.append(pattern)

937 elif condition.op == FilterOperator.LIKE:

938 # https://stackoverflow.com/a/26148730

939 pattern = pattern.replace('%', '.*')

940 file_matches.append(pattern)

941 elif condition.op == FilterOperator.NOT_LIKE:

942 pattern = pattern.replace('%', '.*')

943 file_not_matches.append(pattern)

944 condition.applied = True

945

946 if limit is None:

947 limit = 10

948

949 if len(file_matches) == 0:

950 file_matches = None

951 if len(file_not_matches) == 0:

952 file_not_matches = None

953 res = self.get_path(repo, path, file_matches, file_not_matches, limit)

954 return pd.DataFrame(res, columns=self.get_columns())

955

956 def get_columns(self) -> list:

957 return ['path', 'name', 'content']

Coverage for mindsdb / integrations / handlers / github_handler / github_tables.py: 0%

287 statements