Coverage for mindsdb / integrations / handlers / npm_handler / npm_tables.py: 0%
139 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
1from typing import List
3import pandas as pd
4from mindsdb_sql_parser import ast
6from mindsdb.integrations.utilities.handlers.query_utilities import (
7 SELECTQueryExecutor,
8 SELECTQueryParser,
9)
10from mindsdb.integrations.libs.api_handler import APIHandler, APITable
11from mindsdb.integrations.utilities.sql_utils import conditions_to_filter
14def rename_key(d, new_key, old_key):
15 d[new_key] = d.pop(old_key)
18class CustomAPITable(APITable):
20 def __init__(self, handler: APIHandler):
21 super().__init__(handler)
22 self.handler.connect()
24 def get_columns(self, ignore: List[str] = []) -> List[str]:
25 return [item for item in self.columns if item not in ignore]
27 def select(self, query: ast.Select) -> pd.DataFrame:
28 raise NotImplementedError()
30 def parse_select(self, query: ast.Select, table_name: str):
31 select_statement_parser = SELECTQueryParser(query, table_name, self.get_columns())
32 self.selected_columns, self.where_conditions, self.order_by_conditions, self.result_limit = select_statement_parser.parse_query()
34 def get_package_name(self, query: ast.Select):
35 params = conditions_to_filter(query.where)
36 if "package" not in params:
37 raise Exception("Where condition does not have 'package' selector")
38 return params["package"]
40 def apply_query_params(self, df, query):
41 select_statement_parser = SELECTQueryParser(query, self.name, self.get_columns())
42 selected_columns, _, order_by_conditions, result_limit = select_statement_parser.parse_query()
43 select_statement_executor = SELECTQueryExecutor(df, selected_columns, [], order_by_conditions, result_limit)
44 return select_statement_executor.execute_query()
47class NPMMetadataTable(CustomAPITable):
48 name: str = "metadata"
49 columns: List[str] = [
50 "name",
51 "scope",
52 "version",
53 "description",
54 "author_name",
55 "author_email",
56 "publisher_username",
57 "publisher_email",
58 "repository_url",
59 "license",
60 "num_releases",
61 "num_downloads",
62 "num_stars",
63 "score",
64 ]
66 def __init__(self, handler: APIHandler):
67 super().__init__(handler)
68 self.handler.connect()
70 def select(self, query: ast.Select) -> pd.DataFrame:
71 package_name = self.get_package_name(query)
72 connection = self.handler.connection(package_name)
73 metadata = connection.get_cols_in(
74 ["collected", "metadata"],
75 ["name", "scope", "version", "description", "author", "publisher", "repository", "license", "releases"]
76 )
77 metadata["author_email"] = metadata["author"].get("email", "")
78 metadata["author"] = metadata["author"].get("name", "")
79 rename_key(metadata, "author_name", "author")
80 metadata["publisher_email"] = metadata["publisher"].get("email", "")
81 metadata["publisher"] = metadata["publisher"].get("username", "")
82 rename_key(metadata, "publisher_username", "publisher")
83 metadata["repository"] = metadata["repository"].get("url", "")
84 rename_key(metadata, "repository_url", "repository")
85 metadata["releases"] = sum([x.get("count", 0) for x in metadata.get("releases", [0])])
86 rename_key(metadata, "num_releases", "releases")
87 npm_data = connection.get_cols_in(
88 ["collected", "npm"],
89 ["downloads", "starsCount"]
90 )
91 npm_data["downloads"] = sum([x.get("count", 0) for x in npm_data.get("downloads", [0])])
92 rename_key(npm_data, "num_downloads", "downloads")
93 rename_key(npm_data, "num_stars", "starsCount")
94 score = connection.get_cols_in(["score"], ["final"])["final"]
95 df = pd.DataFrame.from_records([{**metadata, **npm_data, "score": score}])
96 return self.apply_query_params(df, query)
99class NPMMaintainersTable(CustomAPITable):
100 name: str = "maintainers"
101 columns: List[str] = [
102 "username",
103 "email"
104 ]
106 def __init__(self, handler: APIHandler):
107 super().__init__(handler)
108 self.handler.connect()
110 def select(self, query: ast.Select) -> pd.DataFrame:
111 package_name = self.get_package_name(query)
112 connection = self.handler.connection(package_name)
113 metadata = connection.get_cols_in(
114 ["collected", "metadata"],
115 ["maintainers"]
116 )
117 records = [{col: x[col] for col in self.columns} for x in metadata["maintainers"]] if metadata.get("maintainers") else [{}]
118 df = pd.DataFrame.from_records(records)
119 return self.apply_query_params(df, query)
122class NPMKeywordsTable(CustomAPITable):
123 name: str = "keywords"
124 columns: List[str] = [
125 "keyword"
126 ]
128 def __init__(self, handler: APIHandler):
129 super().__init__(handler)
130 self.handler.connect()
132 def select(self, query: ast.Select) -> pd.DataFrame:
133 package_name = self.get_package_name(query)
134 connection = self.handler.connection(package_name)
135 metadata = connection.get_cols_in(
136 ["collected", "metadata"],
137 ["keywords"]
138 )
139 records = [{"keyword": keyword} for keyword in metadata["keywords"]] if metadata.get("keywords") else [{}]
140 df = pd.DataFrame.from_records(records)
141 return self.apply_query_params(df, query)
144class NPMDependenciesTable(CustomAPITable):
145 name: str = "dependencies"
146 columns: List[str] = [
147 "dependency",
148 "version"
149 ]
151 def __init__(self, handler: APIHandler):
152 super().__init__(handler)
153 self.handler.connect()
155 def select(self, query: ast.Select) -> pd.DataFrame:
156 package_name = self.get_package_name(query)
157 connection = self.handler.connection(package_name)
158 metadata = connection.get_cols_in(
159 ["collected", "metadata"],
160 ["dependencies"]
161 )
162 records = [{"dependency": d, "version": v} for d, v in metadata["dependencies"].items()] if metadata.get("dependencies") else [{}]
163 df = pd.DataFrame.from_records(records)
164 return self.apply_query_params(df, query)
167class NPMDevDependenciesTable(CustomAPITable):
168 name: str = "dev_dependencies"
169 columns: List[str] = [
170 "dev_dependency",
171 "version"
172 ]
174 def __init__(self, handler: APIHandler):
175 super().__init__(handler)
176 self.handler.connect()
178 def select(self, query: ast.Select) -> pd.DataFrame:
179 package_name = self.get_package_name(query)
180 connection = self.handler.connection(package_name)
181 metadata = connection.get_cols_in(
182 ["collected", "metadata"],
183 ["devDependencies"]
184 )
185 records = [{"dev_dependency": d, "version": v} for d, v in metadata["devDependencies"].items()] if metadata.get("devDependencies") else [{}]
186 df = pd.DataFrame.from_records(records)
187 return self.apply_query_params(df, query)
190class NPMOptionalDependenciesTable(CustomAPITable):
191 name: str = "optional_dependencies"
192 columns: List[str] = [
193 "optional_dependency",
194 "version"
195 ]
197 def __init__(self, handler: APIHandler):
198 super().__init__(handler)
199 self.handler.connect()
201 def select(self, query: ast.Select) -> pd.DataFrame:
202 package_name = self.get_package_name(query)
203 connection = self.handler.connection(package_name)
204 metadata = connection.get_cols_in(
205 ["collected", "metadata"],
206 ["optionalDependencies"]
207 )
208 records = [{"optional_dependency": d, "version": v} for d, v in metadata["optionalDependencies"].items()] if metadata.get("optionalDependencies") else [{}]
209 df = pd.DataFrame.from_records(records)
210 return self.apply_query_params(df, query)
213class NPMGithubStatsTable(CustomAPITable):
214 name: str = "github_stats"
215 columns: List[str] = [
216 "homepage",
217 "num_stars",
218 "num_forks",
219 "num_subscribers",
220 "num_issues",
221 "num_open_issues",
222 ]
224 def __init__(self, handler: APIHandler):
225 super().__init__(handler)
226 self.handler.connect()
228 def select(self, query: ast.Select) -> pd.DataFrame:
229 package_name = self.get_package_name(query)
230 connection = self.handler.connection(package_name)
231 github_data = connection.get_cols_in(
232 ["collected", "github"],
233 ["homepage", "starsCount", "forksCount", "subscribersCount", "issues"]
234 )
235 rename_key(github_data, "num_stars", "starsCount")
236 rename_key(github_data, "num_forks", "forksCount")
237 rename_key(github_data, "num_subscribers", "subscribersCount")
238 github_data["num_issues"] = github_data["issues"].get("count", 0)
239 github_data["issues"] = github_data["issues"].get("openCount", 0)
240 rename_key(github_data, "num_open_issues", "issues")
241 df = pd.DataFrame.from_records([github_data])
242 return self.apply_query_params(df, query)