Coverage for mindsdb / integrations / handlers / npm_handler / npm_tables.py: 0%

139 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-21 00:36 +0000

1from typing import List 

2 

3import pandas as pd 

4from mindsdb_sql_parser import ast 

5 

6from mindsdb.integrations.utilities.handlers.query_utilities import ( 

7 SELECTQueryExecutor, 

8 SELECTQueryParser, 

9) 

10from mindsdb.integrations.libs.api_handler import APIHandler, APITable 

11from mindsdb.integrations.utilities.sql_utils import conditions_to_filter 

12 

13 

14def rename_key(d, new_key, old_key): 

15 d[new_key] = d.pop(old_key) 

16 

17 

18class CustomAPITable(APITable): 

19 

20 def __init__(self, handler: APIHandler): 

21 super().__init__(handler) 

22 self.handler.connect() 

23 

24 def get_columns(self, ignore: List[str] = []) -> List[str]: 

25 return [item for item in self.columns if item not in ignore] 

26 

27 def select(self, query: ast.Select) -> pd.DataFrame: 

28 raise NotImplementedError() 

29 

30 def parse_select(self, query: ast.Select, table_name: str): 

31 select_statement_parser = SELECTQueryParser(query, table_name, self.get_columns()) 

32 self.selected_columns, self.where_conditions, self.order_by_conditions, self.result_limit = select_statement_parser.parse_query() 

33 

34 def get_package_name(self, query: ast.Select): 

35 params = conditions_to_filter(query.where) 

36 if "package" not in params: 

37 raise Exception("Where condition does not have 'package' selector") 

38 return params["package"] 

39 

40 def apply_query_params(self, df, query): 

41 select_statement_parser = SELECTQueryParser(query, self.name, self.get_columns()) 

42 selected_columns, _, order_by_conditions, result_limit = select_statement_parser.parse_query() 

43 select_statement_executor = SELECTQueryExecutor(df, selected_columns, [], order_by_conditions, result_limit) 

44 return select_statement_executor.execute_query() 

45 

46 

47class NPMMetadataTable(CustomAPITable): 

48 name: str = "metadata" 

49 columns: List[str] = [ 

50 "name", 

51 "scope", 

52 "version", 

53 "description", 

54 "author_name", 

55 "author_email", 

56 "publisher_username", 

57 "publisher_email", 

58 "repository_url", 

59 "license", 

60 "num_releases", 

61 "num_downloads", 

62 "num_stars", 

63 "score", 

64 ] 

65 

66 def __init__(self, handler: APIHandler): 

67 super().__init__(handler) 

68 self.handler.connect() 

69 

70 def select(self, query: ast.Select) -> pd.DataFrame: 

71 package_name = self.get_package_name(query) 

72 connection = self.handler.connection(package_name) 

73 metadata = connection.get_cols_in( 

74 ["collected", "metadata"], 

75 ["name", "scope", "version", "description", "author", "publisher", "repository", "license", "releases"] 

76 ) 

77 metadata["author_email"] = metadata["author"].get("email", "") 

78 metadata["author"] = metadata["author"].get("name", "") 

79 rename_key(metadata, "author_name", "author") 

80 metadata["publisher_email"] = metadata["publisher"].get("email", "") 

81 metadata["publisher"] = metadata["publisher"].get("username", "") 

82 rename_key(metadata, "publisher_username", "publisher") 

83 metadata["repository"] = metadata["repository"].get("url", "") 

84 rename_key(metadata, "repository_url", "repository") 

85 metadata["releases"] = sum([x.get("count", 0) for x in metadata.get("releases", [0])]) 

86 rename_key(metadata, "num_releases", "releases") 

87 npm_data = connection.get_cols_in( 

88 ["collected", "npm"], 

89 ["downloads", "starsCount"] 

90 ) 

91 npm_data["downloads"] = sum([x.get("count", 0) for x in npm_data.get("downloads", [0])]) 

92 rename_key(npm_data, "num_downloads", "downloads") 

93 rename_key(npm_data, "num_stars", "starsCount") 

94 score = connection.get_cols_in(["score"], ["final"])["final"] 

95 df = pd.DataFrame.from_records([{**metadata, **npm_data, "score": score}]) 

96 return self.apply_query_params(df, query) 

97 

98 

99class NPMMaintainersTable(CustomAPITable): 

100 name: str = "maintainers" 

101 columns: List[str] = [ 

102 "username", 

103 "email" 

104 ] 

105 

106 def __init__(self, handler: APIHandler): 

107 super().__init__(handler) 

108 self.handler.connect() 

109 

110 def select(self, query: ast.Select) -> pd.DataFrame: 

111 package_name = self.get_package_name(query) 

112 connection = self.handler.connection(package_name) 

113 metadata = connection.get_cols_in( 

114 ["collected", "metadata"], 

115 ["maintainers"] 

116 ) 

117 records = [{col: x[col] for col in self.columns} for x in metadata["maintainers"]] if metadata.get("maintainers") else [{}] 

118 df = pd.DataFrame.from_records(records) 

119 return self.apply_query_params(df, query) 

120 

121 

122class NPMKeywordsTable(CustomAPITable): 

123 name: str = "keywords" 

124 columns: List[str] = [ 

125 "keyword" 

126 ] 

127 

128 def __init__(self, handler: APIHandler): 

129 super().__init__(handler) 

130 self.handler.connect() 

131 

132 def select(self, query: ast.Select) -> pd.DataFrame: 

133 package_name = self.get_package_name(query) 

134 connection = self.handler.connection(package_name) 

135 metadata = connection.get_cols_in( 

136 ["collected", "metadata"], 

137 ["keywords"] 

138 ) 

139 records = [{"keyword": keyword} for keyword in metadata["keywords"]] if metadata.get("keywords") else [{}] 

140 df = pd.DataFrame.from_records(records) 

141 return self.apply_query_params(df, query) 

142 

143 

144class NPMDependenciesTable(CustomAPITable): 

145 name: str = "dependencies" 

146 columns: List[str] = [ 

147 "dependency", 

148 "version" 

149 ] 

150 

151 def __init__(self, handler: APIHandler): 

152 super().__init__(handler) 

153 self.handler.connect() 

154 

155 def select(self, query: ast.Select) -> pd.DataFrame: 

156 package_name = self.get_package_name(query) 

157 connection = self.handler.connection(package_name) 

158 metadata = connection.get_cols_in( 

159 ["collected", "metadata"], 

160 ["dependencies"] 

161 ) 

162 records = [{"dependency": d, "version": v} for d, v in metadata["dependencies"].items()] if metadata.get("dependencies") else [{}] 

163 df = pd.DataFrame.from_records(records) 

164 return self.apply_query_params(df, query) 

165 

166 

167class NPMDevDependenciesTable(CustomAPITable): 

168 name: str = "dev_dependencies" 

169 columns: List[str] = [ 

170 "dev_dependency", 

171 "version" 

172 ] 

173 

174 def __init__(self, handler: APIHandler): 

175 super().__init__(handler) 

176 self.handler.connect() 

177 

178 def select(self, query: ast.Select) -> pd.DataFrame: 

179 package_name = self.get_package_name(query) 

180 connection = self.handler.connection(package_name) 

181 metadata = connection.get_cols_in( 

182 ["collected", "metadata"], 

183 ["devDependencies"] 

184 ) 

185 records = [{"dev_dependency": d, "version": v} for d, v in metadata["devDependencies"].items()] if metadata.get("devDependencies") else [{}] 

186 df = pd.DataFrame.from_records(records) 

187 return self.apply_query_params(df, query) 

188 

189 

190class NPMOptionalDependenciesTable(CustomAPITable): 

191 name: str = "optional_dependencies" 

192 columns: List[str] = [ 

193 "optional_dependency", 

194 "version" 

195 ] 

196 

197 def __init__(self, handler: APIHandler): 

198 super().__init__(handler) 

199 self.handler.connect() 

200 

201 def select(self, query: ast.Select) -> pd.DataFrame: 

202 package_name = self.get_package_name(query) 

203 connection = self.handler.connection(package_name) 

204 metadata = connection.get_cols_in( 

205 ["collected", "metadata"], 

206 ["optionalDependencies"] 

207 ) 

208 records = [{"optional_dependency": d, "version": v} for d, v in metadata["optionalDependencies"].items()] if metadata.get("optionalDependencies") else [{}] 

209 df = pd.DataFrame.from_records(records) 

210 return self.apply_query_params(df, query) 

211 

212 

213class NPMGithubStatsTable(CustomAPITable): 

214 name: str = "github_stats" 

215 columns: List[str] = [ 

216 "homepage", 

217 "num_stars", 

218 "num_forks", 

219 "num_subscribers", 

220 "num_issues", 

221 "num_open_issues", 

222 ] 

223 

224 def __init__(self, handler: APIHandler): 

225 super().__init__(handler) 

226 self.handler.connect() 

227 

228 def select(self, query: ast.Select) -> pd.DataFrame: 

229 package_name = self.get_package_name(query) 

230 connection = self.handler.connection(package_name) 

231 github_data = connection.get_cols_in( 

232 ["collected", "github"], 

233 ["homepage", "starsCount", "forksCount", "subscribersCount", "issues"] 

234 ) 

235 rename_key(github_data, "num_stars", "starsCount") 

236 rename_key(github_data, "num_forks", "forksCount") 

237 rename_key(github_data, "num_subscribers", "subscribersCount") 

238 github_data["num_issues"] = github_data["issues"].get("count", 0) 

239 github_data["issues"] = github_data["issues"].get("openCount", 0) 

240 rename_key(github_data, "num_open_issues", "issues") 

241 df = pd.DataFrame.from_records([github_data]) 

242 return self.apply_query_params(df, query)