Coverage for mindsdb / integrations / handlers / serpstack_handler / serpstack_tables.py: 0%
103 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
1import pandas as pd
2import requests
3from mindsdb.integrations.libs.api_handler import APITable
4from mindsdb_sql_parser import ast
5from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions
8class BaseResultsTable(APITable):
9 def select(self, query: ast.Select) -> pd.DataFrame:
10 """
11 Selects data from the results table and returns it as a pandas DataFrame.
13 Args:
14 query (ast.Select): The SQL query to be executed.
16 Returns:
17 pandas.DataFrame: A pandas DataFrame containing the selected data.
18 """
19 base_url = self.handler.connect()
21 params = {'access_key': self.handler.access_key}
22 conditions = extract_comparison_conditions(query.where)
23 params.update({condition[1]: condition[2] for condition in conditions if condition[0] == '='})
25 if 'query' not in params:
26 raise ValueError('Query is missing in the SQL query')
27 if 'type' not in params and hasattr(self, 'default_type'):
28 params['type'] = self.default_type
29 try:
30 api_response = requests.get(base_url, params=params)
31 api_response.raise_for_status() # raises HTTPError for bad responses
32 api_result = api_response.json()
33 except requests.exceptions.HTTPError as e:
34 raise SystemError(f"HTTP error occurred: {e.response.status_code} - {e.response.reason}")
35 except requests.exceptions.ConnectionError as e:
36 raise SystemError(f"Connection error occurred: {str(e)}")
37 except requests.exceptions.Timeout as e:
38 raise SystemError(f"Request timeout: {str(e)}")
39 except requests.exceptions.RequestException as e:
40 raise SystemError(f"Request exception occurred: {str(e)}")
41 except ValueError as e:
42 raise SystemError(f"Failed to parse JSON response: {str(e)}")
44 results = api_result.get(self.results_key, [])
45 processed_results = [self.extract_data(result) for result in results]
47 if len(processed_results) == 0:
48 columns = self.get_columns()
49 empty_data = {col: ["No results found"] for col in columns}
50 return pd.DataFrame(empty_data, columns=columns)
52 result_df = pd.DataFrame(processed_results)
53 result_df = self.filter_columns(result_df, query)
54 return result_df
56 def extract_data(self, data):
57 """
58 Extracts the required data from the result.
60 Args:
61 data (dict): The result data.
63 Returns:
64 dict: A dictionary containing the extracted data.
65 """
66 raise NotImplementedError("Subclasses must implement this method.")
68 def filter_columns(self, result: pd.DataFrame, query: ast.Select = None):
69 """
70 Filters the columns of the result DataFrame.
72 Args:
73 result (pandas.DataFrame): The result DataFrame.
74 query (ast.Select): The SQL query to be executed.
76 Returns:
77 pandas.DataFrame: A pandas DataFrame containing the filtered data.
78 """
79 columns = []
80 if query is not None:
81 for target in query.targets:
82 if isinstance(target, ast.Star):
83 columns = self.get_columns()
84 break
85 elif isinstance(target, ast.Identifier):
86 columns.append(target.parts[-1])
87 else:
88 raise NotImplementedError
89 else:
90 columns = self.get_columns()
92 columns = [name.lower() for name in columns]
94 if len(result) == 0:
95 result = pd.DataFrame([], columns=columns)
96 else:
97 for col in set(columns) & set(result.columns) ^ set(columns):
98 result[col] = None
100 result = result[columns]
102 if query is not None and query.limit is not None:
103 result = result.head(query.limit.value)
105 return result
108class OrganicResultsTable(BaseResultsTable):
109 results_key = 'organic_results'
111 def extract_data(self, organic):
112 return {
113 'position': organic.get('position'),
114 'title': organic.get('title'),
115 'url': organic.get('url'),
116 'domain': organic.get('domain'),
117 'displayed_url': organic.get('displayed_url'),
118 'snippet': organic.get('snippet'),
119 'cached_page_url': organic.get('cached_page_url'),
120 'related_pages_url': organic.get('related_pages_url'),
121 'prerender': organic.get('prerender'),
122 'sitelinks': self._extract_sitelinks(organic.get('sitelinks')),
123 'rich_snippet': self._extract_rich_snippet(organic.get('rich_snippet'))
124 }
126 def _extract_sitelinks(self, sitelinks):
127 if not sitelinks:
128 return None
129 return {
130 'inline': [{'title': link['title'], 'url': link['url']} for link in sitelinks.get('inline', [])],
131 'expanded': [{'title': link['title'], 'url': link['url']} for link in sitelinks.get('expanded', [])]
132 }
134 def _extract_rich_snippet(self, rich_snippet):
135 if not rich_snippet:
136 return None
137 snippet_type = 'top' if 'top' in rich_snippet else 'bottom'
138 return {
139 'detected_extensions': rich_snippet.get(snippet_type, {}).get('detected_extensions', []),
140 'extensions': rich_snippet.get(snippet_type, {}).get('extensions', [])
141 }
143 def get_columns(self):
144 return [
145 'position',
146 'title',
147 'url',
148 'domain',
149 'displayed_url',
150 'snippet',
151 'cached_page_url',
152 'related_pages_url',
153 'prerender',
154 'sitelinks',
155 'rich_snippet'
156 ]
159class ImageResultsTable(BaseResultsTable):
160 results_key = 'image_results'
161 default_type = 'images'
163 def extract_data(self, image):
164 return {
165 'position': image.get('position'),
166 'title': image.get('title'),
167 'width': image.get('width'),
168 'height': image.get('height'),
169 'image_url': image.get('image_url'),
170 'type': image.get('type'),
171 'url': image.get('url'),
172 'source': image.get('source')
173 }
175 def get_columns(self):
176 return [
177 'position',
178 'title',
179 'width',
180 'height',
181 'image_url',
182 'type',
183 'url',
184 'source'
185 ]
188class VideoResultsTable(BaseResultsTable):
189 results_key = 'video_results'
190 default_type = 'videos'
192 def extract_data(self, video):
193 return {
194 'position': video.get('position'),
195 'title': video.get('title'),
196 'url': video.get('url'),
197 'displayed_url': video.get('displayed_url'),
198 'uploaded': video.get('uploaded'),
199 'snippet': video.get('snippet'),
200 'length': video.get('length')
201 }
203 def get_columns(self):
204 return [
205 'position',
206 'title',
207 'url',
208 'displayed_url',
209 'uploaded',
210 'snippet',
211 'length'
212 ]
215class NewsResultsTable(BaseResultsTable):
216 results_key = 'news_results'
217 default_type = 'news'
219 def extract_data(self, news):
220 return {
221 'position': news.get('position'),
222 'title': news.get('title'),
223 'url': news.get('url'),
224 'source_name': news.get('source_name'),
225 'uploaded': news.get('uploaded'),
226 'uploaded_utc': news.get('uploaded_utc'),
227 'snippet': news.get('snippet'),
228 'thumbnail_url': news.get('thumbnail_url')
229 }
231 def get_columns(self):
232 return [
233 'position',
234 'title',
235 'url',
236 'source_name',
237 'uploaded',
238 'uploaded_utc',
239 'snippet',
240 'thumbnail_url'
241 ]
244class ShoppingResultsTable(BaseResultsTable):
245 results_key = 'shopping_results'
246 default_type = 'shopping'
248 def extract_data(self, shopping):
249 return {
250 'position': shopping.get('position'),
251 'title': shopping.get('title'),
252 'url': shopping.get('url')
253 }
255 def get_columns(self):
256 return [
257 'position',
258 'title',
259 'url'
260 ]