Coverage for mindsdb / integrations / handlers / google_search_handler / google_search_tables.py: 0%
102 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
1import pandas as pd
2from mindsdb_sql_parser import ast
3from pandas import DataFrame
5from mindsdb.integrations.libs.api_handler import APITable
6from mindsdb.integrations.utilities.date_utils import parse_utc_date
7from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions
10class SearchAnalyticsTable(APITable):
11 """
12 Table class for the Google Search Console Search Analytics table.
13 """
15 def select(self, query: ast.Select) -> DataFrame:
16 """
17 Gets all traffic data from the Search Console.
19 Args:
20 query (ast.Select): SQL query to parse.
22 Returns:
23 Response: Response object containing the results.
24 """
26 # Parse the query to get the conditions.
27 conditions = extract_comparison_conditions(query.where)
28 # Get the start and end times from the conditions.
29 params = {}
30 accepted_params = ['siteUrl', 'dimensions', 'type', 'rowLimit', 'aggregationType']
31 for op, arg1, arg2 in conditions:
32 if arg1 == 'startDate' or arg1 == 'endDate':
33 date = parse_utc_date(arg2)
34 if op == '=':
35 params[arg1] = date
36 else:
37 raise NotImplementedError
38 elif arg1 in accepted_params:
39 if op != '=':
40 raise NotImplementedError
41 params[arg1] = arg2
42 else:
43 raise NotImplementedError
45 dimensions = ['query', 'page', 'device', 'country']
47 # Get the group by from the query.
48 params['dimensions'] = {}
49 conditions = extract_comparison_conditions(query.group_by)
50 for arg1 in conditions:
51 if arg1 in dimensions:
52 params['dimensions'][arg1] = arg1
53 else:
54 raise NotImplementedError
56 # Get the order by from the query.
57 if query.order_by is not None:
58 if query.order_by[0].value == 'start_time':
59 params['orderBy'] = 'startTime'
60 elif query.order_by[0].value == 'updated':
61 params['orderBy'] = 'updated'
62 else:
63 raise NotImplementedError
65 if query.limit is not None:
66 params['rowLimit'] = query.limit.value
68 # Get the traffic data from the Google Search Console API.
69 traffic_data = self.handler. \
70 call_application_api(method_name='get_traffic_data', params=params)
72 selected_columns = []
73 for target in query.targets:
74 if isinstance(target, ast.Star):
75 selected_columns = self.get_columns()
76 break
77 elif isinstance(target, ast.Identifier):
78 selected_columns.append(target.parts[-1])
79 else:
80 raise ValueError(f"Unknown query target {type(target)}")
82 if len(traffic_data) == 0:
83 traffic_data = pd.DataFrame([], columns=selected_columns)
84 else:
85 traffic_data.columns = self.get_columns()
86 for col in set(traffic_data.columns).difference(set(selected_columns)):
87 traffic_data = traffic_data.drop(col, axis=1)
88 return traffic_data
90 def get_columns(self) -> list:
91 """Gets all columns to be returned in pandas DataFrame responses"""
92 return [
93 'keys',
94 'clicks',
95 'impressions',
96 'ctr',
97 'position'
98 ]
101class SiteMapsTable(APITable):
102 """
103 Table class for the Google Search Console Site Maps table.
104 """
106 def select(self, query: ast.Select) -> DataFrame:
107 """
108 Gets all traffic data from the Search Console.
110 Args:
111 query (ast.Select): SQL query to parse.
113 Returns:
114 Response: Response object containing the results.
115 """
117 # Parse the query to get the conditions.
118 conditions = extract_comparison_conditions(query.where)
119 # Get the start and end times from the conditions.
120 params = {}
121 accepted_params = ['siteUrl', 'sitemapIndex']
122 for op, arg1, arg2 in conditions:
123 if op != '=':
124 raise NotImplementedError
125 if arg1 in accepted_params:
126 params[arg1] = arg2
127 else:
128 raise NotImplementedError
130 if query.limit is not None:
131 params['rowLimit'] = query.limit.value
133 # Get the traffic data from the Google Search Console API.
134 sitemaps = self.handler. \
135 call_application_api(method_name='get_sitemaps', params=params)
137 selected_columns = []
138 for target in query.targets:
139 if isinstance(target, ast.Star):
140 selected_columns = self.get_columns()
141 break
142 elif isinstance(target, ast.Identifier):
143 selected_columns.append(target.parts[-1])
144 else:
145 raise ValueError(f"Unknown query target {type(target)}")
147 if len(sitemaps) == 0:
148 sitemaps = pd.DataFrame([], columns=selected_columns)
149 else:
150 sitemaps.columns = self.get_columns()
151 for col in set(sitemaps.columns).difference(set(selected_columns)):
152 sitemaps = sitemaps.drop(col, axis=1)
153 return sitemaps
155 def insert(self, query: ast.Insert):
156 """
157 Submits a sitemap for a site.
159 Args:
160 query (ast.Insert): SQL query to parse.
162 Returns:
163 Response: Response object containing the results.
164 """
166 # Get the values from the query.
167 values = query.values[0]
168 params = {}
169 # Get the event data from the values.
170 for col, val in zip(query.columns, values):
171 if col == 'siteUrl' or col == 'feedpath':
172 params[col] = val
173 else:
174 raise NotImplementedError
176 # Insert the event into the Google Calendar API.
177 self.handler.call_application_api(method_name='submit_sitemap', params=params)
179 def delete(self, query: ast.Delete):
180 """
181 Deletes a sitemap for a site.
183 Args:
184 query (ast.Delete): SQL query to parse.
186 Returns:
187 Response: Response object containing the results.
188 """
190 # Parse the query to get the conditions.
191 conditions = extract_comparison_conditions(query.where)
192 # Get the start and end times from the conditions.
193 params = {}
194 for op, arg1, arg2 in conditions:
195 if op != '=':
196 raise NotImplementedError
197 if arg1 == 'siteUrl' or arg1 == 'feedpath':
198 params[arg1] = arg2
199 else:
200 raise NotImplementedError
202 # Delete the events in the Google Calendar API.
203 self.handler.call_application_api(method_name='delete_sitemap', params=params)
205 def get_columns(self) -> list:
206 """Gets all columns to be returned in pandas DataFrame responses"""
207 return [
208 'path',
209 'lastSubmitted',
210 'isPending',
211 'isSitemapsIndex',
212 'type',
213 'lastDownloaded',
214 'warnings',
215 'errors',
216 'contents'
217 ]