Coverage for mindsdb / integrations / handlers / google_search_handler / google_search_tables.py: 0%

102 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-21 00:36 +0000

1import pandas as pd 

2from mindsdb_sql_parser import ast 

3from pandas import DataFrame 

4 

5from mindsdb.integrations.libs.api_handler import APITable 

6from mindsdb.integrations.utilities.date_utils import parse_utc_date 

7from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions 

8 

9 

10class SearchAnalyticsTable(APITable): 

11 """ 

12 Table class for the Google Search Console Search Analytics table. 

13 """ 

14 

15 def select(self, query: ast.Select) -> DataFrame: 

16 """ 

17 Gets all traffic data from the Search Console. 

18 

19 Args: 

20 query (ast.Select): SQL query to parse. 

21 

22 Returns: 

23 Response: Response object containing the results. 

24 """ 

25 

26 # Parse the query to get the conditions. 

27 conditions = extract_comparison_conditions(query.where) 

28 # Get the start and end times from the conditions. 

29 params = {} 

30 accepted_params = ['siteUrl', 'dimensions', 'type', 'rowLimit', 'aggregationType'] 

31 for op, arg1, arg2 in conditions: 

32 if arg1 == 'startDate' or arg1 == 'endDate': 

33 date = parse_utc_date(arg2) 

34 if op == '=': 

35 params[arg1] = date 

36 else: 

37 raise NotImplementedError 

38 elif arg1 in accepted_params: 

39 if op != '=': 

40 raise NotImplementedError 

41 params[arg1] = arg2 

42 else: 

43 raise NotImplementedError 

44 

45 dimensions = ['query', 'page', 'device', 'country'] 

46 

47 # Get the group by from the query. 

48 params['dimensions'] = {} 

49 conditions = extract_comparison_conditions(query.group_by) 

50 for arg1 in conditions: 

51 if arg1 in dimensions: 

52 params['dimensions'][arg1] = arg1 

53 else: 

54 raise NotImplementedError 

55 

56 # Get the order by from the query. 

57 if query.order_by is not None: 

58 if query.order_by[0].value == 'start_time': 

59 params['orderBy'] = 'startTime' 

60 elif query.order_by[0].value == 'updated': 

61 params['orderBy'] = 'updated' 

62 else: 

63 raise NotImplementedError 

64 

65 if query.limit is not None: 

66 params['rowLimit'] = query.limit.value 

67 

68 # Get the traffic data from the Google Search Console API. 

69 traffic_data = self.handler. \ 

70 call_application_api(method_name='get_traffic_data', params=params) 

71 

72 selected_columns = [] 

73 for target in query.targets: 

74 if isinstance(target, ast.Star): 

75 selected_columns = self.get_columns() 

76 break 

77 elif isinstance(target, ast.Identifier): 

78 selected_columns.append(target.parts[-1]) 

79 else: 

80 raise ValueError(f"Unknown query target {type(target)}") 

81 

82 if len(traffic_data) == 0: 

83 traffic_data = pd.DataFrame([], columns=selected_columns) 

84 else: 

85 traffic_data.columns = self.get_columns() 

86 for col in set(traffic_data.columns).difference(set(selected_columns)): 

87 traffic_data = traffic_data.drop(col, axis=1) 

88 return traffic_data 

89 

90 def get_columns(self) -> list: 

91 """Gets all columns to be returned in pandas DataFrame responses""" 

92 return [ 

93 'keys', 

94 'clicks', 

95 'impressions', 

96 'ctr', 

97 'position' 

98 ] 

99 

100 

101class SiteMapsTable(APITable): 

102 """ 

103 Table class for the Google Search Console Site Maps table. 

104 """ 

105 

106 def select(self, query: ast.Select) -> DataFrame: 

107 """ 

108 Gets all traffic data from the Search Console. 

109 

110 Args: 

111 query (ast.Select): SQL query to parse. 

112 

113 Returns: 

114 Response: Response object containing the results. 

115 """ 

116 

117 # Parse the query to get the conditions. 

118 conditions = extract_comparison_conditions(query.where) 

119 # Get the start and end times from the conditions. 

120 params = {} 

121 accepted_params = ['siteUrl', 'sitemapIndex'] 

122 for op, arg1, arg2 in conditions: 

123 if op != '=': 

124 raise NotImplementedError 

125 if arg1 in accepted_params: 

126 params[arg1] = arg2 

127 else: 

128 raise NotImplementedError 

129 

130 if query.limit is not None: 

131 params['rowLimit'] = query.limit.value 

132 

133 # Get the traffic data from the Google Search Console API. 

134 sitemaps = self.handler. \ 

135 call_application_api(method_name='get_sitemaps', params=params) 

136 

137 selected_columns = [] 

138 for target in query.targets: 

139 if isinstance(target, ast.Star): 

140 selected_columns = self.get_columns() 

141 break 

142 elif isinstance(target, ast.Identifier): 

143 selected_columns.append(target.parts[-1]) 

144 else: 

145 raise ValueError(f"Unknown query target {type(target)}") 

146 

147 if len(sitemaps) == 0: 

148 sitemaps = pd.DataFrame([], columns=selected_columns) 

149 else: 

150 sitemaps.columns = self.get_columns() 

151 for col in set(sitemaps.columns).difference(set(selected_columns)): 

152 sitemaps = sitemaps.drop(col, axis=1) 

153 return sitemaps 

154 

155 def insert(self, query: ast.Insert): 

156 """ 

157 Submits a sitemap for a site. 

158 

159 Args: 

160 query (ast.Insert): SQL query to parse. 

161 

162 Returns: 

163 Response: Response object containing the results. 

164 """ 

165 

166 # Get the values from the query. 

167 values = query.values[0] 

168 params = {} 

169 # Get the event data from the values. 

170 for col, val in zip(query.columns, values): 

171 if col == 'siteUrl' or col == 'feedpath': 

172 params[col] = val 

173 else: 

174 raise NotImplementedError 

175 

176 # Insert the event into the Google Calendar API. 

177 self.handler.call_application_api(method_name='submit_sitemap', params=params) 

178 

179 def delete(self, query: ast.Delete): 

180 """ 

181 Deletes a sitemap for a site. 

182 

183 Args: 

184 query (ast.Delete): SQL query to parse. 

185 

186 Returns: 

187 Response: Response object containing the results. 

188 """ 

189 

190 # Parse the query to get the conditions. 

191 conditions = extract_comparison_conditions(query.where) 

192 # Get the start and end times from the conditions. 

193 params = {} 

194 for op, arg1, arg2 in conditions: 

195 if op != '=': 

196 raise NotImplementedError 

197 if arg1 == 'siteUrl' or arg1 == 'feedpath': 

198 params[arg1] = arg2 

199 else: 

200 raise NotImplementedError 

201 

202 # Delete the events in the Google Calendar API. 

203 self.handler.call_application_api(method_name='delete_sitemap', params=params) 

204 

205 def get_columns(self) -> list: 

206 """Gets all columns to be returned in pandas DataFrame responses""" 

207 return [ 

208 'path', 

209 'lastSubmitted', 

210 'isPending', 

211 'isSitemapsIndex', 

212 'type', 

213 'lastDownloaded', 

214 'warnings', 

215 'errors', 

216 'contents' 

217 ]