Coverage for mindsdb / integrations / handlers / hackernews_handler / hn_table.py: 0%
76 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
1import pandas as pd
2from mindsdb.integrations.libs.api_handler import APITable
3from mindsdb_sql_parser import ast
4from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions
5from typing import List
8class StoriesTable(APITable):
9 json_endpoint = "topstories.json"
10 columns = ['id', 'time', 'title', 'url', 'score', 'descendants']
12 def select(self, query: ast.Select) -> pd.DataFrame:
13 """Select data from the stories table and return it as a pandas DataFrame.
14 Args:
15 query (ast.Select): The SQL query to be executed.
16 Returns:
17 pandas.DataFrame: A pandas DataFrame containing the selected data.
18 """
19 hn_handler = self.handler
21 # Extract the limit value from the SQL query, if it exists
22 limit = None
23 if query.limit is not None:
24 limit = query.limit.value
26 df = hn_handler.get_df_from_class(self, limit)
28 # Apply any WHERE clauses in the SQL query to the DataFrame
29 conditions = extract_comparison_conditions(query.where)
30 for condition in conditions:
31 if condition[0] == '=' and condition[1] == 'id':
32 df = df[df['id'] == int(condition[2])]
33 elif condition[0] == '>' and condition[1] == 'time':
34 timestamp = int(condition[2])
35 df = df[df['time'] > timestamp]
37 # Filter the columns in the DataFrame according to the SQL query
38 self.filter_columns(df, query)
40 return df
42 def get_columns(self):
43 """Get the list of column names for the stories table.
44 Returns:
45 list: A list of column names for the stories table.
46 """
47 return self.columns
49 def filter_columns(self, df, query):
50 """Filter the columns in the DataFrame according to the SQL query.
51 Args:
52 df (pandas.DataFrame): The DataFrame to filter.
53 query (ast.Select): The SQL query to apply to the DataFrame.
54 """
55 columns = []
56 for target in query.targets:
57 if isinstance(target, ast.Star):
58 columns = self.get_columns()
59 break
60 elif isinstance(target, ast.Identifier):
61 columns.append(target.value)
62 df = df[columns]
63 return df
66class HNStoriesTable(StoriesTable):
67 json_endpoint = "askstories.json"
68 columns = ['id', 'time', 'title', 'text', 'score', 'descendants']
71class JobStoriesTable(StoriesTable):
72 json_endpoint = "jobstories.json"
73 columns = ['id', 'time', 'title', 'url', 'score', 'type']
76class ShowStoriesTable(StoriesTable):
77 json_endpoint = "showstories.json"
78 columns = ['id', 'time', 'title', 'text', 'score', 'descendants']
81class CommentsTable(APITable):
82 def select(self, query: ast.Select) -> pd.DataFrame:
83 """Select data from the comments table and return it as a pandas DataFrame.
84 Args:
85 query (ast.Select): The SQL query to be executed.
86 Returns:
87 pandas.DataFrame: A pandas DataFrame containing the selected data.
88 """
89 hn_handler = self.handler
91 # Get the limit value from the SQL query, if it exists
92 limit = None
93 if query.limit is not None:
94 limit = query.limit.value
96 # Get the item ID from the SQL query
97 item_id = None
98 conditions = extract_comparison_conditions(query.where)
99 for condition in conditions:
100 if condition[0] == '=' and condition[1] == 'item_id':
101 item_id = condition[2]
103 if item_id is None:
104 raise ValueError('Item ID is missing in the SQL query')
106 # Call the Hacker News API to get the comments for the specified item
107 comments_df = hn_handler.call_hackernews_api('get_comments', params={'item_id': item_id})
109 # Fill NaN values with 'deleted'
110 comments_df = comments_df.fillna('deleted')
111 # Filter the columns to those specified in the SQL query
112 self.filter_columns(comments_df, query)
114 # Limit the number of results if necessary
115 if limit is not None:
116 comments_df = comments_df.head(limit)
118 return comments_df
120 def get_columns(self) -> List[str]:
121 """Get the list of column names for the comments table.
122 Returns:
123 list: A list of column names for the comments table.
124 """
125 return [
126 'id',
127 'by',
128 'parent',
129 'text',
130 'time',
131 'type',
132 ]
134 def filter_columns(self, result: pd.DataFrame, query: ast.Select = None) -> None:
135 """Filter the columns of a DataFrame to those specified in an SQL query.
136 Args:
137 result (pandas.DataFrame): The DataFrame to filter.
138 query (ast.Select): The SQL query containing the column names to filter on.
139 """
140 if query is None:
141 return
143 columns = []
144 for target in query.targets:
145 if isinstance(target, ast.Star):
146 return
147 elif isinstance(target, ast.Identifier):
148 columns.append(target.value)
150 if len(columns) > 0:
151 result = result[columns]