Coverage for mindsdb/integrations/handlers/hackernews_handler/hn

1import pandas as pd

2from mindsdb.integrations.libs.api_handler import APITable

3from mindsdb_sql_parser import ast

4from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions

5from typing import List

8class StoriesTable(APITable):

9 json_endpoint = "topstories.json"

10 columns = ['id', 'time', 'title', 'url', 'score', 'descendants']

12 def select(self, query: ast.Select) -> pd.DataFrame:

13 """Select data from the stories table and return it as a pandas DataFrame.

14 Args:

15 query (ast.Select): The SQL query to be executed.

16 Returns:

17 pandas.DataFrame: A pandas DataFrame containing the selected data.

18 """

19 hn_handler = self.handler

21 # Extract the limit value from the SQL query, if it exists

22 limit = None

23 if query.limit is not None:

24 limit = query.limit.value

26 df = hn_handler.get_df_from_class(self, limit)

28 # Apply any WHERE clauses in the SQL query to the DataFrame

29 conditions = extract_comparison_conditions(query.where)

30 for condition in conditions:

31 if condition[0] == '=' and condition[1] == 'id':

32 df = df[df['id'] == int(condition[2])]

33 elif condition[0] == '>' and condition[1] == 'time':

34 timestamp = int(condition[2])

35 df = df[df['time'] > timestamp]

37 # Filter the columns in the DataFrame according to the SQL query

38 self.filter_columns(df, query)

40 return df

42 def get_columns(self):

43 """Get the list of column names for the stories table.

44 Returns:

45 list: A list of column names for the stories table.

46 """

47 return self.columns

49 def filter_columns(self, df, query):

50 """Filter the columns in the DataFrame according to the SQL query.

51 Args:

52 df (pandas.DataFrame): The DataFrame to filter.

53 query (ast.Select): The SQL query to apply to the DataFrame.

54 """

55 columns = []

56 for target in query.targets:

57 if isinstance(target, ast.Star):

58 columns = self.get_columns()

59 break

60 elif isinstance(target, ast.Identifier):

61 columns.append(target.value)

62 df = df[columns]

63 return df

66class HNStoriesTable(StoriesTable):

67 json_endpoint = "askstories.json"

68 columns = ['id', 'time', 'title', 'text', 'score', 'descendants']

71class JobStoriesTable(StoriesTable):

72 json_endpoint = "jobstories.json"

73 columns = ['id', 'time', 'title', 'url', 'score', 'type']

76class ShowStoriesTable(StoriesTable):

77 json_endpoint = "showstories.json"

78 columns = ['id', 'time', 'title', 'text', 'score', 'descendants']

81class CommentsTable(APITable):

82 def select(self, query: ast.Select) -> pd.DataFrame:

83 """Select data from the comments table and return it as a pandas DataFrame.

84 Args:

85 query (ast.Select): The SQL query to be executed.

86 Returns:

87 pandas.DataFrame: A pandas DataFrame containing the selected data.

88 """

89 hn_handler = self.handler

91 # Get the limit value from the SQL query, if it exists

92 limit = None

93 if query.limit is not None:

94 limit = query.limit.value

96 # Get the item ID from the SQL query

97 item_id = None

98 conditions = extract_comparison_conditions(query.where)

99 for condition in conditions:

100 if condition[0] == '=' and condition[1] == 'item_id':

101 item_id = condition[2]

102

103 if item_id is None:

104 raise ValueError('Item ID is missing in the SQL query')

105

106 # Call the Hacker News API to get the comments for the specified item

107 comments_df = hn_handler.call_hackernews_api('get_comments', params={'item_id': item_id})

108

109 # Fill NaN values with 'deleted'

110 comments_df = comments_df.fillna('deleted')

111 # Filter the columns to those specified in the SQL query

112 self.filter_columns(comments_df, query)

113

114 # Limit the number of results if necessary

115 if limit is not None:

116 comments_df = comments_df.head(limit)

117

118 return comments_df

119

120 def get_columns(self) -> List[str]:

121 """Get the list of column names for the comments table.

122 Returns:

123 list: A list of column names for the comments table.

124 """

125 return [

126 'id',

127 'by',

128 'parent',

129 'text',

130 'time',

131 'type',

132 ]

133

134 def filter_columns(self, result: pd.DataFrame, query: ast.Select = None) -> None:

135 """Filter the columns of a DataFrame to those specified in an SQL query.

136 Args:

137 result (pandas.DataFrame): The DataFrame to filter.

138 query (ast.Select): The SQL query containing the column names to filter on.

139 """

140 if query is None:

141 return

142

143 columns = []

144 for target in query.targets:

145 if isinstance(target, ast.Star):

146 return

147 elif isinstance(target, ast.Identifier):

148 columns.append(target.value)

149

150 if len(columns) > 0:

151 result = result[columns]

Coverage for mindsdb / integrations / handlers / hackernews_handler / hn_table.py: 0%

76 statements